예제 #1
0
def test_add_to_graph_not_supported(custom_bundle):
    dep_desc = Descriptor.load('''
    id: dep
    includes:
      - http://example.com/ctx
    ''')

    test_desc = Descriptor.load('''
    id: test
    dependencies:
      - dep
    ''')

    depgraph = ConjunctiveGraph()
    ctx_graph = depgraph.get_context('http://example.com/ctx')
    quad = (URIRef('http://example.org/sub'),
            URIRef('http://example.org/prop'),
            URIRef('http://example.org/obj'), ctx_graph)
    depgraph.add(quad)

    with custom_bundle(dep_desc, graph=depgraph) as depbun, \
            custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \
            Bundle('test', bundles_directory=testbun.bundles_directory) as bnd:

        with pytest.raises(ZODB.POSException.ReadOnlyError):
            with transaction.manager:
                bnd.rdf.add((URIRef('http://example.org/sub'),
                             URIRef('http://example.org/prop'),
                             URIRef('http://example.org/obj')))
예제 #2
0
def test_transitive_dep_null_context_triples_no_imports(custom_bundle):
    dep_dep_desc = Descriptor.load('''
    id: dep_dep
    includes:
      - http://example.com/ctx
    ''')

    dep_desc = Descriptor.load('''
    id: dep
    dependencies:
      - dep_dep
    ''')

    test_desc = Descriptor.load('''
    id: test
    dependencies:
      - dep
    ''')

    depgraph = ConjunctiveGraph()
    ctx_graph = depgraph.get_context('http://example.com/ctx')
    quad = (URIRef('http://example.org/sub'),
            URIRef('http://example.org/prop'),
            URIRef('http://example.org/obj'), ctx_graph)
    depgraph.add(quad)

    with custom_bundle(dep_dep_desc, graph=depgraph) as depdepbun, \
            custom_bundle(dep_desc, bundles_directory=depdepbun.bundles_directory) as depbun, \
            custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \
            Bundle('test', bundles_directory=testbun.bundles_directory) as bnd:
        assert set([quad[:3]]) == set(bnd.rdf.triples((None, None, None)))
예제 #3
0
    def convert_gml(self, ttl_output_file, uri_part, specific_part):
            """
            Pelagios conversion GML to TTL
            @type       ttl_output_file: string
            @param      ttl_output_file: Absolute path to TTL output file
            @type       uri_part: string
            @param      uri_part: URI for the region to be displayed (e.g. http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/34809536-b9f8-4c51-83d1-ef365ca658f5/)
            @type       specific_part: string
            @param      specific_part: Specific part that distinguishes the URI from other URIs (e.g. 1994)
            """
            cito_ns = Namespace("http://purl.org/spar/cito")
            cnt_ns = Namespace("http://www.w3.org/2011/content#")
            dcterms_ns = Namespace("http://purl.org/dc/terms/")
            foaf_ns = Namespace("http://xmlns.com/foaf/0.1/")
            geo_ns = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
            geosparql_ns = Namespace("http://www.opengis.net/ont/geosparql#")
            gn_ns = Namespace("http://www.geonames.org/ontology#")
            lawd_ns = Namespace("http://lawd.info/ontology/")
            rdfs_ns = Namespace("http://www.w3.org/2000/01/rdf-schema#")
            skos_ns = Namespace("http://www.w3.org/2004/02/skos/core#")

            slovenia = URIRef("http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/5c6f5563-7665-4719-a2b6-4356ea033c1d/#place/Slovenia")

            store = IOMemory()

            g = ConjunctiveGraph(store=store)
            g.bind("cito", cito_ns)
            g.bind("cnt", cnt_ns)
            g.bind("dcterms", dcterms_ns)
            g.bind("foaf", foaf_ns)
            g.bind("geo", geo_ns)
            g.bind("geosparql", geosparql_ns)
            g.bind("gn", gn_ns)
            g.bind("lawd", lawd_ns)
            g.bind("rdfs", rdfs_ns)
            g.bind("skos", skos_ns)

            graph_slovenian_districts = Graph(store=store, identifier=slovenia)
            gml_to_wkt = GMLtoWKT(self.gml_file)
            district_included = {}
            i = 1
            print "Processing GML file: %s" % self.gml_file
            for district_wkt in gml_to_wkt.get_wkt_linear_ring():
                techname = whsp_to_unsc(district_wkt["name"])
                print "District %d: %s" % (i, whsp_to_unsc(district_wkt["name"]))
                if techname not in district_included:
                    district = URIRef("%s#place/%s/%s" % (uri_part, whsp_to_unsc(district_wkt["name"]), specific_part))
                    graph_slovenian_districts.add((district, RDF.type, lawd_ns.Place))
                    graph_slovenian_districts.add((district, dcterms_ns['isPartOf'], slovenia))
                    graph_slovenian_districts.add((district, dcterms_ns['temporal'], Literal(str(district_wkt["year"]))))
                    graph_slovenian_districts.add((district, gn_ns['countryCode'], Literal(u'SI')))
                    graph_slovenian_districts.add((district, rdfs_ns['label'], Literal(district_wkt["name"], lang=u'si')))
                    polygons = BNode()
                    graph_slovenian_districts.add((district, geosparql_ns['hasGeometry'], polygons))
                    g.add((polygons, geosparql_ns['asWKT'], Literal(district_wkt["polygon"])))
                    district_included[techname] = True
                i += 1
            with open(ttl_output_file, 'w') as f:
                f.write(g.serialize(format='n3'))
            f.close()
예제 #4
0
def test_triples_choices_context_not_included(custom_bundle):
    dep_desc = Descriptor.load('''
    id: dep
    includes:
      - http://example.com/ctxg
    ''')

    test_desc = Descriptor.load('''
    id: test
    dependencies:
      - dep
    ''')

    depgraph = ConjunctiveGraph()
    ctx_graph = depgraph.get_context('http://example.com/ctx')
    quad = (URIRef('http://example.org/sub'),
            URIRef('http://example.org/prop'),
            URIRef('http://example.org/obj'), ctx_graph)
    depgraph.add(quad)

    with custom_bundle(dep_desc, graph=depgraph) as depbun, \
            custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \
            Bundle('test', bundles_directory=testbun.bundles_directory) as bnd:
        match = False
        for x in bnd.rdf.triples_choices((URIRef('http://example.org/sub'),
                                          URIRef('http://example.org/prop'),
                                          [URIRef('http://example.org/obj')]),
                                         context=ctx_graph):
            match = True
        assert not match
예제 #5
0
def main(fd, store_type=None, store_id=None, graph_id=None, gzipped=False):
    """
    Converts MARC21 data stored in fd to a RDFlib graph.
    """
    from rdflib import plugin

    if store_type:
        msg = "Need a {} identifier for a disk-based store."
        assert store_id, msg.format('store')
        assert graph_id, msg.format('graph')
        store = plugin.get(store_type, Store)(store_id)
    else:
        store = 'default'

    graph = Graph(store=store, identifier=graph_id)

    try:
        records = MARCReader(open(fd))

        for i, triple in enumerate(process_records(records)):
            graph.add(triple)
            if i % 100 == 0:
                graph.commit()
            if i % 10000 == 0:
                print i

    finally:
        graph.commit()

    return graph
예제 #6
0
def make_graph():
    g = Graph()

    # add namespaces
    g.bind("inpho", "http://inpho.cogs.indiana.edu/")
    g.bind("thinker", "http://inpho.cogs.indiana.edu/thinker/")
    g.bind("journal", "http://inpho.cogs.indiana.edu/journal/")
    g.bind("foaf", "http://xmlns.com/foaf/0.1/")
    g.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    g.bind("rdfs", "http://www.w3.org/TR/rdf-schema/#")
    g.bind("owl", "http://www.w3.org/2002/07/owl#")
    g.bind("idea", "http://inpho.cogs.indiana.edu/idea/")
    g.bind("skos", "http://www.w3.org/2004/02/skos/core#")
    g.bind ("db", "http://dbpedia.org/")
    g.bind ("dc", "http://purl.org/dc/elements/1.1/")
    
    # user namespace currently doesn't exist?
    g.bind("user", "http://inpho.cogs.indiana.edu/user/")

    # OWL disjoints
    disjoint_objects = ["thinker", "journal", "idea", "user"]
    for a, b in combinations(disjoint_objects, 2):
        g.add((inpho[a], owl['disjointWith'], inpho[b]))
 
    g = populate_thinkers(g)
    g = populate_ideas(g)
    g = populate_journals(g)

    return g
예제 #7
0
def output_to_oac(fileid, dir, metadata, annotations):
	"""
	TODO
	"""
	# import libraries
	from rdflib import Namespace, BNode, Literal, URIRef,RDF,RDFS
	from rdflib.graph import Graph, ConjunctiveGraph
	from rdflib.plugins.memory import IOMemory
	# declare namespaces
	oac = Namespace("http://www.w3.org/ns/oa#")
	perseus = Namespace("http://data.perseus.org/citations/")
	myanno = Namespace("http://hellespont.org/annotations/jstor")
	store = IOMemory()
	# initialise the graph
	g = ConjunctiveGraph(store=store)
	# bind namespaces
	g.bind("oac",oac)
	g.bind("perseus",perseus)
	g.bind("myanno",myanno)
	for n,ann in enumerate(metadata["citations"]):
	    anno1 = URIRef(myanno["#%i"%n])
	    g.add((anno1, RDF.type,oac["Annotation"]))
	    g.add((anno1, oac["hasTarget"],URIRef("%s%s"%("http://jstor.org/stable/",metadata["doi"]))))
	    g.add((anno1, RDFS.label, Literal(ann["label"])))
	    g.add((anno1,oac["hasBody"],perseus[ann["ctsurn"]]))
	    g.add((anno1,oac["motivatedBy"],oac["linking"]))
	fname="%s%s"%(dir, fileid.replace(".txt",".ttl"))
	f=open(fname,"w")
	f.write(g.serialize(format="turtle"))
	f.close()
	return
예제 #8
0
파일: db.py 프로젝트: drewp/commentserve
    def writeFile(self, stmts, ctx, fileWords):
        outfile = "commentstore/post-%s.nt" % ("-".join(fileWords))
        graph = ConjunctiveGraph()

        graph.add(*stmts, **{'context' : ctx})
        graph.graph.serialize(outfile, format='n3')
        log.info("wrote new comment to %s", outfile)
예제 #9
0
 def test_pretty_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal(u'''<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>''', datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format='pretty-xml')
     # then:
     assert u'''<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>'''.encode('utf-8') in xmlrepr
예제 #10
0
 def test_pretty_broken_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal(u'''<p ''', datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format='pretty-xml')
     # then:
     assert u'''<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;p '''.encode('utf-8') in xmlrepr
예제 #11
0
def _mangled_copy(g):
    "Makes a copy of the graph, replacing all bnodes with the bnode ``_blank``."
    gcopy = ConjunctiveGraph()
    isbnode = lambda v: isinstance(v, BNode)
    for s, p, o in g:
        if isbnode(s): s = _blank
        if isbnode(p): p = _blank
        if isbnode(o): o = _blank
        gcopy.add((s, p, o))
    return gcopy
예제 #12
0
def _mangled_copy(g):
    "Makes a copy of the graph, replacing all bnodes with the bnode ``_blank``."
    gcopy = ConjunctiveGraph()
    isbnode = lambda v: isinstance(v, BNode)
    for s, p, o in g:
        if isbnode(s): s = _blank
        if isbnode(p): p = _blank
        if isbnode(o): o = _blank
        gcopy.add((s, p, o))
    return gcopy
예제 #13
0
 def test_pretty_broken_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal("""<p """,
                                        datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format="pretty-xml")
     # then:
     assert (
         """<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;p """
         .encode("utf-8") in xmlrepr)
예제 #14
0
파일: db.py 프로젝트: drewp/commentserve
    def writeFile(self, stmts, ctx, fileWords):
        g = ConjunctiveGraph()
        doc = {'ctx' : ctx}

        for s in stmts:
            g.add(s)
            if s[1] == SIOC.has_reply:
                doc['topic'] = s[0]
            if s[1] == DCTERMS.created: # expecting 2 of these, but same value
                doc['created'] = parse(s[2])

        doc['n3'] = g.serialize(format="n3")
        self.mongo['comment'].insert(doc, safe=True)
예제 #15
0
def test_escaping_of_triple_doublequotes():
    """
    Issue 186 - Check escaping of multiple doublequotes.
    A serialization/deserialization roundtrip of a certain class of 
    Literals fails when there are both, newline characters and multiple subsequent 
    quotation marks in the lexical form of the Literal. In this case invalid N3
    is emitted by the serializer, which in turn cannot be parsed correctly.
    """
    g=ConjunctiveGraph()
    g.add((URIRef('http://foobar'), URIRef('http://fooprop'), Literal('abc\ndef"""""')))
    # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n'
    g2=ConjunctiveGraph()
    g2.parse(data=g.serialize(format='n3'), format='n3')
    assert g.isomorphic(g2) is True
예제 #16
0
def test_escaping_of_triple_doublequotes():
    """
    Issue 186 - Check escaping of multiple doublequotes.
    A serialization/deserialization roundtrip of a certain class of 
    Literals fails when there are both, newline characters and multiple subsequent 
    quotation marks in the lexical form of the Literal. In this case invalid N3
    is emitted by the serializer, which in turn cannot be parsed correctly.
    """
    g = ConjunctiveGraph()
    g.add((URIRef('http://foobar'), URIRef('http://fooprop'),
           Literal('abc\ndef"""""')))
    # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n'
    g2 = ConjunctiveGraph()
    g2.parse(data=g.serialize(format='n3'), format='n3')
    assert g.isomorphic(g2) is True
예제 #17
0
파일: inference.py 프로젝트: drewp/homeauto
def infer(graph: ConjunctiveGraph, rules: ConjunctiveGraph):
    """
    returns new graph of inferred statements.
    """
    log.info(
        f'Begin inference of graph len={len(graph)} with rules len={len(rules)}:'
    )

    workingSet = ConjunctiveGraph()
    workingSet.addN(graph.quads())

    implied = ConjunctiveGraph()

    delta = 1
    while delta > 0:
        delta = -len(implied)

        for r in rules:
            if r[1] == LOG['implies']:
                containsSetup = all(st in workingSet for st in r[0])
                if containsSetup:
                    log.info(f'  Rule {r[0]} -> present={containsSetup}')
                    for st in r[0]:
                        log.info(
                            f'     {st[0].n3()} {st[1].n3()} {st[2].n3()}')

                    log.info(f'  ...implies {len(r[2])} statements')
                if containsSetup:
                    for st in r[2]:
                        workingSet.add(st)
                        implied.add(st)
            else:
                log.info(f'  {r}')
        delta += len(implied)
        log.info(f'  this inference round added {delta} more implied stmts')
    log.info(f'{len(implied)} stmts implied:')
    for st in implied:
        log.info(f'  {st}')
    return implied

    # based on fuxi/tools/rdfpipe.py
    target = Graph()
    tokenSet = generateTokenSet(graph)
    with _dontChangeRulesStore(rules):
        network = ReteNetwork(rules, inferredTarget=target)
        network.feedFactsToAdd(tokenSet)

    return target
예제 #18
0
def write_graph(data_handle, out_handle, format='n3'):
    graph = Graph()
    count = 0
    for record in generate_records(data_handle):
        count += 1
        if count % 1000:
            sys.stderr.write(".")
        else:
            sys.stderr.write(str(count))
        for triple in get_triples(record):
            graph.add(triple)
        graph.commit()
    current_site = Site.objects.get_current()
    domain = 'https://%s' % current_site.domain
    out_handle.write(graph.serialize(format=format, base=domain, include_base=True))
    return count
예제 #19
0
 def test_pretty_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((
         BNode(),
         RDF.value,
         Literal(
             u"""<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>""",
             datatype=RDF.XMLLiteral,
         ),
     ))
     # when:
     xmlrepr = g.serialize(format="pretty-xml")
     # then:
     assert (
         u"""<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>"""
         .encode("utf-8") in xmlrepr)
예제 #20
0
    def post(self):
        query = self.request.get("content")
        nrOfResults = self.request.get("amount")

        try:
            number = int(nrOfResults)
        except ValueError:
            number = 0

        literals = re.findall(r'"(.+?)"',query)

        urls = processLiterals(literals, number)

        graph = ConjunctiveGraph()

        for url in urls:
            # Original URL fetch
            xmlresult = urlfetch.fetch(url,deadline=60,method=urlfetch.GET)

            if xmlresult.status_code == 200:

                iwa = Namespace('http://iwa2012-18-2.appspot.com/#')
                idns = Namespace('http://iwa2012-18-2.appspot.com/id/#')
                venuens = Namespace('http://iwa2012-18-2.appspot.com/venueid/#')

                tree = etree.fromstring(xmlresult.content)
                for event in tree.findall('events/event'):
                    id = event.attrib['id']
                    title = event.find('title')
                    url = event.find('url')
                    venueid = event.find('venue_id')
                    venueurl = event.find('venue_url')
                    venuename = event.find('venue_name')

                    graph.add((idns[id], iwa['hasTitle'], Literal(title.text)))
                    graph.add((idns[id], iwa['hasUrl'], Literal(url.text)))
                    graph.add((venuens[id], iwa['hasVenueName'], Literal(venuename.text)))
                    graph.add((venuens[id], iwa['hasUrl'], Literal(venueurl.text)))
                    graph.add((idns[id], iwa['atVenue'], venuens[id])))

            else:
                print "Something went wrong with the connection to the Eventful server. Status code: " + xml.status_code

        print graph.serialize()
예제 #21
0
def rdf_description(name, notation='xml' ):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    configString = "/var/tmp/rdfstore"

    # Get the Sleepycat plugin.
    store = plugin.get('Sleepycat', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="Sleepycat",
               identifier = URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:
    #There is no underlying Sleepycat infrastructure, create it
        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"


    # Now we'll add some triples to the graph & commit the changes
    rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = ["id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"]
    node=Objecttype.objects.get(title=name)
    node_dict=node.__dict__

    subject=str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate=str(key)
            pobject=str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))
     
     
    graph.commit()

    print graph.serialize(format=notation)

    graph.close()
예제 #22
0
def rdf_description(name, notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    configString = "/var/tmp/rdfstore"

    # Get the Sleepycat plugin.
    store = plugin.get('Sleepycat', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:
        #There is no underlying Sleepycat infrastructure, create it
        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"

    # Now we'll add some triples to the graph & commit the changes
    rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = [
        "id", "rght", "node_ptr_id", "image", "lft", "_state",
        "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"
    ]
    node = Objecttype.objects.get(title=name)
    node_dict = node.__dict__

    subject = str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate = str(key)
            pobject = str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))

    graph.commit()

    print graph.serialize(format=notation)

    graph.close()
예제 #23
0
파일: app.py 프로젝트: edsu/mediatypes
 def get(self):
     g = ConjunctiveGraph()
     ns = Namespace('http://purl.org/NET/mediatype#')
     for mt in models.MediaType.all():
         g.add((URIRef(mt.uri), RDF.type, ns['MediaType']))
         g.add((URIRef(mt.uri), RDFS.label, Literal(mt.name)))
         if mt.rfc_url:
             g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.rfc_url)))
         if mt.application_url:
             g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.application_url)))
     self.response.headers['Content-Type'] = 'application/rdf+xml'
     g.serialize(self.response.out)
예제 #24
0
    def testSerialize(self):

      s1 = URIRef('store:1')
      r1 = URIRef('resource:1')
      r2 = URIRef('resource:2')

      label = URIRef('predicate:label')

      g1 = Graph(identifier = s1)
      g1.add((r1, label, Literal("label 1", lang="en")))
      g1.add((r1, label, Literal("label 2")))

      s2 = URIRef('store:2')
      g2 = Graph(identifier = s2)
      g2.add((r2, label, Literal("label 3")))

      g = ConjunctiveGraph()
      for s,p,o in g1.triples((None, None, None)):
        g.addN([(s,p,o,g1)])
      for s,p,o in g2.triples((None, None, None)):
        g.addN([(s,p,o,g2)])
      r3 = URIRef('resource:3')
      g.add((r3, label, Literal(4)))
      
      
      r = g.serialize(format='trix')
      g3 = ConjunctiveGraph()
      from StringIO import StringIO

      g3.parse(StringIO(r), format='trix')

      for q in g3.quads((None,None,None)):
        # TODO: Fix once getGraph/getContext is in conjunctive graph
        if isinstance(q[3].identifier, URIRef): 
          tg=Graph(store=g.store, identifier=q[3].identifier)
        else:
          # BNode, this is a bit ugly
          # we cannot match the bnode to the right graph automagically
          # here I know there is only one anonymous graph, 
          # and that is the default one, but this is not always the case
          tg=g.default_context
        self.assertTrue(q[0:3] in tg)
예제 #25
0
    def testSerialize(self):

        s1 = URIRef('store:1')
        r1 = URIRef('resource:1')
        r2 = URIRef('resource:2')

        label = URIRef('predicate:label')

        g1 = Graph(identifier=s1)
        g1.add((r1, label, Literal("label 1", lang="en")))
        g1.add((r1, label, Literal("label 2")))

        s2 = URIRef('store:2')
        g2 = Graph(identifier=s2)
        g2.add((r2, label, Literal("label 3")))

        g = ConjunctiveGraph()
        for s, p, o in g1.triples((None, None, None)):
            g.addN([(s, p, o, g1)])
        for s, p, o in g2.triples((None, None, None)):
            g.addN([(s, p, o, g2)])
        r3 = URIRef('resource:3')
        g.add((r3, label, Literal(4)))

        r = g.serialize(format='trix')
        g3 = ConjunctiveGraph()
        from StringIO import StringIO

        g3.parse(StringIO(r), format='trix')

        for q in g3.quads((None, None, None)):
            # TODO: Fix once getGraph/getContext is in conjunctive graph
            if isinstance(q[3].identifier, URIRef):
                tg = Graph(store=g.store, identifier=q[3].identifier)
            else:
                # BNode, this is a bit ugly
                # we cannot match the bnode to the right graph automagically
                # here I know there is only one anonymous graph,
                # and that is the default one, but this is not always the case
                tg = g.default_context
            self.assertTrue(q[0:3] in tg)
예제 #26
0
 def track(self, resource):
     graph = ConjunctiveGraph()
     sparql = SPARQLWrapper(self.conf.get_SPARQL())
     
     queue = [resource]
     while len(queue) != 0:
         target = queue.pop()   
         query = DESCRIBE_QUERY.replace('__RESOURCE__', target.n3())
         query = query.replace('__RELEASE__', self.conf.get_graph_name('release'))
         query = query.replace('__RULES__', self.conf.get_graph_name('rules'))
         query = query.replace('__RAW_DATA__', self.conf.get_graph_name('raw-data'))
         sparql.setQuery(query)
         results = sparql.query().convert()
         for statement in results:
             # Add the statement to the graph
             graph.add(statement)
             
             # If the relate to another resource we describe, queue it
             (_,p,o) = statement
             if p.startswith(PROV):
                 if o.startswith(self.conf.get_namespace('data')):
                     queue.append(o)
                 
     print graph.serialize(format='turtle')
예제 #27
0
def test_quad_not_in_dependency(custom_bundle):
    dep_desc = Descriptor.load('''
    id: dep
    includes:
      - http://example.com/ctx
    ''')

    test_desc = Descriptor.load('''
    id: test
    dependencies:
      - dep
    ''')

    depgraph = ConjunctiveGraph()
    ctx_graph = depgraph.get_context('http://example.com/other_ctx')
    quad = (URIRef('http://example.org/sub'),
            URIRef('http://example.org/prop'),
            URIRef('http://example.org/obj'), ctx_graph)
    depgraph.add(quad)

    with custom_bundle(dep_desc, graph=depgraph) as depbun, \
            custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \
            Bundle('test', bundles_directory=testbun.bundles_directory) as bnd:
        assert quad not in bnd.rdf
예제 #28
0
class ContextTestCase(unittest.TestCase):
    #store = 'Memory'
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store)
        if self.store == "MySQL":
            from mysql import configString
            from rdflib.store.MySQL import MySQL
            path = configString
            MySQL().destroy(path)
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        shutil.rmtree(self.tmppath)

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
               isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store,
                     identifier=identifier,
                     namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEquals(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            return c.identifier

        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob, )))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)),
                    set([(bob, pizza), (bob, michel)]))
            asserte(
                set(c.subject_objects(likes)),
                set([(tarek, cheese), (michel, cheese), (michel, pizza),
                     (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)),
                    set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)),
                    set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)),
                    set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)),
                    set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)),
                    set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(
                set(c),
                set([(bob, hates, michel), (bob, likes, cheese),
                     (tarek, likes, pizza), (michel, likes, pizza),
                     (michel, likes, cheese), (bob, hates, pizza),
                     (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
예제 #29
0
파일: rdfize.py 프로젝트: edsu/muldicat
def convert(muldicat_csv):
    g = ConjunctiveGraph()
    g.bind('skos', SKOS)
    g.bind('dct', DCT)

    # add concept scheme
    g.add((muldicat, RDF.type, SKOS.ConceptScheme))
    g.add((muldicat, DCT.title, Literal("Multilingual Dictionary of Cataloging Terms and Concepts", lang="en")))
    g.add((muldicat, DCT.description, Literal(description, datatype=XHTML)))
    g.add((muldicat, DCT.modified, Literal(datetime.date.today())))

    # work through each row of the spreadsheet, adding concepts as we go
    subject = None
    for row in unicode_csv_reader(codecs.open(muldicat_csv, encoding='utf-8')):
        # strip whitespace from row
        row = [cell.strip() for cell in row]

        # older version of the table had an unused ID column
        if len(row) == 8:
            print "popping"
            row.pop(0)

        if row[0] == 'Language':
            continue
        elif row == [u'', u'', u'', u'', u'', u'', u'', u'']:
            continue
        else:
            lang, label, definition, see, see_also, source, modified  = row
            lang = languages.get(lang, None)
            label = label.strip()
            if not lang or not label:
                continue

            # use the english label to form part of the URI for the concept 
            # hopefully not too controversial?
            if lang == 'en':
                subject = make_id(label)
            
            g.add((subject, RDF.type, SKOS.Concept))
            g.add((subject, SKOS.prefLabel, Literal(label, lang=lang)))
            g.add((subject, SKOS.inScheme, muldicat))

            if definition:
                g.add((subject, SKOS.definition, Literal(definition, lang=lang)))

            if source:
                g.add((subject, DCT.source, Literal(source, lang=lang)))

            if modified:
                date = datetime.datetime.strptime(modified, '%Y%m%d').date()

                # only record the latest last modification date for the concept
                existing_date = g.value(subject, DCT.modified)
                if not existing_date and date:
                    g.add((subject, DCT.modified, Literal(date)))
                elif date and existing_date and date > existing_date.toPython():
                    g.remove((subject, DCT.modified, existing_date))
                    g.add((subject, DCT.modified, Literal(date)))

            for alt_label in see.split(','):
                if not alt_label:
                    continue
                alt_label = alt_label.strip()
                g.add((subject, SKOS.altLabel, Literal(alt_label, lang=lang)))
            
            # link up relations if we have the english label
            if lang == 'en' and see_also:
                for s in see_also.split(','):
                    s = s.strip()
                    match = re.match(r'(.*) \[(.*?)\]', s)
                    if not match:
                        continue
                    label, reltype = match.groups()
                    reltype = reltype.strip('[]') # some are formatted wrong
                    
                    object = make_id(label)

                    if reltype == 'BT':
                        g.add((subject, SKOS.broader, object))
                        g.add((object, SKOS.narrower, subject))
                    elif reltype == 'NT':
                        g.add((subject, SKOS.narrower, object))
                        g.add((object, SKOS.broader, subject))
                    elif reltype == 'RT':
                        g.add((subject, SKOS.related, object))
                        g.add((object, SKOS.related, subject))
                    else:
                        raise RuntimeError(reltype)
    return g
예제 #30
0
def rdf_all(notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
  
    configString = "/var/tmp/rdfstore"

    # Get the IOMemory plugin.
    store = plugin.get('IOMemory', Store)('rdfstore')
   


    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="IOMemory",
               identifier = URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:
    
        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"


    # Now we'll add some triples to the graph & commit the changes
    
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = ["id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"]

    for node in NID.objects.all():
    	    node_dict=node.ref.__dict__
    	    node_type = node.reftype
            try:       
            
           	 if (node_type=='Gbobject'):
    	    		node=Gbobject.objects.get(title=node)
    		
    	    	 elif (node_type=='None'):
    			node=Gbobject.objects.get(title=node)
    		 
            	 elif (node_type=='Processes'):
    			node=Gbobject.objects.get(title=node)
    			 
            	 elif (node_type=='System'):
    			node=Gbobject.objects.get(title=node)
    			rdflib=link(node)
			url_addr=link1(node)
                	a=fstore_dump(url_addr) 
            	 elif (node_type=='Objecttype'):
    			node=Objecttype.objects.get(title=node)
    			
    	    	 elif (node_type=='Attributetype'):
    			node=Attributetype.objects.get(title=node)
    	        	 
    	    	 elif (node_type=='Complement'):
    			node=Complement.objects.get(title=node)
    			
            	 elif (node_type=='Union'):
    	   		node=Union.objects.get(title=node)
    			 
            	 elif (node_type=='Intersection'):
    			node=Intersection.objects.get(title=node)
    	
            	 elif (node_type=='Expression'):
    			node=Expression.objects.get(title=node)
    		
            	 elif (node_type=='Processtype'):
    			node=Processtype.objects.get(title=node)
    		 
            	 elif (node_type=='Systemtype'):
    	 		node=Systemtype.objects.get(title=node)
    			
            	 elif (node_type=='AttributeSpecification'):
    			node=AttributeSpecification.objects.get(title=node)
    			
            	 elif (node_type=='RelationSpecification'):
    			node=RelationSpecification.objects.get(title=node)
    		 rdflib=link(node) 	 
                 url_addr=link1(node)
                 a=fstore_dump(url_addr) 
            	 if(node_type=='Attribute'):
    			node=Attribute.objects.get(title=node)
    			rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
              	
            	 elif(node_type=='Relationtype' ):
    			node=Relationtype.objects.get(title=node)
    			rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
                
    	    	 elif(node_type=='Metatype'):
    			node=Metatype.objects.get(title=node)
    			rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
                 url_addr='http://sbox.gnowledge.org/gstudio/'
                 a=fstore_dump(url_addr) 
            except:
            	if(node_type=='Attribute'):
                	rdflib= Namespace('http://sbox.gnowledge.org/gstudio/')
    
            	if(node_type=='Relationtype' ):
                	rdflib= Namespace('http://sbox.gnowledge.org/gstudio/')
                    
            	if(node_type=='Metatype'):
                	rdflib= Namespace('http://sbox.gnowledge.org/gstudio/')
            	
           
    subject=str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
           predicate=str(key)
           pobject=str(node_dict[predicate])
           graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))                        
        
    rdf_code=graph.serialize(format=notation)
               #path to store the rdf in a file
    
    #x = os.path.join(os.path.dirname(__file__), 'rdffiles.rdf')
    
    graph.commit()
    graph.close()
예제 #31
0
from rdflib.term import Literal, BNode, URIRef
from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace

DC = Namespace(u"http://purl.org/dc/elements/1.1/")
FOAF = Namespace(u"http://xmlns.com/foaf/0.1/")

graph = ConjunctiveGraph()
s = BNode()
graph.add((s, FOAF['givenName'], Literal('Alice')))
b = BNode()
graph.add((b, FOAF['givenName'], Literal('Bob')))
graph.add((b, DC['date'], Literal("2005-04-04T04:04:04Z")))


def test_bound():
    res = list(
        graph.query("""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX dc:  <http://purl.org/dc/elements/1.1/>
    PREFIX xsd:  <http://www.w3.org/2001/XMLSchema#>
    SELECT ?name
    WHERE { ?x foaf:givenName  ?name .
                    OPTIONAL { ?x dc:date ?date } .
                    FILTER ( bound(?date) ) }"""))
    expected = [(Literal('Bob', lang=None, datatype=None), )]
    assert res == expected, "Expected %s but got %s" % (expected, res)


if __name__ == '__main__':
    test_bound()
예제 #32
0
class Hisco2RDF():
    '''
    Scrapes the HISCO Web site
    The hierarchy goes as "master > minor > rubri > micro"
    '''
    def __init__(self):
        # The graph to store the data
        self.graph = ConjunctiveGraph()
        self.graph.namespace_manager.bind('skos', SKOS)
        self.graph.namespace_manager.bind('hisco', HISCO)
        self.graph.namespace_manager.bind('dcterms', DCTERMS)
        self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION)
        self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE)
        self.graph.namespace_manager.bind('qb', QB)
        
        # SQLite DB for the cache
        self.cache = sqlite3.connect('cache.db')
        cursor = self.cache.cursor()
        cursor.execute("CREATE TABLE IF NOT EXISTS  page (url text, html text)")
        self.cache.commit()
    
    def __del__(self):
        self.cache.close()
        
    def get_page(self, url):
        #log.debug("Load %s" % url)
        
        c = self.cache.cursor()
        c.execute("SELECT * FROM page WHERE url = ?", (url,))
        res = c.fetchone()
        doc = None
        if res == None:
            doc = requests.get(url).content
            c.execute("INSERT INTO page VALUES (?,?)", (url, doc))
            self.cache.commit()
        else:
            (_, doc) = res            
        return BeautifulSoup(doc)

    def save_output(self):
        # Add more things needed for DataCubes
        dimprop = HISCO['occupation']
        self.graph.add((dimprop, RDF.type, QB['DimensionProperty']))
        self.graph.add((dimprop, RDFS.range, SKOS.Collection))
        self.graph.add((dimprop, QB['Concept'], SKOS.Collection))
        self.graph.add((dimprop, RDFS.label, Literal('Occupation code', lang='en')))
        self.graph.add((dimprop, RDFS.comment, Literal('The HISCO group of the occupation', lang='en')))
        
        
        # Print to the screen
        #outfile = sys.stdout.buffer
        #self.graph.serialize(destination=outfile, format='n3')
        
        # Save to the file
        outfile = open('../hisco.ttl', "wb")
        self.graph.serialize(destination=outfile, format='n3')
        outfile.close()
        
    def parse_hisco_tree(self):
        '''
        Parse the hisco tree
        '''
        # Load the page
        doc = self.get_page(ROOT + HISCO_TREE)
        
        # Find the major groups
        major_groups = []
        major_group = None
        for table in doc.find_all('table', attrs={'border':'0'}):
            for row in table.find_all('tr'):
                for col in row.find_all('td'):
                    # Skip empty rows
                    if len(col.text) == 1:
                        continue
                    # We are starting a new group
                    if col.text.startswith('Majorgroup'):
                        # Save the one we were building if any
                        if major_group != None:
                            major_groups.append(major_group)
                        m = re.search("Majorgroup ([^ ]*) ", col.text)
                        major_group = {}
                        major_group['title'] = col.text
                        major_group['code'] = m.group(1).replace('/', '-')
                    # We have a description
                    if col.text.startswith('Workers'):
                        major_group['description'] = col.text
                    # We have links to minor
                    if col.text.startswith('List Minor'):
                        link = col.find_all('a')[0]['href']
                        major_group.setdefault('links', [])
                        major_group['links'].append(link)
        # Add the last group in the making
        if major_group != None:
            major_groups.append(major_group)

        # Add the groups to the graph
        for group in major_groups:
            major_group_uri = self._get_group_uri(group['code'])
            self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme']))
            self.graph.add((major_group_uri, DCTERMS.title, Literal(group['title'])))
            self.graph.add((major_group_uri, DCTERMS.description, Literal(group['description'])))
            
        # Now move onto the minor groups following the links
        for major_group in major_groups:
            major_group_uri = self._get_group_uri(major_group['code'])
            
            for minor_link in major_group['links']:
                # Look for the minor groups
                minor_groups = self._parse_records_table(minor_link, 2)
        
                # Add the groups to the graph
                for minor_group in minor_groups:
                    minor_group_uri = self._get_group_uri(minor_group['code'])
                    self.graph.add((minor_group_uri, RDF.type, SKOS['ConceptScheme']))
                    self.graph.add((minor_group_uri, RDFS.label, Literal(minor_group['title'])))
                    self.graph.add((minor_group_uri, DCTERMS.description, Literal(minor_group['description'])))
                    self.graph.add((major_group_uri, SKOS.related, minor_group_uri))

                    # Got one level deeper into the rubri
                    for rubri_link in minor_group['links']:
                        # Look for the minor groups
                        rubri_groups = self._parse_records_table(rubri_link, 3)
                        
                        # Add the groups to the graph
                        for rubri_group in rubri_groups:
                            rubri_group_uri =  self._get_group_uri(rubri_group['code'])
                            self.graph.add((rubri_group_uri, RDF.type, SKOS['ConceptScheme']))
                            self.graph.add((rubri_group_uri, RDFS.label, Literal(rubri_group['title'])))
                            self.graph.add((rubri_group_uri, DCTERMS.description, Literal(rubri_group['description'])))
                            self.graph.add((minor_group_uri, SKOS.related, rubri_group_uri))
    
                            # And one deeper for the micro
                            for micro_link in rubri_group['links']:
                                # Look for the minor groups
                                micro_groups = self._parse_records_table(micro_link, 5)
                                
                                # Add the groups to the graph
                                for micro_group in micro_groups:
                                    hisco_uri = self._get_hisco_uri(micro_group['code'])
                                    self.graph.add((hisco_uri, RDF.type, SKOS['Collection']))
                                    self.graph.add((hisco_uri, RDFS.label, Literal(micro_group['title'])))
                                    self.graph.add((hisco_uri, DCTERMS.description, Literal(micro_group['description'])))
                                    self.graph.add((rubri_group_uri, SKOS.related, hisco_uri))
                
    def parse_occupational_titles(self):
        '''
        Scrape the section of the site about occupational titles
        Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch
        '''
        parsed_status_page = set()
        next_page = OCCUPATIONAL_TITLES
        
        while next_page != None:
            log.info("Parse titles %s" % next_page)
                
            # Load the page
            doc = self.get_page(ROOT + next_page)
                
            # Find the right table
            table = doc.find('table', attrs={'cellspacing':'0', 'cellpadding':'2', 'border':'0'})
    
            # Look for all the titles 
            for row in table.find_all('tr')[1:]:  # Skip the header
                cols = row.find_all('td')
                occupation_title = cols[1].text
                details_page_link = cols[1].find_all('a')[0]['href']
                language = LANG_MAP[cols[2].text]
                hisco_code = cols[3].text.replace('*', '')
                
                # Get the DB index from details_page_link
                m = re.search('know_id=([^&]*)&', details_page_link)
                occupation_index = m.group(1)
                
                # Add the concept to the graph
                resource = self._get_occupation_title_uri(occupation_index)
                self.graph.add((resource, RDF.type, SKOS['Concept']))
                self.graph.add((resource, SKOS.prefLabel, Literal(occupation_title, lang=language)))
                self.graph.add((resource, SKOS.member, self._get_hisco_uri(hisco_code)))
                
                # Get more information about the title and add it as a member of the collection
                details_page = self.get_page(ROOT + details_page_link)
                details_table = details_page.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'})
                keyvalues = {}
                for details_row in details_table.find_all('tr'):
                    details_cols = details_row.find_all('td')
                    keyvalues[details_cols[0].text.strip()] = details_cols[-1]
                    
                # We already dealt with these two
                del keyvalues['Hisco code']
                del keyvalues['Occupational title']
                
                # TODO Country , use refArea
                
                # TODO Language
                
                # Do we know the gender ?
                if 'Gender' in keyvalues:
                    sex = SDMX_CODE['sex-U'] # Also applies to "Male/Female"
                    if keyvalues['Gender'].text.strip() == 'Male':
                        sex = SDMX_CODE['sex-M'] 
                    elif keyvalues['Gender'].text.strip() == 'Female':
                        sex = SDMX_CODE['sex-F']
                    self.graph.add((resource, SDMX_DIMENSION['sex'], sex))
                    del keyvalues['Gender']
                
                # Do we know the status ?
                if 'Status' in keyvalues:
                    # Add the status
                    status = keyvalues['Status'].text.strip()
                    self.graph.add((resource, HISCO['status'], self._get_status_uri(status)))
                    # Parse the status page if necessary
                    status_page = keyvalues['Status'].find_all('a')[0]['href']
                    if status_page not in parsed_status_page:
                        self._parse_status_page(status_page)
                        parsed_status_page.add(status_page)
                    del keyvalues['Status']
                
                # TODO Relation  
                
                # TODO Product
                  
                # TODO Provenance
                
                # Do we have a translation in English ?
                if 'Translation' in keyvalues:
                    trans = Literal(keyvalues['Translation'].text.strip().replace('´', "'"), lang='en')
                    self.graph.add((resource, SKOS.altLabel, trans))
                    del keyvalues['Translation']
                
                # Print whatever is left
                #if len(keyvalues.keys()) != 0:
                #    log.info(keyvalues.keys())
                    
            # Look for the "next" link
            next_table = doc.find('table', class_='nextprev')
            next_page = None
            for link in next_table.find_all('a'):
                if 'Next' in link.text:
                    next_page = link['href']
            
    def _parse_status_page(self, url):
        '''
        Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32
        '''
        
        # Work-around broken content
        if url == 'status.php?int02=15':
            return
        
        # Load the page
        doc = self.get_page(ROOT + url)
        
        # Find the data about this status
        status_uri = None
        for line in doc.find('pre').text.split('\n'):
            if re.match("^[0-9]* [a-zA-Z]*", line):
                m = re.search("^([0-9]*) ([a-zA-Z]*)", line)
                status_uri = self._get_status_uri(m.group(1))
                self.graph.add((status_uri, RDF.type, HISCO['Status']))
                self.graph.add((status_uri, RDFS.label, Literal(m.group(2))))
                self.graph.add((status_uri, SKOS.prefLabel, Literal(m.group(2))))
                self.graph.add((status_uri, SKOS.notation, Literal(m.group(1))))
            if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line):
                m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line)
                lang_code = m.group(1).lower()
                label = Literal(m.group(2), lang = lang_code)
                self.graph.add((status_uri, SKOS.altLabel, label))
                
        # Describe the class
        status_class = HISCO['Status']
        descr = doc.find('table', attrs={'width':'600'}).text.strip().split('\r\n')
        self.graph.add((status_class, RDF.type, RDFS.Class))
        self.graph.add((status_class, RDFS.label, Literal("Status code")))
        self.graph.add((status_class, DCTERMS.comment, Literal(descr[1])))
        
        # Describe the property
        status_property = HISCO['status']
        self.graph.add((status_property, RDF.type, RDF.Property))
        self.graph.add((status_property, RDFS.label, Literal("status associated to the occupation")))
        self.graph.add((status_property, RDFS.range, HISCO['Status']))
        self.graph.add((status_property, RDFS.domain, SKOS.Concept))
        
    def _parse_records_table(self, url, size):
        '''
        Minor, Rubri and Micro have the same structure except an additional
        column for Micro with links to the titles
        '''
        # Load the page
        doc = self.get_page(ROOT + url)
        
        # Find the right table
        table = doc.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'})
        
        # If we can't find the table return an empty list
        # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict
        if table == None:
            return []
        
        # Look for the minor groups
        groups = []
        group = None
        columns = table.find_all('td')
        for index in range(0, len(columns)):
            # New group
            if re.match("[0-9]{%d}" % size, columns[index].text):
                if group != None:
                    groups.append(group)
                group = {}
                group['code'] = columns[index].text
                group['title'] = columns[index + 1].text
                link = columns[index + 1].find_all('a')[0]['href']
                group.setdefault('links', [])
                group['links'].append(link)
                group['description'] = columns[index + 2].text
                if columns[index + 3].text == "Display Titles":
                    link = columns[index + 3].find_all('a')[0]['href']
                    group['titles_link'] = link
        groups.append(group)
        
        return groups
            
    def _get_group_uri(self, code):
        return HISCO['group-%s' % code]
    
    def _get_hisco_uri(self, code):
        return HISCO['hisco-%s' % code]
    
    def _get_occupation_title_uri(self, code):
        return HISCO['occupation-%s' % code]
    
    def _get_status_uri(self, code):
        return HISCO['status-%s' % code]
예제 #33
0
def make_rdf_graph(movies):
    mg = ConjunctiveGraph()

    mg.bind('fb', FB)
    mg.bind('dc', DC)
    for movie in movies:

        # Make a movie node
        movie_node = IVA_MOVIE[movie['id']]
        mg.add((movie_node, DC['title'], Literal(movie['title'])))

        # Make the director node, give it a name and link it to the movie
        dir_node = IVA_PERSON[movie['director']['id']]
        mg.add((movie_node, FB['film.film.directed_by'], dir_node))
        mg.add((dir_node, DC['title'], Literal(movie['director']['name'])))

        for actor in movie['actors']:
            # The performance node is a blank node -- it has no URI
            performance = BNode()

            # The performance is connected to the actor and the movie
            actor_node = IVA_PERSON[actor['id']]

            mg.add((actor_node, DC['title'], Literal(actor['name'])))
            mg.add((performance, FB['film.performance.actor'], actor_node))
            # If you had the name of the role, you could also add it to the
            # performance node, e.g.
            # mg.add((performance,FB['film.performance.role'],Literal('Carrie Bradshaw')))

            mg.add((movie_node, FB['film.film.performances'], performance))

    return mg
from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace, RDF, XSD
from rdflib.term import BNode, Literal

import rdflib

DC = Namespace(u"http://purl.org/dc/elements/1.1/")
FUNC = Namespace(u"http://example.org/functions#")

graph = ConjunctiveGraph()
graph.add((BNode(), RDF.value, Literal(0)))
graph.add((BNode(), RDF.value, Literal(1)))
graph.add((BNode(), RDF.value, Literal(2)))
graph.add((BNode(), RDF.value, Literal(3)))

from rdflib.term import _toPythonMapping
NUMERIC_TYPES = [type_uri for type_uri in _toPythonMapping if \
                 _toPythonMapping[type_uri] in (int, float, long)]


def func_even(a):
    # Should this be required, or be done automatically?
    from rdfextras.sparql.sparqlOperators import getValue
    value = getValue(a)

    if isinstance(value, Literal) and value.datatype in NUMERIC_TYPES:
        return Literal(int(value.toPython() % 2 == 0), datatype=XSD.boolean)
    else:
        raise TypeError(a)

예제 #35
0
class TestLevelDBConjunctiveGraphCore(unittest.TestCase):
    def setUp(self):
        store = "LevelDB"
        self.graph = ConjunctiveGraph(store=store)
        self.path = configString
        self.graph.open(self.path, create=True)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass
        if getattr(self, 'path', False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + '/' + f)
                    os.rmdir(self.path)
                elif len(self.path.split(':')) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def test_namespaces(self):
        self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/")
        self.assert_(len(list(self.graph.namespaces())) == 5)
        self.assert_(('foaf', rdflib.term.URIRef(u'http://xmlns.com/foaf/0.1/')
                      ) in list(self.graph.namespaces()))

    def test_readable_index(self):
        print(readable_index(111))

    def test_triples_context_reset(self):
        michel = rdflib.URIRef(u'michel')
        likes = rdflib.URIRef(u'likes')
        pizza = rdflib.URIRef(u'pizza')
        cheese = rdflib.URIRef(u'cheese')
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        ntriples = self.graph.triples((None, None, None),
                                      context=self.graph.store)
        self.assert_(len(list(ntriples)) == 2)

    def test_remove_context_reset(self):
        michel = rdflib.URIRef(u'michel')
        likes = rdflib.URIRef(u'likes')
        pizza = rdflib.URIRef(u'pizza')
        cheese = rdflib.URIRef(u'cheese')
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.remove((michel, likes, cheese), self.graph.store)
        self.graph.commit()
        self.assert_(
            len(
                list(
                    self.graph.triples((None, None, None),
                                       context=self.graph.store))) == 1)

    def test_remove_db_exception(self):
        michel = rdflib.URIRef(u'michel')
        likes = rdflib.URIRef(u'likes')
        pizza = rdflib.URIRef(u'pizza')
        cheese = rdflib.URIRef(u'cheese')
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.__len__(context=self.graph.store)
        self.assert_(
            len(
                list(
                    self.graph.triples((None, None, None),
                                       context=self.graph.store))) == 2)
예제 #36
0
def to_RDF(records, base_namespace, lang_codes=None,skosxl=False):
	"""
	docstring for as_RDF
	"""
	from rdflib import Namespace, BNode, Literal, URIRef,RDF,RDFS
	from rdflib.graph import Graph, ConjunctiveGraph
	from rdflib.plugins.memory import IOMemory
	print >> sys.stderr, base_namespace
	store = IOMemory()
	g = ConjunctiveGraph(store=store)
	skos = Namespace('http://www.w3.org/2004/02/skos/core#')
	skosxl = Namespace('http://www.w3.org/2008/05/skos-xl#')
	base = Namespace(base_namespace)
	g.bind('skos',skos)
	g.bind('skosxl',skosxl)
	g.bind('base',base)
	scheme_label = "schemes/1"
	thesaurus = URIRef(base[scheme_label])
	g.add((thesaurus,RDF.type, skos["ConceptScheme"]))
	for n,record in enumerate(records):
		label_counter = 1
		try:
			if(record is not None):
				uri = URIRef(base["%s/concepts/%i"%(scheme_label,int(record['id']))])
				g.add((uri, RDF.type, skos['Concept']))
				g.add((uri,skos["inScheme"],thesaurus))
				if(record['broader_id'] is not None):
					broader_uri = URIRef(base["%s/concepts/%i"%(scheme_label,int(record['broader_id']))])
					g.add((uri,skos['broader'],broader_uri)) 
					g.add((broader_uri,skos['narrower'],uri))
				else:
					g.add((uri,skos["topConceptOf"],thesaurus))
				if(record['hidden_label'] is not None):
					if(skosxl):
						label_uri = URIRef("%s#l%i"%(uri,label_counter))
						g.add((label_uri,RDF.type,skosxl["Label"]))
						g.add((label_uri,skosxl["literalForm"],Literal(record['hidden_label'])))
						g.add((uri,skosxl["hiddenLabel"],label_uri))
						label_counter += 1
					else:
						g.add((uri,skos["hiddenLabel"],Literal(record['hidden_label'])))
				if(record['labels'] is not None):
					# when transforming into SKOS-XL append the hiddenLabel to the preferredLabel@de
					# of a given term. This way it becomes possible to use the hiddenLabel to distinguish
					# between concepts with the same label but different provenance (i.e. they are found 
					# within different branches of the same thesaurus tree. 
					if(skosxl):
						label_uri = URIRef("%s#l%i"%(uri,label_counter))
						g.add((label_uri,RDF.type,skosxl["Label"]))
						g.add((label_uri,skosxl["literalForm"],Literal("%s (%s)"%(record['labels']["ger"],record['hidden_label']),lang=lang_codes["ger"])))
						g.add((uri,skosxl["prefLabel"],label_uri))
						label_counter += 1
					for lang in record['labels'].keys():
						if(skosxl):
							label_uri = URIRef("%s#l%i"%(uri,label_counter))
							g.add((label_uri,RDF.type,skosxl["Label"]))
							g.add((label_uri,skosxl["literalForm"],Literal(record['labels'][lang],lang=lang_codes[lang])))
							g.add((uri,skosxl["prefLabel"],label_uri))
							label_counter += 1
						else:
							g.add((uri,skos["prefLabel"],Literal(record['labels'][lang],lang=lang_codes[lang])))
				if(record['anon_nodes'] is not None):
					for node_id,node in record['anon_nodes']:
						temp = URIRef(base["%s/concepts/%s"%(scheme_label,node_id)])
						print >> sys.stderr, temp
						g.add((temp,RDF.type,skos['Concept']))
						g.add((temp,skos["inScheme"],thesaurus))
						g.add((temp,skos['broader'],uri))
						if(skosxl):
							label_uri = URIRef("%s#l%i"%(temp,label_counter))
							g.add((label_uri,RDF.type,skosxl["Label"]))
							g.add((label_uri,skosxl["literalForm"],Literal(node,lang="de")))
							g.add((temp,skosxl["prefLabel"],label_uri))
							label_counter += 1
							# added extra preferredLabel@de with hiddenLabel betwen brackets 
							label_uri = URIRef("%s#l%i"%(temp,label_counter))
							g.add((label_uri,RDF.type,skosxl["Label"]))
							g.add((label_uri,skosxl["literalForm"],Literal("%s (%s)"%(node,record['hidden_label']),lang="de")))
							g.add((temp,skosxl["prefLabel"],label_uri))
							label_counter += 1
						else:
							g.add((temp,skos["prefLabel"],Literal(node,lang="de")))
				print >> sys.stderr, "Record %s converted into RDF (%i/%i)"%(record['id'],n,len(records))
		except Exception, e:
			print >> sys.stderr, "Failed converting record %s with error %s (%i/%i)"%(record['id'],str(e),n,len(records))
예제 #37
0
    def query_lode(self,id):
        var = "http://inpho.cogs.indiana.edu/thinker/"+id
        # initialize dictionaries to store temporray results
        dbPropResults = {}
        inpho_DB = {}
        DB_inpho = {}
	dbpedia_web = {}
        triples={}

        # init graphs for LODE and mapped data
        gLODE = ConjunctiveGraph()
        gReturn = ConjunctiveGraph()
        # import InPhO data
        gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3")

        # builds a set of triples with the inpho id as the first entry and the
        # dbpedia id as the second 
        resultsLODE = gLODE.query("""
            SELECT ?thinker_LODE ?thinkerDB
            WHERE { ?thinker_LODE owl:sameAs ?thinkerDB 
                    FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i")
                    && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")).
                   }
            """)
        
        # load in property mapping between inpho-dbpedia
        prop_map_filename = config.get_data_path('rdf_map.txt')
        with open(prop_map_filename,'r') as f:
            dbprops=csv.reader(f,delimiter='\t')
            for dbprop in dbprops:
                dbPropResults[dbprop[1]] = dbprop[0]
		dbpedia_web[dbprop[1].split(":")[1]]=dbprop[2]
		

        # iterate through triples and store mappings
        for triple in resultsLODE: 
            inpho_DB[str(triple[0])] = str(triple[1])#store the results in key as inpho url and value as dbpedia url
            DB_inpho[str(triple[1])] = str(triple[0])#store the results in key as dbpedia url and value as inpho url 
	   
	
	
        # queries for all relationships in dbpedia
        sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/")
        sparqlDB.setReturnFormat(JSON)
        for inpho,DB in inpho_DB.iteritems():
            predicate = {}
            #for dbprop in dbPropResults:
            if(str(DB_inpho.get(DB))== var):
		for dbprop in dbPropResults:
                    sparqlDB.setQuery(""" PREFIX dbpprop: <http://dbpedia.org/ontology/>
                                      SELECT ?b  WHERE { <"""+DB+"""> """+dbprop+""" ?b.
                                                        FILTER (regex(str(?b),"dbpedia.org/resource/","i")).
                                                        }""")
                    resultsDB = sparqlDB.query().convert()
                    predicate[dbprop] = resultsDB["results"]["bindings"]
            	triples[DB] = predicate
        
        #retrieve native python object
        c.entity = h.fetch_obj(Entity, id, new_id=True)
	existing_predicate_list=[]
	existing_object_list=[]

        predicates_to_compare = ['influenced', 'influenced_by', 'teachers', 'students']


        for subject,predicate in triples.iteritems():
            for predicate1, objectn in predicate.iteritems():
                predicate_to_match=predicate1.split(":")[1]
	        attr=getattr(c.entity,dbpedia_web[predicate_to_match])
              
		for attr1 in attr:
               	        if(dbpedia_web[predicate_to_match] in predicates_to_compare) :
				existing_predicate_list.append(dbpedia_web[predicate_to_match] +':'+attr1.wiki)




        # maps from dbpedia relationships back to inpho relationships
        for subject,predicate in triples.iteritems():
            #attr = getattr(c.entity, predicate)
	    #raise Exception
		
	    for predicate1, objectn in predicate.iteritems():
		
	      
				
	
                for object1 in objectn:                       
		   #temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ")
		   temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1]

                   
	#	   raise Exception
	           if temp_str not in existing_predicate_list:     
		  # returns the inphoid for the object
                   	DB_Entry = DB_inpho.get(object1['b']['value'])#reverse lookup for the inpho data check	    

                    	# if there is not an inpho id, leave it as the dbpedia id
                   	if(DB_Entry == None):
                        	gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(object1['b']['value'])))
                   	else:
                        	# return the properly mapped id
                        	# TODO: use attr to filter DB_Entry
                        	gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(DB_Entry)))
                     
                      #  if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ):
		   
#        raise Exception                  
        return gReturn.serialize();
예제 #38
0
class ContextTest(test.TestCase):
    """
    Testing different contexts.

    Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/context_case.py
    """
    store_name = "Django"
    storetest = True
    path = ""
    create = True

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)

        self.assertEquals(len(graph), 2)
        self.assertEquals(len(self.graph), 2)

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for _ in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertNotIn(triple, self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertNotIn(triple, self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, basestring):
                return c.identifier
            return c

        self.assertIn(self.c1, [cid(c) for c in self.graph.contexts()])
        self.assertIn(self.c2, [cid(c) for c in self.graph.contexts()])

        contextList = [cid(c) for c in self.graph.contexts(triple)]
        self.assertIn(self.c1, contextList)
        self.assertIn(self.c2, contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, self.c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        self.assertEquals(len(list(c1triples((Any, self.likes, self.pizza)))), 2)
        self.assertEquals(len(list(c1triples((Any, self.hates, self.pizza)))), 1)
        self.assertEquals(len(list(c1triples((Any, self.likes, self.cheese)))), 3)
        self.assertEquals(len(list(c1triples((Any, self.hates, self.cheese)))), 0)

        # unbound subjects without context, same results!
        self.assertEquals(len(list(triples((Any, self.likes, self.pizza)))), 2)
        self.assertEquals(len(list(triples((Any, self.hates, self.pizza)))), 1)
        self.assertEquals(len(list(triples((Any, self.likes, self.cheese)))), 3)
        self.assertEquals(len(list(triples((Any, self.hates, self.cheese)))), 0)

        # unbound objects with context
        self.assertEquals(len(list(c1triples((self.michel, self.likes, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.tarek, self.likes, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, self.hates, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, self.likes, Any)))), 1)

        # unbound objects without context, same results!
        self.assertEquals(len(list(triples((self.michel, self.likes, Any)))), 2)
        self.assertEquals(len(list(triples((self.tarek, self.likes, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, self.hates, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, self.likes, Any)))), 1)

        # unbound predicates with context
        self.assertEquals(len(list(c1triples((self.michel, Any, self.cheese)))), 1)
        self.assertEquals(len(list(c1triples((self.tarek, Any, self.cheese)))), 1)
        self.assertEquals(len(list(c1triples((self.bob, Any, self.pizza)))), 1)
        self.assertEquals(len(list(c1triples((self.bob, Any, self.michel)))), 1)

        # unbound predicates without context, same results!
        self.assertEquals(len(list(triples((self.michel, Any, self.cheese)))), 1)
        self.assertEquals(len(list(triples((self.tarek, Any, self.cheese)))), 1)
        self.assertEquals(len(list(triples((self.bob, Any, self.pizza)))), 1)
        self.assertEquals(len(list(triples((self.bob, Any, self.michel)))), 1)

        # unbound subject, objects with context
        self.assertEquals(len(list(c1triples((Any, self.hates, Any)))), 2)
        self.assertEquals(len(list(c1triples((Any, self.likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        self.assertEquals(len(list(triples((Any, self.hates, Any)))), 2)
        self.assertEquals(len(list(triples((Any, self.likes, Any)))), 5)

        # unbound predicates, objects with context
        self.assertEquals(len(list(c1triples((self.michel, Any, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, Any, Any)))), 3)
        self.assertEquals(len(list(c1triples((self.tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        self.assertEquals(len(list(triples((self.michel, Any, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, Any, Any)))), 3)
        self.assertEquals(len(list(triples((self.tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        self.assertEquals(len(list(c1triples((Any, Any, self.pizza)))), 3)
        self.assertEquals(len(list(c1triples((Any, Any, self.cheese)))), 3)
        self.assertEquals(len(list(c1triples((Any, Any, self.michel)))), 1)

        # unbound subjects, predicates without context, same results!
        self.assertEquals(len(list(triples((Any, Any, self.pizza)))), 3)
        self.assertEquals(len(list(triples((Any, Any, self.cheese)))), 3)
        self.assertEquals(len(list(triples((Any, Any, self.michel)))), 1)

        # all unbound with context
        self.assertEquals(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        self.assertEquals(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(self.c1)]:
            # unbound subjects
            self.assertEquals(set(c.subjects(self.likes, self.pizza)), {self.michel, self.tarek})
            self.assertEquals(set(c.subjects(self.hates, self.pizza)), {self.bob})
            self.assertEquals(set(c.subjects(self.likes, self.cheese)), {self.tarek, self.bob, self.michel})
            self.assertEquals(set(c.subjects(self.hates, self.cheese)), set())

            # unbound objects
            self.assertEquals(set(c.objects(self.michel, self.likes)), {self.cheese, self.pizza})
            self.assertEquals(set(c.objects(self.tarek, self.likes)), {self.cheese, self.pizza})
            self.assertEquals(set(c.objects(self.bob, self.hates)), {self.michel, self.pizza})
            self.assertEquals(set(c.objects(self.bob, self.likes)), {self.cheese})

            # unbound predicates
            self.assertEquals(set(c.predicates(self.michel, self.cheese)), {self.likes})
            self.assertEquals(set(c.predicates(self.tarek, self.cheese)), {self.likes})
            self.assertEquals(set(c.predicates(self.bob, self.pizza)), {self.hates})
            self.assertEquals(set(c.predicates(self.bob, self.michel)), {self.hates})

            self.assertEquals(set(c.subject_objects(self.hates)), {(self.bob, self.pizza), (self.bob, self.michel)})
            self.assertEquals(set(c.subject_objects(self.likes)),
                    {(self.tarek, self.cheese), (self.michel, self.cheese), (self.michel, self.pizza), (self.bob, self.cheese), (self.tarek, self.pizza)})

            self.assertEquals(set(c.predicate_objects(self.michel)), {(self.likes, self.cheese), (self.likes, self.pizza)})
            self.assertEquals(set(c.predicate_objects(self.bob)), {(self.likes, self.cheese), (self.hates, self.pizza), (self.hates, self.michel)})
            self.assertEquals(set(c.predicate_objects(self.tarek)), {(self.likes, self.cheese), (self.likes, self.pizza)})

            self.assertEquals(set(c.subject_predicates(self.pizza)), {(self.bob, self.hates), (self.tarek, self.likes), (self.michel, self.likes)})
            self.assertEquals(set(c.subject_predicates(self.cheese)), {(self.bob, self.likes), (self.tarek, self.likes), (self.michel, self.likes)})
            self.assertEquals(set(c.subject_predicates(self.michel)), {(self.bob, self.hates)})

            self.assertEquals(set(c), {(self.bob, self.hates, self.michel), (self.bob, self.likes, self.cheese), (self.tarek, self.likes, self.pizza),
                (self.michel, self.likes, self.pizza), (self.michel, self.likes, self.cheese), (self.bob, self.hates, self.pizza),
                (self.tarek, self.likes, self.cheese)})

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        self.assertEquals(len(list(c1triples((Any, Any, Any)))), 0)
        self.assertEquals(len(list(triples((Any, Any, Any)))), 0)
예제 #39
0
# Load up RDFLib

from rdflib import *
from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace
from rdflib.term import URIRef
from rdflib.parser import StringInputSource

# Firstly, it doesn't have to be so complex.
# Here we create a "Graph" of our work.
# Think of it as a blank piece of graph paper!

primer = ConjunctiveGraph()
myNS = Namespace('#')

primer.add((myNS.pat, myNS.knows, myNS.jo))
# or:
primer.add((myNS['pat'], myNS['age'], long(24)))

# Now, with just that, lets see how the system
# recorded *way* too many details about what
# you just asserted as fact.
#

from pprint import pprint
pprint(list(primer))

# just think .whatever((s, p, o))
# here we report on what we know

pprint(list(primer.subjects()))
예제 #40
0
default_graph_uri = "http://id.southampton.ac.uk/dataset/places"
configString = 

# Get the mysql plugin. You may have to install the python mysql libraries
store = plugin.get('MySQL', Store)('rdfstore')

# Open previously created store, or create it if it doesn't exist yet
rt = store.open(configString,create=False)
if rt == NO_STORE:
    # There is no underlying MySQL infrastructure, create it
    store.open(configString,create=True)
else:
    assert rt == VALID_STORE,"There underlying store is corrupted"

# There is a store, use it
graph = Graph(store, identifier = URIRef(default_graph_uri))

print("Triples in graph before add: %s" % len(graph))

# Now we'll add some triples to the graph & commit the changes
rdflib = Namespace('http://rdflib.net/test/')
graph.add((rdflib['pic:1'], rdflib['name'], Literal('Jane & Bob')))
graph.add((rdflib['pic:2'], rdflib['name'], Literal('Squirrel in Tree')))
graph.commit()

print("Triples in graph after add: %" % len(graph))

# display the graph in RDF/XML
print(graph.serialize())
예제 #41
0
def locationtoturtle(ellist, meta):
    rdf=Graph();
    cs = Namespace("http://cs.unibo.it/ontology/")
    colon=Namespace("http://www.essepuntato.it/resource/")
    dcterms=Namespace("http://purl.org/dc/terms/")
    xsd=Namespace("http://www.w3.org/2001/XMLSchema#")
    this=Namespace("http://vitali.web.cs.unibo.it/twiki/pub/TechWeb12/DataSource2/posteBO2011.ttl#")
    vcard = Namespace("http://www.w3.org/2006/vcard/ns#")
    rdf.bind("vcard", vcard)
    rdf.bind("cs", cs)
    rdf.bind("", colon)
    rdf.bind("dcterms", dcterms)
    rdf.bind("xsd", xsd)
    rdf.bind("this", this)
    rdf.add((this["metadata"], dcterms["creator"], Literal(meta.creator)))
    rdf.add((this["metadata"], dcterms["created"], Literal(meta.created,datatype=XSD.date)))
    rdf.add((this["metadata"], dcterms["description"], Literal(meta.version)))
    rdf.add((this["metadata"], dcterms["valid"], Literal(meta.valid,datatype=XSD.date)))
    rdf.add((this["metadata"], dcterms["source"], Literal(meta.source)))
    for location in ellist:
        rdf.add((colon[location.id], vcard["fn"], Literal(location.name)))
        rdf.add((colon[location.id], vcard["extended-address"], Literal(location.address)))
        rdf.add((colon[location.id], vcard["category"], Literal(location.category)))
        rdf.add((colon[location.id], vcard["latitude"], Literal(location.lat)))
        rdf.add((colon[location.id], vcard["longitude"], Literal(location.long)))
        if(location.tel):
            rdf.add((colon[location.id], vcard["tel"], Literal(location.tel)))
        if(location.note):
            rdf.add((colon[location.id], vcard["note"], Literal(location.note)))
        rdf.add((colon[location.id], cs["opening"], Literal(location.opening)))
        rdf.add((colon[location.id], cs["closing"], Literal(location.closing)))
    print("Content-type: text/turtle; charset=UTF-8\n")
    print rdf.serialize(format="n3")
예제 #42
0
def make_property_graph(properties, args):
    graph = ConjunctiveGraph()
    output_graph = ConjunctiveGraph()

    ontologies = [
        'https://raw.githubusercontent.com/monarch-initiative/SEPIO-ontology/master/src/ontology/sepio.owl',
        'https://raw.githubusercontent.com/monarch-initiative/GENO-ontology/develop/src/ontology/geno.owl',
        'http://purl.obolibrary.org/obo/ro.owl',
        'http://purl.obolibrary.org/obo/iao.owl',
        'http://purl.obolibrary.org/obo/ero.owl',
        'https://raw.githubusercontent.com/jamesmalone/OBAN/master/ontology/oban_core.ttl',
        'http://purl.obolibrary.org/obo/pco.owl',
        'http://purl.obolibrary.org/obo/xco.owl'
    ]

    for ontology in ontologies:
        print("parsing: " + ontology)
        try:
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))
        except SAXParseException as e:
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format="turtle")
        except OSError as e:  # URLError:
            # simple retry
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))

    # Get object properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['ObjectProperty']),
        output_graph, OWL['ObjectProperty'], properties)

    # Get annotation properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['AnnotationProperty']),
        output_graph, OWL['AnnotationProperty'], properties)

    # Get data properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['DatatypeProperty']),
        output_graph, OWL['DatatypeProperty'], properties)

    # Hardcoded properties
    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'),
            RDF['type'], OWL['AnnotationProperty']))

    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_anonymous'),
            RDF['type'], OWL['AnnotationProperty']))

    # Check monarch data triple
    data_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.input))
    new_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.output))
    if (URIRef(data_url), RDF.type, OWL['Ontology']) in output_graph:
        output_graph.remove(URIRef(data_url), RDF.type, OWL['Ontology'])

    output_graph.add((URIRef(new_url), RDF.type, OWL['Ontology']))

    for row in output_graph.predicates(
            DC['source'], OWL['AnnotationProperty']):
        if row == RDF['type']:
            output_graph.remove(
                (DC['source'], RDF['type'], OWL['AnnotationProperty']))

    output_graph.add((DC['source'], RDF['type'], OWL['ObjectProperty']))

    return output_graph
예제 #43
0
class InMemoryStorage(object):

    def __init__(self):

        store = IOMemory()

        self.g = ConjunctiveGraph(store=store)

        self.g.bind("lada",ns_lada)
        self.g.bind('data', ns_data)
        self.g.bind('cube', ns_cube)
        self.g.bind('qb', ns_cube)
        self.g.bind('lcd', ns_lcd)
        self.g.bind('xsd', ns_xsd)
        self.g.bind('qb4cc', ns_qb4cc)
        self.g.bind('skos', ns_skos)

        self.initNs = {
            'lada': ns_lada,
            'data': ns_data,
            'qb': ns_cube,
            'lcd': ns_lcd,
            'xsd': ns_xsd,
            'qb4cc': ns_qb4cc,
            'skos': ns_skos
        }


    def _concatenate_graphs(self, graphs):
        source = Graph()
        for g in graphs:
            if g in graph_dict:
                source += self.g.get_context(graph_dict[g])
            elif type(g) is URIRef:
                source += self.g.get_context(g)
        return source

    def add_triple(self, triple, context):
        if context:
            if type(context) is str:
                self.g.get_context(graph_dict[context]).add(triple)
            else:
                self.g.get_context(context).add(triple)
        else:
            self.g.add(triple)

    def add_graph(self, graph, context):
        if context:
            g = None
            if type(context) is str:
                g = self.g.get_context(graph_dict[context])
            else:
                g = self.g.get_context(context)
            g += graph
        else:
            self.g += graph

    def add_file(self, file, format, context):
        if context:
            if type(context) is str:
                self.g.get_context(graph_dict[context]).parse(file, format=format)
            else:
                self.g.get_context(context).parse(file, format=format)
        else:
            self.g.parse(file, format=format)


    def query(self, queryString, contexts):

        if contexts:
            if type(contexts) is list:
                return self._concatenate_graphs(contexts).query(queryString, initNs=self.initNs)
            elif type(contexts) is str:
                return self.g.get_context(graph_dict[contexts]).query(queryString, initNs=self.initNs)
            else:
                return self.g.get_context(contexts).query(queryString, initNs=self.initNs)
        else:
            return self.g.query(queryString, initNs=self.initNs)

    def value(self, subject, predicate, context):
        if context:
            if type(context) is str:
                return self.g.get_context(graph_dict[context]).value(subject, predicate)
            else:
                return self.g.get_context(context).value(subject, predicate)
        else:
            return self.g.value(subject, predicate)

    def remove(self, triple_pattern, contexts):
        if contexts:
            if type(contexts) is list:
                self._concatenate_graphs(contexts).remove(triple_pattern)
            else:
                self.g.get_context(graph_dict[contexts]).remove(triple_pattern)
        else:
            self.g.remove(triple_pattern)

    def clear(self, context):
        if context:
            if type(context) is str:
                self.g.remove_context(self.g.get_context(graph_dict[context]))
            else:
                self.g.remove_context(self.g.get_context(context))
        else:
            self.g.remove( (None, None, None) )

    def count_triples(self):
        c = 0;
        for s, p, o in self.g:
            c = c +1;
        return c

    def export(self, context):
        if type(context) is str:
            self.g.get_context(graph_dict[context]).serialize(context + ".ttl", format="turtle")
예제 #44
0
파일: entity.py 프로젝트: inpho/inphosite
    def query_lode(self, id):
        var = "http://inpho.cogs.indiana.edu/thinker/" + id
        # initialize dictionaries to store temporray results
        dbPropResults = {}
        inpho_DB = {}
        DB_inpho = {}
        dbpedia_web = {}
        triples = {}

        # init graphs for LODE and mapped data
        gLODE = ConjunctiveGraph()
        gReturn = ConjunctiveGraph()
        # import InPhO data
        gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3")

        # builds a set of triples with the inpho id as the first entry and the
        # dbpedia id as the second
        resultsLODE = gLODE.query(
            """
            SELECT ?thinker_LODE ?thinkerDB
            WHERE { ?thinker_LODE owl:sameAs ?thinkerDB 
                    FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i")
                    && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")).
                   }
            """
        )

        # load in property mapping between inpho-dbpedia
        prop_map_filename = config.get_data_path("rdf_map.txt")
        with open(prop_map_filename, "r") as f:
            dbprops = csv.reader(f, delimiter="\t")
            for dbprop in dbprops:
                dbPropResults[dbprop[1]] = dbprop[0]
                dbpedia_web[dbprop[1].split(":")[1]] = dbprop[2]

        # iterate through triples and store mappings
        for triple in resultsLODE:
            inpho_DB[str(triple[0])] = str(triple[1])  # store the results in key as inpho url and value as dbpedia url
            DB_inpho[str(triple[1])] = str(triple[0])  # store the results in key as dbpedia url and value as inpho url

        # queries for all relationships in dbpedia
        sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/")
        sparqlDB.setReturnFormat(JSON)
        for inpho, DB in inpho_DB.iteritems():
            predicate = {}
            # for dbprop in dbPropResults:
            if str(DB_inpho.get(DB)) == var:
                for dbprop in dbPropResults:
                    sparqlDB.setQuery(
                        """ PREFIX dbpprop: <http://dbpedia.org/ontology/>
                                      SELECT ?b  WHERE { <"""
                        + DB
                        + """> """
                        + dbprop
                        + """ ?b.
                                                        FILTER (regex(str(?b),"dbpedia.org/resource/","i")).
                                                        }"""
                    )
                    resultsDB = sparqlDB.query().convert()
                    predicate[dbprop] = resultsDB["results"]["bindings"]
                triples[DB] = predicate

                # retrieve native python object
        c.entity = h.fetch_obj(Entity, id, new_id=True)
        existing_predicate_list = []
        existing_object_list = []

        predicates_to_compare = ["influenced", "influenced_by", "teachers", "students"]

        for subject, predicate in triples.iteritems():
            for predicate1, objectn in predicate.iteritems():
                predicate_to_match = predicate1.split(":")[1]
                attr = getattr(c.entity, dbpedia_web[predicate_to_match])

                for attr1 in attr:
                    if dbpedia_web[predicate_to_match] in predicates_to_compare:
                        existing_predicate_list.append(dbpedia_web[predicate_to_match] + ":" + attr1.wiki)

                        # maps from dbpedia relationships back to inpho relationships
        for subject, predicate in triples.iteritems():
            # attr = getattr(c.entity, predicate)
            # raise Exception

            for predicate1, objectn in predicate.iteritems():

                for object1 in objectn:
                    # temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ")
                    temp_str = (
                        dbpedia_web[predicate1.split(":")[1]]
                        + ":"
                        + str(object1["b"]["value"]).split("/")[len(str(object1["b"]["value"]).split("/")) - 1]
                    )

                    # 	   raise Exception
                    if temp_str not in existing_predicate_list:
                        # returns the inphoid for the object
                        DB_Entry = DB_inpho.get(object1["b"]["value"])  # reverse lookup for the inpho data check

                        # if there is not an inpho id, leave it as the dbpedia id
                        if DB_Entry == None:
                            gReturn.add(
                                (URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(object1["b"]["value"]))
                            )
                        else:
                            # return the properly mapped id
                            # TODO: use attr to filter DB_Entry
                            gReturn.add((URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(DB_Entry)))

                        #  if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ):

        #        raise Exception
        return gReturn.serialize()
예제 #45
0
    data = csv.DictReader(fd, delimiter="\t", quotechar='"', escapechar='')
    for r in data:
        raw_id = r['raw_id']

        # Check if valid with regex
        match = re.match(r"^(tt)*(?P<id>\d{7,10}).*", raw_id)
        if not match:
            progress.count()
            wrongs.append(raw_id)
            continue

        imdb_id = match.group(2)
        film_node = n['Movie/tt' + imdb_id]

        # Create a node for dbpedia
        uri = r['uri']
        wiki_node = URIRef(uri)
        g.add((film_node, n['has' + source + 'Node'], wiki_node))

        progress.count()
        if progress.finished():
            break

g.serialize(destination=outfile, format='turtle')
end = time.time()

print('Wrong formatted IMDB IDs found: ', len(wrongs))
print(wrongs)
print("Total Items Processed: ", progress.total)
print("Total Time: ", end - start)
g.close()
예제 #46
0
    def encode_container(self, bundle, container=None, identifier=None):
        if container is None:
            container = ConjunctiveGraph(identifier=identifier)
            nm = container.namespace_manager
            nm.bind('prov', PROV.uri)

        for namespace in bundle.namespaces:
            container.bind(namespace.prefix, namespace.uri)

        id_generator = AnonymousIDGenerator()
        real_or_anon_id = lambda record: record._identifier.uri if \
            record._identifier else id_generator.get_anon_id(record)

        for record in bundle._records:
            rec_type = record.get_type()
            if hasattr(record, 'identifier') and record.identifier:
                identifier = URIRef(six.text_type(real_or_anon_id(record)))
                container.add((identifier, RDF.type, URIRef(rec_type.uri)))
            else:
                identifier = None
            if record.attributes:
                bnode = None
                formal_objects = []
                used_objects = []
                all_attributes = list(record.formal_attributes) + list(
                    record.attributes)
                formal_qualifiers = False
                for attrid, (attr, value) in enumerate(
                        list(record.formal_attributes)):
                    if (identifier is not None and value is not None) or \
                            (identifier is None and value is not None and attrid > 1):
                        formal_qualifiers = True
                has_qualifiers = len(
                    record.extra_attributes) > 0 or formal_qualifiers
                for idx, (attr, value) in enumerate(all_attributes):
                    if record.is_relation():
                        if rec_type.namespace.prefix == 'prov':
                            pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri)
                        else:
                            pred = URIRef(PROVONE[PROVONE_N_MAP[rec_type]].uri)
                        # create bnode relation
                        if bnode is None:
                            valid_formal_indices = set()
                            for idx, (key, val) in enumerate(
                                    record.formal_attributes):
                                formal_objects.append(key)
                                if val:
                                    valid_formal_indices.add(idx)
                            used_objects = [record.formal_attributes[0][0]]
                            subj = None
                            if record.formal_attributes[0][1]:
                                subj = URIRef(
                                    record.formal_attributes[0][1].uri)
                            if identifier is None and subj is not None:
                                try:
                                    obj_val = record.formal_attributes[1][1]
                                    obj_attr = URIRef(
                                        record.formal_attributes[1][0].uri)
                                    # TODO: Why is obj_attr above not used anywhere?
                                except IndexError:
                                    obj_val = None
                                if obj_val and (rec_type not in {
                                        PROV_END, PROV_START, PROV_USAGE,
                                        PROV_GENERATION, PROV_DERIVATION,
                                        PROV_ASSOCIATION, PROV_INVALIDATION
                                } or (valid_formal_indices == {0, 1}
                                      and len(record.extra_attributes) == 0)):
                                    used_objects.append(
                                        record.formal_attributes[1][0])
                                    obj_val = self.encode_rdf_representation(
                                        obj_val)
                                    if rec_type == PROV_ALTERNATE:
                                        subj, obj_val = obj_val, subj
                                    container.add((subj, pred, obj_val))
                                    if rec_type == PROV_MENTION:
                                        if record.formal_attributes[2][1]:
                                            used_objects.append(
                                                record.formal_attributes[2][0])
                                            obj_val = self.encode_rdf_representation(
                                                record.formal_attributes[2][1])
                                            container.add(
                                                (subj,
                                                 URIRef(
                                                     PROV['asInBundle'].uri),
                                                 obj_val))
                                        has_qualifiers = False
                            if rec_type in [PROV_ALTERNATE]:
                                continue
                            if subj and (has_qualifiers or identifier):
                                qualifier = rec_type._localpart
                                rec_uri = rec_type.uri
                                for attr_name, val in record.extra_attributes:
                                    if attr_name == PROV['type']:
                                        if PROV['Revision'] == val or \
                                              PROV['Quotation'] == val or \
                                                PROV['PrimarySource'] == val:
                                            qualifier = val._localpart
                                            rec_uri = val.uri
                                            if identifier is not None:
                                                container.remove(
                                                    (identifier, RDF.type,
                                                     URIRef(rec_type.uri)))
                                QRole = URIRef(PROV['qualified' +
                                                    qualifier].uri)
                                if identifier is not None:
                                    container.add((subj, QRole, identifier))
                                else:
                                    bnode = identifier = BNode()
                                    container.add((subj, QRole, identifier))
                                    container.add(
                                        (identifier, RDF.type, URIRef(rec_uri)
                                         ))  # reset identifier to BNode
                        if value is not None and attr not in used_objects:
                            if attr in formal_objects:
                                pred = attr2rdf(attr)
                            elif attr == PROV['role']:
                                pred = URIRef(PROV['hadRole'].uri)
                            elif attr == PROV['plan']:
                                pred = URIRef(PROV['hadPlan'].uri)
                            elif attr == PROV['type']:
                                pred = RDF.type
                            elif attr == PROV['label']:
                                pred = RDFS.label
                            elif isinstance(attr, pm.QualifiedName):
                                pred = URIRef(attr.uri)
                            else:
                                pred = self.encode_rdf_representation(attr)
                            if PROV['plan'].uri in pred:
                                pred = URIRef(PROV['hadPlan'].uri)
                            if PROV['informant'].uri in pred:
                                pred = URIRef(PROV['activity'].uri)
                            if PROV['responsible'].uri in pred:
                                pred = URIRef(PROV['agent'].uri)
                            if rec_type == PROV_DELEGATION and \
                                            PROV['activity'].uri in pred:
                                pred = URIRef(PROV['hadActivity'].uri)
                            if (rec_type in [PROV_END, PROV_START] and
                                            PROV['trigger'].uri in pred) or\
                                (rec_type in [PROV_USAGE] and
                                         PROV['used'].uri in pred):
                                pred = URIRef(PROV['entity'].uri)
                            if rec_type in [
                                    PROV_GENERATION, PROV_END, PROV_START,
                                    PROV_USAGE, PROV_INVALIDATION
                            ]:
                                if PROV['time'].uri in pred:
                                    pred = URIRef(PROV['atTime'].uri)
                                if PROV['ender'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['starter'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['location'].uri in pred:
                                    pred = URIRef(PROV['atLocation'].uri)
                            if rec_type in [PROV_ACTIVITY]:
                                if PROV_ATTR_STARTTIME in pred:
                                    pred = URIRef(PROV['startedAtTime'].uri)
                                if PROV_ATTR_ENDTIME in pred:
                                    pred = URIRef(PROV['endedAtTime'].uri)
                            if rec_type == PROV_DERIVATION:
                                if PROV['activity'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['generation'].uri in pred:
                                    pred = URIRef(PROV['hadGeneration'].uri)
                                if PROV['usage'].uri in pred:
                                    pred = URIRef(PROV['hadUsage'].uri)
                                if PROV['usedEntity'].uri in pred:
                                    pred = URIRef(PROV['entity'].uri)
                            container.add(
                                (identifier, pred,
                                 self.encode_rdf_representation(value)))
                        continue
                    if value is None:
                        continue
                    if isinstance(value, pm.ProvRecord):
                        obj = URIRef(six.text_type(real_or_anon_id(value)))
                    else:
                        #  Assuming this is a datetime value
                        obj = self.encode_rdf_representation(value)
                    if attr == PROV['location']:
                        pred = URIRef(PROV['atLocation'].uri)
                        if False and isinstance(value,
                                                (URIRef, pm.QualifiedName)):
                            if isinstance(value, pm.QualifiedName):
                                value = URIRef(value.uri)
                            container.add((identifier, pred, value))
                        else:
                            container.add(
                                (identifier, pred,
                                 self.encode_rdf_representation(obj)))
                        continue
                    if attr == PROV['type']:
                        pred = RDF.type
                    elif attr == PROV['label']:
                        pred = RDFS.label
                    elif attr == PROV_ATTR_STARTTIME:
                        pred = URIRef(PROV['startedAtTime'].uri)
                    elif attr == PROV_ATTR_ENDTIME:
                        pred = URIRef(PROV['endedAtTime'].uri)
                    else:
                        pred = self.encode_rdf_representation(attr)
                    container.add((identifier, pred, obj))
        return container
예제 #47
0
    def build_graph(self):
        graph = ConjunctiveGraph()
        graph.bind('sioc', SIOC)
        graph.bind('foaf', FOAF)
        graph.bind('rdfs', RDFS)
        graph.bind('dct', DCT)
        graph.bind('mvcb', MVCB)

        swaml = URIRef("http://swaml.berlios.de/doap#swaml")
        doc = URIRef(self.base)
        graph.add((doc, RDF.type, FOAF["Document"]))
        graph.add((doc, RDFS.label, Literal("RDF version of the message '%s' retrieved from MarkMail API" % self.key))) #FIXME: this should go out of this api
        graph.add((doc, MVCB.generatorAgent, swaml))
        message = URIRef(self.get_uri())
        graph.add((message, RDF.type, SIOC.Post))
        graph.add((message, RDF.type, SIOCT.MailMessage))
        graph.add((doc, FOAF.primaryTopic, message))

        graph.add((message, SIOC.id, Literal(self.key)))
        graph.add((message, SIOC.link, URIRef("http://markmail.org/message/%s" % self.key)))  
        #graph.add((message, SIOC.has_container,URIRef(self.config.get('base')+'forum')))   
        #graph.add((message, SIOC.has_creator, URIRef(self.getSender().getUri())))                    
        graph.add((message, DCT.title, Literal(self.title))) 
        #graph.add((message, DCT.created, Literal(self.getDate(), datatype=XSD[u'dateTime'])))  
        graph.add((message, SIOC.content, Literal(self.content)))

        self.set_graph(graph)
예제 #48
0
def make_property_graph(properties, args):
    graph = ConjunctiveGraph()
    output_graph = ConjunctiveGraph()

    GH = 'https://raw.githubusercontent.com'
    OBO = 'https://purl.obolibrary.org/obo'
    ontologies = [
        OBO + '/sepio.owl',
        OBO + '/geno.owl',
        OBO + '/iao.owl',
        OBO + '/ero.owl',
        OBO + '/pco.owl',
        OBO + '/xco.owl',
        OBO + '/ro.owl',
        GH + '/jamesmalone/OBAN/master/ontology/oban_core.ttl',
    ]

    for ontology in ontologies:
        print("parsing: " + ontology)
        try:
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))
        except SAXParseException as e:
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format="turtle")
        except OSError as e:  # URLError:
            # simple retry
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))

    # Get object properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['ObjectProperty']), output_graph,
        OWL['ObjectProperty'], properties)

    # Get annotation properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['AnnotationProperty']), output_graph,
        OWL['AnnotationProperty'], properties)

    # Get data properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['DatatypeProperty']), output_graph,
        OWL['DatatypeProperty'], properties)

    # Hardcoded properties
    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'),
         RDF['type'], OWL['AnnotationProperty']))

    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_anonymous'),
         RDF['type'], OWL['AnnotationProperty']))

    # Check monarch data triple
    data_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.input))
    new_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.output))
    if (URIRef(data_url), RDF.type, OWL['Ontology']) in output_graph:
        output_graph.remove(URIRef(data_url), RDF.type, OWL['Ontology'])

    output_graph.add((URIRef(new_url), RDF.type, OWL['Ontology']))

    for row in output_graph.predicates(DC['source'],
                                       OWL['AnnotationProperty']):
        if row == RDF['type']:
            output_graph.remove(
                (DC['source'], RDF['type'], OWL['AnnotationProperty']))

    output_graph.add((DC['source'], RDF['type'], OWL['ObjectProperty']))

    return output_graph
예제 #49
0
def rdf_description(name, notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    # default_graph_uri = "http://example.com/"
    configString = "/var/tmp/rdfstore"

    # Get the IOMemory plugin.
    store = plugin.get('IOMemory', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="IOMemory", identifier=URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:

        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"

    # Now we'll add some triples to the graph & commit the changes
    #rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = [
        "id", "rght", "node_ptr_id", "image", "lft", "_state",
        "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"
    ]

    #verifies the type of node

    node = NID.objects.get(title=name)
    node_type = node.reftype

    if (node_type == 'Gbobject'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)
    elif (node_type == 'None'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processes'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'System'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Objecttype'):
        node = Objecttype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attributetype'):
        node = Attributetype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Complement'):
        node = Complement.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Union'):
        node = Union.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Intersection'):
        node = Intersection.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Expression'):
        node = Expression.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processtype'):
        node = Processtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Systemtype'):
        node = Systemtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'AttributeSpecification'):
        node = AttributeSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'RelationSpecification'):
        node = RelationSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attribute'):
        node = Attribute.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Relationtype'):
        node = Relationtype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Metatype'):
        node = Metatype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    else:
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    node_dict = node.__dict__

    subject = str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate = str(key)
            pobject = str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))

    rdf_code = graph.serialize(format=notation)

    graph.commit()
    print rdf_code
    graph.close()
예제 #50
0
    def build_graph(self):
        graph = ConjunctiveGraph()
        graph.bind('sioc', SIOC)
        graph.bind('foaf', FOAF)
        graph.bind('rdfs', RDFS)
        graph.bind('dct', DCT)
        graph.bind('mvcb', MVCB)

        swaml = URIRef("http://swaml.berlios.de/doap#swaml")
        doc = URIRef("%s/thread/%s" % (self.base, self.key))
        graph.add((doc, RDF.type, FOAF["Document"]))
        graph.add((doc, RDFS.label, Literal("RDF version of the thread '%s' retrieved from MarkMail API" % self.key))) #FIXME: this should go out of this api
        graph.add((doc, MVCB.generatorAgent, swaml))
        thread = URIRef("%s/thread/%s#thread" % (self.base, self.key))
        graph.add((thread, RDF.type, SIOC["Thread"]))
        graph.add((doc, FOAF["primaryTopic"], thread))

        graph.add((thread, SIOC.id, Literal(self.key)))
        graph.add((thread, SIOC.link, URIRef(self.homepage)))              
        graph.add((thread, DCT.title, Literal(self.title))) 
        graph.add((thread, SIOC.num_item, Literal(len(self.messages), XSD.Integer))) 
        for message in self.messages:
            url = "%s/message/%s" % (self.base, message["id"])
            post = URIRef("%s#message" % url)
            graph.add((post, RDF.type, SIOC.Post))
            graph.add((post, RDFS.seeAlso, URIRef(url)))
            graph.add((thread, SIOC.container_of, post))
            graph.add((post, SIOC.has_container, thread))
            graph.add((post, SIOC.id, Literal(self.key)))
            graph.add((post, SIOC.link, URIRef("http://markmail.org%s" % message["url"])))  
            author = BNode()
            graph.add((post, SIOC.has_creator, author))
            graph.add((author, RDF.type, SIOC.UserAccount))
            graph.add((author, SIOC.name, Literal(message["from"])))
            graph.add((post, DCT.created, Literal(message["date"], datatype=XSD.dateTime)))

        self.set_graph(graph)
예제 #51
0
class Hisco2RDF():
    '''
    Scrapes the HISCO Web site
    The hierarchy goes as "master > minor > rubri > micro"
    '''
    def __init__(self):
        # The graph to store the data
        self.graph = ConjunctiveGraph()
        self.graph.namespace_manager.bind('skos', SKOS)
        self.graph.namespace_manager.bind('hisco', HISCO)
        self.graph.namespace_manager.bind('dcterms', DCTERMS)
        self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION)
        self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE)
        self.graph.namespace_manager.bind('qb', QB)

        # SQLite DB for the cache
        self.cache = sqlite3.connect('cache.db')
        cursor = self.cache.cursor()
        cursor.execute(
            "CREATE TABLE IF NOT EXISTS  page (url text, html text)")
        self.cache.commit()

    def __del__(self):
        self.cache.close()

    def get_page(self, url):
        #log.debug("Load %s" % url)

        c = self.cache.cursor()
        c.execute("SELECT * FROM page WHERE url = ?", (url, ))
        res = c.fetchone()
        doc = None
        if res == None:
            doc = requests.get(url).content
            c.execute("INSERT INTO page VALUES (?,?)", (url, doc))
            self.cache.commit()
        else:
            (_, doc) = res
        return BeautifulSoup(doc)

    def save_output(self):
        # Add more things needed for DataCubes
        dimprop = HISCO['occupation']
        self.graph.add((dimprop, RDF.type, QB['DimensionProperty']))
        self.graph.add((dimprop, RDFS.range, SKOS.Collection))
        self.graph.add((dimprop, QB['Concept'], SKOS.Collection))
        self.graph.add(
            (dimprop, RDFS.label, Literal('Occupation code', lang='en')))
        self.graph.add((dimprop, RDFS.comment,
                        Literal('The HISCO group of the occupation',
                                lang='en')))

        # Print to the screen
        #outfile = sys.stdout.buffer
        #self.graph.serialize(destination=outfile, format='n3')

        # Save to the file
        outfile = open('../hisco.ttl', "wb")
        self.graph.serialize(destination=outfile, format='n3')
        outfile.close()

    def parse_hisco_tree(self):
        '''
        Parse the hisco tree
        '''
        # Load the page
        doc = self.get_page(ROOT + HISCO_TREE)

        # Find the major groups
        major_groups = []
        major_group = None
        for table in doc.find_all('table', attrs={'border': '0'}):
            for row in table.find_all('tr'):
                for col in row.find_all('td'):
                    # Skip empty rows
                    if len(col.text) == 1:
                        continue
                    # We are starting a new group
                    if col.text.startswith('Majorgroup'):
                        # Save the one we were building if any
                        if major_group != None:
                            major_groups.append(major_group)
                        m = re.search("Majorgroup ([^ ]*) ", col.text)
                        major_group = {}
                        major_group['title'] = col.text
                        major_group['code'] = m.group(1).replace('/', '-')
                    # We have a description
                    if col.text.startswith('Workers'):
                        major_group['description'] = col.text
                    # We have links to minor
                    if col.text.startswith('List Minor'):
                        link = col.find_all('a')[0]['href']
                        major_group.setdefault('links', [])
                        major_group['links'].append(link)
        # Add the last group in the making
        if major_group != None:
            major_groups.append(major_group)

        # Add the groups to the graph
        for group in major_groups:
            major_group_uri = self._get_group_uri(group['code'])
            self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme']))
            self.graph.add(
                (major_group_uri, DCTERMS.title, Literal(group['title'])))
            self.graph.add((major_group_uri, DCTERMS.description,
                            Literal(group['description'])))

        # Now move onto the minor groups following the links
        for major_group in major_groups:
            major_group_uri = self._get_group_uri(major_group['code'])

            for minor_link in major_group['links']:
                # Look for the minor groups
                minor_groups = self._parse_records_table(minor_link, 2)

                # Add the groups to the graph
                for minor_group in minor_groups:
                    minor_group_uri = self._get_group_uri(minor_group['code'])
                    self.graph.add(
                        (minor_group_uri, RDF.type, SKOS['ConceptScheme']))
                    self.graph.add((minor_group_uri, RDFS.label,
                                    Literal(minor_group['title'])))
                    self.graph.add((minor_group_uri, DCTERMS.description,
                                    Literal(minor_group['description'])))
                    self.graph.add(
                        (major_group_uri, SKOS.related, minor_group_uri))

                    # Got one level deeper into the rubri
                    for rubri_link in minor_group['links']:
                        # Look for the minor groups
                        rubri_groups = self._parse_records_table(rubri_link, 3)

                        # Add the groups to the graph
                        for rubri_group in rubri_groups:
                            rubri_group_uri = self._get_group_uri(
                                rubri_group['code'])
                            self.graph.add((rubri_group_uri, RDF.type,
                                            SKOS['ConceptScheme']))
                            self.graph.add((rubri_group_uri, RDFS.label,
                                            Literal(rubri_group['title'])))
                            self.graph.add(
                                (rubri_group_uri, DCTERMS.description,
                                 Literal(rubri_group['description'])))
                            self.graph.add((minor_group_uri, SKOS.related,
                                            rubri_group_uri))

                            # And one deeper for the micro
                            for micro_link in rubri_group['links']:
                                # Look for the minor groups
                                micro_groups = self._parse_records_table(
                                    micro_link, 5)

                                # Add the groups to the graph
                                for micro_group in micro_groups:
                                    hisco_uri = self._get_hisco_uri(
                                        micro_group['code'])
                                    self.graph.add((hisco_uri, RDF.type,
                                                    SKOS['Collection']))
                                    self.graph.add(
                                        (hisco_uri, RDFS.label,
                                         Literal(micro_group['title'])))
                                    self.graph.add(
                                        (hisco_uri, DCTERMS.description,
                                         Literal(micro_group['description'])))
                                    self.graph.add((rubri_group_uri,
                                                    SKOS.related, hisco_uri))

    def parse_occupational_titles(self):
        '''
        Scrape the section of the site about occupational titles
        Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch
        '''
        parsed_status_page = set()
        next_page = OCCUPATIONAL_TITLES

        while next_page != None:
            log.info("Parse titles %s" % next_page)

            # Load the page
            doc = self.get_page(ROOT + next_page)

            # Find the right table
            table = doc.find('table',
                             attrs={
                                 'cellspacing': '0',
                                 'cellpadding': '2',
                                 'border': '0'
                             })

            # Look for all the titles
            for row in table.find_all('tr')[1:]:  # Skip the header
                cols = row.find_all('td')
                occupation_title = cols[1].text
                details_page_link = cols[1].find_all('a')[0]['href']
                language = LANG_MAP[cols[2].text]
                hisco_code = cols[3].text.replace('*', '')

                # Get the DB index from details_page_link
                m = re.search('know_id=([^&]*)&', details_page_link)
                occupation_index = m.group(1)

                # Add the concept to the graph
                resource = self._get_occupation_title_uri(occupation_index)
                self.graph.add((resource, RDF.type, SKOS['Concept']))
                self.graph.add((resource, SKOS.prefLabel,
                                Literal(occupation_title, lang=language)))
                self.graph.add(
                    (resource, SKOS.member, self._get_hisco_uri(hisco_code)))

                # Get more information about the title and add it as a member of the collection
                details_page = self.get_page(ROOT + details_page_link)
                details_table = details_page.find('table',
                                                  attrs={
                                                      'cellspacing': '8',
                                                      'cellpadding': '0'
                                                  })
                keyvalues = {}
                for details_row in details_table.find_all('tr'):
                    details_cols = details_row.find_all('td')
                    keyvalues[details_cols[0].text.strip()] = details_cols[-1]

                # We already dealt with these two
                del keyvalues['Hisco code']
                del keyvalues['Occupational title']

                # TODO Country , use refArea

                # TODO Language

                # Do we know the gender ?
                if 'Gender' in keyvalues:
                    sex = SDMX_CODE['sex-U']  # Also applies to "Male/Female"
                    if keyvalues['Gender'].text.strip() == 'Male':
                        sex = SDMX_CODE['sex-M']
                    elif keyvalues['Gender'].text.strip() == 'Female':
                        sex = SDMX_CODE['sex-F']
                    self.graph.add((resource, SDMX_DIMENSION['sex'], sex))
                    del keyvalues['Gender']

                # Do we know the status ?
                if 'Status' in keyvalues:
                    # Add the status
                    status = keyvalues['Status'].text.strip()
                    self.graph.add((resource, HISCO['status'],
                                    self._get_status_uri(status)))
                    # Parse the status page if necessary
                    status_page = keyvalues['Status'].find_all('a')[0]['href']
                    if status_page not in parsed_status_page:
                        self._parse_status_page(status_page)
                        parsed_status_page.add(status_page)
                    del keyvalues['Status']

                # TODO Relation

                # TODO Product

                # TODO Provenance

                # Do we have a translation in English ?
                if 'Translation' in keyvalues:
                    trans = Literal(
                        keyvalues['Translation'].text.strip().replace(
                            '´', "'"),
                        lang='en')
                    self.graph.add((resource, SKOS.altLabel, trans))
                    del keyvalues['Translation']

                # Print whatever is left
                #if len(keyvalues.keys()) != 0:
                #    log.info(keyvalues.keys())

            # Look for the "next" link
            next_table = doc.find('table', class_='nextprev')
            next_page = None
            for link in next_table.find_all('a'):
                if 'Next' in link.text:
                    next_page = link['href']

    def _parse_status_page(self, url):
        '''
        Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32
        '''

        # Work-around broken content
        if url == 'status.php?int02=15':
            return

        # Load the page
        doc = self.get_page(ROOT + url)

        # Find the data about this status
        status_uri = None
        for line in doc.find('pre').text.split('\n'):
            if re.match("^[0-9]* [a-zA-Z]*", line):
                m = re.search("^([0-9]*) ([a-zA-Z]*)", line)
                status_uri = self._get_status_uri(m.group(1))
                self.graph.add((status_uri, RDF.type, HISCO['Status']))
                self.graph.add((status_uri, RDFS.label, Literal(m.group(2))))
                self.graph.add(
                    (status_uri, SKOS.prefLabel, Literal(m.group(2))))
                self.graph.add(
                    (status_uri, SKOS.notation, Literal(m.group(1))))
            if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line):
                m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line)
                lang_code = m.group(1).lower()
                label = Literal(m.group(2), lang=lang_code)
                self.graph.add((status_uri, SKOS.altLabel, label))

        # Describe the class
        status_class = HISCO['Status']
        descr = doc.find('table', attrs={
            'width': '600'
        }).text.strip().split('\r\n')
        self.graph.add((status_class, RDF.type, RDFS.Class))
        self.graph.add((status_class, RDFS.label, Literal("Status code")))
        self.graph.add((status_class, DCTERMS.comment, Literal(descr[1])))

        # Describe the property
        status_property = HISCO['status']
        self.graph.add((status_property, RDF.type, RDF.Property))
        self.graph.add((status_property, RDFS.label,
                        Literal("status associated to the occupation")))
        self.graph.add((status_property, RDFS.range, HISCO['Status']))
        self.graph.add((status_property, RDFS.domain, SKOS.Concept))

    def _parse_records_table(self, url, size):
        '''
        Minor, Rubri and Micro have the same structure except an additional
        column for Micro with links to the titles
        '''
        # Load the page
        doc = self.get_page(ROOT + url)

        # Find the right table
        table = doc.find('table',
                         attrs={
                             'cellspacing': '8',
                             'cellpadding': '0'
                         })

        # If we can't find the table return an empty list
        # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict
        if table == None:
            return []

        # Look for the minor groups
        groups = []
        group = None
        columns = table.find_all('td')
        for index in range(0, len(columns)):
            # New group
            if re.match("[0-9]{%d}" % size, columns[index].text):
                if group != None:
                    groups.append(group)
                group = {}
                group['code'] = columns[index].text
                group['title'] = columns[index + 1].text
                link = columns[index + 1].find_all('a')[0]['href']
                group.setdefault('links', [])
                group['links'].append(link)
                group['description'] = columns[index + 2].text
                if columns[index + 3].text == "Display Titles":
                    link = columns[index + 3].find_all('a')[0]['href']
                    group['titles_link'] = link
        groups.append(group)

        return groups

    def _get_group_uri(self, code):
        return HISCO['group-%s' % code]

    def _get_hisco_uri(self, code):
        return HISCO['hisco-%s' % code]

    def _get_occupation_title_uri(self, code):
        return HISCO['occupation-%s' % code]

    def _get_status_uri(self, code):
        return HISCO['status-%s' % code]
예제 #52
0
    def encode_container(self, bundle, container=None, identifier=None):
        if container is None:
            container = ConjunctiveGraph(identifier=identifier)
            nm = container.namespace_manager
            nm.bind('prov', PROV.uri)

        for namespace in bundle.namespaces:
            container.bind(namespace.prefix, namespace.uri)

        id_generator = AnonymousIDGenerator()
        real_or_anon_id = lambda record: record._identifier.uri if \
            record._identifier else id_generator.get_anon_id(record)

        for record in bundle._records:
            rec_type = record.get_type()
            if hasattr(record, 'identifier') and record.identifier:
                identifier = URIRef(text_type(real_or_anon_id(record)))
                container.add((identifier, RDF.type, URIRef(rec_type.uri)))
            else:
                identifier = None
            if record.attributes:
                bnode = None
                formal_objects = []
                used_objects = []
                all_attributes = list(record.formal_attributes) + list(record.attributes)
                formal_qualifiers = False
                for attrid, (attr, value) in enumerate(list(record.formal_attributes)):
                    if (identifier is not None and value is not None) or \
                            (identifier is None and value is not None and attrid > 1):
                        formal_qualifiers = True
                has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers
                for idx, (attr, value) in enumerate(all_attributes):
                    if record.is_relation():
                        pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri)
                        # create bnode relation
                        if bnode is None:
                            valid_formal_indices = set()
                            for idx, (key, val) in enumerate(record.formal_attributes):
                                formal_objects.append(key)
                                if val:
                                    valid_formal_indices.add(idx)
                            used_objects = [record.formal_attributes[0][0]]
                            subj = None
                            if record.formal_attributes[0][1]:
                                subj = URIRef(record.formal_attributes[0][1].uri)
                            if identifier is None and subj is not None:
                                try:
                                    obj_val = record.formal_attributes[1][1]
                                    obj_attr = URIRef(record.formal_attributes[1][0].uri)
                                except IndexError:
                                    obj_val = None
                                if obj_val and (rec_type not in [PROV_END,
                                                                PROV_START,
                                                                PROV_USAGE,
                                                                PROV_GENERATION,
                                                                PROV_DERIVATION,
                                                                PROV_INVALIDATION] or
                                                (valid_formal_indices == {0, 1} and
                                                 len(record.extra_attributes) == 0)):
                                    used_objects.append(record.formal_attributes[1][0])
                                    obj_val = self.encode_rdf_representation(obj_val)
                                    if rec_type == PROV_ALTERNATE:
                                        subj, obj_val = obj_val, subj
                                    container.add((subj, pred, obj_val))
                                    if rec_type == PROV_MENTION:
                                        if record.formal_attributes[2][1]:
                                            used_objects.append(record.formal_attributes[2][0])
                                            obj_val = self.encode_rdf_representation(record.formal_attributes[2][1])
                                            container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val))
                                        has_qualifiers = False
                            if rec_type in [PROV_ALTERNATE]: #, PROV_ASSOCIATION]:
                                continue
                            if subj and (has_qualifiers or identifier):  #and (len(record.extra_attributes) > 0 or                                                            identifier):
                                qualifier = rec_type._localpart
                                rec_uri = rec_type.uri
                                for attr_name, val in record.extra_attributes:
                                    if attr_name == PROV['type']:
                                        if PROV['Revision'] == val or \
                                              PROV['Quotation'] == val or \
                                                PROV['PrimarySource'] == val:
                                            qualifier = val._localpart
                                            rec_uri = val.uri
                                            if identifier is not None:
                                                container.remove((identifier,
                                                                  RDF.type,
                                                                  URIRef(rec_type.uri)))
                                QRole = URIRef(PROV['qualified' + qualifier].uri)
                                if identifier is not None:
                                    container.add((subj, QRole, identifier))
                                else:
                                    bnode = identifier = BNode()
                                    container.add((subj, QRole, identifier))
                                    container.add((identifier, RDF.type,
                                                   URIRef(rec_uri)))
                                               # reset identifier to BNode
                        if value is not None and attr not in used_objects:
                            if attr in formal_objects:
                                pred = attr2rdf(attr)
                            elif attr == PROV['role']:
                                pred = URIRef(PROV['hadRole'].uri)
                            elif attr == PROV['plan']:
                                pred = URIRef(PROV['hadPlan'].uri)
                            elif attr == PROV['type']:
                                pred = RDF.type
                            elif attr == PROV['label']:
                                pred = RDFS.label
                            elif isinstance(attr, QualifiedName):
                                pred = URIRef(attr.uri)
                            else:
                                pred = self.encode_rdf_representation(attr)
                            if PROV['plan'].uri in pred:
                                pred = URIRef(PROV['hadPlan'].uri)
                            if PROV['informant'].uri in pred:
                                pred = URIRef(PROV['activity'].uri)
                            if PROV['responsible'].uri in pred:
                                pred = URIRef(PROV['agent'].uri)
                            if rec_type == PROV_DELEGATION and PROV['activity'].uri in pred:
                                pred = URIRef(PROV['hadActivity'].uri)
                            if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\
                                (rec_type in [PROV_USAGE] and PROV['used'].uri in pred):
                                pred = URIRef(PROV['entity'].uri)
                            if rec_type in [PROV_GENERATION, PROV_END,
                                            PROV_START, PROV_USAGE,
                                            PROV_INVALIDATION]:
                                if PROV['time'].uri in pred:
                                    pred = URIRef(PROV['atTime'].uri)
                                if PROV['ender'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['starter'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['location'].uri in pred:
                                    pred = URIRef(PROV['atLocation'].uri)
                            if rec_type in [PROV_ACTIVITY]:
                                if PROV_ATTR_STARTTIME in pred:
                                    pred = URIRef(PROV['startedAtTime'].uri)
                                if PROV_ATTR_ENDTIME in pred:
                                    pred = URIRef(PROV['endedAtTime'].uri)
                            if rec_type == PROV_DERIVATION:
                                if PROV['activity'].uri in pred:
                                    pred = URIRef(PROV['hadActivity'].uri)
                                if PROV['generation'].uri in pred:
                                    pred = URIRef(PROV['hadGeneration'].uri)
                                if PROV['usage'].uri in pred:
                                    pred = URIRef(PROV['hadUsage'].uri)
                                if PROV['usedEntity'].uri in pred:
                                    pred = URIRef(PROV['entity'].uri)
                            container.add((identifier, pred,
                                           self.encode_rdf_representation(value)))
                        continue
                    if value is None:
                        continue
                    if isinstance(value, ProvRecord):
                        obj = URIRef(text_type(real_or_anon_id(value)))
                    else:
                        #  Assuming this is a datetime value
                        obj = self.encode_rdf_representation(value)
                    if attr == PROV['location']:
                        pred = URIRef(PROV['atLocation'].uri)
                        if False and isinstance(value, (URIRef, QualifiedName)):
                            if isinstance(value, QualifiedName):
                                value = URIRef(value.uri)
                            container.add((identifier, pred, value))
                        else:
                            container.add((identifier, pred,
                                           self.encode_rdf_representation(obj)))
                        continue
                    if attr == PROV['type']:
                        pred = RDF.type
                    elif attr == PROV['label']:
                        pred = RDFS.label
                    elif attr == PROV_ATTR_STARTTIME:
                        pred = URIRef(PROV['startedAtTime'].uri)
                    elif attr == PROV_ATTR_ENDTIME:
                        pred = URIRef(PROV['endedAtTime'].uri)
                    else:
                        pred = self.encode_rdf_representation(attr)
                    container.add((identifier, pred, obj))
        return container
예제 #53
0
class TestKyotoCabinetConjunctiveGraphCore(unittest.TestCase):
    def setUp(self):
        store = "KyotoCabinet"
        self.graph = ConjunctiveGraph(store=store)
        self.path = configString
        self.graph.open(self.path, create=True)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass
        if getattr(self, "path", False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + "/" + f)
                    os.rmdir(self.path)
                elif len(self.path.split(":")) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def test_namespaces(self):
        self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/")
        self.assert_(len(list(self.graph.namespaces())) == 5)
        self.assert_(("foaf", rdflib.term.URIRef(u"http://xmlns.com/foaf/0.1/")) in list(self.graph.namespaces()))

    def test_play_journal(self):
        self.assertRaises(NotImplementedError, self.graph.store.play_journal, {"graph": self.graph})

    def test_readable_index(self):
        print(readable_index(111))

    def test_triples_context_reset(self):
        michel = rdflib.URIRef(u"michel")
        likes = rdflib.URIRef(u"likes")
        pizza = rdflib.URIRef(u"pizza")
        cheese = rdflib.URIRef(u"cheese")
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        ntriples = self.graph.triples((None, None, None), context=self.graph.store)
        self.assert_(len(list(ntriples)) == 2)

    def test_remove_context_reset(self):
        michel = rdflib.URIRef(u"michel")
        likes = rdflib.URIRef(u"likes")
        pizza = rdflib.URIRef(u"pizza")
        cheese = rdflib.URIRef(u"cheese")
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.remove((michel, likes, cheese), self.graph.store)
        self.graph.commit()
        self.assert_(len(list(self.graph.triples((None, None, None), context=self.graph.store))) == 1)

    def test_remove_db_exception(self):
        michel = rdflib.URIRef(u"michel")
        likes = rdflib.URIRef(u"likes")
        pizza = rdflib.URIRef(u"pizza")
        cheese = rdflib.URIRef(u"cheese")
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.__len__(context=self.graph.store)
        self.assert_(len(list(self.graph.triples((None, None, None), context=self.graph.store))) == 2)
예제 #54
0
class RDFAggregator(Aggregator):
    def __init__(self, *args, **kw):
        """Inicializa o agregador RDF.
        """
        super(RDFAggregator, self).__init__('csv', *args, **kw)
        self.aggregator = ConjunctiveGraph()
        self.aggregator.bind(u'owl', OWL)
        self.aggregator.bind(u'lic', LIC)
        self.aggregator.bind(u'siorg', SIORG)
        self.aggregator.bind(u'siafi', SIAFI)
        self.aggregator.bind(u'geo', GEO)
        self.aggregator.bind(u'dbpedia', DBPEDIA)
        self.aggregator.bind(u'dbprop', DBPROP)
        self.aggregator.bind(u'dbo', DBONT)
        self.aggregator.bind(u'void', VOID)
        self.aggregator.bind(u'foaf', FOAF)
        self.aggregator.bind(u'vcard', VCARD)
    def add(self, obj):
        """Acrescenta as triplas do objeto ao grafo agregador.
        """
        if getattr(obj, 'repr_rdf', None):
            # objeto tem um metodo para representacao propria em rdf
            triplas = obj.repr_rdf()
            for t in triplas:
                self.aggregator.add(t)
        else:
            # o objeto nao tem o metodo, tenta criar triplas por heuristicas
            subject = obj.uri
            doc = obj.doc_uri
            if doc == subject:
                doc = None
            class_uri = getattr(obj.__class__, '__class_uri__', None)
            expostos = getattr(obj.__class__,self.atributo_serializar, set())
            prop_map = getattr(obj.__class__, '__rdf_prop__', {})
            g = self.aggregator
            #  classe
            if class_uri:
                g.add((URIRef(subject), RDF['type'], URIRef(class_uri)))
            # documento
            if doc:
                g.add((URIRef(doc), RDF['type'], FOAF['Document']))
                g.add((URIRef(subject), FOAF['isPrimaryTopicOf'], URIRef(doc)))
                g.add((URIRef(doc), FOAF['primaryTopic'], URIRef(subject)))
            #  nome
            if getattr(obj, 'nome', None):
                if getattr(obj, '__rdf_prop__', None) is None or \
                        obj.__rdf_prop__.get('nome', None) is None:
                    g.add((URIRef(subject), RDFS['label'], Literal(obj.nome)))
            #  localizacao geo
            if getattr(obj, 'geo_ponto', None):
                ponto = obj.geo_ponto
                if ponto:
                    g.add((URIRef(subject), GEO['lat'], Literal(ponto['lat'])))
                    g.add((URIRef(subject), GEO['long'], Literal(ponto['lon'])))
            #  propriedades
            for atr in expostos:
                if atr in prop_map.keys():
                    if getattr(prop_map[atr], '__call__', None):
                        # as triplas da propriedade sao dadas por uma funcao
                        triplas = prop_map[atr](obj)
                        if triplas:
                            for t in triplas:
                                g.add(t)
                    elif prop_map[atr].get('metodo', None):
                        # as triplas da propriedade sao dadas por um metodo
                        m = getattr(obj, prop_map[atr]['metodo'])
                        triplas = m(atr)
                        if triplas:
                            for t in triplas:
                                g.add(t)
                    elif prop_map[atr].get('pred_uri', None):
                        # a propriedade corresponde a uma unica tripla
                        pred_uri = prop_map[atr]['pred_uri']
                        object = getattr(obj, atr, None)
                        if object:
                            obj_uri = getattr(object, 'uri', lambda: None)()
                            obj_cls_uri = getattr(object, '__class_uri__', None)
                            # o objeto tem uri definida?
                            if obj_uri:
                                g.add((URIRef(subject), URIRef(pred_uri), URIRef(obj_uri)))
                            elif obj_cls_uri:
                                # se o objeto nao tem uri mas tem uri da classe,
                                # tenta criar blank node
                                bn = BNode()
                                g.add((URIRef(subject), URIRef(pred_uri), bn))
                                g.add((bn, RDF['type'], URIRef(obj_cls_uri)))
                                g.add((bn, RDFS['comment'], Literal(unicode(obj))))
                            else:
                                # caso contrario, tratar a propriedade como um literal
                                g.add((URIRef(subject), URIRef(pred_uri), Literal(unicode(object))))
    def serialize(self, format="n3"):
        """Retorna a serializacao do agregador RDF (uniao dos grafos).
        """
        format_map = {
            'xml': 'xml',
            'rdf': 'pretty-xml',
            'rdf/xml': 'pretty-xml',
            'ttl': 'n3',
            'n3': 'n3',
            'nt': 'nt',
        }
        f = format_map.get(format, 'n3')
        current_url = self.dataset_split.get('current_url', '') # url do documento atual
        dataset_url = self.dataset_split.get('dataset_url', '') # url geral do dataset
        next_url = self.dataset_split.get('next_url', '') # url da proxima pagina
        # a uri do dataset: url do documento acrescida de #dataset
        if current_url:
            self.aggregator.add((URIRef(current_url+"#dataset"),RDF['type'],VOID['Dataset']))
            self.aggregator.add((URIRef(current_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(current_url),FOAF['primaryTopic'],URIRef(current_url+"#dataset")))
            if next_url:
                self.aggregator.add((URIRef(current_url+"#dataset"),RDFS['seeAlso'],URIRef(next_url+"#dataset")))
        if next_url:
            self.aggregator.add((URIRef(next_url+"#dataset"),RDF['type'], VOID['Dataset']))
            self.aggregator.add((URIRef(next_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(next_url),FOAF['primaryTopic'],URIRef(next_url+"#dataset")))
        if dataset_url:
            self.aggregator.add((URIRef(dataset_url+"#dataset"),RDF['type'], VOID['Dataset']))
            self.aggregator.add((URIRef(dataset_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(dataset_url),FOAF['primaryTopic'],URIRef(dataset_url+"#dataset")))
            if current_url:
                self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(current_url+"#dataset")))
            if next_url:
                self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(next_url+"#dataset")))
        return self.aggregator.serialize(format=f)
예제 #55
0
from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace, RDF
from rdflib.term import BNode, Literal

DC = Namespace(u"http://purl.org/dc/elements/1.1/")
FUNC = Namespace(u"http://example.org/functions#")

_XSD_NS = Namespace('http://www.w3.org/2001/XMLSchema#')

graph = ConjunctiveGraph()
graph.add((BNode(), RDF.value, Literal(0)))
graph.add((BNode(), RDF.value, Literal(1)))
graph.add((BNode(), RDF.value, Literal(2)))
graph.add((BNode(), RDF.value, Literal(3)))

from rdflib.term import _toPythonMapping
NUMERIC_TYPES = [type_uri for type_uri in _toPythonMapping if \
                 _toPythonMapping[type_uri] in (int, float, long)]

def func_even(a):
    # Should this be required, or be done automatically?
    from rdflib.sparql.sparqlOperators import getValue
    value = getValue(a)

    if isinstance(value, Literal) and value.datatype in NUMERIC_TYPES:
        return Literal(int(value.toPython() % 2 == 0), datatype=_XSD_NS.boolean)
    else:
        raise TypeError(a)

def test_even_extension():
    res = list(graph.query("""
예제 #56
0
class ContextTest(TestCase):
    """
    Testing different contexts.

    Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/context_case.py
    """  # noqa: E501
    store_name = "Django"
    storetest = True
    path = ""
    create = True

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier)  # noqa: E501
        return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self)  # noqa: E501

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)

        self.assertEquals(len(graph), 2)
        self.assertEquals(len(self.graph), 2)

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for _ in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertNotIn(triple, self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertNotIn(triple, self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, str):
                return c.identifier
            return c

        self.assertIn(self.c1, [cid(c) for c in self.graph.contexts()])
        self.assertIn(self.c2, [cid(c) for c in self.graph.contexts()])

        contextList = [cid(c) for c in self.graph.contexts(triple)]
        self.assertIn(self.c1, contextList)
        self.assertIn(self.c2, contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, self.c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        self.assertEquals(
            len(
                list(
                    c1triples((Any, self.likes, self.pizza))
                )
            ), 2
        )
        self.assertEquals(
            len(
                list(
                    c1triples((Any, self.hates, self.pizza))
                )
            ), 1
        )
        self.assertEquals(
            len(
                list(
                    c1triples((Any, self.likes, self.cheese))
                )
            ), 3
        )
        self.assertEquals(
            len(
                list(
                    c1triples((Any, self.hates, self.cheese))
                )
            ), 0
        )

        # unbound subjects without context, same results!
        self.assertEquals(len(list(triples((Any, self.likes, self.pizza)))), 2)
        self.assertEquals(len(list(triples((Any, self.hates, self.pizza)))), 1)
        self.assertEquals(len(list(triples((Any, self.likes, self.cheese)))), 3)
        self.assertEquals(len(list(triples((Any, self.hates, self.cheese)))), 0)

        # unbound objects with context
        self.assertEquals(len(list(c1triples((self.michel, self.likes, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.tarek, self.likes, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, self.hates, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, self.likes, Any)))), 1)

        # unbound objects without context, same results!
        self.assertEquals(len(list(triples((self.michel, self.likes, Any)))), 2)
        self.assertEquals(len(list(triples((self.tarek, self.likes, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, self.hates, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, self.likes, Any)))), 1)

        # unbound predicates with context
        self.assertEquals(len(list(c1triples((self.michel, Any, self.cheese)))), 1)
        self.assertEquals(len(list(c1triples((self.tarek, Any, self.cheese)))), 1)
        self.assertEquals(len(list(c1triples((self.bob, Any, self.pizza)))), 1)
        self.assertEquals(len(list(c1triples((self.bob, Any, self.michel)))), 1)

        # unbound predicates without context, same results!
        self.assertEquals(len(list(triples((self.michel, Any, self.cheese)))), 1)
        self.assertEquals(len(list(triples((self.tarek, Any, self.cheese)))), 1)
        self.assertEquals(len(list(triples((self.bob, Any, self.pizza)))), 1)
        self.assertEquals(len(list(triples((self.bob, Any, self.michel)))), 1)

        # unbound subject, objects with context
        self.assertEquals(len(list(c1triples((Any, self.hates, Any)))), 2)
        self.assertEquals(len(list(c1triples((Any, self.likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        self.assertEquals(len(list(triples((Any, self.hates, Any)))), 2)
        self.assertEquals(len(list(triples((Any, self.likes, Any)))), 5)

        # unbound predicates, objects with context
        self.assertEquals(len(list(c1triples((self.michel, Any, Any)))), 2)
        self.assertEquals(len(list(c1triples((self.bob, Any, Any)))), 3)
        self.assertEquals(len(list(c1triples((self.tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        self.assertEquals(len(list(triples((self.michel, Any, Any)))), 2)
        self.assertEquals(len(list(triples((self.bob, Any, Any)))), 3)
        self.assertEquals(len(list(triples((self.tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        self.assertEquals(len(list(c1triples((Any, Any, self.pizza)))), 3)
        self.assertEquals(len(list(c1triples((Any, Any, self.cheese)))), 3)
        self.assertEquals(len(list(c1triples((Any, Any, self.michel)))), 1)

        # unbound subjects, predicates without context, same results!
        self.assertEquals(len(list(triples((Any, Any, self.pizza)))), 3)
        self.assertEquals(len(list(triples((Any, Any, self.cheese)))), 3)
        self.assertEquals(len(list(triples((Any, Any, self.michel)))), 1)

        # all unbound with context
        self.assertEquals(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        self.assertEquals(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(self.c1)]:
            # unbound subjects
            self.assertEquals(set(c.subjects(self.likes, self.pizza)), {self.michel, self.tarek})
            self.assertEquals(set(c.subjects(self.hates, self.pizza)), {self.bob})
            self.assertEquals(set(c.subjects(self.likes, self.cheese)), {self.tarek, self.bob, self.michel})
            self.assertEquals(set(c.subjects(self.hates, self.cheese)), set())

            # unbound objects
            self.assertEquals(set(c.objects(self.michel, self.likes)), {self.cheese, self.pizza})
            self.assertEquals(set(c.objects(self.tarek, self.likes)), {self.cheese, self.pizza})
            self.assertEquals(set(c.objects(self.bob, self.hates)), {self.michel, self.pizza})
            self.assertEquals(set(c.objects(self.bob, self.likes)), {self.cheese})

            # unbound predicates
            self.assertEquals(
                set(
                    c.predicates(self.michel, self.cheese)
                ),
                {self.likes}
            )
            self.assertEquals(
                set(
                    c.predicates(self.tarek, self.cheese)
                ),
                {self.likes}
            )
            self.assertEquals(set(c.predicates(self.bob, self.pizza)), {self.hates})
            self.assertEquals(set(c.predicates(self.bob, self.michel)), {self.hates})

            self.assertEquals(set(c.subject_objects(self.hates)), {(self.bob, self.pizza), (self.bob, self.michel)})
            self.assertEquals(set(c.subject_objects(self.likes)),
                    {(self.tarek, self.cheese), (self.michel, self.cheese), (self.michel, self.pizza), (self.bob, self.cheese), (self.tarek, self.pizza)})

            self.assertEquals(set(c.predicate_objects(self.michel)), {(self.likes, self.cheese), (self.likes, self.pizza)})
            self.assertEquals(set(c.predicate_objects(self.bob)), {(self.likes, self.cheese), (self.hates, self.pizza), (self.hates, self.michel)})
            self.assertEquals(set(c.predicate_objects(self.tarek)), {(self.likes, self.cheese), (self.likes, self.pizza)})

            self.assertEquals(set(c.subject_predicates(self.pizza)), {(self.bob, self.hates), (self.tarek, self.likes), (self.michel, self.likes)})
            self.assertEquals(set(c.subject_predicates(self.cheese)), {(self.bob, self.likes), (self.tarek, self.likes), (self.michel, self.likes)})
            self.assertEquals(set(c.subject_predicates(self.michel)), {(self.bob, self.hates)})

            self.assertEquals(set(c), {(self.bob, self.hates, self.michel), (self.bob, self.likes, self.cheese), (self.tarek, self.likes, self.pizza),
                (self.michel, self.likes, self.pizza), (self.michel, self.likes, self.cheese), (self.bob, self.hates, self.pizza),
                (self.tarek, self.likes, self.cheese)})

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        self.assertEquals(len(list(c1triples((Any, Any, Any)))), 0)
        self.assertEquals(len(list(triples((Any, Any, Any)))), 0)
예제 #57
0
def createRDF(username, city, artists, locationInformation, events, genres):

    graph = ConjunctiveGraph()

    rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#')
    iwa = Namespace('http://iwa2012-18-project.appspot.com/')
    lfm = Namespace('http://iwa2012-18-project.appspot.com/lastfm/')
    ev = Namespace('http://iwa2012-18-project.appspot.com/event/')
    dbp = Namespace('http://dbpedia.org/resource/') # DBPedia link to artists, genres and cities
    
    for artist in artists:

        graph.add(( lfm[username], iwa['likesArtist'], dbp[artist.replace(" ","_")] ))
        graph.add(( dbp[artist.replace(" ","_")], rdfs['label'], Literal(artist) ))

    for location in locationInformation:

        graph.add(( dbp[city.replace(" ","_")], iwa['poi'], Literal(location) ))

    for event in events:

        try:
            graph.add(( ev[event[0]], ev['onDate'], Literal(str(event[1].year)+"-"+str(event[1].month)+"-"+str(event[1].day),datatype=XSD.date) ))
            graph.add(( ev[event[0]], rdfs['label'], Literal(event[2]) ))
            graph.add(( ev[event[0]], ev['artist'], dbp[event[3].replace(" ","_")] ))
            graph.add(( ev[event[0]], ev['venue'], Literal(event[4]) ))
            graph.add(( ev[event[0]], ev['city'], dbp[city.replace(" ","_")] ))

            for eventGenre in event[5]:

                graph.add(( ev[event[0]], ev['genre'], dbp[eventGenre.replace(" ","_")] ))

        except AttributeError:
            graph.add(( ev[event[0]], rdfs['label'], Literal("Event is missing information") ))

    for genre in genres:

        graph.add(( lfm[username], iwa['likesGenre'], dbp[genre.replace(" ","_")] ))
        graph.add(( dbp[genre.replace(" ","_")], rdfs['label'], Literal(genre) ))

    graph.add(( dbp[city.replace(" ","_")], rdfs['label'], Literal(city) ))

    return graph