Exemple #1
0
    def create_ontology(self, tr, predicate, subClass, address, booktitle):
        LDT = Namespace("http://www.JceFinalProjectOntology.com/")
        ut = Namespace("http://www.JceFinalProjectOntology.com/subject/#")
        usubClass = URIRef("http://www.JceFinalProjectOntology.com/subject/" +
                           subClass.strip() + '#')
        #LDT.subClass=LDT[subClass]
        print(ut)
        print(usubClass)

        store = IOMemory()

        sty = LDT[predicate]
        g = rdflib.Graph(store=store, identifier=LDT)
        t = ConjunctiveGraph(store=store, identifier=ut)
        print('Triples in graph before add: ', len(t))
        #g.add((LDT,RDF.type,RDFS.Class))
        g.add((URIRef(LDT), RDF.type, RDFS.Class))
        g.add((URIRef(LDT), RDFS.label, Literal("JFPO")))
        g.add((URIRef(LDT), RDFS.comment, Literal('class of all properties')))
        for v in self.symbols.values():
            if self.if_compoTerm(v) == True:
                vs = self.splitTerms(v)[0]
            else:
                vs = v
            g.add((LDT[vs], RDF.type, RDF.Property))
            g.add((LDT[vs], RDFS.label, Literal('has' + vs)))
            g.add((LDT[vs], RDFS.comment, Literal(v)))
            g.add((LDT[vs], RDFS.range, OWL.Class))
            g.add((LDT[vs], RDFS.domain, Literal(vs)))
        g.bind('JFPO', LDT)
        #g.commit()
        g.serialize('trtst.rdf', format='turtle')

        t.add((ut[tr], RDF.type, OWL.Class))
        t.add((ut[tr], RDFS.subClassOf, OWL.Thing))
        t.add((ut[tr], RDFS.label, Literal(tr)))
        t.add((ut[tr], DC.title, Literal(booktitle)))
        t.add((ut[tr], DC.source, Literal(address)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        t.add((ut[tr], LDT[predicate], RDF.Property))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        relation = 'has' + predicate
        t.add((ut[tr], LDT.term(predicate), URIRef(usubClass)))

        t.add((usubClass, RDF.type, OWL.Class))
        t.add((usubClass, RDFS.subClassOf, OWL.Thing))
        t.add((usubClass, RDFS.subClassOf, URIRef(sty)))
        t.add((usubClass, RDFS.label, Literal(subClass)))

        #tc=Graph(store=store,identifier=usubClass)
        t.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        t.bind('JFPO', LDT)
        t.commit()
        #print(t.serialize(format='pretty-xml'))

        t.serialize('test2.owl', format='turtle')
Exemple #2
0
def locationtoturtle(ellist, meta):
    rdf=Graph();
    cs = Namespace("http://cs.unibo.it/ontology/")
    colon=Namespace("http://www.essepuntato.it/resource/")
    dcterms=Namespace("http://purl.org/dc/terms/")
    xsd=Namespace("http://www.w3.org/2001/XMLSchema#")
    this=Namespace("http://vitali.web.cs.unibo.it/twiki/pub/TechWeb12/DataSource2/posteBO2011.ttl#")
    vcard = Namespace("http://www.w3.org/2006/vcard/ns#")
    rdf.bind("vcard", vcard)
    rdf.bind("cs", cs)
    rdf.bind("", colon)
    rdf.bind("dcterms", dcterms)
    rdf.bind("xsd", xsd)
    rdf.bind("this", this)
    rdf.add((this["metadata"], dcterms["creator"], Literal(meta.creator)))
    rdf.add((this["metadata"], dcterms["created"], Literal(meta.created,datatype=XSD.date)))
    rdf.add((this["metadata"], dcterms["description"], Literal(meta.version)))
    rdf.add((this["metadata"], dcterms["valid"], Literal(meta.valid,datatype=XSD.date)))
    rdf.add((this["metadata"], dcterms["source"], Literal(meta.source)))
    for location in ellist:
        rdf.add((colon[location.id], vcard["fn"], Literal(location.name)))
        rdf.add((colon[location.id], vcard["extended-address"], Literal(location.address)))
        rdf.add((colon[location.id], vcard["category"], Literal(location.category)))
        rdf.add((colon[location.id], vcard["latitude"], Literal(location.lat)))
        rdf.add((colon[location.id], vcard["longitude"], Literal(location.long)))
        if(location.tel):
            rdf.add((colon[location.id], vcard["tel"], Literal(location.tel)))
        if(location.note):
            rdf.add((colon[location.id], vcard["note"], Literal(location.note)))
        rdf.add((colon[location.id], cs["opening"], Literal(location.opening)))
        rdf.add((colon[location.id], cs["closing"], Literal(location.closing)))
    print("Content-type: text/turtle; charset=UTF-8\n")
    print rdf.serialize(format="n3")
Exemple #3
0
def parse_and_serialize(input_files,
                        input_format,
                        guess,
                        outfile,
                        output_format,
                        ns_bindings,
                        store_conn="",
                        store_type=None):

    if store_type:
        store = plugin.get(store_type, Store)()
        store.open(store_conn)
        graph = ConjunctiveGraph(store)
    else:
        store = None
        graph = ConjunctiveGraph()

    for prefix, uri in list(ns_bindings.items()):
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        kws.setdefault('base', None)
        graph.serialize(destination=outfile, format=output_format, **kws)

    if store:
        store.rollback()
Exemple #4
0
def parse_and_serialize(input_files, input_format, guess,
                        outfile, output_format, ns_bindings,
                        store_conn="", store_type=None):

    if store_type:
        store = plugin.get(store_type, Store)()
        store.open(store_conn)
        graph = ConjunctiveGraph(store)
    else:
        store = None
        graph = ConjunctiveGraph()

    for prefix, uri in list(ns_bindings.items()):
        graph.namespace_manager.bind(prefix, uri, override=False)

    for fpath in input_files:
        use_format, kws = _format_and_kws(input_format)
        if fpath == '-':
            fpath = sys.stdin
        elif not input_format and guess:
            use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
        graph.parse(fpath, format=use_format, **kws)

    if outfile:
        output_format, kws = _format_and_kws(output_format)
        kws.setdefault('base', None)
        graph.serialize(destination=outfile, format=output_format, **kws)

    if store:
        store.rollback()
Exemple #5
0
    def test_turtle_namespace_prefixes(self):

        g = ConjunctiveGraph()
        n3 = \
            """
        @prefix _9: <http://data.linkedmdb.org/resource/movie/> .
        @prefix p_9: <urn:test:> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

        p_9:a p_9:b p_9:c .

        <http://data.linkedmdb.org/resource/director/1> a
        <http://data.linkedmdb.org/resource/movie/director>;
            rdfs:label "Cecil B. DeMille (Director)";
            _9:director_name "Cecil B. DeMille" ."""

        g.parse(data=n3, format='n3')
        turtle = g.serialize(format="turtle")

        # Check round-tripping, just for kicks.
        g = ConjunctiveGraph()
        g.parse(data=turtle, format='turtle')
        # Shouldn't have got to here
        s = g.serialize(format="turtle")

        self.assertTrue(b('@prefix _9') not in s)
    def create_ontology(self,tr,predicate,subClass,address,booktitle):
        LDT= Namespace("http://www.JceFinalProjectOntology.com/")
        ut=Namespace("http://www.JceFinalProjectOntology.com/subject/#")
        usubClass=URIRef("http://www.JceFinalProjectOntology.com/subject/"+subClass.strip()+'#')
        #LDT.subClass=LDT[subClass]
        print(ut)
        print(usubClass)

        store=IOMemory()

        sty=LDT[predicate]
        g = rdflib.Graph(store=store,identifier=LDT)
        t = ConjunctiveGraph(store=store,identifier=ut)
        print ('Triples in graph before add: ', len(t))
        #g.add((LDT,RDF.type,RDFS.Class))
        g.add((URIRef(LDT),RDF.type,RDFS.Class))
        g.add((URIRef(LDT),RDFS.label,Literal("JFPO")))
        g.add((URIRef(LDT),RDFS.comment,Literal('class of all properties')))
        for  v in self.symbols.values():
            if self.if_compoTerm(v)==True:
                vs=self.splitTerms(v)[0]
            else:
                vs =v
            g.add((LDT[vs],RDF.type,RDF.Property))
            g.add((LDT[vs],RDFS.label,Literal('has'+vs)))
            g.add((LDT[vs],RDFS.comment,Literal(v)))
            g.add((LDT[vs],RDFS.range,OWL.Class))
            g.add((LDT[vs],RDFS.domain,Literal(vs)))
        g.bind('JFPO',LDT)
        #g.commit()
        g.serialize('trtst.rdf',format='turtle')

        t.add( (ut[tr], RDF.type,OWL.Class) )
        t.add((ut[tr],RDFS.subClassOf,OWL.Thing))
        t.add((ut[tr],RDFS.label,Literal(tr)))
        t.add((ut[tr],DC.title,Literal(booktitle)))
        t.add((ut[tr],DC.source,Literal(address)))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))
        t.add((ut[tr],LDT[predicate],RDF.Property))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))
        relation='has'+predicate
        t.add((ut[tr],LDT.term(predicate),URIRef(usubClass)))

        t.add( (usubClass,RDF.type,OWL.Class))
        t.add((usubClass,RDFS.subClassOf,OWL.Thing))
        t.add((usubClass,RDFS.subClassOf,URIRef(sty)))
        t.add((usubClass,RDFS.label,Literal(subClass)))

        #tc=Graph(store=store,identifier=usubClass)
        t.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        t.bind('JFPO',LDT)
        t.commit()
                #print(t.serialize(format='pretty-xml'))

        t.serialize('test2.owl',format='turtle')
Exemple #7
0
def serialize_store(db_conn, filename):
    createdb = False
    rdfstore = rdflib.plugin.get('Sleepycat', rdflib.store.Store)()
    # rdflib can create necessary structures if the store is empty
    rt = rdfstore.open(db_conn, create=False)
    cg = ConjunctiveGraph(store=rdfstore)
    f = open(filename, 'w')
    cg.serialize(f)
    f.close()
    return True
Exemple #8
0
def serialize_store(db_conn, filename):
    createdb = False
    rdfstore = rdflib.plugin.get('Sleepycat', rdflib.store.Store)()
    # rdflib can create necessary structures if the store is empty
    rt = rdfstore.open(db_conn, create=False)
    cg = ConjunctiveGraph(store=rdfstore)
    f = open(filename, 'w')
    cg.serialize(f)
    f.close()
    return True
Exemple #9
0
def extract_rdfa(url, outfile=sys.stdout, parser="rdfa", serializer="n3"):
    """
    Extract RDFa from a given URL

    Parsers are listed at https://rdflib.readthedocs.org/en/4.1.0/plugin_parsers.html
    Serializers are listed at https://rdflib.readthedocs.org/en/4.1.0/plugin_serializers.html
    """
    store = None
    graph = ConjunctiveGraph()
    graph.parse(url, format=parser)
    graph.serialize(destination=outfile, format=serializer)
Exemple #10
0
def extract_rdfa(url, outfile=sys.stdout, parser="rdfa", serializer="n3"):
    """
    Extract RDFa from a given URL

    Parsers are listed at https://rdflib.readthedocs.org/en/4.1.0/plugin_parsers.html
    Serializers are listed at https://rdflib.readthedocs.org/en/4.1.0/plugin_serializers.html
    """
    store = None
    graph = ConjunctiveGraph()
    graph.parse(url, format=parser)
    graph.serialize(destination=outfile, format=serializer)
Exemple #11
0
 def get(self):
     g = ConjunctiveGraph()
     ns = Namespace('http://purl.org/NET/mediatype#')
     for mt in models.MediaType.all():
         g.add((URIRef(mt.uri), RDF.type, ns['MediaType']))
         g.add((URIRef(mt.uri), RDFS.label, Literal(mt.name)))
         if mt.rfc_url:
             g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.rfc_url)))
         if mt.application_url:
             g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.application_url)))
     self.response.headers['Content-Type'] = 'application/rdf+xml'
     g.serialize(self.response.out)
Exemple #12
0
    def convert_gml(self, ttl_output_file, uri_part, specific_part):
            """
            Pelagios conversion GML to TTL
            @type       ttl_output_file: string
            @param      ttl_output_file: Absolute path to TTL output file
            @type       uri_part: string
            @param      uri_part: URI for the region to be displayed (e.g. http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/34809536-b9f8-4c51-83d1-ef365ca658f5/)
            @type       specific_part: string
            @param      specific_part: Specific part that distinguishes the URI from other URIs (e.g. 1994)
            """
            cito_ns = Namespace("http://purl.org/spar/cito")
            cnt_ns = Namespace("http://www.w3.org/2011/content#")
            dcterms_ns = Namespace("http://purl.org/dc/terms/")
            foaf_ns = Namespace("http://xmlns.com/foaf/0.1/")
            geo_ns = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
            geosparql_ns = Namespace("http://www.opengis.net/ont/geosparql#")
            gn_ns = Namespace("http://www.geonames.org/ontology#")
            lawd_ns = Namespace("http://lawd.info/ontology/")
            rdfs_ns = Namespace("http://www.w3.org/2000/01/rdf-schema#")
            skos_ns = Namespace("http://www.w3.org/2004/02/skos/core#")

            slovenia = URIRef("http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/5c6f5563-7665-4719-a2b6-4356ea033c1d/#place/Slovenia")

            store = IOMemory()

            g = ConjunctiveGraph(store=store)
            g.bind("cito", cito_ns)
            g.bind("cnt", cnt_ns)
            g.bind("dcterms", dcterms_ns)
            g.bind("foaf", foaf_ns)
            g.bind("geo", geo_ns)
            g.bind("geosparql", geosparql_ns)
            g.bind("gn", gn_ns)
            g.bind("lawd", lawd_ns)
            g.bind("rdfs", rdfs_ns)
            g.bind("skos", skos_ns)

            graph_slovenian_districts = Graph(store=store, identifier=slovenia)
            gml_to_wkt = GMLtoWKT(self.gml_file)
            district_included = {}
            i = 1
            print "Processing GML file: %s" % self.gml_file
            for district_wkt in gml_to_wkt.get_wkt_linear_ring():
                techname = whsp_to_unsc(district_wkt["name"])
                print "District %d: %s" % (i, whsp_to_unsc(district_wkt["name"]))
                if techname not in district_included:
                    district = URIRef("%s#place/%s/%s" % (uri_part, whsp_to_unsc(district_wkt["name"]), specific_part))
                    graph_slovenian_districts.add((district, RDF.type, lawd_ns.Place))
                    graph_slovenian_districts.add((district, dcterms_ns['isPartOf'], slovenia))
                    graph_slovenian_districts.add((district, dcterms_ns['temporal'], Literal(str(district_wkt["year"]))))
                    graph_slovenian_districts.add((district, gn_ns['countryCode'], Literal(u'SI')))
                    graph_slovenian_districts.add((district, rdfs_ns['label'], Literal(district_wkt["name"], lang=u'si')))
                    polygons = BNode()
                    graph_slovenian_districts.add((district, geosparql_ns['hasGeometry'], polygons))
                    g.add((polygons, geosparql_ns['asWKT'], Literal(district_wkt["polygon"])))
                    district_included[techname] = True
                i += 1
            with open(ttl_output_file, 'w') as f:
                f.write(g.serialize(format='n3'))
            f.close()
Exemple #13
0
    def test_issue_250(self):
        """

        https://github.com/RDFLib/rdflib/issues/250

        When I have a ConjunctiveGraph with the default namespace set,
        for example

        import rdflib
        g = rdflib.ConjunctiveGraph()
        g.bind(None, "http://defaultnamespace")

        then the Trix serializer binds the default namespace twice in its XML
        output, once for the Trix namespace and once for the namespace I used:

        print(g.serialize(format='trix').decode('UTF-8'))

        <?xml version="1.0" encoding="utf-8"?>
        <TriX
          xmlns:xml="http://www.w3.org/XML/1998/namespace"
          xmlns="http://defaultnamespace"
          xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
          xmlns="http://www.w3.org/2004/03/trix/trix-1/"
        />

        """

        graph = ConjunctiveGraph()
        graph.bind(None, "http://defaultnamespace")
        sg = graph.serialize(format='trix').decode('UTF-8')
        self.assertTrue(
            'xmlns="http://defaultnamespace"' not in sg, sg)
        self.assertTrue(
            'xmlns="http://www.w3.org/2004/03/trix/trix-1/' in sg, sg)
Exemple #14
0
class SiocWiki(object):
    def __init__(self, uri, title=None, created=None):
        self.graph = Graph()
        self.graph.bind('sioc', SIOC)
        self.graph.bind('dc', DC)
        self.graph.bind('dcterms', DCTERMS)
        self.graph.bind('rdf', RDF)
        
        self._add_site(uri, title)
        
        
    def _add_site(self, uri, title):
        node = URIRef(uri)
        self.graph.add((node, RDF.type, SIOC['Site']))
        self.graph.add((node, DC['title'], Literal(title)))
        return node
        
    def add_page(self, content, title, uri, updated):
        node = URIRef(uri)
        self.graph.add((node, RDF.type, SIOC['Wiki']))      
        self.graph.add((node, SIOC['link'], URIRef(uri)))
        self.graph.add((node, DC['title'], Literal(title)))
        self.graph.add((node, DC['content'], Literal(content)))
        self.graph.add((node, DCTERMS['updated'], Literal(updated)))
    
    def to_str(self):
        return self.graph.serialize(format="pretty-xml")
Exemple #15
0
    def test_issue_250(self):
        """

        https://github.com/RDFLib/rdflib/issues/250

        When I have a ConjunctiveGraph with the default namespace set,
        for example

        import rdflib
        g = rdflib.ConjunctiveGraph()
        g.bind(None, "http://defaultnamespace")

        then the Trix serializer binds the default namespace twice in its XML
        output, once for the Trix namespace and once for the namespace I used:

        print(g.serialize(format='trix').decode('UTF-8'))

        <?xml version="1.0" encoding="utf-8"?>
        <TriX
          xmlns:xml="http://www.w3.org/XML/1998/namespace"
          xmlns="http://defaultnamespace"
          xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
          xmlns="http://www.w3.org/2004/03/trix/trix-1/"
        />

        """

        graph = ConjunctiveGraph()
        graph.bind(None, "http://defaultnamespace")
        sg = graph.serialize(format="trix")
        self.assertTrue('xmlns="http://defaultnamespace"' not in sg, sg)
        self.assertTrue('xmlns="http://www.w3.org/2004/03/trix/trix-1/' in sg,
                        sg)
Exemple #16
0
def check_n3_serialize(fpath, fmt, verbose=False):
    g = ConjunctiveGraph()
    _parse_or_report(verbose, g, fpath, format=fmt)
    if verbose:
        for t in g:
            print t
        print "========================================"
        print "Parsed OK!"
    s = g.serialize(format='n3')
    if verbose:
        print s
    g2 = ConjunctiveGraph()
    _parse_or_report(verbose, g2, data=s, format='n3')
    if verbose:
        print g2.serialize()
    crapCompare(g,g2)
Exemple #17
0
def check_nt_serialize(fpath, fmt, verbose=False):
    g = ConjunctiveGraph()
    _parse_or_report(verbose, g, fpath, format=fmt)
    if verbose:
        for t in g:
            print t
        print "========================================"
        print "Parsed OK!"
    s = g.serialize(format='nt')
    if verbose:
        print "Serialized to: ", s
    g2 = ConjunctiveGraph()
    _parse_or_report(verbose, g2, data=s, format='nt')
    if verbose:
        print g2.serialize()
    crapCompare(g,g2)
Exemple #18
0
def output_to_oac(fileid, dir, metadata, annotations):
	"""
	TODO
	"""
	# import libraries
	from rdflib import Namespace, BNode, Literal, URIRef,RDF,RDFS
	from rdflib.graph import Graph, ConjunctiveGraph
	from rdflib.plugins.memory import IOMemory
	# declare namespaces
	oac = Namespace("http://www.w3.org/ns/oa#")
	perseus = Namespace("http://data.perseus.org/citations/")
	myanno = Namespace("http://hellespont.org/annotations/jstor")
	store = IOMemory()
	# initialise the graph
	g = ConjunctiveGraph(store=store)
	# bind namespaces
	g.bind("oac",oac)
	g.bind("perseus",perseus)
	g.bind("myanno",myanno)
	for n,ann in enumerate(metadata["citations"]):
	    anno1 = URIRef(myanno["#%i"%n])
	    g.add((anno1, RDF.type,oac["Annotation"]))
	    g.add((anno1, oac["hasTarget"],URIRef("%s%s"%("http://jstor.org/stable/",metadata["doi"]))))
	    g.add((anno1, RDFS.label, Literal(ann["label"])))
	    g.add((anno1,oac["hasBody"],perseus[ann["ctsurn"]]))
	    g.add((anno1,oac["motivatedBy"],oac["linking"]))
	fname="%s%s"%(dir, fileid.replace(".txt",".ttl"))
	f=open(fname,"w")
	f.write(g.serialize(format="turtle"))
	f.close()
	return
Exemple #19
0
        def view(name=None, format=None, view=None):
            self.db.store.nsBindings = {}
            content_type = None
            if format is not None:
                if format in extensions:
                    content_type = extensions[format]
                else:
                    name = '.'.join([name, format])
            #argstring = '&'.join(["%s=%s"%(k,v) for k,v in request.args.iteritems(multi=True) if k != 'value'])
            if name is not None:
                #if len(argstring) > 0:
                #    name = name + "?" + argstring
                entity = self.NS.local[name]
            elif 'uri' in request.args:
                entity = URIRef(request.args['uri'])
            else:
                entity = self.NS.local.Home

            #print(request.method, 'view()', entity, view)
            if request.method == 'POST':
                print ("uploading file",entity)
                if len(request.files) == 0:
                    flash('No file uploaded')
                    return redirect(request.url)
                upload_type = rdflib.URIRef(request.form['upload_type'])
                self.add_files(entity, [y for x, y in request.files.items(multi=True)],
                               upload_type=upload_type)
                url = "/about?%s" % urlencode(dict(uri=str(entity), view="view"))
                print ("redirecting to",url)
                return redirect(url)
            elif request.method == 'DELETE':
                self.delete_file(entity)
                return '', 204
            elif request.method == 'GET':
                resource = self.get_resource(entity)

                # 'view' is the default view
                fileid = resource.value(self.NS.whyis.hasFileID)
                if fileid is not None and 'view' not in request.args:
                    print (resource.identifier, fileid)
                    f = self.file_depot.get(fileid)
                    fsa = FileServeApp(f, self.config["file_archive"].get("cache_max_age",3600*24*7))
                    return fsa
            
                if content_type is None:
                    content_type = request.headers['Accept'] if 'Accept' in request.headers else 'text/turtle'
                #print entity

                fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],content_type)
                if 'view' in request.args or fmt in htmls:
                    return render_view(resource)
                elif fmt in dataFormats:
                    output_graph = ConjunctiveGraph()
                    result, status, headers = render_view(resource, view='describe')
                    output_graph.parse(data=result, format="json-ld")
                    return output_graph.serialize(format=dataFormats[fmt]), 200, {'Content-Type':content_type}
                #elif 'view' in request.args or sadi.mimeparse.best_match(htmls, content_type) in htmls:
                else:
                    return render_view(resource)
Exemple #20
0
def main():
    import optparse
    prs = optparse.OptionParser()

    prs.add_option("-i", "--ipython", action="store_true")
    prs.add_option("--print-store", action="store_true")
    prs.add_option("--drop-store", action="store_true")

    (opts, args) = prs.parse_args()

    logging.getLogger().setLevel(logging.DEBUG)

    initialize_rdflib()
    store = store_from_connstr(DEFAULT_STORE_URI)

    if opts.drop_store:
        store.destroy(DEFAULT_STORE_URI)
        exit(0)

    # Create a new named graph
    graph_uri = get_session_uri()
    graph = Graph(store, identifier=URIRef(graph_uri))  # !

    if opts.print_store:
        print store

        cmt = """
        contexts = sorted(set(graph.contexts()))
        print "Contexts:"
        for c in contexts:
            print c
        print ""

        for c in contexts:
            print '\n\n', c
            for t in graph.triples((None,None,None), context=c):
                print t
        """

        print graph.serialize(format='n3')

    if opts.ipython:
        #import sys
        import IPython
        IPython.Shell.IPShellEmbed(argv=args)(local_ns=locals(),
                                              global_ns=globals())
Exemple #21
0
 def test_pretty_broken_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal(u'''<p ''', datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format='pretty-xml')
     # then:
     assert u'''<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;p '''.encode('utf-8') in xmlrepr
Exemple #22
0
 def test_pretty_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal(u'''<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>''', datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format='pretty-xml')
     # then:
     assert u'''<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>'''.encode('utf-8') in xmlrepr
Exemple #23
0
    def post(self):
        query = self.request.get("content")
        nrOfResults = self.request.get("amount")

        try:
            number = int(nrOfResults)
        except ValueError:
            number = 0

        literals = re.findall(r'"(.+?)"',query)

        urls = processLiterals(literals, number)

        graph = ConjunctiveGraph()

        for url in urls:
            # Original URL fetch
            xmlresult = urlfetch.fetch(url,deadline=60,method=urlfetch.GET)

            if xmlresult.status_code == 200:

                iwa = Namespace('http://iwa2012-18-2.appspot.com/#')
                idns = Namespace('http://iwa2012-18-2.appspot.com/id/#')
                venuens = Namespace('http://iwa2012-18-2.appspot.com/venueid/#')

                tree = etree.fromstring(xmlresult.content)
                for event in tree.findall('events/event'):
                    id = event.attrib['id']
                    title = event.find('title')
                    url = event.find('url')
                    venueid = event.find('venue_id')
                    venueurl = event.find('venue_url')
                    venuename = event.find('venue_name')

                    graph.add((idns[id], iwa['hasTitle'], Literal(title.text)))
                    graph.add((idns[id], iwa['hasUrl'], Literal(url.text)))
                    graph.add((venuens[id], iwa['hasVenueName'], Literal(venuename.text)))
                    graph.add((venuens[id], iwa['hasUrl'], Literal(venueurl.text)))
                    graph.add((idns[id], iwa['atVenue'], venuens[id])))

            else:
                print "Something went wrong with the connection to the Eventful server. Status code: " + xml.status_code

        print graph.serialize()
Exemple #24
0
def rdf_description(name, notation='xml' ):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    configString = "/var/tmp/rdfstore"

    # Get the Sleepycat plugin.
    store = plugin.get('Sleepycat', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="Sleepycat",
               identifier = URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:
    #There is no underlying Sleepycat infrastructure, create it
        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"


    # Now we'll add some triples to the graph & commit the changes
    rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = ["id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"]
    node=Objecttype.objects.get(title=name)
    node_dict=node.__dict__

    subject=str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate=str(key)
            pobject=str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))
     
     
    graph.commit()

    print graph.serialize(format=notation)

    graph.close()
Exemple #25
0
def rdf_description(name, notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    configString = "/var/tmp/rdfstore"

    # Get the Sleepycat plugin.
    store = plugin.get('Sleepycat', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:
        #There is no underlying Sleepycat infrastructure, create it
        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"

    # Now we'll add some triples to the graph & commit the changes
    rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = [
        "id", "rght", "node_ptr_id", "image", "lft", "_state",
        "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"
    ]
    node = Objecttype.objects.get(title=name)
    node_dict = node.__dict__

    subject = str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate = str(key)
            pobject = str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))

    graph.commit()

    print graph.serialize(format=notation)

    graph.close()
Exemple #26
0
 def test_pretty_broken_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((BNode(), RDF.value, Literal("""<p """,
                                        datatype=RDF.XMLLiteral)))
     # when:
     xmlrepr = g.serialize(format="pretty-xml")
     # then:
     assert (
         """<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;p """
         .encode("utf-8") in xmlrepr)
Exemple #27
0
    def writeFile(self, stmts, ctx, fileWords):
        g = ConjunctiveGraph()
        doc = {'ctx' : ctx}

        for s in stmts:
            g.add(s)
            if s[1] == SIOC.has_reply:
                doc['topic'] = s[0]
            if s[1] == DCTERMS.created: # expecting 2 of these, but same value
                doc['created'] = parse(s[2])

        doc['n3'] = g.serialize(format="n3")
        self.mongo['comment'].insert(doc, safe=True)
def gnis2rdf(gnisfilename, rdffilename):
    gnisfile = open(gnisfilename, "rb")
    store = ConjunctiveGraph(identifier="temp")

    if not gnisfile:
        print("Error opening gnis file!")
        return False

    gnisreader = csv.reader(gnisfile, delimiter="|")

    # Drop first row
    gnisreader.next()

    for r in gnisreader:
        InsertGNISFeature(r, store)

    # Add prefixes to store
    store.bind("gnis", gnis)
    store.bind("geo", geo)

    print("Serializing rdf...")
    store.serialize(destination=rdffilename, format="n3")
    print("created " + str(len(store)) + " triples")
Exemple #29
0
def test_escaping_of_triple_doublequotes():
    """
    Issue 186 - Check escaping of multiple doublequotes.
    A serialization/deserialization roundtrip of a certain class of 
    Literals fails when there are both, newline characters and multiple subsequent 
    quotation marks in the lexical form of the Literal. In this case invalid N3
    is emitted by the serializer, which in turn cannot be parsed correctly.
    """
    g=ConjunctiveGraph()
    g.add((URIRef('http://foobar'), URIRef('http://fooprop'), Literal('abc\ndef"""""')))
    # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n'
    g2=ConjunctiveGraph()
    g2.parse(data=g.serialize(format='n3'), format='n3')
    assert g.isomorphic(g2) is True
Exemple #30
0
def gnis2rdf(gnisfilename, rdffilename):
    gnisfile = open(gnisfilename, 'rb')
    store = ConjunctiveGraph(identifier='temp')
        
    if not gnisfile:
        print('Error opening gnis file!')
        return False

    gnisreader = csv.reader(gnisfile, delimiter='|')

    # Drop first row
    gnisreader.next()

    for r in gnisreader:
        InsertGNISFeature(r, store)

    # Add prefixes to store
    store.bind('gnis', gnis)
    store.bind('geo', geo)

    print('Serializing rdf...')
    store.serialize(destination=rdffilename, format='n3')
    print('created ' + str(len(store)) + ' triples')
 def track(self, resource):
     graph = ConjunctiveGraph()
     sparql = SPARQLWrapper(self.conf.get_SPARQL())
     
     queue = [resource]
     while len(queue) != 0:
         target = queue.pop()   
         query = DESCRIBE_QUERY.replace('__RESOURCE__', target.n3())
         query = query.replace('__RELEASE__', self.conf.get_graph_name('release'))
         query = query.replace('__RULES__', self.conf.get_graph_name('rules'))
         query = query.replace('__RAW_DATA__', self.conf.get_graph_name('raw-data'))
         sparql.setQuery(query)
         results = sparql.query().convert()
         for statement in results:
             # Add the statement to the graph
             graph.add(statement)
             
             # If the relate to another resource we describe, queue it
             (_,p,o) = statement
             if p.startswith(PROV):
                 if o.startswith(self.conf.get_namespace('data')):
                     queue.append(o)
                 
     print graph.serialize(format='turtle')
Exemple #32
0
def test_escaping_of_triple_doublequotes():
    """
    Issue 186 - Check escaping of multiple doublequotes.
    A serialization/deserialization roundtrip of a certain class of 
    Literals fails when there are both, newline characters and multiple subsequent 
    quotation marks in the lexical form of the Literal. In this case invalid N3
    is emitted by the serializer, which in turn cannot be parsed correctly.
    """
    g = ConjunctiveGraph()
    g.add((URIRef('http://foobar'), URIRef('http://fooprop'),
           Literal('abc\ndef"""""')))
    # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n'
    g2 = ConjunctiveGraph()
    g2.parse(data=g.serialize(format='n3'), format='n3')
    assert g.isomorphic(g2) is True
def gatherAndExportUserData(repo_name,userId,userToken):
	store = IOMemory()

	g=ConjunctiveGraph(store=store)
	g.bind("av",ns)
	g.bind("sc",sc)
	g.bind("dbo",dbo)
	g.bind("fb",fb)

	createGraphForFBUser(store,repo_name,userId,userToken)

	graphString = g.serialize(format="n3")
	with open("user.ttl","w") as f:
		f.write(graphString)

	response = sesame.import_content(repo_name,graphString)
Exemple #34
0
def write_graph(data_handle, out_handle, format='n3'):
    graph = Graph()
    count = 0
    for record in generate_records(data_handle):
        count += 1
        if count % 1000:
            sys.stderr.write(".")
        else:
            sys.stderr.write(str(count))
        for triple in get_triples(record):
            graph.add(triple)
        graph.commit()
    current_site = Site.objects.get_current()
    domain = 'https://%s' % current_site.domain
    out_handle.write(graph.serialize(format=format, base=domain, include_base=True))
    return count
Exemple #35
0
 def test_pretty_xmlliteral(self):
     # given:
     g = ConjunctiveGraph()
     g.add((
         BNode(),
         RDF.value,
         Literal(
             u"""<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>""",
             datatype=RDF.XMLLiteral,
         ),
     ))
     # when:
     xmlrepr = g.serialize(format="pretty-xml")
     # then:
     assert (
         u"""<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>"""
         .encode("utf-8") in xmlrepr)
Exemple #36
0
    def testSerialize(self):

      s1 = URIRef('store:1')
      r1 = URIRef('resource:1')
      r2 = URIRef('resource:2')

      label = URIRef('predicate:label')

      g1 = Graph(identifier = s1)
      g1.add((r1, label, Literal("label 1", lang="en")))
      g1.add((r1, label, Literal("label 2")))

      s2 = URIRef('store:2')
      g2 = Graph(identifier = s2)
      g2.add((r2, label, Literal("label 3")))

      g = ConjunctiveGraph()
      for s,p,o in g1.triples((None, None, None)):
        g.addN([(s,p,o,g1)])
      for s,p,o in g2.triples((None, None, None)):
        g.addN([(s,p,o,g2)])
      r3 = URIRef('resource:3')
      g.add((r3, label, Literal(4)))
      
      
      r = g.serialize(format='trix')
      g3 = ConjunctiveGraph()
      from StringIO import StringIO

      g3.parse(StringIO(r), format='trix')

      for q in g3.quads((None,None,None)):
        # TODO: Fix once getGraph/getContext is in conjunctive graph
        if isinstance(q[3].identifier, URIRef): 
          tg=Graph(store=g.store, identifier=q[3].identifier)
        else:
          # BNode, this is a bit ugly
          # we cannot match the bnode to the right graph automagically
          # here I know there is only one anonymous graph, 
          # and that is the default one, but this is not always the case
          tg=g.default_context
        self.assertTrue(q[0:3] in tg)
def gatherAndExportGenreData(repo_name):
	store = IOMemory()

	g=ConjunctiveGraph(store=store)
	g.bind("av",ns)
	g.bind("sc",sc)
	g.bind("dbo",dbo)
	g.bind("fb",fb)

	genreRelations = dbpedia.getDBpediaGenreRelations()
	genreNames = dbpedia.getDbpediaMusicGenres()
	createGraphForGenres(store,genreNames,genreRelations)


	graphString = g.serialize(format="n3")

	with open("genres.ttl","w") as f:
		f.write(graphString)

	response = sesame.import_content(repo_name,graphString)
Exemple #38
0
    def testSerialize(self):

        s1 = URIRef('store:1')
        r1 = URIRef('resource:1')
        r2 = URIRef('resource:2')

        label = URIRef('predicate:label')

        g1 = Graph(identifier=s1)
        g1.add((r1, label, Literal("label 1", lang="en")))
        g1.add((r1, label, Literal("label 2")))

        s2 = URIRef('store:2')
        g2 = Graph(identifier=s2)
        g2.add((r2, label, Literal("label 3")))

        g = ConjunctiveGraph()
        for s, p, o in g1.triples((None, None, None)):
            g.addN([(s, p, o, g1)])
        for s, p, o in g2.triples((None, None, None)):
            g.addN([(s, p, o, g2)])
        r3 = URIRef('resource:3')
        g.add((r3, label, Literal(4)))

        r = g.serialize(format='trix')
        g3 = ConjunctiveGraph()
        from StringIO import StringIO

        g3.parse(StringIO(r), format='trix')

        for q in g3.quads((None, None, None)):
            # TODO: Fix once getGraph/getContext is in conjunctive graph
            if isinstance(q[3].identifier, URIRef):
                tg = Graph(store=g.store, identifier=q[3].identifier)
            else:
                # BNode, this is a bit ugly
                # we cannot match the bnode to the right graph automagically
                # here I know there is only one anonymous graph,
                # and that is the default one, but this is not always the case
                tg = g.default_context
            self.assertTrue(q[0:3] in tg)
Exemple #39
0
def processHEAD(request, return_content = False):
    '''
        Returns an httplib.HTTPRequest
    '''
    graph = get_graph_from_request(request)    
    accept = http_accept(request)
    
    if accept not in FORMAT_MAP.values():
        return HttpResponse(status = 406)        
    
    g = None
    if graph is None:
        g = ConjunctiveGraph(store=CharmeMiddleware.get_store())        
    else:
        g = generate_graph(CharmeMiddleware.get_store(), URIRef(graph))
    
    content = g.serialize(format = rdf_format_from_mime(accept))
    
    if return_content:
        return HttpResponse(content = content) 
    return HttpResponse()  
def gatherAndExportGlobalData(repo_name):
	store = IOMemory()

	g=ConjunctiveGraph(store=store)
	g.bind("av",ns)
	g.bind("sc",sc)
	g.bind("dbo",dbo)
	g.bind("fb",fb)

	venues = importVenuesFromFile("fb_data_stuff/venues.txt")
	events = importEventsFromDirectory("fb_data_stuff/events/")
	

	createGraphForEvents(store,repo_name,events)
	createGraphForVenues(store,venues)
	createGraphForEventArtistsAndGenres(store,repo_name,events)

	graphString = g.serialize(format="n3")

	with open("global.ttl","w") as f:
		f.write(graphString)
Exemple #41
0
def generictest(testFile):
    func_name = __name__ = __doc__ = id = 'test_sparql.' + \
                os.path.splitext(testFile)[0][8:].translate(
                                                    maketrans('-/','__'))
    store = plugin.get(STORE,Store)()
    bootStrapStore(store)
    store.commit()
    prefix = testFile.split('.rq')[-1]
    manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3'])
    manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl'])
    queryFileName = testFile.split('/')[-1]
    store = plugin.get(STORE,Store)()
    store.open(configString,create=False)
    assert len(store) == 0
    manifestG=ConjunctiveGraph(store)
    if not os.path.exists(manifestPath):
        assert os.path.exists(manifestPath2)
        manifestPath = manifestPath2
    manifestG.default_context.parse(open(manifestPath),
                                    publicID=URIRef(TEST_BASE),
                                    format='n3')
    manifestData = manifestG.query(
                      MANIFEST_QUERY,
                      processor='sparql',
                      initBindings={'query' : TEST_BASE[queryFileName]},
                      initNs=manifestNS,
                      DEBUG = False)
    store.rollback()
    store.close()
    for source,testCaseName,testCaseComment,expectedRT in manifestData:
        if expectedRT:
            expectedRT = '/'.join(testFile.split('/')[:-1] + \
                                    [expectedRT.replace(TEST_BASE,'')])
        if source:
            source = '/'.join(testFile.split('/')[:-1] + \
                                    [source.replace(TEST_BASE,'')])
        testCaseName = testCaseComment and testCaseComment or testCaseName
        # log.debug("## Source: %s ##"%source)
        # log.debug("## Test: %s ##"%testCaseName)
        # log.debug("## Result: %s ##"%expectedRT)
        #Expected results
        if expectedRT:
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            resultG=ConjunctiveGraph(store).default_context
            log.debug("###"*10)
            log.debug("parsing: %s" % open(expectedRT).read())
            log.debug("###"*10)
            assert len(store) == 0
            # log.debug("## Parsing (%s) ##"%(expectedRT))
            if not trialAndErrorRTParse(resultG,expectedRT,DEBUG):
                log.debug(
                    "Unexpected result format (for %s), skipping" % \
                                                    (expectedRT))
                store.rollback()
                store.close()
                continue
            log.debug("## Done .. ##")
            rtVars = [rtVar for rtVar in 
                        resultG.objects(None,RESULT_NS.resultVariable)]
            bindings = []
            resultSetNode = resultG.value(predicate=RESULT_NS.value,
                                          object=RESULT_NS.ResultSet)
            for solutionNode in resultG.objects(resultSetNode,
                                                RESULT_NS.solution):
                bindingDict = dict([(key,None) for key in rtVars])
                for bindingNode in resultG.objects(solutionNode,
                                                   RESULT_NS.binding):
                    value = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.value)
                    name  = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.variable)
                    bindingDict[name] = value
                rbinds = [bindingDict[vName] for vName in rtVars]
                # print("Rbinds", rbinds)
                if len(rbinds) > 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(frozenset(rbinds))
                elif len(rbinds) == 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(rbinds[0])
                else:
                    bindings.append(rbinds)
                # bindings.append(tuple([bindingDict[vName] for vName in rtVars]))
            log.debug(open(expectedRT).read())
            store.rollback()
            store.close()
        if testFile in tests2Skip.keys():
            log.debug("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
            raise SkipTest("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
        query = open(testFile).read()
        log.debug("### %s (%s) ###" % (testCaseName,testFile))
        log.debug(query)
        p = parse(query)#,DEBUG_PARSE)
        log.debug(p)
        if EVALUATE and source:
            log.debug("### Source Graph: ###")
            log.debug(open(source).read())
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            g = ConjunctiveGraph(store)
            try:
                g.parse(open(source),format='n3')
            except:
                log.debug("Unexpected data format (for %s), skipping" % \
                                                                (source))
                store.rollback()
                store.close()
                continue
            rt = g.query(query,
                         processor='sparql',
                         DEBUG = False)
            if expectedRT:
                try:
                    result = rt.result
                except AttributeError:
                    result = rt
                if isinstance(result, Graph):
                    resgraph = open(graphtests[testFile]).read()
                    store = plugin.get(STORE,Store)()
                    store.open(configString,create=False)
                    g = ConjunctiveGraph(store)
                    g.parse(data=resgraph,format="n3")
                    assert result == g, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (g.serialize(format="n3"), 
                                     result.serialize(format="n3"))
                else:
                    # result = [r[0] for r in result if isinstance(r, (tuple, list))]
                    def stab(r):
                        if isinstance(r, (tuple, list)):
                            return frozenset(r)
                        else:
                            return r
                    results = set(
                        [stab(r) for r in result])
                    assert set(bindings).difference(results) == set([]) or set(bindings) == results, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (set(bindings), results)
                log.debug("### Test Passed: ###")
            store.rollback()
Exemple #42
0
    def query_lode(self, id):
        var = "http://inpho.cogs.indiana.edu/thinker/" + id
        # initialize dictionaries to store temporray results
        dbPropResults = {}
        inpho_DB = {}
        DB_inpho = {}
        dbpedia_web = {}
        triples = {}

        # init graphs for LODE and mapped data
        gLODE = ConjunctiveGraph()
        gReturn = ConjunctiveGraph()
        # import InPhO data
        gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3")

        # builds a set of triples with the inpho id as the first entry and the
        # dbpedia id as the second
        resultsLODE = gLODE.query(
            """
            SELECT ?thinker_LODE ?thinkerDB
            WHERE { ?thinker_LODE owl:sameAs ?thinkerDB 
                    FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i")
                    && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")).
                   }
            """
        )

        # load in property mapping between inpho-dbpedia
        prop_map_filename = config.get_data_path("rdf_map.txt")
        with open(prop_map_filename, "r") as f:
            dbprops = csv.reader(f, delimiter="\t")
            for dbprop in dbprops:
                dbPropResults[dbprop[1]] = dbprop[0]
                dbpedia_web[dbprop[1].split(":")[1]] = dbprop[2]

        # iterate through triples and store mappings
        for triple in resultsLODE:
            inpho_DB[str(triple[0])] = str(triple[1])  # store the results in key as inpho url and value as dbpedia url
            DB_inpho[str(triple[1])] = str(triple[0])  # store the results in key as dbpedia url and value as inpho url

        # queries for all relationships in dbpedia
        sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/")
        sparqlDB.setReturnFormat(JSON)
        for inpho, DB in inpho_DB.iteritems():
            predicate = {}
            # for dbprop in dbPropResults:
            if str(DB_inpho.get(DB)) == var:
                for dbprop in dbPropResults:
                    sparqlDB.setQuery(
                        """ PREFIX dbpprop: <http://dbpedia.org/ontology/>
                                      SELECT ?b  WHERE { <"""
                        + DB
                        + """> """
                        + dbprop
                        + """ ?b.
                                                        FILTER (regex(str(?b),"dbpedia.org/resource/","i")).
                                                        }"""
                    )
                    resultsDB = sparqlDB.query().convert()
                    predicate[dbprop] = resultsDB["results"]["bindings"]
                triples[DB] = predicate

                # retrieve native python object
        c.entity = h.fetch_obj(Entity, id, new_id=True)
        existing_predicate_list = []
        existing_object_list = []

        predicates_to_compare = ["influenced", "influenced_by", "teachers", "students"]

        for subject, predicate in triples.iteritems():
            for predicate1, objectn in predicate.iteritems():
                predicate_to_match = predicate1.split(":")[1]
                attr = getattr(c.entity, dbpedia_web[predicate_to_match])

                for attr1 in attr:
                    if dbpedia_web[predicate_to_match] in predicates_to_compare:
                        existing_predicate_list.append(dbpedia_web[predicate_to_match] + ":" + attr1.wiki)

                        # maps from dbpedia relationships back to inpho relationships
        for subject, predicate in triples.iteritems():
            # attr = getattr(c.entity, predicate)
            # raise Exception

            for predicate1, objectn in predicate.iteritems():

                for object1 in objectn:
                    # temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ")
                    temp_str = (
                        dbpedia_web[predicate1.split(":")[1]]
                        + ":"
                        + str(object1["b"]["value"]).split("/")[len(str(object1["b"]["value"]).split("/")) - 1]
                    )

                    # 	   raise Exception
                    if temp_str not in existing_predicate_list:
                        # returns the inphoid for the object
                        DB_Entry = DB_inpho.get(object1["b"]["value"])  # reverse lookup for the inpho data check

                        # if there is not an inpho id, leave it as the dbpedia id
                        if DB_Entry == None:
                            gReturn.add(
                                (URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(object1["b"]["value"]))
                            )
                        else:
                            # return the properly mapped id
                            # TODO: use attr to filter DB_Entry
                            gReturn.add((URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(DB_Entry)))

                        #  if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ):

        #        raise Exception
        return gReturn.serialize()
Exemple #43
0
gNode1.add((SNode2, hs['EndTime'],  Literal("2012-06-19T01:52:02Z")))


gNode1.add((SNode3,hs['hasTemperature'], Literal('64')))
gNode1.add((SNode3, hs['hasLight'], Literal('67')))
gNode1.add((SNode3, hs['hasHumidity'], Literal('88')))
gNode1.add((SNode3, hs['Located'], Literal('')))
gNode1.add((SNode3, hs['StartTime'],  Literal("2012-06-19T01:52:02Z")))
gNode1.add((SNode3, hs['EndTime'],  Literal("2012-06-19T01:52:02Z")))



# seperate and display the graphs in n3
print gNode1.serialize(format='n3')
print "==================="




#Display full graph
print g.serialize(format='n3')









Exemple #44
0
    # add a graph for Mary's facts to the Conjunctive Graph
    gmary = Graph(store=store, identifier=cmary)
    # Mary's graph only contains the URI of the person she love, not his cute name
    gmary.add((mary, ns["hasName"], Literal("Mary")))
    gmary.add((mary, ns["loves"], john))

    # add a graph for John's facts to the Conjunctive Graph
    gjohn = Graph(store=store, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))

    # enumerate contexts
    for c in g.contexts():
        print("-- %s " % c)

    # separate graphs
    print(gjohn.serialize(format="n3"))
    print("===================")
    print(gmary.serialize(format="n3"))
    print("===================")

    # full graph
    print(g.serialize(format="n3"))

    # query the conjunction of all graphs
    xx = None
    for x in g[mary : ns.loves / ns.hasCuteName]:  # type: ignore[misc]
        xx = x
    print("Q: Who does Mary love?")
    print("A: Mary loves {}".format(xx))
Exemple #45
0
    gmary.add((mary, LOVE.loves, john))

    # Add a graph containing John's facts to the Conjunctive Graph
    gjohn = Graph(store=store, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, LOVE.hasCuteName, Literal("Johnny Boy")))

    # Enumerate contexts
    print("Contexts:")
    for c in g.contexts():
        print(f"-- {c.identifier} ")
    print("===================")
    # Separate graphs
    print("John's Graph:")
    print(gjohn.serialize())
    print("===================")
    print("Mary's Graph:")
    print(gmary.serialize())
    print("===================")

    print("Full Graph")
    print(g.serialize())
    print("===================")

    print("Query the conjunction of all graphs:")
    xx = None
    for x in g[mary : LOVE.loves / LOVE.hasCuteName]:
        xx = x
    print("Q: Who does Mary love?")
    print("A: Mary loves {}".format(xx))
    # add a graph for Mary's facts to the Conjunctive Graph
    gmary = Graph(store=store, identifier=cmary)
    # Mary's graph only contains the URI of the person she love, not his cute name
    gmary.add((mary, ns["hasName"], Literal("Mary")))
    gmary.add((mary, ns["loves"], john))

    # add a graph for John's facts to the Conjunctive Graph
    gjohn = Graph(store=store, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))

    # enumerate contexts
    for c in g.contexts():
        print("-- %s " % c)

    # separate graphs
    print(gjohn.serialize(format="n3").decode("utf-8"))
    print("===================")
    print(gmary.serialize(format="n3").decode("utf-8"))
    print("===================")

    # full graph
    print(g.serialize(format="n3").decode("utf-8"))

    # query the conjunction of all graphs
    xx = None
    for x in g[mary:ns.loves / ns.hasCuteName]:
        xx = x
    print("Q: Who does Mary love?")
    print("A: Mary loves {}".format(xx))
Exemple #47
0
class Hisco2RDF():
    '''
    Scrapes the HISCO Web site
    The hierarchy goes as "master > minor > rubri > micro"
    '''
    def __init__(self):
        # The graph to store the data
        self.graph = ConjunctiveGraph()
        self.graph.namespace_manager.bind('skos', SKOS)
        self.graph.namespace_manager.bind('hisco', HISCO)
        self.graph.namespace_manager.bind('dcterms', DCTERMS)
        self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION)
        self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE)
        self.graph.namespace_manager.bind('qb', QB)
        
        # SQLite DB for the cache
        self.cache = sqlite3.connect('cache.db')
        cursor = self.cache.cursor()
        cursor.execute("CREATE TABLE IF NOT EXISTS  page (url text, html text)")
        self.cache.commit()
    
    def __del__(self):
        self.cache.close()
        
    def get_page(self, url):
        #log.debug("Load %s" % url)
        
        c = self.cache.cursor()
        c.execute("SELECT * FROM page WHERE url = ?", (url,))
        res = c.fetchone()
        doc = None
        if res == None:
            doc = requests.get(url).content
            c.execute("INSERT INTO page VALUES (?,?)", (url, doc))
            self.cache.commit()
        else:
            (_, doc) = res            
        return BeautifulSoup(doc)

    def save_output(self):
        # Add more things needed for DataCubes
        dimprop = HISCO['occupation']
        self.graph.add((dimprop, RDF.type, QB['DimensionProperty']))
        self.graph.add((dimprop, RDFS.range, SKOS.Collection))
        self.graph.add((dimprop, QB['Concept'], SKOS.Collection))
        self.graph.add((dimprop, RDFS.label, Literal('Occupation code', lang='en')))
        self.graph.add((dimprop, RDFS.comment, Literal('The HISCO group of the occupation', lang='en')))
        
        
        # Print to the screen
        #outfile = sys.stdout.buffer
        #self.graph.serialize(destination=outfile, format='n3')
        
        # Save to the file
        outfile = open('../hisco.ttl', "wb")
        self.graph.serialize(destination=outfile, format='n3')
        outfile.close()
        
    def parse_hisco_tree(self):
        '''
        Parse the hisco tree
        '''
        # Load the page
        doc = self.get_page(ROOT + HISCO_TREE)
        
        # Find the major groups
        major_groups = []
        major_group = None
        for table in doc.find_all('table', attrs={'border':'0'}):
            for row in table.find_all('tr'):
                for col in row.find_all('td'):
                    # Skip empty rows
                    if len(col.text) == 1:
                        continue
                    # We are starting a new group
                    if col.text.startswith('Majorgroup'):
                        # Save the one we were building if any
                        if major_group != None:
                            major_groups.append(major_group)
                        m = re.search("Majorgroup ([^ ]*) ", col.text)
                        major_group = {}
                        major_group['title'] = col.text
                        major_group['code'] = m.group(1).replace('/', '-')
                    # We have a description
                    if col.text.startswith('Workers'):
                        major_group['description'] = col.text
                    # We have links to minor
                    if col.text.startswith('List Minor'):
                        link = col.find_all('a')[0]['href']
                        major_group.setdefault('links', [])
                        major_group['links'].append(link)
        # Add the last group in the making
        if major_group != None:
            major_groups.append(major_group)

        # Add the groups to the graph
        for group in major_groups:
            major_group_uri = self._get_group_uri(group['code'])
            self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme']))
            self.graph.add((major_group_uri, DCTERMS.title, Literal(group['title'])))
            self.graph.add((major_group_uri, DCTERMS.description, Literal(group['description'])))
            
        # Now move onto the minor groups following the links
        for major_group in major_groups:
            major_group_uri = self._get_group_uri(major_group['code'])
            
            for minor_link in major_group['links']:
                # Look for the minor groups
                minor_groups = self._parse_records_table(minor_link, 2)
        
                # Add the groups to the graph
                for minor_group in minor_groups:
                    minor_group_uri = self._get_group_uri(minor_group['code'])
                    self.graph.add((minor_group_uri, RDF.type, SKOS['ConceptScheme']))
                    self.graph.add((minor_group_uri, RDFS.label, Literal(minor_group['title'])))
                    self.graph.add((minor_group_uri, DCTERMS.description, Literal(minor_group['description'])))
                    self.graph.add((major_group_uri, SKOS.related, minor_group_uri))

                    # Got one level deeper into the rubri
                    for rubri_link in minor_group['links']:
                        # Look for the minor groups
                        rubri_groups = self._parse_records_table(rubri_link, 3)
                        
                        # Add the groups to the graph
                        for rubri_group in rubri_groups:
                            rubri_group_uri =  self._get_group_uri(rubri_group['code'])
                            self.graph.add((rubri_group_uri, RDF.type, SKOS['ConceptScheme']))
                            self.graph.add((rubri_group_uri, RDFS.label, Literal(rubri_group['title'])))
                            self.graph.add((rubri_group_uri, DCTERMS.description, Literal(rubri_group['description'])))
                            self.graph.add((minor_group_uri, SKOS.related, rubri_group_uri))
    
                            # And one deeper for the micro
                            for micro_link in rubri_group['links']:
                                # Look for the minor groups
                                micro_groups = self._parse_records_table(micro_link, 5)
                                
                                # Add the groups to the graph
                                for micro_group in micro_groups:
                                    hisco_uri = self._get_hisco_uri(micro_group['code'])
                                    self.graph.add((hisco_uri, RDF.type, SKOS['Collection']))
                                    self.graph.add((hisco_uri, RDFS.label, Literal(micro_group['title'])))
                                    self.graph.add((hisco_uri, DCTERMS.description, Literal(micro_group['description'])))
                                    self.graph.add((rubri_group_uri, SKOS.related, hisco_uri))
                
    def parse_occupational_titles(self):
        '''
        Scrape the section of the site about occupational titles
        Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch
        '''
        parsed_status_page = set()
        next_page = OCCUPATIONAL_TITLES
        
        while next_page != None:
            log.info("Parse titles %s" % next_page)
                
            # Load the page
            doc = self.get_page(ROOT + next_page)
                
            # Find the right table
            table = doc.find('table', attrs={'cellspacing':'0', 'cellpadding':'2', 'border':'0'})
    
            # Look for all the titles 
            for row in table.find_all('tr')[1:]:  # Skip the header
                cols = row.find_all('td')
                occupation_title = cols[1].text
                details_page_link = cols[1].find_all('a')[0]['href']
                language = LANG_MAP[cols[2].text]
                hisco_code = cols[3].text.replace('*', '')
                
                # Get the DB index from details_page_link
                m = re.search('know_id=([^&]*)&', details_page_link)
                occupation_index = m.group(1)
                
                # Add the concept to the graph
                resource = self._get_occupation_title_uri(occupation_index)
                self.graph.add((resource, RDF.type, SKOS['Concept']))
                self.graph.add((resource, SKOS.prefLabel, Literal(occupation_title, lang=language)))
                self.graph.add((resource, SKOS.member, self._get_hisco_uri(hisco_code)))
                
                # Get more information about the title and add it as a member of the collection
                details_page = self.get_page(ROOT + details_page_link)
                details_table = details_page.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'})
                keyvalues = {}
                for details_row in details_table.find_all('tr'):
                    details_cols = details_row.find_all('td')
                    keyvalues[details_cols[0].text.strip()] = details_cols[-1]
                    
                # We already dealt with these two
                del keyvalues['Hisco code']
                del keyvalues['Occupational title']
                
                # TODO Country , use refArea
                
                # TODO Language
                
                # Do we know the gender ?
                if 'Gender' in keyvalues:
                    sex = SDMX_CODE['sex-U'] # Also applies to "Male/Female"
                    if keyvalues['Gender'].text.strip() == 'Male':
                        sex = SDMX_CODE['sex-M'] 
                    elif keyvalues['Gender'].text.strip() == 'Female':
                        sex = SDMX_CODE['sex-F']
                    self.graph.add((resource, SDMX_DIMENSION['sex'], sex))
                    del keyvalues['Gender']
                
                # Do we know the status ?
                if 'Status' in keyvalues:
                    # Add the status
                    status = keyvalues['Status'].text.strip()
                    self.graph.add((resource, HISCO['status'], self._get_status_uri(status)))
                    # Parse the status page if necessary
                    status_page = keyvalues['Status'].find_all('a')[0]['href']
                    if status_page not in parsed_status_page:
                        self._parse_status_page(status_page)
                        parsed_status_page.add(status_page)
                    del keyvalues['Status']
                
                # TODO Relation  
                
                # TODO Product
                  
                # TODO Provenance
                
                # Do we have a translation in English ?
                if 'Translation' in keyvalues:
                    trans = Literal(keyvalues['Translation'].text.strip().replace('´', "'"), lang='en')
                    self.graph.add((resource, SKOS.altLabel, trans))
                    del keyvalues['Translation']
                
                # Print whatever is left
                #if len(keyvalues.keys()) != 0:
                #    log.info(keyvalues.keys())
                    
            # Look for the "next" link
            next_table = doc.find('table', class_='nextprev')
            next_page = None
            for link in next_table.find_all('a'):
                if 'Next' in link.text:
                    next_page = link['href']
            
    def _parse_status_page(self, url):
        '''
        Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32
        '''
        
        # Work-around broken content
        if url == 'status.php?int02=15':
            return
        
        # Load the page
        doc = self.get_page(ROOT + url)
        
        # Find the data about this status
        status_uri = None
        for line in doc.find('pre').text.split('\n'):
            if re.match("^[0-9]* [a-zA-Z]*", line):
                m = re.search("^([0-9]*) ([a-zA-Z]*)", line)
                status_uri = self._get_status_uri(m.group(1))
                self.graph.add((status_uri, RDF.type, HISCO['Status']))
                self.graph.add((status_uri, RDFS.label, Literal(m.group(2))))
                self.graph.add((status_uri, SKOS.prefLabel, Literal(m.group(2))))
                self.graph.add((status_uri, SKOS.notation, Literal(m.group(1))))
            if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line):
                m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line)
                lang_code = m.group(1).lower()
                label = Literal(m.group(2), lang = lang_code)
                self.graph.add((status_uri, SKOS.altLabel, label))
                
        # Describe the class
        status_class = HISCO['Status']
        descr = doc.find('table', attrs={'width':'600'}).text.strip().split('\r\n')
        self.graph.add((status_class, RDF.type, RDFS.Class))
        self.graph.add((status_class, RDFS.label, Literal("Status code")))
        self.graph.add((status_class, DCTERMS.comment, Literal(descr[1])))
        
        # Describe the property
        status_property = HISCO['status']
        self.graph.add((status_property, RDF.type, RDF.Property))
        self.graph.add((status_property, RDFS.label, Literal("status associated to the occupation")))
        self.graph.add((status_property, RDFS.range, HISCO['Status']))
        self.graph.add((status_property, RDFS.domain, SKOS.Concept))
        
    def _parse_records_table(self, url, size):
        '''
        Minor, Rubri and Micro have the same structure except an additional
        column for Micro with links to the titles
        '''
        # Load the page
        doc = self.get_page(ROOT + url)
        
        # Find the right table
        table = doc.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'})
        
        # If we can't find the table return an empty list
        # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict
        if table == None:
            return []
        
        # Look for the minor groups
        groups = []
        group = None
        columns = table.find_all('td')
        for index in range(0, len(columns)):
            # New group
            if re.match("[0-9]{%d}" % size, columns[index].text):
                if group != None:
                    groups.append(group)
                group = {}
                group['code'] = columns[index].text
                group['title'] = columns[index + 1].text
                link = columns[index + 1].find_all('a')[0]['href']
                group.setdefault('links', [])
                group['links'].append(link)
                group['description'] = columns[index + 2].text
                if columns[index + 3].text == "Display Titles":
                    link = columns[index + 3].find_all('a')[0]['href']
                    group['titles_link'] = link
        groups.append(group)
        
        return groups
            
    def _get_group_uri(self, code):
        return HISCO['group-%s' % code]
    
    def _get_hisco_uri(self, code):
        return HISCO['hisco-%s' % code]
    
    def _get_occupation_title_uri(self, code):
        return HISCO['occupation-%s' % code]
    
    def _get_status_uri(self, code):
        return HISCO['status-%s' % code]
Exemple #48
0
def DoTheTestMemory():
    ns = Namespace("http://love.com#")

    # AssertionError: ConjunctiveGraph must be backed by a context aware store.
    mary = URIRef("http://love.com/lovers/mary")
    john = URIRef("http://love.com/lovers/john")

    cmary = URIRef("http://love.com/lovers/context_mary")
    cjohn = URIRef("http://love.com/lovers/context_john")

    # my_store = Memory()
    store_input = IOMemory()

    gconjunctive = ConjunctiveGraph(store=store_input)
    gconjunctive.bind("love", ns)

    # add a graph for Mary's facts to the Conjunctive Graph
    gmary = Graph(store=store_input, identifier=cmary)
    # Mary's graph only contains the URI of the person she love, not his cute name
    gmary.add((mary, ns["hasName"], Literal("Mary")))
    gmary.add((mary, ns["loves"], john))

    # add a graph for John's facts to the Conjunctive Graph
    gjohn = Graph(store=store_input, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))

    # enumerate contexts
    print("Input contexts")
    for c in gconjunctive.contexts():
        print("-- %s " % c)

    # separate graphs
    if False:
        print("===================")
        print("GJOHN")
        print(gjohn.serialize(format="n3").decode("utf-8"))
        print("===================")
        print("GMARY")
        print(gmary.serialize(format="n3").decode("utf-8"))
        print("===================")

    # full graph
    print("===================")
    print("GCONJUNCTIVE NATIVE")
    print(gconjunctive.serialize(format="n3").decode("utf-8"))

    # query the conjunction of all graphs
    xx = None
    for x in gconjunctive[mary:ns.loves / ns.hasCuteName]:
        xx = x
    print("Q: Who does Mary love?")
    print("A: Mary loves {}".format(xx))

    # Ensuite, on sauve un seul sous-graphe, puis on le recharge et le resultat doit etre le meme.
    gjohn.serialize(destination='gjohn_copy.xml', format='xml')
    gmary.serialize(destination='gmary_copy.xml', format='xml')

    gjohn_copy = Graph()
    gjohn_copy.parse('gjohn_copy.xml', format='xml')
    gmary_copy = Graph()
    gmary_copy.parse('gmary_copy.xml', format='xml')

    if True:
        print("===================")
        print("GJOHN")
        print(gjohn_copy.serialize(format="n3").decode("utf-8"))
        print("===================")
        print("GMARY")
        print(gmary_copy.serialize(format="n3").decode("utf-8"))
        print("===================")

    print("===================")
    print("GCONJUNCTIVE WITH QUADS")
    print(list(gconjunctive.quads(None)))
    print("===================")

    gconjunctive.serialize(destination='gconjunctive_copy.xml', format='xml')

    gconjunctive_copy = ConjunctiveGraph()
    gconjunctive_copy.parse('gconjunctive_copy.xml', format='xml')

    print("===================")
    print("GCONJUNCTIVE AS CONJUNCTIVE")
    print(gconjunctive_copy.serialize(format="n3").decode("utf-8"))
    print("Output contexts")
    for c in gconjunctive_copy.contexts():
        print("-- %s " % c)
    print("===================")

    gconjunctive_graph_copy = Graph()
    gconjunctive_graph_copy.parse('gconjunctive_copy.xml', format='xml')

    print("===================")
    print("GCONJUNCTIVE AS GRAPH")
    print(gconjunctive_graph_copy.serialize(format="n3").decode("utf-8"))
    #print("Output contexts")
    #for c in gconjunctive_graph_copy.contexts():
    #    print("-- %s " % c)
    print("===================")
Exemple #49
0
    def query_lode(self,id):
        var = "http://inpho.cogs.indiana.edu/thinker/"+id
        # initialize dictionaries to store temporray results
        dbPropResults = {}
        inpho_DB = {}
        DB_inpho = {}
	dbpedia_web = {}
        triples={}

        # init graphs for LODE and mapped data
        gLODE = ConjunctiveGraph()
        gReturn = ConjunctiveGraph()
        # import InPhO data
        gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3")

        # builds a set of triples with the inpho id as the first entry and the
        # dbpedia id as the second 
        resultsLODE = gLODE.query("""
            SELECT ?thinker_LODE ?thinkerDB
            WHERE { ?thinker_LODE owl:sameAs ?thinkerDB 
                    FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i")
                    && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")).
                   }
            """)
        
        # load in property mapping between inpho-dbpedia
        prop_map_filename = config.get_data_path('rdf_map.txt')
        with open(prop_map_filename,'r') as f:
            dbprops=csv.reader(f,delimiter='\t')
            for dbprop in dbprops:
                dbPropResults[dbprop[1]] = dbprop[0]
		dbpedia_web[dbprop[1].split(":")[1]]=dbprop[2]
		

        # iterate through triples and store mappings
        for triple in resultsLODE: 
            inpho_DB[str(triple[0])] = str(triple[1])#store the results in key as inpho url and value as dbpedia url
            DB_inpho[str(triple[1])] = str(triple[0])#store the results in key as dbpedia url and value as inpho url 
	   
	
	
        # queries for all relationships in dbpedia
        sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/")
        sparqlDB.setReturnFormat(JSON)
        for inpho,DB in inpho_DB.iteritems():
            predicate = {}
            #for dbprop in dbPropResults:
            if(str(DB_inpho.get(DB))== var):
		for dbprop in dbPropResults:
                    sparqlDB.setQuery(""" PREFIX dbpprop: <http://dbpedia.org/ontology/>
                                      SELECT ?b  WHERE { <"""+DB+"""> """+dbprop+""" ?b.
                                                        FILTER (regex(str(?b),"dbpedia.org/resource/","i")).
                                                        }""")
                    resultsDB = sparqlDB.query().convert()
                    predicate[dbprop] = resultsDB["results"]["bindings"]
            	triples[DB] = predicate
        
        #retrieve native python object
        c.entity = h.fetch_obj(Entity, id, new_id=True)
	existing_predicate_list=[]
	existing_object_list=[]

        predicates_to_compare = ['influenced', 'influenced_by', 'teachers', 'students']


        for subject,predicate in triples.iteritems():
            for predicate1, objectn in predicate.iteritems():
                predicate_to_match=predicate1.split(":")[1]
	        attr=getattr(c.entity,dbpedia_web[predicate_to_match])
              
		for attr1 in attr:
               	        if(dbpedia_web[predicate_to_match] in predicates_to_compare) :
				existing_predicate_list.append(dbpedia_web[predicate_to_match] +':'+attr1.wiki)




        # maps from dbpedia relationships back to inpho relationships
        for subject,predicate in triples.iteritems():
            #attr = getattr(c.entity, predicate)
	    #raise Exception
		
	    for predicate1, objectn in predicate.iteritems():
		
	      
				
	
                for object1 in objectn:                       
		   #temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ")
		   temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1]

                   
	#	   raise Exception
	           if temp_str not in existing_predicate_list:     
		  # returns the inphoid for the object
                   	DB_Entry = DB_inpho.get(object1['b']['value'])#reverse lookup for the inpho data check	    

                    	# if there is not an inpho id, leave it as the dbpedia id
                   	if(DB_Entry == None):
                        	gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(object1['b']['value'])))
                   	else:
                        	# return the properly mapped id
                        	# TODO: use attr to filter DB_Entry
                        	gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(DB_Entry)))
                     
                      #  if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ):
		   
#        raise Exception                  
        return gReturn.serialize();
    g = ConjunctiveGraph(store=store)
    g.bind("love", ns)

    gmary = Graph(store=store, identifier=cmary)

    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))

    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    #enumerate contexts
    for c in g.contexts():
        print("-- %s " % c)

    #separate graphs
    print(gjohn.serialize(format='n3'))
    print("===================")
    print(gmary.serialize(format='n3'))
    print("===================")

    #full graph
    print(g.serialize(format='xml'))

    # query the conjunction of all graphs

    print 'Mary loves:'
    for x in g[mary:ns.loves / ns.hasName]:
        print x
    data = csv.DictReader(fd, delimiter="\t", quotechar='"', escapechar='')
    for r in data:
        raw_id = r['raw_id']

        # Check if valid with regex
        match = re.match(r"^(tt)*(?P<id>\d{7,10}).*", raw_id)
        if not match:
            progress.count()
            wrongs.append(raw_id)
            continue

        imdb_id = match.group(2)
        film_node = n['Movie/tt' + imdb_id]

        # Create a node for dbpedia
        uri = r['uri']
        wiki_node = URIRef(uri)
        g.add((film_node, n['has' + source + 'Node'], wiki_node))

        progress.count()
        if progress.finished():
            break

g.serialize(destination=outfile, format='turtle')
end = time.time()

print('Wrong formatted IMDB IDs found: ', len(wrongs))
print(wrongs)
print("Total Items Processed: ", progress.total)
print("Total Time: ", end - start)
g.close()
Exemple #52
0
class Hisco2RDF():
    '''
    Scrapes the HISCO Web site
    The hierarchy goes as "master > minor > rubri > micro"
    '''
    def __init__(self):
        # The graph to store the data
        self.graph = ConjunctiveGraph()
        self.graph.namespace_manager.bind('skos', SKOS)
        self.graph.namespace_manager.bind('hisco', HISCO)
        self.graph.namespace_manager.bind('dcterms', DCTERMS)
        self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION)
        self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE)
        self.graph.namespace_manager.bind('qb', QB)

        # SQLite DB for the cache
        self.cache = sqlite3.connect('cache.db')
        cursor = self.cache.cursor()
        cursor.execute(
            "CREATE TABLE IF NOT EXISTS  page (url text, html text)")
        self.cache.commit()

    def __del__(self):
        self.cache.close()

    def get_page(self, url):
        #log.debug("Load %s" % url)

        c = self.cache.cursor()
        c.execute("SELECT * FROM page WHERE url = ?", (url, ))
        res = c.fetchone()
        doc = None
        if res == None:
            doc = requests.get(url).content
            c.execute("INSERT INTO page VALUES (?,?)", (url, doc))
            self.cache.commit()
        else:
            (_, doc) = res
        return BeautifulSoup(doc)

    def save_output(self):
        # Add more things needed for DataCubes
        dimprop = HISCO['occupation']
        self.graph.add((dimprop, RDF.type, QB['DimensionProperty']))
        self.graph.add((dimprop, RDFS.range, SKOS.Collection))
        self.graph.add((dimprop, QB['Concept'], SKOS.Collection))
        self.graph.add(
            (dimprop, RDFS.label, Literal('Occupation code', lang='en')))
        self.graph.add((dimprop, RDFS.comment,
                        Literal('The HISCO group of the occupation',
                                lang='en')))

        # Print to the screen
        #outfile = sys.stdout.buffer
        #self.graph.serialize(destination=outfile, format='n3')

        # Save to the file
        outfile = open('../hisco.ttl', "wb")
        self.graph.serialize(destination=outfile, format='n3')
        outfile.close()

    def parse_hisco_tree(self):
        '''
        Parse the hisco tree
        '''
        # Load the page
        doc = self.get_page(ROOT + HISCO_TREE)

        # Find the major groups
        major_groups = []
        major_group = None
        for table in doc.find_all('table', attrs={'border': '0'}):
            for row in table.find_all('tr'):
                for col in row.find_all('td'):
                    # Skip empty rows
                    if len(col.text) == 1:
                        continue
                    # We are starting a new group
                    if col.text.startswith('Majorgroup'):
                        # Save the one we were building if any
                        if major_group != None:
                            major_groups.append(major_group)
                        m = re.search("Majorgroup ([^ ]*) ", col.text)
                        major_group = {}
                        major_group['title'] = col.text
                        major_group['code'] = m.group(1).replace('/', '-')
                    # We have a description
                    if col.text.startswith('Workers'):
                        major_group['description'] = col.text
                    # We have links to minor
                    if col.text.startswith('List Minor'):
                        link = col.find_all('a')[0]['href']
                        major_group.setdefault('links', [])
                        major_group['links'].append(link)
        # Add the last group in the making
        if major_group != None:
            major_groups.append(major_group)

        # Add the groups to the graph
        for group in major_groups:
            major_group_uri = self._get_group_uri(group['code'])
            self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme']))
            self.graph.add(
                (major_group_uri, DCTERMS.title, Literal(group['title'])))
            self.graph.add((major_group_uri, DCTERMS.description,
                            Literal(group['description'])))

        # Now move onto the minor groups following the links
        for major_group in major_groups:
            major_group_uri = self._get_group_uri(major_group['code'])

            for minor_link in major_group['links']:
                # Look for the minor groups
                minor_groups = self._parse_records_table(minor_link, 2)

                # Add the groups to the graph
                for minor_group in minor_groups:
                    minor_group_uri = self._get_group_uri(minor_group['code'])
                    self.graph.add(
                        (minor_group_uri, RDF.type, SKOS['ConceptScheme']))
                    self.graph.add((minor_group_uri, RDFS.label,
                                    Literal(minor_group['title'])))
                    self.graph.add((minor_group_uri, DCTERMS.description,
                                    Literal(minor_group['description'])))
                    self.graph.add(
                        (major_group_uri, SKOS.related, minor_group_uri))

                    # Got one level deeper into the rubri
                    for rubri_link in minor_group['links']:
                        # Look for the minor groups
                        rubri_groups = self._parse_records_table(rubri_link, 3)

                        # Add the groups to the graph
                        for rubri_group in rubri_groups:
                            rubri_group_uri = self._get_group_uri(
                                rubri_group['code'])
                            self.graph.add((rubri_group_uri, RDF.type,
                                            SKOS['ConceptScheme']))
                            self.graph.add((rubri_group_uri, RDFS.label,
                                            Literal(rubri_group['title'])))
                            self.graph.add(
                                (rubri_group_uri, DCTERMS.description,
                                 Literal(rubri_group['description'])))
                            self.graph.add((minor_group_uri, SKOS.related,
                                            rubri_group_uri))

                            # And one deeper for the micro
                            for micro_link in rubri_group['links']:
                                # Look for the minor groups
                                micro_groups = self._parse_records_table(
                                    micro_link, 5)

                                # Add the groups to the graph
                                for micro_group in micro_groups:
                                    hisco_uri = self._get_hisco_uri(
                                        micro_group['code'])
                                    self.graph.add((hisco_uri, RDF.type,
                                                    SKOS['Collection']))
                                    self.graph.add(
                                        (hisco_uri, RDFS.label,
                                         Literal(micro_group['title'])))
                                    self.graph.add(
                                        (hisco_uri, DCTERMS.description,
                                         Literal(micro_group['description'])))
                                    self.graph.add((rubri_group_uri,
                                                    SKOS.related, hisco_uri))

    def parse_occupational_titles(self):
        '''
        Scrape the section of the site about occupational titles
        Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch
        '''
        parsed_status_page = set()
        next_page = OCCUPATIONAL_TITLES

        while next_page != None:
            log.info("Parse titles %s" % next_page)

            # Load the page
            doc = self.get_page(ROOT + next_page)

            # Find the right table
            table = doc.find('table',
                             attrs={
                                 'cellspacing': '0',
                                 'cellpadding': '2',
                                 'border': '0'
                             })

            # Look for all the titles
            for row in table.find_all('tr')[1:]:  # Skip the header
                cols = row.find_all('td')
                occupation_title = cols[1].text
                details_page_link = cols[1].find_all('a')[0]['href']
                language = LANG_MAP[cols[2].text]
                hisco_code = cols[3].text.replace('*', '')

                # Get the DB index from details_page_link
                m = re.search('know_id=([^&]*)&', details_page_link)
                occupation_index = m.group(1)

                # Add the concept to the graph
                resource = self._get_occupation_title_uri(occupation_index)
                self.graph.add((resource, RDF.type, SKOS['Concept']))
                self.graph.add((resource, SKOS.prefLabel,
                                Literal(occupation_title, lang=language)))
                self.graph.add(
                    (resource, SKOS.member, self._get_hisco_uri(hisco_code)))

                # Get more information about the title and add it as a member of the collection
                details_page = self.get_page(ROOT + details_page_link)
                details_table = details_page.find('table',
                                                  attrs={
                                                      'cellspacing': '8',
                                                      'cellpadding': '0'
                                                  })
                keyvalues = {}
                for details_row in details_table.find_all('tr'):
                    details_cols = details_row.find_all('td')
                    keyvalues[details_cols[0].text.strip()] = details_cols[-1]

                # We already dealt with these two
                del keyvalues['Hisco code']
                del keyvalues['Occupational title']

                # TODO Country , use refArea

                # TODO Language

                # Do we know the gender ?
                if 'Gender' in keyvalues:
                    sex = SDMX_CODE['sex-U']  # Also applies to "Male/Female"
                    if keyvalues['Gender'].text.strip() == 'Male':
                        sex = SDMX_CODE['sex-M']
                    elif keyvalues['Gender'].text.strip() == 'Female':
                        sex = SDMX_CODE['sex-F']
                    self.graph.add((resource, SDMX_DIMENSION['sex'], sex))
                    del keyvalues['Gender']

                # Do we know the status ?
                if 'Status' in keyvalues:
                    # Add the status
                    status = keyvalues['Status'].text.strip()
                    self.graph.add((resource, HISCO['status'],
                                    self._get_status_uri(status)))
                    # Parse the status page if necessary
                    status_page = keyvalues['Status'].find_all('a')[0]['href']
                    if status_page not in parsed_status_page:
                        self._parse_status_page(status_page)
                        parsed_status_page.add(status_page)
                    del keyvalues['Status']

                # TODO Relation

                # TODO Product

                # TODO Provenance

                # Do we have a translation in English ?
                if 'Translation' in keyvalues:
                    trans = Literal(
                        keyvalues['Translation'].text.strip().replace(
                            '´', "'"),
                        lang='en')
                    self.graph.add((resource, SKOS.altLabel, trans))
                    del keyvalues['Translation']

                # Print whatever is left
                #if len(keyvalues.keys()) != 0:
                #    log.info(keyvalues.keys())

            # Look for the "next" link
            next_table = doc.find('table', class_='nextprev')
            next_page = None
            for link in next_table.find_all('a'):
                if 'Next' in link.text:
                    next_page = link['href']

    def _parse_status_page(self, url):
        '''
        Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32
        '''

        # Work-around broken content
        if url == 'status.php?int02=15':
            return

        # Load the page
        doc = self.get_page(ROOT + url)

        # Find the data about this status
        status_uri = None
        for line in doc.find('pre').text.split('\n'):
            if re.match("^[0-9]* [a-zA-Z]*", line):
                m = re.search("^([0-9]*) ([a-zA-Z]*)", line)
                status_uri = self._get_status_uri(m.group(1))
                self.graph.add((status_uri, RDF.type, HISCO['Status']))
                self.graph.add((status_uri, RDFS.label, Literal(m.group(2))))
                self.graph.add(
                    (status_uri, SKOS.prefLabel, Literal(m.group(2))))
                self.graph.add(
                    (status_uri, SKOS.notation, Literal(m.group(1))))
            if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line):
                m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line)
                lang_code = m.group(1).lower()
                label = Literal(m.group(2), lang=lang_code)
                self.graph.add((status_uri, SKOS.altLabel, label))

        # Describe the class
        status_class = HISCO['Status']
        descr = doc.find('table', attrs={
            'width': '600'
        }).text.strip().split('\r\n')
        self.graph.add((status_class, RDF.type, RDFS.Class))
        self.graph.add((status_class, RDFS.label, Literal("Status code")))
        self.graph.add((status_class, DCTERMS.comment, Literal(descr[1])))

        # Describe the property
        status_property = HISCO['status']
        self.graph.add((status_property, RDF.type, RDF.Property))
        self.graph.add((status_property, RDFS.label,
                        Literal("status associated to the occupation")))
        self.graph.add((status_property, RDFS.range, HISCO['Status']))
        self.graph.add((status_property, RDFS.domain, SKOS.Concept))

    def _parse_records_table(self, url, size):
        '''
        Minor, Rubri and Micro have the same structure except an additional
        column for Micro with links to the titles
        '''
        # Load the page
        doc = self.get_page(ROOT + url)

        # Find the right table
        table = doc.find('table',
                         attrs={
                             'cellspacing': '8',
                             'cellpadding': '0'
                         })

        # If we can't find the table return an empty list
        # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict
        if table == None:
            return []

        # Look for the minor groups
        groups = []
        group = None
        columns = table.find_all('td')
        for index in range(0, len(columns)):
            # New group
            if re.match("[0-9]{%d}" % size, columns[index].text):
                if group != None:
                    groups.append(group)
                group = {}
                group['code'] = columns[index].text
                group['title'] = columns[index + 1].text
                link = columns[index + 1].find_all('a')[0]['href']
                group.setdefault('links', [])
                group['links'].append(link)
                group['description'] = columns[index + 2].text
                if columns[index + 3].text == "Display Titles":
                    link = columns[index + 3].find_all('a')[0]['href']
                    group['titles_link'] = link
        groups.append(group)

        return groups

    def _get_group_uri(self, code):
        return HISCO['group-%s' % code]

    def _get_hisco_uri(self, code):
        return HISCO['hisco-%s' % code]

    def _get_occupation_title_uri(self, code):
        return HISCO['occupation-%s' % code]

    def _get_status_uri(self, code):
        return HISCO['status-%s' % code]
Exemple #53
0
    gmary = Graph(
        store=store, identifier=cmary
    )  #Creamos un grafo para Mary para así almanenar sus propiedades
    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))
    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    #Una vez creados los grafos tanto para Mary como para John Mostraremos el contenido

    print('#Contenido del grafo de Conjuntos')
    print
    print
    for c in g.contexts():
        print("-- %s " % c)
    print
    print('#Contenido del Grafo de John en notación N3')
    print(gjohn.serialize(format='n3'))
    print("===================")
    print('#Contenido del Grafo de John en notación N3')

    print(gmary.serialize(format='n3'))
    print("===================")
    #full graph
    print(g.serialize(format='n3'))
    # query the conjunction of all graphs
    print('Mary loves:')
    for x in g[
            mary:ns.loves / ns.
            hasName]:  #La forma que realizamos la query es indicando que deseamos de los valores del predicado ns.loves queremos unicamente su propiedad ns.hasName
        print(x)
Exemple #54
0
class RDFAggregator(Aggregator):
    def __init__(self, *args, **kw):
        """Inicializa o agregador RDF.
        """
        super(RDFAggregator, self).__init__('csv', *args, **kw)
        self.aggregator = ConjunctiveGraph()
        self.aggregator.bind(u'owl', OWL)
        self.aggregator.bind(u'lic', LIC)
        self.aggregator.bind(u'siorg', SIORG)
        self.aggregator.bind(u'siafi', SIAFI)
        self.aggregator.bind(u'geo', GEO)
        self.aggregator.bind(u'dbpedia', DBPEDIA)
        self.aggregator.bind(u'dbprop', DBPROP)
        self.aggregator.bind(u'dbo', DBONT)
        self.aggregator.bind(u'void', VOID)
        self.aggregator.bind(u'foaf', FOAF)
        self.aggregator.bind(u'vcard', VCARD)
    def add(self, obj):
        """Acrescenta as triplas do objeto ao grafo agregador.
        """
        if getattr(obj, 'repr_rdf', None):
            # objeto tem um metodo para representacao propria em rdf
            triplas = obj.repr_rdf()
            for t in triplas:
                self.aggregator.add(t)
        else:
            # o objeto nao tem o metodo, tenta criar triplas por heuristicas
            subject = obj.uri
            doc = obj.doc_uri
            if doc == subject:
                doc = None
            class_uri = getattr(obj.__class__, '__class_uri__', None)
            expostos = getattr(obj.__class__,self.atributo_serializar, set())
            prop_map = getattr(obj.__class__, '__rdf_prop__', {})
            g = self.aggregator
            #  classe
            if class_uri:
                g.add((URIRef(subject), RDF['type'], URIRef(class_uri)))
            # documento
            if doc:
                g.add((URIRef(doc), RDF['type'], FOAF['Document']))
                g.add((URIRef(subject), FOAF['isPrimaryTopicOf'], URIRef(doc)))
                g.add((URIRef(doc), FOAF['primaryTopic'], URIRef(subject)))
            #  nome
            if getattr(obj, 'nome', None):
                if getattr(obj, '__rdf_prop__', None) is None or \
                        obj.__rdf_prop__.get('nome', None) is None:
                    g.add((URIRef(subject), RDFS['label'], Literal(obj.nome)))
            #  localizacao geo
            if getattr(obj, 'geo_ponto', None):
                ponto = obj.geo_ponto
                if ponto:
                    g.add((URIRef(subject), GEO['lat'], Literal(ponto['lat'])))
                    g.add((URIRef(subject), GEO['long'], Literal(ponto['lon'])))
            #  propriedades
            for atr in expostos:
                if atr in prop_map.keys():
                    if getattr(prop_map[atr], '__call__', None):
                        # as triplas da propriedade sao dadas por uma funcao
                        triplas = prop_map[atr](obj)
                        if triplas:
                            for t in triplas:
                                g.add(t)
                    elif prop_map[atr].get('metodo', None):
                        # as triplas da propriedade sao dadas por um metodo
                        m = getattr(obj, prop_map[atr]['metodo'])
                        triplas = m(atr)
                        if triplas:
                            for t in triplas:
                                g.add(t)
                    elif prop_map[atr].get('pred_uri', None):
                        # a propriedade corresponde a uma unica tripla
                        pred_uri = prop_map[atr]['pred_uri']
                        object = getattr(obj, atr, None)
                        if object:
                            obj_uri = getattr(object, 'uri', lambda: None)()
                            obj_cls_uri = getattr(object, '__class_uri__', None)
                            # o objeto tem uri definida?
                            if obj_uri:
                                g.add((URIRef(subject), URIRef(pred_uri), URIRef(obj_uri)))
                            elif obj_cls_uri:
                                # se o objeto nao tem uri mas tem uri da classe,
                                # tenta criar blank node
                                bn = BNode()
                                g.add((URIRef(subject), URIRef(pred_uri), bn))
                                g.add((bn, RDF['type'], URIRef(obj_cls_uri)))
                                g.add((bn, RDFS['comment'], Literal(unicode(obj))))
                            else:
                                # caso contrario, tratar a propriedade como um literal
                                g.add((URIRef(subject), URIRef(pred_uri), Literal(unicode(object))))
    def serialize(self, format="n3"):
        """Retorna a serializacao do agregador RDF (uniao dos grafos).
        """
        format_map = {
            'xml': 'xml',
            'rdf': 'pretty-xml',
            'rdf/xml': 'pretty-xml',
            'ttl': 'n3',
            'n3': 'n3',
            'nt': 'nt',
        }
        f = format_map.get(format, 'n3')
        current_url = self.dataset_split.get('current_url', '') # url do documento atual
        dataset_url = self.dataset_split.get('dataset_url', '') # url geral do dataset
        next_url = self.dataset_split.get('next_url', '') # url da proxima pagina
        # a uri do dataset: url do documento acrescida de #dataset
        if current_url:
            self.aggregator.add((URIRef(current_url+"#dataset"),RDF['type'],VOID['Dataset']))
            self.aggregator.add((URIRef(current_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(current_url),FOAF['primaryTopic'],URIRef(current_url+"#dataset")))
            if next_url:
                self.aggregator.add((URIRef(current_url+"#dataset"),RDFS['seeAlso'],URIRef(next_url+"#dataset")))
        if next_url:
            self.aggregator.add((URIRef(next_url+"#dataset"),RDF['type'], VOID['Dataset']))
            self.aggregator.add((URIRef(next_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(next_url),FOAF['primaryTopic'],URIRef(next_url+"#dataset")))
        if dataset_url:
            self.aggregator.add((URIRef(dataset_url+"#dataset"),RDF['type'], VOID['Dataset']))
            self.aggregator.add((URIRef(dataset_url),RDF['type'],VOID['DatasetDescription']))
            self.aggregator.add((URIRef(dataset_url),FOAF['primaryTopic'],URIRef(dataset_url+"#dataset")))
            if current_url:
                self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(current_url+"#dataset")))
            if next_url:
                self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(next_url+"#dataset")))
        return self.aggregator.serialize(format=f)
Exemple #55
0
 def testQuotedSerialization(self):
     g = ConjunctiveGraph()
     g.parse(data=test_data, format="n3")
     g.serialize(format="n3")
Exemple #56
0
# with the object init (and it added some namespaces as well)
# By default, your main namespace is the URI of your
# current working directory, so lets make that simpler:

myNS = Namespace(URIRef('http://www.w3.org/2000/10/swap/Primer#'))
primer.bind('', myNS)
primer.bind('owl', 'http://www.w3.org/2002/07/owl#')
primer.bind('dc', 'http://purl.org/dc/elements/1.1/')
primer.bind('swap', 'http://www.w3.org/2000/10/swap/')
sourceCode = StringInputSource(mySource, myNS)

# Lets load it up!

primer.parse(sourceCode, format='n3')

# Now you can query, either directly straight into a list:

[(x, y, z) for x, y, z in primer]

# or spit it back out (mostly) the way we created it:

print primer.serialize(format='n3')

# for more insight into things already done, lets see the namespaces

list(primer.namespaces())

# lets ask something about the data

list(primer.objects(myNS.pat, myNS.child))
Exemple #57
0
def rdf_description(name, notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    # default_graph_uri = "http://example.com/"
    configString = "/var/tmp/rdfstore"

    # Get the IOMemory plugin.
    store = plugin.get('IOMemory', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="IOMemory", identifier=URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:

        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"

    # Now we'll add some triples to the graph & commit the changes
    #rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = [
        "id", "rght", "node_ptr_id", "image", "lft", "_state",
        "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"
    ]

    #verifies the type of node

    node = NID.objects.get(title=name)
    node_type = node.reftype

    if (node_type == 'Gbobject'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)
    elif (node_type == 'None'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processes'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'System'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Objecttype'):
        node = Objecttype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attributetype'):
        node = Attributetype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Complement'):
        node = Complement.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Union'):
        node = Union.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Intersection'):
        node = Intersection.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Expression'):
        node = Expression.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processtype'):
        node = Processtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Systemtype'):
        node = Systemtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'AttributeSpecification'):
        node = AttributeSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'RelationSpecification'):
        node = RelationSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attribute'):
        node = Attribute.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Relationtype'):
        node = Relationtype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Metatype'):
        node = Metatype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    else:
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    node_dict = node.__dict__

    subject = str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate = str(key)
            pobject = str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))

    rdf_code = graph.serialize(format=notation)

    graph.commit()
    print rdf_code
    graph.close()