Example #1
0
def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password,
                namespace=None, skip_person=False, confirmed_orcid_id=False):
    with Store(data_path) as store:
        #Crosswalk
        (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri,
                                                       person_id=person_id, skip_person=skip_person,
                                                       person_class=person_class, confirmed_orcid_id=confirmed_orcid_id)

        graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower())
        previous_graph = Graph(namespace_manager=ns_manager)
        #Load last graph
        if os.path.exists(graph_filepath):
            log.debug("Loading previous graph %s", graph_filepath)
            previous_graph.parse(graph_filepath, format="turtle")

        #Diff against last graph
        (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph)

        #SPARQL Update
        log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id)
        sparql_delete(delete_graph, endpoint, username, password)
        sparql_insert(add_graph, endpoint, username, password)

        #Save new last graph
        log.debug("Saving new graph %s", graph_filepath)
        with codecs.open(graph_filepath, "w") as out:
            graph.serialize(format="turtle", destination=out)

        #Touch
        store.touch(orcid_id)

        return graph, add_graph, delete_graph
Example #2
0
    def parse(self, basefile):
        # Find out possible skeleton entries by loading the entire
        # graph of resource references, and find resources that only
        # exist as objects.
        #
        # Note: if we used download_from_triplestore we know that this list
        #       is clean -- we could just iterate the graph w/o filtering
        g = Graph()
        self.log.info("Parsing %s" % basefile)
        g.parse(self.store.downloaded_path(basefile), format="nt")
        self.log.info("Compiling object set")
        # create a uri -> True dict mapping -- maybe?
        objects = dict(zip([str(o).split("#")[0] for (s, p, o) in g], True))
        self.log.info("Compiling subject set")
        subjects = dict(zip([str(s).split("#")[0] for (s, p, o) in g], True))
        self.log.info("%s objects, %s subjects. Iterating through existing objects" %
                      (len(objects), len(subjects)))

        for o in objects:
            if not o.startswith(self.config.url):
                continue
            if '9999:999' in o:
                continue
            if o in subjects:
                continue
            for repo in otherrepos:
                skelbase = repo.basefile_from_uri(repo)
                if skelbase:
                    skel = repo.triples_from_uri(o)  # need to impl
                    with self.store.open_distilled(skelbase, "wb") as fp:
                        fp.write(skel.serialize(format="pretty-xml"))

                    self.log.info("Created skel for %s" % o)
Example #3
0
File: rdf.py Project: qood/vgmdb
def generate_artistlist(config, data):
	g = Graph('IOMemory', BNode())
	for artist_data in data['artists']:
		artist = URIRef(link(artist_data['link'])+"#subject")
		add_lang_names(g, artist, artist_data['names'], rel=[FOAF.name])
		g.add((artist, RDF.type, SCHEMA.MusicGroup))
	return g
Example #4
0
 def test_post_no_type_to_root(self):
     graph = Graph()
     created = BNode()
     graph.add((self.my_ktbs.uri, RDFS.seeAlso, created))
     graph.add((created, RDF.type, KTBS.Base))
     with assert_raises(RdfRestException):
         self.my_ktbs.post_graph(graph)
Example #5
0
File: rdf.py Project: qood/vgmdb
def generate_productlist(config, data):
	g = Graph('IOMemory', BNode())
	for product_data in data['products']:
		product = URIRef(link(product_data['link'])+"#subject")
		add_lang_names(g, product, product_data['names'], rel=[SCHEMA.name, DCTERMS.title])
		g.add((product, RDF.type, SCHEMA.CreativeWork))
	return g
Example #6
0
def find_location(textlocation):
    """
    returns a 2-tuple containing the RDFLIB node of textlocation as for
    the geonames api search, and the RDF-Graph with its description in Geonames.
    raise NotFoundException if textlocation was not found in GeoNames
    """
    payload = {'q' : textlocation,
            'username' : 'edsa_project',
            'featureClass' : 'P',
            'isNameRequired' : 'true',
            'maxRows' : '1'} 
    #TODO: For extra precision, countries need to be translated to ISO-3166.
    # The problem is that US locations have the state.

    r = requests.get('http://api.geonames.org/searchRDF', params=payload)

    g = Graph()
    g.parse(data=r.text, format="xml")

    spquery= """
        SELECT DISTINCT ?iri WHERE {?iri gn:name ?y}
    """
    qres = g.query(spquery)
    iri = ''
    for row in qres:
        iri = row.iri
    if iri == '':
        raise NotFoundException("Could not found "+textlocation)
    else:
        return (iri,g)
Example #7
0
 def _read_id_from_install_rdf(self, installrdfpath):
     from rdflib import Graph
     rdf = Graph()
     installrdf = rdf.parse(file=file(installrdfpath))
     for i in installrdf.all_nodes():
         if re.search(".*@.*\..*", i):
             return i.decode()
Example #8
0
    def testGraphAdd(self):
        g1 = self.graph
        g2 = Graph(store=g1.store)

        tarek = self.tarek
        # michel = self.michel
        bob = self.bob
        likes = self.likes
        # hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        g1.add((tarek, likes, pizza))

        g2.add((bob, likes, cheese))

        g3 = g1 + g2

        self.assertEquals(len(g3), 2)
        self.assertEquals((tarek, likes, pizza) in g3, True)
        self.assertEquals((tarek, likes, cheese) in g3, False)

        self.assertEquals((bob, likes, cheese) in g3, True)

        g1 += g2

        self.assertEquals(len(g1), 2)
        self.assertEquals((tarek, likes, pizza) in g1, True)
        self.assertEquals((tarek, likes, cheese) in g1, False)

        self.assertEquals((bob, likes, cheese) in g1, True)
Example #9
0
def fragment(gp, broker, agora, channel, updating, gen):
    try:
        gp_match = re.search(r'\{(.*)\}', gp).groups(0)
        if len(gp_match) != 1:
            raise click.ClickException('Invalid graph pattern')

        STOA = {
            "broker_host": broker[0],
            "broker_port": broker[1],
            "agora_host": agora[0],
            "agora_port": agora[1],
            "exchange": channel[0],
            "topic_pattern": channel[1],
            "response_prefix": channel[2]
        }

        tps = re.split('\. ', gp_match[0])

        prefixes, fragment_gen = get_fragment_generator(*tps, monitoring=30, STOA=STOA, updating=updating, gen=gen)
        graph = Graph()
        for prefix in prefixes:
            graph.bind(prefix, prefixes[prefix])
            click.echo('@prefix {}: <{}> .'.format(prefix, prefixes[prefix]))
        click.echo('')

        for chunk in fragment_gen:
            if chunk is not None:
                headers, (c, s, p, o) = chunk
                triple = u'{} {} {} .'.format(s.n3(graph.namespace_manager), p.n3(graph.namespace_manager),
                                              o.n3(graph.namespace_manager))
                click.echo(triple)
    except Exception as e:
        raise click.ClickException('There was a problem with the request: {}'.format(e.message))
Example #10
0
    def parse(self):
        if "workflowBundle.ttl" in self.zip.namelist():
            format = "n3" 
            rootfile = "workflowBundle.ttl"
        elif "workflowBundle.rdf" in self.zip.namelist():
            rootfile = "workflowBundle.rdf"
            format = "xml" 
        else:
            raise Scufl2Error("Can't find workflowBundle.ttl or "
                              "workflowBundle.rdf")

        self.uri = "file://" + urllib.pathname2url(os.path.abspath(self.filename)) + "/"
        early_graph = Graph()    
        rdf_file = self.zip.open(rootfile)
        early_graph.parse(rdf_file, self.uri, format=format)
        sameBaseAs = list(early_graph.objects(subject=URIRef(self.uri), predicate=Scufl2NS.sameBaseAs))

        if not sameBaseAs:
            # Fall back to the file:/// URIs   
            self.graph = early_graph
        else:    
            # Use the sameBaseAs as the base
            self.uri = sameBaseAs[0]
            self.graph = Graph()
            # Reparse it
            rdf_file = self.zip.open(rootfile)
            self.graph.parse(rdf_file, self.uri, format=format)

        self.parse_all_graphs(self.uri)
Example #11
0
 def __init__(self, filepath, uri):
     graph.__init__(self)
     self.parse(filepath, format='turtle')
     self.filename = os.path.basename(filepath)
     self.uri = uri
     print(" - Resource {0} has {1} triples.".format(
                                                 self.filename, len(self)))
Example #12
0
File: xtr.py Project: mpetyx/pyrif
def main():

    graph = Graph()
    graph.parse(sys.argv[1], format="n3")

    if len(sys.argv) > 2:
        doc = URIRef(sys.argv[2])
    else:
        docs = []
        for c in (RIF.Document, RIF.BLDDocument, 
                  RIF.PRDDocument, RIF.CoreDocument):
            for x in graph.subjects(RDF.type, c):
                docs.append(x)
        if len(docs) == 1:
            doc = docs[0]
        elif len(docs) > 1:
            print >>sys.stderr, "Input contains multiple Document nodes"
            print >>sys.stderr, indent+",".join([repr(x) for x in docs])
            print >>sys.stderr, "Name one on the command line to select it"
            sys.exit(1)
        elif len(docs) < 1:
            print >>sys.stderr, "Input contains no Document nodes"
            for (s,p,o) in graph:
                print s,p,o
            sys.exit(1)

    out = sys.stdout
    to_rif(out, graph, doc, root=True)
Example #13
0
 def testConjunction(self):
     self.addStuffInMultipleContexts()
     triple = (self.pizza, self.likes, self.pizza)
     # add to context 1
     graph = Graph(self.graph.store, self.c1)
     graph.add(triple)
     self.assertEquals(len(self.graph), len(graph))
Example #14
0
def create_store_with_identifier(identifier):
    ident = URIRef(identifier)
    store = plugin.get("SQLAlchemy", Store)(identifier=ident)
    graph = Graph(store, identifier=ident)
    uri = Literal(os.environ.get("DATABASE_URL"))
    graph.open(uri, create=True)
    graph.parse(join(join(settings.BASE_DIR, 'static'), 'output.xml'))
Example #15
0
def annotateConfidence(target, un, con, com):
    # thisAnnotation id is the full string, eg:
    # http://chartex.org/user/jjon/annotation/dc9d7cbdd0ebefb583e46fc2b79bc8cedde34d68
    # the last element being a hash (hashlib.sha1(oa:hastarget).hexdigest()) of this full string:
    # http://chartex.org/graphid/Person_11139might_bePerson_11339 (this triple is actually in there, why?, weird!
    target = re.sub('[<>]', '', target)
    thisAnnotationURI =  "http://chartex.org/user/%s/annotation/%s" % (un, sha1(target).hexdigest())
    confidence = Literal(con) if con == 'nochange' else Literal(con,datatype=XSD.decimal)
    #TODO: if no change, create no confidenceMetric triple for the annotation OR insert original decimal value
    
    if (int(annotationExists('<' + thisAnnotationURI + '>')) > 0):
        return ("You've already annotated this statement: %s \nPresumably you could make a separate annotation with a different username. If you start doing that, you should keep track of all your usernames. When we have authentication and session logic, this won't be necessary.\n\nAnnotation triples:\n" % (target,), getSingleConfidenceAnnotation('<' + thisAnnotationURI + '>', 'application/rdf+xml'))
    else:
        thisann = URIRef(thisAnnotationURI)
        g = Graph()
        bodyNode = BNode()
        triples = [
            (thisann, RDF.type, oa.Annotation),
            (thisann, oa.hasTarget, URIRef(target)),
            (thisann, oa.hasBody, bodyNode),
            (bodyNode, chartex.suggestedConfidenceMetric, confidence),
            (bodyNode, chartex.userComment, Literal(com))
        ]
        for t in triples: g.add(t)
        r = requests.post(
            AGVM_VC_REPO + "/statements",
            headers={'Content-Type': 'text/turtle'},
            data=g.serialize(format='turtle'),
            auth=AG_AUTH
        )

        return (g.serialize(format='pretty-xml'), r.__dict__)
Example #16
0
def test_graph_prefix():
    """
    This is issue https://github.com/RDFLib/rdflib/issues/313
    """

    g1 = Graph()
    g1.parse(data="""
    @prefix : <urn:ns1:> .
    :foo <p> 42.
    """, format="n3")

    g2 = Graph()
    g2.parse(data="""
    @prefix : <urn:somethingelse:> .
    <urn:ns1:foo> <p> 42.
    """, format="n3")

    assert isomorphic(g1, g2)

    q_str = ("""
    PREFIX : <urn:ns1:>
    SELECT ?val
    WHERE { :foo ?p ?val }
    """)
    q_prepared = prepareQuery(q_str)

    expected = [(Literal(42),)]

    eq_(list(g1.query(q_prepared)), expected)
    eq_(list(g2.query(q_prepared)), expected)

    eq_(list(g1.query(q_str)), expected)
    eq_(list(g2.query(q_str)), expected)
Example #17
0
def open_store(identifier):
    ident = URIRef(identifier)
    store = plugin.get("SQLAlchemy", Store)(identifier=ident)
    graph = Graph(store, identifier=ident)
    uri = Literal(os.environ.get("DATABASE_URL"))
    graph.open(uri, create=False)
    return graph
Example #18
0
class RDFPage(Page):

    format = None

    def __init__(self, response):
        self.data = Graph()
        self.links = FilterableList()
        self.queries = FilterableList(base_url=response.url)
        super(RDFPage, self).__init__(response)

    def extract_data(self):
        self.data = Graph()
        self.data.parse(data=self.response.text, format=self.format, publicID=self.url)

    def extract_links(self):
        for p, o in self.data.predicate_objects(URIRef(self.url)):
            if isinstance(o, URIRef):
                link = Link(p.toPython(), o.toPython())
                self.links.append(link)

    def extract_queries(self):
        rows = self.data.query('''
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX schema: <http://schema.org/>
        PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
        SELECT ?rel ?template
        WHERE {
            ?url ?rel ?action .
            ?action rdf:type hydra:IriTemplate .
            ?action hydra:template ?template .
        }
        ''')

        for rel, template in rows:
            self.queries.append(Query(str(rel), str(template), base_url=self.response.url))
Example #19
0
 def rdf_get(self, departments):
     us_dept = URIRef('https://en.wikipedia.org/wiki/List_of_federal_agencies_in_the_United_States')
     g = Graph()
     for dept in departments:
         this_dept = URIRef('http://127.0.0.1:5000/departments/{0}'.format(urllib.quote(dept)))
         g.add((this_dept, RDF.type, us_dept,))
     return g.serialize(format='n3')
    def test_history_turtle(self):
        with self.client as client:
            res = client.patch(
                '/d/',
                data=self.patch,
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'})
            res = client.post(
                urlparse(res.headers['Location']).path + 'merge',
                buffered=True,
                headers={'Authorization': 'Bearer '
                         + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'})

        res1 = self.client.get('/history.ttl')
        self.assertEqual(res1.status_code, http.client.OK)
        self.assertEqual(res1.headers['Content-Type'], 'text/turtle')
        self.assertEqual(
            res1.headers['Cache-Control'],
            'public, max-age={}'.format(cache.SHORT_TIME))
        self.assertEqual(
            res1.headers['Content-Disposition'],
            'attachment; filename="periodo-history.ttl"')

        g = Graph()
        g.parse(data=res1.get_data(as_text=True), format='turtle')
        self.assertIn((HOST['h#patch-1'],
                       FOAF.page, HOST['patches/1/patch.jsonpatch']), g)
        self.assertIn((HOST['d'],
                       DCTERMS.provenance, HOST['h#changes']), g)

        res3 = self.client.get('/history.ttl/')
        self.assertEqual(res3.status_code, http.client.NOT_FOUND)
Example #21
0
def get_response(project_uri, query_string, include_n3=True):
    d = {
        'results': list(),
    }

    project_graph = projects.get_project_graph(project_uri)
    graph = Graph()

    query_set = SearchQuerySet().models(Text).filter(
        content=AutoQuery(query_string), project__exact=project_uri
    )

    highlighter = Highlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS)
    title_highlighter = TitleHighlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS)

    d['spelling_suggestion'] = query_set.spelling_suggestion()

    for result in query_set:
        text_uri = URIRef(result.get_stored_fields()['identifier'])

        if annotations.has_annotation_link(project_graph, text_uri) or projects.is_top_level_project_resource(project_uri, text_uri):
            d['results'].append(search_result_to_dict(result, project_uri, highlighter, title_highlighter))

            if include_n3:
                graph += utils.metadata_triples(project_graph, text_uri)

    if include_n3:
        d['n3'] = graph.serialize(format='n3')

    return d
    def test_period_json(self):
        res1 = self.client.get('/trgkvwbjd.json')
        self.assertEqual(res1.status_code, http.client.OK)
        self.assertEqual(res1.headers['Content-Type'], 'application/json')
        self.assertEqual(
            res1.headers['Content-Disposition'],
            'attachment; filename="periodo-period-trgkvwbjd.json"')
        context = json.loads(res1.get_data(as_text=True))['@context']
        self.assertEqual(context, [
            'http://localhost.localdomain:5000/c',
            {'@base': 'http://n2t.net/ark:/99152/'}])

        res2 = self.client.get('/trgkvwbjd.jsonld')
        self.assertEqual(res2.status_code, http.client.OK)
        self.assertEqual(res2.headers['Content-Type'], 'application/ld+json')
        self.assertEqual(
            res2.headers['Content-Disposition'],
            'attachment; filename="periodo-period-trgkvwbjd.json"')

        jsonld = json.loads(res1.get_data(as_text=True))
        context = json.loads(self.client.get('/c', buffered=True)
                             .get_data(as_text=True))
        g = Graph().parse(
            data=json.dumps({**jsonld, **context}), format='json-ld')
        self.assertIsNone(
            g.value(predicate=RDF.type, object=SKOS.ConceptScheme))
        self.assertIn((PERIODO['p0trgkvwbjd'],
                       FOAF.isPrimaryTopicOf, HOST['trgkvwbjd.json']), g)
        self.assertIn((HOST['trgkvwbjd.json'],
                       VOID.inDataset, HOST['d']), g)
        self.assertIn((PERIODO['p0trgkvwbjd'],
                       SKOS.inScheme, PERIODO['p0trgkv']), g)
        res3 = self.client.get('/trgkvwbjd.json.html')
        self.assertEqual(res3.status_code, http.client.OK)
        self.assertEqual(res3.headers['Content-Type'], 'text/html')
Example #23
0
def ext_json():
    rdfUrl = ''
    tok = Tokenizer()
    if request.method == 'POST':
        rdf = request.form['data']
        status_test = "0"#request.form['status']
        filters = ""#request.form['exculdeurls']
        #rdf = "http://jpp.no-ip.org/MAD_J.rdf"
        try:
            #r = requests.get(rdf)
            gg = Graph()
            #g.load(rdfUrl)
            rdf_content = StringIO.StringIO(rdf.encode('utf-8'))
            #print rdf_content.readline()
            gg.parse(rdf_content,  format="xml")
            ext = Extractor(gg)
            uris = ext.getUris()
            mapping = MapFactory()
            for uri in uris:
                term = tok.tokenized_url(uri)
                uri_status = ""
                if status_test == "1":
                    uri_status = ext.testUri(uri)
                else:
                    uri_status = "N/A"  
                uri_lookup = str(uri)+"\"" 
                lnum = ext.get_lines(rdf_content, uri_lookup)          
                ent = MapEntry(uri, term, "", lnum, uri_status)
                mapping.add(ent)
            jsonized_result = json.dumps(mapping.get())              
            return Response(jsonized_result, mimetype='application/json')
        except requests.exceptions.ConnectionError:
            X2Rwarning = 'X2R Warning: The requested URL raises ConnectionError~!!!'
            return X2Rwarning
def all_products():
  params = request.args.get('categoria')
  g = Graph()
  try:
    g.parse('prueba.rdf', format='xml')
  except Exception,e:
    print str(e)
Example #25
0
    def __init__(self, err, data, namespace=None):
        self.err = err
        self.manifest = u"urn:mozilla:install-manifest"
        self.namespace = namespace or "http://www.mozilla.org/2004/em-rdf"

        if isinstance(data, types.StringTypes):
            data = StringIO(data)  # Wrap data in a pseudo-file

        from rdflib.plugins.parsers import rdfxml
        orig_create_parser = rdfxml.create_parser

        try:
            # Patch rdflib to not resolve URL entities.
            def create_parser(*args, **kwargs):
                parser = orig_create_parser(*args, **kwargs)
                parser.setEntityResolver(AddonRDFEntityResolver(err))
                return parser
            rdfxml.create_parser = create_parser

            # Load up and parse the file in XML format.
            graph = Graph()
            graph.parse(data, format="xml")
            self.rdf = graph

        except ParserError as ex:
            # Re-raise the exception in a local exception type.
            raise RDFException(message=ex.message)
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        finally:
            # If we fail, we don't want to sully up the creation function.
            rdfxml.create_parser = orig_create_parser
Example #26
0
 def test_post_no_type_to_base(self):
     graph = Graph()
     created = BNode()
     graph.add((self.my_ktbs.uri, RDFS.seeAlso, created))
     graph.add((created, RDF.type, KTBS.hasModel)) # in correct NS
     with assert_raises(RdfRestException):
         self.my_ktbs.post_graph(graph)
Example #27
0
    def test_load_from_model(self):
        """Can we round trip through a RDF model"""
        model = Graph()
        path = '/root/42BW9AAXX/C1-38/Project_12345/'
        filename = '12345_AAATTT_L003_R1_001.fastq.gz'
        seq = sequences.parse_fastq(path, filename)
        seq.save_to_model(model)

        seq_id = 'file://'+path+filename
        seqNode = URIRef(seq_id)
        libNode = URIRef('http://localhost/library/12345')
        model.add((seqNode, libraryOntology['library'], libNode))
        seq2 = sequences.SequenceFile.load_from_model(model, seq_id)

        self.assertEqual(seq.flowcell, seq2.flowcell)
        self.assertEqual(seq.flowcell, '42BW9AAXX')
        self.assertEqual(seq.filetype, seq2.filetype)
        self.assertEqual(seq2.filetype, 'split_fastq')
        self.assertEqual(seq.lane, seq2.lane)
        self.assertEqual(seq2.lane, '3')
        self.assertEqual(seq.read, seq2.read)
        self.assertEqual(seq2.read, 1)
        self.assertEqual(seq.project, seq2.project)
        self.assertEqual(seq2.project, '12345')
        self.assertEqual(seq.index, seq2.index)
        self.assertEqual(seq2.index, 'AAATTT')
        self.assertEqual(seq.split, seq2.split)
        self.assertEqual(seq2.split, '001')
        self.assertEqual(seq.cycle, seq2.cycle)
        self.assertEqual(seq.pf, seq2.pf)
        self.assertEqual(seq2.libraryNode, libNode)
        self.assertEqual(seq.path, seq2.path)
    def test_dataset_description_linksets(self):
        res = self.client.get('/.well-known/void')
        self.assertEqual(res.status_code, http.client.OK)
        self.assertEqual(res.headers['Content-Type'], 'text/turtle')
        g = Graph()
        g.parse(format='turtle', data=res.get_data(as_text=True))
        # http://dbpedia.org/void/Dataset
        q = sparql.prepareQuery('''
SELECT ?triples
WHERE {
  ?linkset a void:Linkset .
  ?linkset void:subset <http://n2t.net/ark:/99152/p0d> .
  ?linkset void:subjectsTarget <http://n2t.net/ark:/99152/p0d> .
  ?linkset void:linkPredicate ?predicate .
  ?linkset void:objectsTarget ?dataset .
  ?linkset void:triples ?triples .
}
''', initNs={'void': VOID})
        dbpedia = URIRef('http://dbpedia.org/void/Dataset')
        triples = next(iter(g.query(
            q, initBindings={'dataset': dbpedia,
                             'predicate': DCTERMS.spatial})))['triples'].value
        self.assertEqual(triples, 3)

        worldcat = URIRef('http://purl.oclc.org/dataset/WorldCat')
        triples = next(iter(g.query(
            q, initBindings={'dataset': worldcat,
                             'predicate': DCTERMS.isPartOf})))['triples'].value
        self.assertEqual(triples, 1)
Example #29
0
 def test_post_bad_type_to_base(self):
     graph = Graph()
     created = BNode()
     graph.add((self.my_ktbs.uri, KTBS.contains, created))
     graph.add((created, RDF.type, RDFS.Resource))
     with assert_raises(RdfRestException):
         self.my_ktbs.post_graph(graph)
    def test_period_turtle(self):
        res1 = self.client.get('/trgkvwbjd.ttl')
        self.assertEqual(res1.status_code, http.client.OK)
        self.assertEqual(res1.headers['Content-Type'], 'text/turtle')
        self.assertEqual(
            res1.headers['Cache-Control'],
            'public, max-age={}'.format(cache.SHORT_TIME))
        self.assertEqual(
            res1.headers['Content-Disposition'],
            'attachment; filename="periodo-period-trgkvwbjd.ttl"')

        g = Graph().parse(data=res1.get_data(as_text=True), format='turtle')
        self.assertIsNone(
            g.value(predicate=RDF.type, object=SKOS.ConceptScheme))
        self.assertIn((PERIODO['p0trgkvwbjd'],
                       FOAF.isPrimaryTopicOf, HOST['trgkvwbjd.ttl']), g)
        self.assertIn((HOST['trgkvwbjd.ttl'],
                       VOID.inDataset, HOST['d']), g)
        self.assertIn((PERIODO['p0trgkvwbjd'],
                       SKOS.inScheme, PERIODO['p0trgkv']), g)
        res2 = self.client.get('/trgkvwbjd.ttl.html')
        self.assertEqual(res2.status_code, http.client.OK)
        self.assertEqual(res2.headers['Content-Type'], 'text/html')
        self.assertEqual(
            res2.headers['Cache-Control'],
            'public, max-age={}'.format(cache.SHORT_TIME))
Example #31
0
 def setUp(self):
     self.manifest = manifest = Graph(store=self.store)
     manifest.open(self.path)
     manifest.load(
         cached_file(
             "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf"))
Example #32
0
class SDOGraphSetupTestCase(unittest.TestCase):
    @classmethod
    def loadGraphs(self):
        from rdflib import Graph
        import rdflib
        self.rdflib_data = Graph()
        store = getMasterStore()
        graphs = list(store.graphs())
        log.info("Loading test graph from MasterStore")
        for g in graphs:
            id = str(g.identifier)
            if not id.startswith("http://"):  #skip some internal graphs
                continue
            self.rdflib_data += g

    @classmethod
    def setUpClass(self):
        log.info("Graph tests require rdflib.")
        try:
            log.info("Trying to import rdflib...")
            import rdflib
            from rdflib import Graph
        except Exception as e:
            raise unittest.SkipTest(
                "Need rdflib installed to do graph tests: %s" % e)
        SDOGraphSetupTestCase.loadGraphs()

    def test_graphsLoaded(self):
        self.assertTrue(
            len(self.rdflib_data) > 0,
            "Graph rdflib_data should have some triples in it.")

    # SPARQLResult http://rdflib.readthedocs.org/en/latest/apidocs/rdflib.plugins.sparql.html
    # "A list of dicts (solution mappings) is returned"

    def test_found_sixplus_inverseOf(self):
        inverseOf_results = self.rdflib_data.query(
            "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }")
        log.info("inverseOf result count: %s" % len(inverseOf_results))
        self.assertTrue(
            len(inverseOf_results) >= 6,
            "Six or more inverseOf expected. Found: %s " %
            len(inverseOf_results))

    def test_even_number_inverseOf(self):

        inverseOf_results = self.rdflib_data.query(
            "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }")
        self.assertTrue(
            len(inverseOf_results) % 2 == 0,
            "Even number of inverseOf triples expected. Found: %s " %
            len(inverseOf_results))

    def test_non_equal_inverseOf(self):
        results = self.rdflib_data.query(
            "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }")
        for result in results:
            self.assertTrue(
                result[0] != result[1],
                "%s should not be equal to %s" % (result[0], result[1]))

    def test_non_equal_supercededBy(self):
        results = self.rdflib_data.query(
            "select ?x ?y where { ?x <http://schema.org/supercededBy> ?y }")
        for result in results:
            self.assertTrue(
                result[0] != result[1],
                "%s should not be equal to %s" % (result[0], result[1]))

    @unittest.expectedFailure  # autos
    def test_needlessDomainIncludes(self):
        global warnings
        # check immediate subtypes don't declare same domainIncludes
        # TODO: could we use property paths here to be more thorough?
        # rdfs:subClassOf+ should work but seems not to.
        ndi1 = ('''SELECT ?prop ?c1 ?c2 
           WHERE { 
           ?prop <http://schema.org/domainIncludes> ?c1 .
           ?prop <http://schema.org/domainIncludes> ?c2 .
           ?c1 rdfs:subClassOf ?c2 .
           FILTER (?c1 != ?c2) .
           FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
           }
           ORDER BY ?prop ''')
        ndi1_results = self.rdflib_data.query(ndi1)
        if (len(ndi1_results) > 0):
            for row in ndi1_results:
                warn = "WARNING property %s defining domain, %s, [which is subclassOf] %s unnecessarily" % (
                    row["prop"], row["c1"], row["c2"])
                warnings.append(warn)
                log.info(warn + "\n")
        self.assertEqual(
            len(ndi1_results), 0,
            "No subtype need redeclare a domainIncludes of its parents. Found: %s "
            % len(ndi1_results))

    @unittest.expectedFailure
    def test_needlessRangeIncludes(self):
        global warnings
        # as above, but for range. We excuse URL as it is special, not best seen as a Text subtype.
        # check immediate subtypes don't declare same domainIncludes
        # TODO: could we use property paths here to be more thorough?
        nri1 = ('''SELECT ?prop ?c1 ?c2 
         WHERE { 
         ?prop <http://schema.org/rangeIncludes> ?c1 .
         ?prop <http://schema.org/rangeIncludes> ?c2 .
         ?c1 rdfs:subClassOf ?c2 .
         FILTER (?c1 != ?c2) .
         FILTER (?c1 != <http://schema.org/URL>) .
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
             }
             ORDER BY ?prop ''')
        nri1_results = self.rdflib_data.query(nri1)
        if (len(nri1_results) > 0):
            for row in nri1_results:
                warn = "WARNING property %s defining range, %s, [which is subclassOf] %s unnecessarily" % (
                    row["prop"], row["c1"], row["c2"])
                warnings.append(warn)
                log.info(warn + "\n")
        self.assertEqual(
            len(nri1_results), 0,
            "No subtype need redeclare a rangeIncludes of its parents. Found: %s"
            % len(nri1_results))


#  def test_supersededByAreLabelled(self):
#    supersededByAreLabelled_results = self.rdflib_data.query("select ?x ?y ?z where { ?x <http://schema.org/supersededBy> ?y . ?y <http://schema.org/name> ?z }")
#    self.assertEqual(len(inverseOf_results ) % 2 == 0, True, "Even number of inverseOf triples expected. Found: %s " % len(inverseOf_results ) )

    def test_validRangeIncludes(self):
        nri1 = ('''SELECT ?prop ?c1
     WHERE {
         ?prop <http://schema.org/rangeIncludes> ?c1 .
         OPTIONAL{
            ?c1 rdf:type ?c2 .
            ?c1 rdf:type rdfs:Class .
         }.
         FILTER (!BOUND(?c2))
        FILTER NOT EXISTS { ?prop <http://schema.org/isPartOf> <http://attic.schema.org> .}
                 }
                 ORDER BY ?prop ''')
        nri1_results = self.rdflib_data.query(nri1)
        for row in nri1_results:
            log.info("Property %s invalid rangeIncludes value: %s\n" %
                     (row["prop"], row["c1"]))
        self.assertEqual(
            len(nri1_results), 0,
            "RangeIncludes should define valid type. Found: %s" %
            len(nri1_results))

    def test_validDomainIncludes(self):
        nri1 = ('''SELECT ?prop ?c1
     WHERE {
         ?prop <http://schema.org/domainIncludes> ?c1 .
         OPTIONAL{
            ?c1 rdf:type ?c2 .
            ?c1 rdf:type rdfs:Class .
         }.
         FILTER (!BOUND(?c2))
        FILTER NOT EXISTS { ?prop <http://schema.org/isPartOf> <http://attic.schema.org> .}
                 }
                 ORDER BY ?prop ''')
        nri1_results = self.rdflib_data.query(nri1)
        for row in nri1_results:
            log.info("Property %s invalid domainIncludes value: %s\n" %
                     (row["prop"], row["c1"]))
        self.assertEqual(
            len(nri1_results), 0,
            "DomainIncludes should define valid type. Found: %s" %
            len(nri1_results))

    # These are place-holders for more sophisticated SPARQL-expressed checks.
    @unittest.expectedFailure
    def test_readSchemaFromRDFa(self):
        self.assertTrue(
            True, False,
            "We should know how to locally get /docs/schema_org_rdfa.html but this requires fixes to api.py."
        )

    #@unittest.expectedFailure
    def test_simpleLabels(self):
        s = ""
        complexLabels = self.rdflib_data.query(
            "select distinct ?term ?label where { ?term rdfs:label ?label  FILTER regex(?label,'[^a-zA-Z0-9_ ]','i'). } "
        )
        for row in complexLabels:
            s += (" term %s has complex label: %s\n" %
                  (row["term"], row["label"]))
        self.assertTrue(
            len(complexLabels) == 0,
            "No complex term labels expected; alphanumeric only please. Found: %s Details: %s\n"
            % (len(complexLabels), s))
        # Whitespace is tolerated, for now.
        # we don't deal well with non definitional uses of rdfs:label yet - non terms are flagged up.
        # https://github.com/schemaorg/schemaorg/issues/1136

    #
    # TODO: https://github.com/schemaorg/schemaorg/issues/662
    #
    # self.assertEqual(len(ndi1_results), 0, "No domainIncludes or rangeIncludes value should lack a type. Found: %s " % len(ndi1_results ) )

    def test_labelMatchesTermId(self):
        nri1 = ('''select ?term ?label where { 
       ?term rdfs:label ?label.
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")
       FILTER(SUBSTR(?strVal, 19) != STR(?label))
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Label matching errors:")
            for row in nri1_results:
                log.info("Term '%s' has none-matching label: '%s'" %
                         (row["term"], row["label"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Term should have matching rdfs:label. Found: %s" %
            len(nri1_results))

    def test_superTypesExist(self):
        nri1 = ('''select ?term ?super where { 
       ?term rdfs:subClassOf ?super.
       ?term rdf:type rdfs:Class.
       FILTER NOT EXISTS { ?super rdf:type rdfs:Class }
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")
       
       BIND(STR(?super) AS ?superStrVal)
       FILTER(STRLEN(?superStrVal) >= 18 && SUBSTR(?superStrVal, 1, 18) = "http://schema.org/")
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Invalid SuperType errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' has nonexistent supertype: '%s'" %
                         (row["term"], row["super"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Types with nonexistent SuperTypes. Found: %s" % len(nri1_results))

    def test_superPropertiesExist(self):
        nri1 = ('''select ?term ?super where { 
       ?term rdf:type rdf:Property.
       ?term rdfs:subPropertyOf ?super.
       FILTER NOT EXISTS { ?super rdf:type rdf:Property }
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")

       BIND(STR(?super) AS ?superStrVal)
       FILTER(STRLEN(?superStrVal) >= 18 && SUBSTR(?superStrVal, 1, 18) = "http://schema.org/")
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Invalid Super-Property errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' has nonexistent super-property: '%s'" %
                         (row["term"], row["super"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Properties with nonexistent SuperProperties. Found: %s" %
            len(nri1_results))

    def test_selfReferencingInverse(self):
        nri1 = ('''select ?term ?inverse where { 
       ?term rdf:type rdf:Property.
       ?term <http://schema.org/inverseOf> ?inverse.
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")
       
       FILTER(str(?term) = str(?inverse))
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}

    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Self referencing inverseOf errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' is defined as inverseOf self" %
                         (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Types with self referencing inverseOf Found: %s" %
            len(nri1_results))

    def test_sameInverseAndSupercededByTarget(self):
        nri1 = ('''select ?term ?inverse ?super where { 
       ?term rdf:type rdf:Property.
       ?term <http://schema.org/inverseOf> ?inverse.
       ?term <http://schema.org/supercededBy> ?super.
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")
       
       FILTER(str(?inverse) = str(?super))
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}

    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("InverseOf supercededBy shared target errors!!!\n")
            for row in nri1_results:
                log.info(
                    "Term '%s' defined ase inverseOf AND supercededBy %s" %
                    (row["term"], row["inverse"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Types with inverseOf supercededBy shared target Found: %s" %
            len(nri1_results))

    @unittest.expectedFailure
    def test_commentEndWithPeriod(self):
        nri1 = ('''select ?term ?com where { 
       ?term rdfs:comment ?com.
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")

       FILTER regex(str(?com), '[^.]$')
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Coment without ending '.' errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' has a comment without an ending '.'" %
                         (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Coment without ending '.' Found: %s" % len(nri1_results))

    def test_typeLabelCase(self):
        nri1 = ('''select ?term ?label where { 
       ?term rdf:type rdfs:Class.
       ?term rdfs:label ?label.
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")

       FILTER (!regex(str(?label), '^[0-9]*[A-Z].*'))
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Type label [A-Z] errors!!!\n")
            for row in nri1_results:
                log.info(
                    "Type '%s' has a label without upper case 1st character" %
                    (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Type label not [A-Z] 1st non-numeric char Found: %s" %
            len(nri1_results))

    def test_propertyLabelCase(self):
        nri1 = ('''select ?term ?label where { 
       ?term rdf:type rdf:Property.
       ?term rdfs:label ?label.
       
       BIND(STR(?term) AS ?strVal)
       FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/")

       FILTER (!regex(str(?label), '^[0-9]*[a-z].*'))
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Property label [a-z] errors!!!\n")
            for row in nri1_results:
                log.info(
                    "Property '%s' has a label without lower case 1st non-numeric character"
                    % (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Property label not [a-z] 1st char Found: %s" % len(nri1_results))

    def test_superTypeInAttic(self):
        nri1 = ('''select ?term ?super where { 
       {
           ?term rdfs:subClassOf ?super.
       }
       UNION
       {
           ?term rdfs:subPropertyOf ?super.
       }
       ?super <http://schema.org/isPartOf> <http://attic.schema.org> .
       FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Super-term in attic errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' is sub-term of %s a term in attic" %
                         (row["term"], row["super"]))
        self.assertEqual(len(nri1_results), 0,
                         "Super-term in attic  Found: %s" % len(nri1_results))

    def test_referenceTermInAttic(self):
        nri1 = ('''select ?term ?rel ?ref where { 
       {
           ?term <http://schema.org/domainIncludes> ?ref.
           ?term ?rel ?ref.
       }
       UNION
       {
           ?term <http://schema.org/rangeIncludes> ?ref.
           ?term ?rel ?ref.
       }
       UNION
       {
           ?term <http://schema.org/inverseOf> ?ref.
           ?term ?rel ?ref.
       }
       UNION
       {
           ?term <http://schema.org/supercededBy> ?ref.
           ?term ?rel ?ref.
       }
       ?ref <http://schema.org/isPartOf> <http://attic.schema.org> .
       FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
    }
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Reference to attic term errors!!!\n")
            for row in nri1_results:
                log.info(
                    "Term '%s' makes a %s reference to %s a term in attic" %
                    (row["term"], row["rel"], row["ref"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Reference to attic term  Found: %s" % len(nri1_results))

    def test_termIn2PlusExtensions(self):
        nri1 = ('''select ?term (count(?part) as ?count) where { 
        ?term <http://schema.org/isPartOf> ?part.
    }
    GROUP BY ?term
    HAVING (count(?part) > 1)
    ORDER BY ?term
     ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Term in +1 extensions errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' isPartOf %s extensions" %
                         (row["term"], row["count"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Term in +1 extensions  Found: %s" % len(nri1_results))

    def test_termNothttps(self):
        nri1 = ('''select distinct ?term where {
      ?term ?p ?o.
      FILTER strstarts(str(?term),"https://schema.org")
    }
    ORDER BY ?term
     ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Term defined as https errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' is defined as https " % (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Term defined as https  Found: %s" % len(nri1_results))

    def test_targetNothttps(self):
        nri1 = ('''prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix schema: <http://schema.org/>
    select ?term ?target where {
  
      ?term schema:domainIncludes | 
            schema:rangeIncludes |
            rdfs:subClassOf |
            rdfs:subPropertyOf |
            schema:supercededBy |
            schema:inverseOf ?target.
      filter strstarts(str(?target),"https://schema.org")
    }
    ORDER BY ?term
     ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info("Target defined as https errors!!!\n")
            for row in nri1_results:
                log.info("Term '%s' references term %s  as https " %
                         (row["term"], row["target"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Term defined as https  Found: %s" % len(nri1_results))

    @unittest.expectedFailure
    def test_EnumerationWithoutEnums(self):
        nri1 = ('''select ?term where { 
        ?term rdfs:subClassOf/rdfs:subClassOf* <http://schema.org/Enumeration> .
        FILTER NOT EXISTS { ?enum a ?term. }
        FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .}
    } 
    ORDER BY ?term  ''')
        nri1_results = self.rdflib_data.query(nri1)
        if len(nri1_results):
            log.info(
                "Enumeration Type without Enumeration value(s) errors!!!\n")
            for row in nri1_results:
                log.info("Enumeration Type '%s' has no matching enum values" %
                         (row["term"]))
        self.assertEqual(
            len(nri1_results), 0,
            "Enumeration Type without Enumeration value(s)    Found: %s" %
            len(nri1_results))
Example #33
0
#! /usr/bin/env python

from rdflib import Graph, URIRef
from rdflib.plugins.stores import sparqlstore

endpoint = 'http://<IP>:7200/repositories/SciGraph'
store = sparqlstore.SPARQLStore()
store.open(endpoint)

graph_name_ref = URIRef(u'http://www.springernature.com/scigraph/graphs/articles.dds')
ng = Graph(store,identifier=graph_name_ref)
store.bind('sg', 'http://www.springernature.com/scigraph/ontologies/core/')


q = 'select ?s ?t  where { ?s a sg:Article . ?s sg:title ?t  } limit 10 '
print(q)

for s, o in ng.query(q):
    print 'article Id:' +s + '\t article Title:' +o
Example #34
0
from rdflib.namespace import RDF
from rdflib import Graph, URIRef, BNode
import sys
import re

g = Graph()
g.parse(sys.argv[1], format="turtle")

roots = set([s for s, p, o in g if len(list(g.subjects(None, s))) == 0])


def make_name(n):
    s = str(n)
    i = 0
    if '#' in s:
        i = s.rindex('#')
    if '/' in s:
        i = max(i, s.rindex('/'))
    if i + 1 < len(s) and s[i + 1].isdigit():
        return "Synset " + s[i + 1:]
    t = s[i + 1:]
    if t == "":
        return "_node"
    else:
        return s[i + 1:]


def write_obj(o):
    if isinstance(o, URIRef):
        return "<%s>" % str(o)
    elif isinstance(o, BNode):
Example #35
0
import bs4
import requests
import time
import os
import urllib.parse
import csv
import glob
from rdflib import URIRef, BNode, Literal, Graph
from rdflib.namespace import RDF, RDFS, FOAF, XSD
from rdflib import Namespace

files = glob.glob("data/kotobank_kani/*.json")

arr = []

all = Graph()

t = "https://nakamura196.github.io/hi_person/term/type/Kani.json"

subject = URIRef(t)

stmt = (subject, URIRef("http://www.w3.org/2000/01/rdf-schema#label"), Literal("官位"))

all.add(stmt)

stmt = (subject, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("http://www.w3.org/2000/01/rdf-schema#Class"))

all.add(stmt)

path = t.replace("https://nakamura196.github.io/hi_person", "../docs")
Example #36
0
def fix_reference(timestamp, accept, citing, cited, reference):
    rf, of, cp, cdh = create_resources()
    s = Storer(cp.graph_set(),
               context_map={context_path: context_file_path},
               dir_split=dir_split_number,
               n_file_item=items_per_file,
               default_dir=default_dir)

    r_text = unquote(reference)

    g_add_be = Graph(identifier=base_iri + "be/")
    g_remove_be = Graph(identifier=base_iri + "be/")
    g_add_br = Graph(identifier=base_iri + "br/")
    g_remove_br = Graph(identifier=base_iri + "br/")

    ref_res = rf.retrieve_reference(base_iri + citing, base_iri + cited)
    g_add_be.add((ref_res, GraphEntity.has_content, Literal(r_text)))
    ref_res_text = rf.retrieve_reference_text(ref_res)
    g_remove_be.add((ref_res, GraphEntity.has_content, ref_res_text))

    if accept == "false":
        citing_res = URIRef(base_iri + citing)
        cited_res = URIRef(base_iri + cited)
        cur_time = datetime.fromtimestamp(
            int(timestamp)).strftime('%Y-%m-%dT%H:%M:%S')
        mod_date = str(rf.retrieve_modification_date(ref_res))

        if cur_time == mod_date:  # It didn't exist before
            cur_dir_path, cur_file_path = s.dir_and_file_paths(
                g_remove_br, base_dir, base_iri)
            cur_g = s.load(cur_file_path)
            for s, p, o in cur_g.triples((cited_res, None, None)):
                if p != RDF.type or o != GraphEntity.expression:
                    g_remove_br.add(s, p, o)

        else:  # It exists already
            new_cited = URIRef(
                str(cp.graph_set().add_br(cp.name, doi_curator,
                                          bcite_base_iri)))
            gen_prov_and_store_data(cp, rf, timestamp)
            g_remove_br.add((citing_res, GraphEntity.cites, cited_res))
            g_remove_be.add((ref_res, GraphEntity.references, cited_res))

            g_add_br.add((citing_res, GraphEntity.cites, new_cited))
            g_add_be.add((ref_res, GraphEntity.references, new_cited))

    s.update(g_add_be, g_remove_be, base_dir, base_iri, context_path,
             temp_dir_for_rdf_loading)
    s.update(g_add_br, g_remove_br, base_dir, base_iri, context_path,
             temp_dir_for_rdf_loading)
    s.update_all([g_add_br, g_add_be], [g_remove_br, g_remove_be],
                 triplestore_url, base_dir)

    return timestamp, accept, citing, cited, quote(ref_res_text)
Example #37
0
class GraphEntity(AbstractEntity):
    BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/")
    C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/")
    CO: ClassVar[Namespace] = Namespace("http://purl.org/co/")
    CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/")
    DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/")
    DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/")
    DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/")
    DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/")
    FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/")
    FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/")
    FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#")
    LITERAL: ClassVar[Namespace] = Namespace(
        "http://www.essepuntato.it/2010/06/literalreification/")
    OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#")
    OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/")
    PRISM: ClassVar[Namespace] = Namespace(
        "http://prismstandard.org/namespaces/basic/2.0/")
    PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/")

    iri_has_subtitle: ClassVar[URIRef] = FABIO.hasSubtitle
    iri_has_publication_date: ClassVar[URIRef] = PRISM.publicationDate
    iri_bibliographic_reference: ClassVar[URIRef] = BIRO.BibliographicReference
    iri_references: ClassVar[URIRef] = BIRO.references
    iri_denotes: ClassVar[URIRef] = C4O.denotes
    iri_has_content: ClassVar[URIRef] = C4O.hasContent
    iri_intextref_pointer: ClassVar[URIRef] = C4O.InTextReferencePointer
    iri_is_context_of: ClassVar[URIRef] = C4O.isContextOf
    iri_singleloc_pointer_list: ClassVar[
        URIRef] = C4O.SingleLocationPointerList
    iri_has_element: ClassVar[URIRef] = CO.element
    iri_citation: ClassVar[URIRef] = CITO.Citation
    iri_cites: ClassVar[URIRef] = CITO.cites
    iri_citation_characterisation: ClassVar[
        URIRef] = CITO.hasCitationCharacterisation
    iri_has_citing_entity: ClassVar[URIRef] = CITO.hasCitingEntity
    iri_has_cited_entity: ClassVar[URIRef] = CITO.hasCitedEntity
    iri_oci: ClassVar[URIRef] = DATACITE.oci
    iri_doi: ClassVar[URIRef] = DATACITE.doi
    iri_pmid: ClassVar[URIRef] = DATACITE.pmid
    iri_pmcid: ClassVar[URIRef] = DATACITE.pmcid
    iri_orcid: ClassVar[URIRef] = DATACITE.orcid
    iri_xpath: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"]
    iri_intrepid: ClassVar[URIRef] = DATACITE["intrepid"]
    iri_xmlid: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"]
    iri_has_identifier: ClassVar[URIRef] = DATACITE.hasIdentifier
    iri_identifier: ClassVar[URIRef] = DATACITE.Identifier
    iri_isbn: ClassVar[URIRef] = DATACITE.isbn
    iri_issn: ClassVar[URIRef] = DATACITE.issn
    iri_url: ClassVar[URIRef] = DATACITE.url
    iri_uses_identifier_scheme: ClassVar[
        URIRef] = DATACITE.usesIdentifierScheme
    iri_title: ClassVar[URIRef] = DCTERMS["title"]
    iri_caption: ClassVar[URIRef] = DEO.Caption
    iri_discourse_element: ClassVar[URIRef] = DEO.DiscourseElement
    iri_footnote: ClassVar[URIRef] = DOCO.Footnote
    iri_paragraph: ClassVar[URIRef] = DOCO.Paragraph
    iri_part: ClassVar[URIRef] = DOCO.Part
    iri_section: ClassVar[URIRef] = DOCO.Section
    iri_section_title: ClassVar[URIRef] = DOCO.SectionTitle
    iri_sentence: ClassVar[URIRef] = DOCO.Sentence
    iri_table: ClassVar[URIRef] = DOCO.Table
    iri_text_chunk: ClassVar[URIRef] = DOCO.TextChunk
    iri_academic_proceedings: ClassVar[URIRef] = FABIO.AcademicProceedings
    iri_book: ClassVar[URIRef] = FABIO.Book
    iri_book_chapter: ClassVar[URIRef] = FABIO.BookChapter
    iri_book_series: ClassVar[URIRef] = FABIO.BookSeries
    iri_book_set: ClassVar[URIRef] = FABIO.BookSet
    iri_data_file: ClassVar[URIRef] = FABIO.DataFile
    iri_expression: ClassVar[URIRef] = FABIO.Expression
    iri_expression_collection: ClassVar[URIRef] = FABIO.ExpressionCollection
    iri_has_sequence_identifier: ClassVar[URIRef] = FABIO.hasSequenceIdentifier
    iri_journal: ClassVar[URIRef] = FABIO.Journal
    iri_journal_article: ClassVar[URIRef] = FABIO.JournalArticle
    iri_journal_issue: ClassVar[URIRef] = FABIO.JournalIssue
    iri_journal_volume: ClassVar[URIRef] = FABIO.JournalVolume
    iri_manifestation: ClassVar[URIRef] = FABIO.Manifestation
    iri_proceedings_paper: ClassVar[URIRef] = FABIO.ProceedingsPaper
    iri_reference_book: ClassVar[URIRef] = FABIO.ReferenceBook
    iri_reference_entry: ClassVar[URIRef] = FABIO.ReferenceEntry
    iri_report_document: ClassVar[URIRef] = FABIO.ReportDocument
    iri_series: ClassVar[URIRef] = FABIO.Series
    iri_specification_document: ClassVar[URIRef] = FABIO.SpecificationDocument
    iri_thesis: ClassVar[URIRef] = FABIO.Thesis
    iri_agent: ClassVar[URIRef] = FOAF.Agent
    iri_family_name: ClassVar[URIRef] = FOAF.familyName
    iri_given_name: ClassVar[URIRef] = FOAF.givenName
    iri_name: ClassVar[URIRef] = FOAF.name
    iri_embodiment: ClassVar[URIRef] = FRBR.embodiment
    iri_part_of: ClassVar[URIRef] = FRBR.partOf
    iri_contains_reference: ClassVar[URIRef] = FRBR.part
    iri_contains_de: ClassVar[URIRef] = FRBR.part
    iri_has_literal_value: ClassVar[URIRef] = LITERAL.hasLiteralValue
    iri_ending_page: ClassVar[URIRef] = PRISM.endingPage
    iri_starting_page: ClassVar[URIRef] = PRISM.startingPage
    iri_author: ClassVar[URIRef] = PRO.author
    iri_editor: ClassVar[URIRef] = PRO.editor
    iri_is_held_by: ClassVar[URIRef] = PRO.isHeldBy
    iri_publisher: ClassVar[URIRef] = PRO.publisher
    iri_is_document_context_for: ClassVar[URIRef] = PRO.isDocumentContextFor
    iri_role_in_time: ClassVar[URIRef] = PRO.RoleInTime
    iri_with_role: ClassVar[URIRef] = PRO.withRole
    iri_note: ClassVar[URIRef] = OA.Annotation
    iri_has_body: ClassVar[URIRef] = OA.hasBody
    iri_has_annotation: ClassVar[
        URIRef] = OCO.hasAnnotation  # inverse of OA.hasTarget
    iri_has_next: ClassVar[URIRef] = OCO.hasNext
    iri_archival_document: ClassVar[URIRef] = FABIO.ArchivalDocument
    iri_viaf: ClassVar[URIRef] = DATACITE.viaf
    iri_crossref: ClassVar[
        URIRef] = DATACITE.crossref  # TODO: add to datacite!
    iri_wikidata: ClassVar[
        URIRef] = DATACITE.wikidata  # TODO: add to datacite!
    iri_wikipedia: ClassVar[
        URIRef] = DATACITE.wikipedia  # TODO: add to datacite!
    iri_has_edition: ClassVar[URIRef] = PRISM.edition
    iri_relation: ClassVar[URIRef] = DCTERMS.relation
    iri_has_citation_creation_date: ClassVar[
        URIRef] = CITO.hasCitationCreationDate
    iri_has_citation_time_span: ClassVar[URIRef] = CITO.hasCitationTimeSpan
    iri_digital_manifestation: ClassVar[URIRef] = FABIO.DigitalManifestation
    iri_print_object: ClassVar[URIRef] = FABIO.PrintObject
    iri_has_url: ClassVar[URIRef] = FRBR.exemplar
    iri_self_citation: ClassVar[URIRef] = CITO.SelfCitation
    iri_affiliation_self_citation: ClassVar[
        URIRef] = CITO.AffiliationSelfCitation
    iri_author_network_self_citation: ClassVar[
        URIRef] = CITO.AuthorNetworkSelfCitation
    iri_author_self_citation: ClassVar[URIRef] = CITO.AuthorSelfCitation
    iri_funder_self_citation: ClassVar[URIRef] = CITO.FunderSelfCitation
    iri_journal_self_citation: ClassVar[URIRef] = CITO.JournalSelfCitation
    iri_journal_cartel_citation: ClassVar[URIRef] = CITO.JournalCartelCitation
    iri_distant_citation: ClassVar[URIRef] = CITO.DistantCitation
    iri_has_format: ClassVar[URIRef] = DCTERMS["format"]

    short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = {
        'an': iri_note,
        'ar': iri_role_in_time,
        'be': iri_bibliographic_reference,
        'br': iri_expression,
        'ci': iri_citation,
        'de': iri_discourse_element,
        'id': iri_identifier,
        'pl': iri_singleloc_pointer_list,
        'ra': iri_agent,
        're': iri_manifestation,
        'rp': iri_intextref_pointer
    }

    def __init__(self,
                 g: Graph,
                 g_set: GraphSet,
                 res: URIRef = None,
                 res_type: URIRef = None,
                 resp_agent: str = None,
                 source: str = None,
                 count: str = None,
                 label: str = None,
                 short_name: str = "",
                 preexisting_graph: Graph = None) -> None:
        super(GraphEntity, self).__init__()
        self.g: Graph = g
        self.resp_agent: str = resp_agent
        self.source: str = source
        self.short_name: str = short_name
        self.g_set: GraphSet = g_set
        self.preexisting_graph: Graph = Graph(identifier=g.identifier)
        self._merge_list: Tuple[GraphEntity] = tuple()
        # FLAGS
        self._to_be_deleted: bool = False
        self._was_merged: bool = False

        # If res was not specified, create from scratch the URI reference for this entity,
        # otherwise use the provided one
        if res is None:
            self.res = self._generate_new_res(g, count)
        else:
            self.res = res

        if g_set is not None:
            # If not already done, register this GraphEntity instance inside the GraphSet
            if self.res not in g_set.res_to_entity:
                g_set.res_to_entity[self.res] = self

        if preexisting_graph is not None:
            # Triples inside self.g are entirely replaced by triples from preexisting_graph.
            # This has maximum priority with respect to every other self.g initializations.
            # It's fundamental that the preexisting graph gets passed as an argument of the constructor:
            # allowing the user to set this value later through a method would mean that the user could
            # set the preexisting graph AFTER having modified self.g (which would not make sense).
            self.remove_every_triple()
            for p, o in preexisting_graph.predicate_objects(self.res):
                self.g.add((self.res, p, o))
                self.preexisting_graph.add((self.res, p, o))
        else:
            # Add mandatory information to the entity graph
            self._create_type(res_type)
            if label is not None:
                self.create_label(label)

    @staticmethod
    def _generate_new_res(g: Graph, count: str) -> URIRef:
        return URIRef(str(g.identifier) + count)

    @property
    def to_be_deleted(self) -> bool:
        return self._to_be_deleted

    @property
    def was_merged(self) -> bool:
        return self._was_merged

    @property
    def merge_list(self) -> Tuple[GraphEntity]:
        return self._merge_list

    def mark_as_to_be_deleted(self) -> None:
        # Here we must REMOVE triples pointing
        # to 'self' [THIS CANNOT BE UNDONE]:
        for res, entity in self.g_set.res_to_entity.items():
            triples_list: List[Tuple] = list(
                entity.g.triples((res, None, self.res)))
            for triple in triples_list:
                entity.g.remove(triple)

        self._to_be_deleted = True

    def merge(self, other: GraphEntity) -> None:
        # Here we must REDIRECT triples pointing
        # to 'other' to make them point to 'self':
        for res, entity in self.g_set.res_to_entity.items():
            triples_list: List[Tuple] = list(
                entity.g.triples((res, None, other.res)))
            for triple in triples_list:
                entity.g.remove(triple)
                new_triple = (triple[0], triple[1], self.res)
                entity.g.add(new_triple)

        types: List[URIRef] = other.get_types()
        for cur_type in types:
            self._create_type(cur_type)

        label: Optional[str] = other.get_label()
        if label is not None:
            self.create_label(label)

        self._was_merged = True
        self._merge_list = (*self._merge_list, other)

        # 'other' must be deleted AFTER the redirection of
        # triples pointing to it, since mark_as_to_be_deleted
        # also removes every triple pointing to 'other'
        other.mark_as_to_be_deleted()

    def commit_changes(self):
        self.preexisting_graph = Graph(identifier=self.g.identifier)
        if self._to_be_deleted:
            self.remove_every_triple()
        else:
            for triple in self.g.triples((self.res, None, None)):
                self.preexisting_graph.add(triple)
        self._to_be_deleted = False
        self._was_merged = False
        self._merge_list = tuple()
Example #38
0
    def wrapper(self, doc):
        # call the actual function that creates the doc data
        oldbasefile = doc.basefile
        ret = f(self, doc)
        if doc.basefile != oldbasefile:
            # means that basefile was adjusted.  Touch the old parsed
            # path first so we don't regenerate.
            with self.store.open_parsed(oldbasefile, "w"):
                pass
            # move any intermediate files (in particular extracted
            # image backgrounds from PDF files) that might be
            # needed later.
            old_intermediate = self.store.intermediate_path(oldbasefile)
            new_intermediate = self.store.intermediate_path(doc.basefile)
            if self.store.storage_policy == "dir":
                old_intermediate = os.path.dirname(old_intermediate)
                new_intermediate = os.path.dirname(new_intermediate)
            if os.path.exists(
                    old_intermediate) and not os.path.exists(new_intermediate):
                util.ensure_dir(new_intermediate)
                os.rename(old_intermediate, new_intermediate)
        # now render thath doc data as files (JSON, XHTML, RDF/XML)
        if self.config.serializejson == True:
            with self.store.open_serialized(doc.basefile, "wb") as fp:
                r = serialize(doc, format="json")  # should be a (unicode) str
                fp.write(r.encode('utf-8'))
            self.log.debug(
                "%s: Created %s" %
                (doc.basefile, self.store.serialized_path(doc.basefile)))
        # css file + background images + png renderings of text
        resources = self.create_external_resources(doc)
        if resources:
            cssuris = [
                cssuri(doc.uri, x) for x in resources if x.endswith(".css")
            ]
        else:
            cssuris = []
        if cssuris:
            doc.cssuris = cssuris
        updated = self.render_xhtml(doc, self.store.parsed_path(doc.basefile))
        if updated:
            self.log.debug(
                "%s: Created %s" %
                (doc.basefile, self.store.parsed_path(doc.basefile)))

        # Extract all triples on the XHTML/RDFa data to a separate
        # RDF/XML file
        distilled_graph = Graph()
        with codecs.open(self.store.parsed_path(doc.basefile),
                         encoding="utf-8") as fp:  # unicode
            distilled_graph.parse(data=fp.read(),
                                  format="rdfa",
                                  publicID=doc.uri)

        # The act of parsing from RDFa binds a lot of namespaces
        # in the graph in an unneccesary manner. Particularly it
        # binds both 'dc' and 'dcterms' to
        # 'http://purl.org/dc/terms/', which makes serialization
        # less than predictable. Blow these prefixes away.
        distilled_graph.bind("dc", URIRef("http://purl.org/dc/elements/1.1/"))
        distilled_graph.bind(
            "dcterms",
            URIRef("http://example.org/this-prefix-should-not-be-used"))

        util.ensure_dir(self.store.distilled_path(doc.basefile))
        with open(self.store.distilled_path(doc.basefile),
                  "wb") as distilled_file:
            # print("============distilled===============")
            # print(distilled_graph.serialize(format="turtle").decode('utf-8'))
            distilled_graph.serialize(distilled_file, format="pretty-xml")
        self.log.debug('%s: %s triples extracted to %s', doc.basefile,
                       len(distilled_graph),
                       self.store.distilled_path(doc.basefile))

        # Validate that all required triples are present (we check
        # distilled_graph, but we could just as well check doc.meta)
        required = sorted(set(self.get_required_predicates(doc)))
        for p in required:
            x = distilled_graph.value(URIRef(doc.uri), p)
            if not x:
                self.log.warning("%s: Metadata is missing a %s triple" %
                                 (doc.basefile, distilled_graph.qname(p)))
        if 'validaterdfa' in self.config and self.config.validaterdfa:
            # Validate that all triples specified in doc.meta and any
            # .meta property on any body object is present in the
            # XHTML+RDFa file.  NOTE: graph_diff has suddenly become
            # glacial on medium-large graphs (> ~1000 triples). Maybe we
            # don't have to validate them?
            huge_graph = False
            for g in iterate_graphs(doc.body):
                doc.meta += g
                if len(doc.meta) > 1000:
                    huge_graph = True
                    break
            if huge_graph:
                self.log.warning("%s: Graph seems huge, skipping validation" %
                                 doc.basefile)
            else:
                # self.log.debug("%s: diffing graphs" % doc.basefile)
                (in_both, in_first,
                 in_second) = graph_diff(doc.meta, distilled_graph)
                self.log.debug("%s: graphs diffed (-%s, +%s)" %
                               (doc.basefile, len(in_first), len(in_second)))

                if in_first:  # original metadata not present in the XHTML filee
                    self.log.warning(
                        "%s: %d triple(s) from the original metadata was "
                        "not found in the serialized XHTML file:\n%s",
                        doc.basefile, len(in_first),
                        in_first.serialize(format="n3").decode("utf-8"))

        # Validate that entry.title and entry.id has been filled
        # (might be from doc.meta and doc.uri, might be other things
        entry = DocumentEntry(self.store.documententry_path(doc.basefile))
        if not entry.id:
            self.log.warning("%s: entry.id missing" % doc.basefile)
        if not entry.title:
            self.log.warning("%s: entry.title missing" % doc.basefile)
        return ret
Example #39
0
    def export_html(self, model_view='pdm'):
        """
        Exports this instance in HTML, according to a given model from the list of supported models.

        :param model_view: string of one of the model view names available for Sample objects ['igsn', 'dc', '',
            'default']
        :return: HTML string
        """
        if model_view == 'pdm':
            view_title = 'PDM Ontology view'
            sample_table_html = render_template(
                'class_site_pdm.html',
                site_no=self.site_no,
                description=self.description,
                wkt=self._generate_wkt(),
                state=None,  # TODO: calculate
                site_type_alink=self._make_vocab_alink(self.site_type),
                entry_date=self.entry_date
            )
        elif model_view == 'prov':
            view_title = 'PROV Ontology view'
            prov_turtle = self.export_rdf('prov', 'text/turtle')
            g = Graph().parse(data=prov_turtle, format='turtle')

            sample_table_html = render_template(
                'class_site_prov.html',
                visjs=self._make_vsjs(g),
                prov_turtle=prov_turtle,
            )
        else:  # elif model_view == 'dc':
            view_title = 'Dublin Core view'

            sample_table_html = render_template(
                'class_site_dc.html',
                identifier=self.site_no,
                description=self.description,
                date=self.entry_date,
                type=self.site_type,
                wkt=self._generate_wkt(),
                creator='<a href="{}">Geoscience Australia</a>'.format(Site.URI_GA),
                publisher='<a href="{}">Geoscience Australia</a>'.format(Site.URI_GA),
            )

        # add in the Pingback header links as they are valid for all HTML views
        pingback_uri = conf.URI_SITE_INSTANCE_BASE + self.site_no + "/pingback"
        headers = {
            'Link': '<{}>;rel = "http://www.w3.org/ns/prov#pingback"'.format(pingback_uri)
        }

        return Response(
            render_template(
                'page_site.html',
                view=model_view,
                site_no=self.site_no,
                entry_date=self.entry_date,
                view_title=view_title,
                sample_table_html=sample_table_html,
                date_now=datetime.now().strftime('%d %B %Y'),
                gm_key=conf.GOOGLE_MAPS_API_KEY,
                google_maps_js=self._generate_google_map_js(),
                lat=self.centroid_y,
                lon=self.centroid_x,
                geometry_type=self.geometry_type,
                coords=self.coords
            ),
            headers=headers
        )
Example #40
0
                results.add((test, RESULT["test"], uri))
                results.add((test, RESULT["system"], system))
                if not result:
                    results.add((test, RDF.type, RESULT["PassingRun"]))
                else:
                    results.add((test, RDF.type, RESULT["FailingRun"]))
                total += 1
                num_failed += result
        self.assertEqual(num_failed, 0,
                         "Failed: %s of %s." % (num_failed, total))


RESULT = Namespace("http://www.w3.org/2002/03owlt/resultsOntology#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

results = Graph()

system = BNode("system")
results.add((system, FOAF["homepage"], URIRef("http://rdflib.net/")))
results.add((system, RDFS.label, Literal("RDFLib")))
results.add((system, RDFS.comment, Literal("")))

if __name__ == "__main__":
    manifest = Graph()
    manifest.load(
        cached_file(
            "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf"))
    import sys
    import getopt
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'h:', ["help"])
Example #41
0
    EnumeratedClass,
    OWL_NS,
    Property,
    Restriction,
)
from FuXi.Syntax.InfixOWL import some
from FuXi.Syntax.InfixOWL import max

from rdflib.namespace import (
    Namespace,
    NamespaceManager,
)
from pprint import pformat

exNs = Namespace('http://example.com/')
namespace_manager = NamespaceManager(Graph())
namespace_manager.bind('ex', exNs, override=False)
namespace_manager.bind('owl', OWL_NS, override=False)
g = Graph()
g.namespace_manager = namespace_manager

# Now we have an empty Graph, we can construct OWL classes in it using the
# Python classes defined in this module

a = Class(exNs.Opera, graph=g)

# Now we can assert rdfs:subClassOf and owl:equivalentClass relationships
# (in the underlying graph) with other classes using the subClassOf and
# equivalentClass descriptors which can be set to a list of objects for
# the corresponding predicates.
Example #42
0
def forwards_func(apps, schema_editor):
    # We get the model from the versioned app registry;
    # if we directly import it, it'll be the wrong version
    extensions = [
        os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT
    ]
    management.call_command('load_ontology',
                            source=os.path.join(settings.ONTOLOGY_PATH,
                                                settings.ONTOLOGY_BASE),
                            version=settings.ONTOLOGY_BASE_VERSION,
                            ontology_name=settings.ONTOLOGY_BASE_NAME,
                            id=settings.ONTOLOGY_BASE_ID,
                            extensions=','.join(extensions),
                            verbosity=0)

    Ontology = apps.get_model("models", "Ontology")
    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for ontology in Ontology.objects.filter(parentontology=None):
        g = Graph()
        g.parse(ontology.path.path)
        for extension in Ontology.objects.filter(parentontology=ontology):
            g.parse(extension.path.path)

        ontology_classes = set()
        ontology_properties = set()
        for ontology_property, p, o in g.triples((None, None, RDF.Property)):
            ontology_properties.add(ontology_property)
            for s, p, domain_class in g.triples(
                (ontology_property, RDFS.domain, None)):
                ontology_classes.add(domain_class)
            for s, p, range_class in g.triples(
                (ontology_property, RDFS.range, None)):
                ontology_classes.add(range_class)

        for ontology_class, p, o in g.triples((None, None, RDFS.Class)):
            ontology_classes.add(ontology_class)

        for ontology_class in ontology_classes:
            for node in Node.objects.filter(
                    ontologyclass=str(ontology_class).split('/')[-1],
                    graph__in=ontology.graphs.all()):
                node.ontologyclass = ontology_class
                node.save()

        for ontology_property in ontology_properties:
            for edge in Edge.objects.filter(
                    ontologyproperty=str(ontology_property).split('/')[-1],
                    graph__in=ontology.graphs.all()):
                edge.ontologyproperty = ontology_property
                edge.save()

    # index base Arches concept
    arches_concept = Concept().get(id='00000000-0000-0000-0000-000000000001',
                                   include=['label'])
    arches_concept.index()

    DValueType = apps.get_model("models", "DValueType")
    DValueType.objects.create(valuetype='identifier',
                              category='identifiers',
                              namespace='dcterms',
                              datatype='text')
from rdflib import Graph, Literal, RDF, URIRef
import requests

# create a Graph
g = Graph()
g.parse("GuitarShop.owl", format='turtle')

# loop through each triple in the graph (subj, pred, obj)
for subj, pred, obj in g:
    # check if there is at least one triple in the Graph
    if (subj, pred, obj) not in g:
        raise Exception("It better be!")
    if g.label(subj):
        print(g.label(subj))

# print the number of "triples" in the Graph
print("graph has {} statements.".format(len(g)))
# prints graph has 86 statements.

# print out the entire Graph in the RDF Turtle format
# print(g.serialize(format="turtle").decode("utf-8"))

res = g.query(
    """PREFIX : <http://webprotege.stanford.edu/GuitarShop#>
    SELECT ?guitar
    WHERE {
        ?guitar :hasManufacturer :Fender; 
                :hasPrice ?price .
        FILTER(?price > 500)
    }""")
Example #44
0
    def export_rdf(self, model_view='pdm', rdf_mime='text/turtle'):
        """
        Exports this instance in RDF, according to a given model from the list of supported models,
        in a given rdflib RDF format

        :param model_view: string of one of the model view names available for Sample objects ['igsn', 'dc', '',
            'default']
        :param rdf_mime: string of one of the rdflib serlialization format ['n3', 'nquads', 'nt', 'pretty-xml', 'trig',
            'trix', 'turtle', 'xml'], from http://rdflib3.readthedocs.io/en/latest/plugin_serializers.html
        :return: RDF string
        """

        '''
        <http://pid.geoscience.gov.au/site/9810> a <http://vocabulary.odm2.org/samplingfeaturetype/borehole>, <http://www.w3.org/2002/07/owl#NamedIndividual> ;
            samfl:samplingElevation [ a samfl:Elevation ;
            samfl:elevation "231.69716"^^xsd:float ;
            samfl:verticalDatum "http://spatialreference.org/ref/epsg/4283/"^^xsd:anyUri ] ;
            geosp:hasGeometry [ 
                a geosp:Geometry ;
                geosp:asWKT "SRID=GDA94;POINT(143.36786389 -25.94903611)"^^geosp:wktLiteral 
            ] .
        
        <http://registry.it.csiro.au/sandbox/csiro/oznome/feature/earth-realm/lithosphere> a sosa:FeatureOfInterest ;
            skos:exactMatch <http://sweetontology.net/realmGeol/Lithosphere> .
        
        <http://vocabulary.odm2.org/samplingfeaturetype/borehole> rdfs:subClassOf sosa:Sample .
        '''
        # things that are applicable to all model views; the graph and some namespaces
        g = Graph()
        GEO = Namespace('http://www.opengis.net/ont/geosparql#')
        g.bind('geo', GEO)

        # URI for this site
        this_site = URIRef(conf.URI_SITE_INSTANCE_BASE + self.site_no)
        g.add((this_site, RDF.type, URIRef(self.site_type)))
        g.add((this_site, RDF.type, URIRef('http://www.w3.org/2002/07/owl#NamedIndividual')))
        g.add((this_site, RDFS.label, Literal('Site ' + self.site_no, datatype=XSD.string)))
        g.add((this_site, RDFS.comment, Literal(self.description, datatype=XSD.string)))
        site_geometry = BNode()
        g.add((this_site, GEO.hasGeometry, site_geometry))
        g.add((site_geometry, RDF.type, GEO.Geometry))
        g.add((site_geometry, GEO.asWKT, Literal(self._generate_wkt(), datatype=GEO.wktLiteral)))

        return g.serialize(format=LDAPI.get_rdf_parser_for_mimetype(rdf_mime))
 def _test_load_graph_size(self, file: str, graph_serialisation: str, queries: list):
     g = Graph()
     g.parse(file, format=graph_serialisation)
     self._test_graph_size(g, queries, file)
Example #46
0
File: rdf.py Project: vanceinc/Pynt
 def openfile(self, append=False):
     """Make sure self.outfile is a valid and open FileType"""
     self.graph = Graph()
     self.outfile = self.graph  # we use the graph as the output object.
    def test_meta_output(self):
        """ Generate a context AND a jsonld for the metamodel and make sure it parses as RDF """
        cwd = os.path.abspath(os.path.join(os.path.dirname(__file__)))
        jsonld_path = os.path.join(self.testdir_path, 'metajson.jsonld')
        rdf_path = os.path.join(self.testdir_path, 'metardf.ttl')
        yaml_path = os.path.abspath(os.path.join(cwd, '..', '..', 'meta.yaml'))
        meta_context_path = os.path.join(self.testdir_path,
                                         'metacontext.jsonld')

        # Generate an image of the metamodel
        with open(meta_context_path, 'w') as tfile:
            tfile.write(ContextGenerator(yaml_path).serialize())
        with open(jsonld_path, 'w') as tfile:
            tfile.write(
                JSONLDGenerator(yaml_path).serialize(
                    context=meta_context_path))
        g = Graph()
        g.load(jsonld_path, format="json-ld")
        g.serialize(rdf_path, format="ttl")
        g.bind('bioentity', BIOENTITY)
        new_ttl = g.serialize(format="turtle").decode()
        new_g = Graph()
        new_g.parse(data=new_ttl, format="turtle")
        self.check_size(
            g, new_g,
            URIRef("https://biolink.github.io/metamodel/ontology/meta.ttl"), 8,
            71, 0, "meta")
Example #48
0
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
from rdflib import Graph
g = Graph()
#g.parse("http://bigasterisk.com/foaf.rdf")

#g.parse("demo.nt", format="nt")

#g.parse("http://bigasterisk.com/foaf.rdf")
my_data = '''
<rdf:RDF
   xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
   xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#'>
   <rdf:Description>
     <rdfs:label>Example</rdfs:label>
     <rdfs:comment>This is really just an example.</rdfs:comment>
   </rdf:Description>
 </rdf:RDF>
 '''

import tempfile
file_name = tempfile.mktemp()
f = file(file_name, "w")
f.write(my_data)
f.close()
g = Graph()
result = g.parse(data=my_data, format="application/rdf+xml")
print len(g)

g1 = Graph()
result = g1.parse(location='demo.xml', format="application/rdf+xml")
def main():
    parser = ArgumentParser()
    parser.add_argument('graph_path', help='path to the graph json file')
    parser.add_argument('hypotheses_path',
                        help='path to the hypotheses json directory')
    parser.add_argument('kb_path', help='path to the TA2 KB file (in AIF)')
    parser.add_argument('output_dir', help='path to output directory')
    parser.add_argument('run_id', help='TA3 run ID')
    parser.add_argument('sin_id_prefix',
                        help='prefix of SIN IDs to name the final hypotheses')
    parser.add_argument('--top',
                        default=50,
                        type=int,
                        help='number of top hypothesis to output')
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        default=False,
        help='If specified, overwrite existing output files without warning')

    args = parser.parse_args()

    json_graph = JsonGraph.from_dict(
        util.read_json_file(args.graph_path, 'JSON graph'))

    graph_mappings = json_graph.build_cluster_member_mappings()

    hypotheses_file_paths = util.get_file_list(args.hypotheses_path,
                                               suffix='.json',
                                               sort=True)

    # TODO: there is a known bug in rdflib that
    #  rdflib.Literal("2008", datatype=rdflib.XSD.gYear) would be parsed into
    #  rdflib.term.Literal(u'2008-01-01', datatype=rdflib.XSD.gYear) automatically,
    #  because a `parse_date` function is invoked for all rdflib.XSD.gYear literals.
    #  This is a temporary workaround to patch the _toPythonMapping locally.
    #  c.f.: https://github.com/RDFLib/rdflib/issues/806
    # noinspection PyProtectedMember
    rdflib.term._toPythonMapping.pop(rdflib.XSD['gYear'])

    print('Reading kb from {}'.format(args.kb_path))
    kb_graph = Graph()
    kb_graph.parse(args.kb_path, format='ttl')

    kb_nodes_by_category = catalogue_kb_nodes(kb_graph)

    kb_stmt_key_mapping = index_statement_nodes(
        kb_graph, kb_nodes_by_category['Statement'])
    kb_cm_key_mapping = index_cluster_membership_nodes(
        kb_graph, kb_nodes_by_category['ClusterMembership'])
    kb_type_stmt_key_mapping = index_type_statement_nodes(
        kb_graph, kb_nodes_by_category['TypeStatement'])

    output_dir = util.get_output_dir(args.output_dir,
                                     overwrite_warning=not args.force)

    run_id = args.run_id
    sin_id_prefix = args.sin_id_prefix

    for hypotheses_file_path in hypotheses_file_paths:
        hypotheses_json = util.read_json_file(hypotheses_file_path,
                                              'hypotheses')

        print('Found {} hypotheses with probability {}'.format(
            len(hypotheses_json['probs']), hypotheses_json['probs']))

        soin_id = sin_id_prefix + '_' + hypotheses_file_path.stem.split('_')[0]
        frame_id = soin_id + '_F1'

        top_count = 0
        for hypothesis_idx, prob in sorted(enumerate(hypotheses_json['probs']),
                                           key=itemgetter(1),
                                           reverse=True):
            if prob <= 0.0:
                hypothesis_weight = math.exp(prob / 2.0)
            else:
                hypothesis_weight = 0.0001

            hypothesis = hypotheses_json['support'][hypothesis_idx]

            top_count += 1
            hypothesis_id = '{}_hypothesis_{:0>3d}'.format(frame_id, top_count)

            subgraph = build_subgraph_for_hypothesis(
                kb_graph=kb_graph,
                kb_nodes_by_category=kb_nodes_by_category,
                kb_stmt_key_mapping=kb_stmt_key_mapping,
                kb_cm_key_mapping=kb_cm_key_mapping,
                kb_type_stmt_key_mapping=kb_type_stmt_key_mapping,
                json_graph=json_graph,
                graph_mappings=graph_mappings,
                hypothesis=hypothesis,
                hypothesis_id=hypothesis_id,
                hypothesis_weight=hypothesis_weight)

            output_path = output_dir / '{}.{}.{}.H{:0>3d}.ttl'.format(
                run_id, soin_id, frame_id, top_count)
            print('Writing hypothesis #{:>2d} with prob {:>6.2f} to {}'.format(
                top_count, prob, output_path))
            with open(output_path, 'w') as fout:
                fout.write(print_graph(subgraph))

            if top_count >= args.top:
                break
Example #50
0
def main():
    '''Converts Organisation XMLs to Turtle files and stores these to local folder.'''

    # Settings
    xml_folder = "/home/iati/xml/organisations/"
    turtle_folder = "/home/iati/organisation/"
    Iati = Namespace("http://purl.org/collections/iati/")

    if not os.path.isdir(turtle_folder):
        os.makedirs(turtle_folder)

    document_count = 1
    organisation_count = 1

    # Retrieve XML files from the XML folder
    for document in glob.glob(xml_folder + '*.xml'):

        organisation_ids = []

        doc_fail = False

        doc_id = str(document.rsplit('/', 1)[1])[:-4]
        doc_folder = turtle_folder + doc_id + '/'

        if not os.path.isdir(doc_folder):
            os.makedirs(doc_folder)

        provenance = Graph()
        provenance.bind('iati', Iati)

        # Parse the XML file
        try:
            xml = ET.parse(document)
        except ET.ParseError:
            print "Could not parse file " + document
            doc_fail = True

        if not doc_fail == True:
            root = xml.getroot()
            version = AttributeHelper.attribute_key(root, 'version')

            if (root.tag == 'iati-organisations') or (root.tag
                                                      == 'organisations'):

                # Convert each organisation in XML file to RDFLib Graph
                for organisation in xml.findall('iati-organisation'):

                    try:
                        converter = IatiConverter.ConvertOrganisation(
                            organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)

                    print "Progress: Organisation #" + str(
                        organisation_count) + " in document #" + str(
                            document_count)

                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')

                        with open(
                                doc_folder + str(id.replace('/', '%2F')) +
                                '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)

                    organisation_count += 1
                    organisation_ids.append(id)

                for organisation in xml.findall('organisation'):

                    try:
                        converter = IatiConverter.ConvertOrganisation(
                            organisation)
                        graph, id, last_updated = converter.convert(Iati)
                    except TypeError as e:
                        print "Error in " + document + ":" + str(e)

                    print "Progress: Organisation #" + str(
                        organisation_count) + " in document #" + str(
                            document_count)

                    if (not graph == None) and (not id == None):
                        # Write organisation to Turtle and store in local folder
                        graph_turtle = graph.serialize(format='turtle')

                        with open(
                                doc_folder + str(id.replace('/', '%2F')) +
                                '.ttl', 'w') as turtle_file:
                            turtle_file.write(graph_turtle)

                    organisation_count += 1
                    organisation_ids.append(id)

            elif (root.tag == 'iati-organisation') or (root.tag
                                                       == 'organisation'):

                try:
                    converter = IatiConverter.ConvertOrganisation(
                        xml.getroot())
                    graph, id, last_updated = converter.convert(Iati)
                except TypeError as e:
                    print "Error in " + document + ":" + str(e)

                print "Progress: Organisation #" + str(
                    organisation_count) + " in document #" + str(
                        document_count)

                if (not graph == None) and (not id == None):
                    # Write organisation to Turtle and store in local folder
                    graph_turtle = graph.serialize(format='turtle')

                    with open(
                            doc_folder + str(id.replace('/', '%2F')) + '.ttl',
                            'w') as turtle_file:
                        turtle_file.write(graph_turtle)

                    # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
                    with open(
                            doc_folder + str(id.replace('/', '%2F')) +
                            '.ttl.graph', 'w') as graph_file:
                        graph_file.write(
                            str(Iati) + 'graph/organisation/' + str(id))

                organisation_count += 1
                organisation_ids.append(id)

            document_count += 1

            # Add provenance from corresponding JSON file
            json_document = document[:-4] + '.json'

            try:
                with open(json_document, 'r') as open_json_doc:
                    json_parsed = json.load(open_json_doc)
            except:
                print "Could not parse file " + json_document
                json_parsed = None

            provenance_converter = IatiConverter.ConvertProvenance(
                'organisation', json_parsed, provenance, doc_id, last_updated,
                version, organisation_ids)
            provenance = provenance_converter.convert(Iati)

            # Write provenance graph to Turtle and store in local folder
            provenance_turtle = provenance.serialize(format='turtle')

            with open(doc_folder + 'provenance-' + doc_id + '.ttl',
                      'w') as turtle_file:
                turtle_file.write(provenance_turtle)

            # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs.
            with open(doc_folder + 'provenance-' + doc_id + '.ttl.graph',
                      'w') as graph_file:
                graph_file.write(str(Iati) + 'graph/provenance/')

    print "Done!"
Example #51
0
def convert_graph(options, closureClass=None) :
	"""
	Entry point for external scripts (CGI or command line) to parse an RDF file(s), possibly execute OWL and/or RDFS closures,
	and serialize back the result in some format.
	Note that this entry point can be used requiring no entailment at all;
	because both the input and the output format for the package can be RDF/XML or Turtle, such usage would
	simply mean a format conversion.
	
	If OWL 2 RL processing is required, that also means that the owl:imports statements are interpreted. Ie,
	ontologies can be spread over several files. Note, however, that the output of the process would then include all 
	imported ontologies, too.

	@param options: object with specific attributes, namely:
	  - options.sources: list of uris or file names for the source data; for each one if the name ends with 'ttl', it is considered to be turtle, RDF/XML otherwise (this can be overwritten by the options.iformat, though)
	  - options.text: direct Turtle encoding of a graph as a text string (useful, eg, for a CGI call using a text field)
	  - options.owlClosure: can be yes or no
	  - options.rdfsClosure: can be yes or no
	  - options.owlExtras: can be yes or no; whether the extra rules beyond OWL 2 RL are used or not.
	  - options.axioms: whether relevant axiomatic triples are added before chaining (can be a boolean, or the strings "yes" or "no")
	  - options.daxioms: further datatype axiomatic triples are added to the output (can be a boolean, or the strings "yes" or "no")
	  - options.format: output format, can be "turtle" or "rdfxml"
	  - options.iformat: input format, can be "turtle", "rdfa", "json", "rdfxml", or "auto". "auto" means that the suffix of the file is considered: '.ttl'. '.html', 'json' or '.jsonld' respectively with 'xml' as a fallback
	  - options.trimming: whether the extension to OWLRL should also include trimming
	@param closureClass: explicit class reference. If set, this overrides the various different other options to be used as an extension.
	"""

	def __check_yes_or_true(opt) :
		return opt is True or opt == "yes" or opt == "Yes" or opt == "True" or opt == "true"

	import warnings

	warnings.filterwarnings("ignore")
	if len(options.sources) == 0 and (options.text is None or len(options.text.strip()) == 0) :
		raise Exception("No graph specified either via a URI or text")

	graph = Graph()

	# Just to be sure that this attribute does not create issues with older versions of the service...
	# the try statement should be removed, eventually...
	iformat = AUTO
	try :
		iformat = options.iformat
	except :
		# exception can be raised if that attribute is not used at all, true for older versions
		pass

	# similar measure with the possible usage of the 'source' options
	try :
		if options.source is not None:
			options.sources.append(options.source)
	except:
		# exception can be raised if that attribute is not used at all, true for newer versions
		pass

	# Get the sources first. Note that a possible error is filtered out, namely to process the same file twice. This is done
	# by turning the input arguments into a set...
	for inp in set(options.sources):
		__parse_input(iformat, inp, graph)

	# add the possible extra text (ie, the text input on the HTML page)
	if options.text is not None:
		graph.parse(StringIO.StringIO(options.text), format="n3")

	# Get all the options right
	# noinspection PyPep8Naming
	owlClosure  = __check_yes_or_true(options.owlClosure)
	# noinspection PyPep8Naming
	rdfsClosure = __check_yes_or_true(options.rdfsClosure)
	# noinspection PyPep8Naming
	owlExtras   = __check_yes_or_true(options.owlExtras)
	try:
		trimming = __check_yes_or_true(options.trimming)
	except :
		trimming = False
	axioms  = __check_yes_or_true(options.axioms)
	daxioms = __check_yes_or_true(options.daxioms)

	if owlClosure:
		interpret_owl_imports(iformat, graph)

	# adds to the 'beauty' of the output
	graph.bind("owl", "http://www.w3.org/2002/07/owl#")
	graph.bind("xsd", "http://www.w3.org/2001/XMLSchema#")

	#@@@@ some smarter choice should be used later to decide what the closure class is!!! That should
	# also control the import management. Eg, if the superclass includes OWL...
	if closureClass is not None :
		closure_class = closureClass
	else :
		closure_class = return_closure_class(owlClosure, rdfsClosure, owlExtras, trimming)

	DeductiveClosure(closure_class, improved_datatypes=True, rdfs_closure=rdfsClosure, axiomatic_triples=axioms, datatype_axioms=daxioms).expand(graph)

	if options.format == TURTLE:
		return graph.serialize(format="turtle")
	elif options.format == JSON:
		if json_ld_available :
			return graph.serialize(format="json-ld")
		else:
			raise Exception("JSON-LD serializer is not available")
	else:
		return graph.serialize(format="pretty-xml")
def build_subgraph_for_hypothesis(kb_graph, kb_nodes_by_category,
                                  kb_stmt_key_mapping, kb_cm_key_mapping,
                                  kb_type_stmt_key_mapping, json_graph,
                                  graph_mappings, hypothesis, hypothesis_id,
                                  hypothesis_weight):
    member_to_clusters = graph_mappings['member_to_clusters']
    cluster_to_prototype = graph_mappings['cluster_to_prototype']

    # Set of all KB edge statement nodes
    kb_edge_stmt_set = set()
    # Mapping from ERE to all its KB type statement nodes
    kb_type_stmt_set = set()

    # Mapping from KB edge statement nodes to importance values
    kb_stmt_importance = {}

    # Set of all ERE node labels
    ere_set = set()
    # Mapping from ERE node labels to importance values
    ere_importance = {}

    # logging.info('Processing all statements')
    for stmt_label, stmt_weight in zip(hypothesis['statements'],
                                       hypothesis['statementWeights']):
        # Rescale the stmt_weight to get the importance value
        if stmt_weight <= 0.0:
            stmt_weight = math.exp(stmt_weight / 100.0)
        else:
            stmt_weight = 0.0001

        assert json_graph.is_statement(stmt_label)
        stmt_entry = json_graph.node_dict[stmt_label]

        stmt_subj = stmt_entry.subject
        stmt_pred = stmt_entry.predicate
        stmt_obj = stmt_entry.object
        assert stmt_subj is not None and stmt_pred is not None and stmt_obj is not None

        # Find the statement node in the KB
        kb_stmt_id = URIRef(stmt_label)
        if kb_stmt_id not in kb_nodes_by_category['Statement']:
            kb_stmt_pred = RDF.type if stmt_pred == 'type' else LDC_ONT.term(
                stmt_pred)
            kb_stmt_id = next(
                iter(kb_stmt_key_mapping[(URIRef(stmt_subj), kb_stmt_pred,
                                          URIRef(stmt_obj))]))

        # Add the subject of any statement to ere_set
        ere_set.add(stmt_subj)

        # Update the importance value of the subject of any statement based on stmt_weight
        if stmt_subj not in ere_importance or ere_importance[
                stmt_subj] < stmt_weight:
            ere_importance[stmt_subj] = stmt_weight

        if stmt_pred == 'type':
            if kb_stmt_id is not None:
                # Add kb_stmt_id to the set of KB type statement nodes
                kb_type_stmt_set.add(kb_stmt_id)
                # kb_type_stmt_dict[stmt_subj].add(kb_stmt_id)

        else:
            if kb_stmt_id is not None:
                # Add kb_stmt_id to the set of KB edge statement nodes
                kb_edge_stmt_set.add(kb_stmt_id)
                # Update the importance value of the edge statement
                kb_stmt_importance[kb_stmt_id] = stmt_weight

            # Add the object of edge statements to ere_set
            ere_set.add(stmt_obj)

            # Update the importance value of the object of edge statements based on stmt_weight
            if stmt_obj not in ere_importance or ere_importance[
                    stmt_obj] < stmt_weight:
                ere_importance[stmt_obj] = stmt_weight

    # Set of all SameAsCluster node labels
    same_as_cluster_set = set()
    # Set of all KB ClusterMembership nodes
    kb_cluster_membership_set = set()

    # Set of all ERE node labels that are prototypes
    proto_ere_set = set()
    # Mapping from ERE prototype node labels to importance values
    proto_importance = {}

    # logging.info('Processing all EREs and clusters')
    cluster_memberships = hypothesis.get('clusterMemberships', None)
    if cluster_memberships is None:
        for ere in ere_set:
            ere_weight = ere_importance.get(ere, 0.0)
            for cluster in member_to_clusters[ere]:
                # Add all corresponding cluster label of each ERE node to same_as_cluster_set
                same_as_cluster_set.add(cluster)

                # Find the ClusterMembership node in the KB
                kb_cluster_membership_set.update(
                    kb_cm_key_mapping[URIRef(cluster),
                                      URIRef(ere)])

                proto_ere = cluster_to_prototype[cluster]
                if proto_ere not in proto_importance or proto_importance[
                        proto_ere] < ere_weight:
                    proto_importance[proto_ere] = ere_weight
    else:
        for member, cluster in cluster_memberships:
            same_as_cluster_set.add(cluster)
            kb_cluster_membership_set.update(kb_cm_key_mapping[URIRef(cluster),
                                                               URIRef(member)])

            # Add the prototype of each SameAsCluster node to ere_set
            proto_ere = cluster_to_prototype[cluster]
            proto_ere_set.add(proto_ere)

            # Find the type statement node for the prototype
            proto_type_stmt_id_list = kb_type_stmt_key_mapping[URIRef(
                proto_ere)]
            highest_granularity_level = max([
                len(type_ont.split('.'))
                for _, type_ont in proto_type_stmt_id_list
            ])
            for type_stmt_id, type_ont in proto_type_stmt_id_list:
                if len(type_ont.split('.')) == highest_granularity_level:
                    kb_type_stmt_set.add(type_stmt_id)

            # Find the ClusterMembership node for the prototype in the KB
            kb_cluster_membership_set.update(
                kb_cm_key_mapping[URIRef(cluster),
                                  URIRef(proto_ere)])

            member_weight = ere_importance.get(member, 0.0)
            if proto_ere not in proto_importance or proto_importance[
                    proto_ere] < member_weight:
                proto_importance[proto_ere] = member_weight

    # Add all prototype ERE labels to ere_set
    ere_set |= proto_ere_set

    # All triples to be added to the subgraph
    # logging.info('Extracting all content triples')
    all_triples = set()

    for kb_stmt_id in kb_edge_stmt_set:
        all_triples.update(triples_for_edge_stmt(kb_graph, kb_stmt_id))

    for kb_stmt_id in kb_type_stmt_set:
        all_triples.update(triples_for_type_stmt(kb_graph, kb_stmt_id))

    # logging.info('Extracting triples for all EREs')
    # Add triples for all EREs
    for ere in ere_set:
        kb_ere_id = URIRef(ere)
        all_triples.update(triples_for_ere(kb_graph, kb_ere_id))

    # logging.info('Extracting triples for all SameAsClusters')
    # Add triples for all SameAsClusters
    for cluster in same_as_cluster_set:
        kb_cluster_id = URIRef(cluster)
        all_triples.update(triples_for_cluster(kb_graph, kb_cluster_id))

    # logging.info('Extracting triples for all ClusterMemberships')
    # Add triples for all ClusterMemberships
    for kb_cm_id in kb_cluster_membership_set:
        all_triples.update(triples_for_cluster_membership(kb_graph, kb_cm_id))

    # logging.info('Constructing a subgraph')
    # Start building the subgraph
    subgraph = Graph()

    # Bind all prefixes of kb_graph to the subgraph
    for prefix, namespace in kb_graph.namespaces():
        if str(namespace) not in [AIDA, LDC, LDC_ONT]:
            subgraph.bind(prefix, namespace)
    # Bind the AIDA, LDC, LDC_ONT, and UTEXAS namespaces to the subgraph
    subgraph.bind('aida', AIDA, override=True)
    subgraph.bind('ldc', LDC, override=True)
    subgraph.bind('ldcOnt', LDC_ONT, override=True)
    subgraph.bind('utexas', UTEXAS)

    # logging.info('Adding hypothesis related triples to the subgraph')
    # Add triple for the aida:Hypothesis node and its type
    kb_hypothesis_id = UTEXAS.term(hypothesis_id)
    subgraph.add((kb_hypothesis_id, RDF.type, AIDA.Hypothesis))

    # Add triple for the hypothesis importance value
    subgraph.add((kb_hypothesis_id, AIDA.importance,
                  Literal(hypothesis_weight, datatype=XSD.double)))

    # Add triple for the aida:Subgraph node and its type
    kb_subgraph_id = UTEXAS.term(hypothesis_id + '_subgraph')
    subgraph.add((kb_hypothesis_id, AIDA.hypothesisContent, kb_subgraph_id))
    subgraph.add((kb_subgraph_id, RDF.type, AIDA.Subgraph))

    # Add all EREs as contents of the aida:Subgraph node
    for ere in ere_set:
        kb_ere_id = URIRef(ere)
        subgraph.add((kb_subgraph_id, AIDA.subgraphContains, kb_ere_id))

    # logging.info('Adding all content triples to the subgraph')
    # Add all triples
    for triple in all_triples:
        subgraph.add(triple)

    # Add importance values for all edge statements
    for kb_stmt_id, importance in kb_stmt_importance.items():
        subgraph.add((kb_stmt_id, AIDA.importance,
                      Literal(importance, datatype=XSD.double)))

    # Add importance values for all prototype EREs
    for proto_ere, proto_weight in proto_importance.items():
        kb_proto_id = URIRef(proto_ere)
        subgraph.add((kb_proto_id, AIDA.importance,
                      Literal(proto_weight, datatype=XSD.double)))

    # Compute handles for Entity clusters
    proto_handles = compute_handle_mapping(ere_set, json_graph,
                                           member_to_clusters,
                                           cluster_to_prototype)

    for proto_ere, handle in proto_handles.items():
        kb_proto_id = URIRef(proto_ere)
        if len(
                list(
                    subgraph.objects(subject=kb_proto_id,
                                     predicate=AIDA.handle))) == 0:
            subgraph.add(
                (kb_proto_id, AIDA.handle, Literal(handle,
                                                   datatype=XSD.string)))

    return subgraph
Example #53
0
class Model(object):
    def __init__(self):
        self.graph = Graph()
        self.top_nodes = []
        self.serializedResources = []

    def createResource(self, id=None):
        return Resource(id=id, model=self)

    def createProperty(self, id=None):
        return Property(id)

    def createLiteral(self, element):
        return Literal(element)

    def createTypedLiteral(self, element, type):
        return self._convert_element(element, type)

    def createOrderedList(self):
        return OrderedList()

    def createItemizedList(self):
        return ItemizedList()

    def createLinkedList(self):
        return LinkedList()

    def createNudeList(self):
        return NudeList()

    def _append_to_graph(self, subject, predicate, object):

        if isinstance(predicate, Property):
            self.graph.add((subject, URIRef(predicate.id), object))
        else:
            self.graph.add((subject, predicate, object))

    def _add_statement(self, statement):
        if statement is not None:
            #print "Adding statement", statement
            self._append_to_graph(statement.getSubject(),
                                  statement.getPredicate(),
                                  statement.getObject())

    # Append various types of elements to the graph. This is the main method for determining
    # the type of a serializable object and creating the appropriate triple for it. All methods
    # that need to make additions to the graph should use it, unless handling raw triples
    # the the form of s-p-o of a statement that embeds them
    def _add_element(self, object, predicate, subject=None):
        from SmartAPI.rdf.Variant import Variant
        from SmartAPI.common.Tools import Tools

        if isinstance(object, Resource) and (object
                                             not in self.serializedResources):
            self.serializedResources.append(object)

            n = object.getNode()

            for p in object.listProperties():
                if p is not None:
                    if isinstance(p, list):
                        for pe in p:
                            self._add_statement(pe)
                            if pe.getResource() is not None:
                                self._add_element(pe.getResource(),
                                                  pe.getPredicate(), n)
                    else:
                        self._add_statement(p)
                        if p.getResource() is not None:
                            self._add_element(p.getResource(),
                                              p.getPredicate(), n)

            for l in object.listLiterals():
                self._add_statement(l)

        elif isinstance(object, Resource) and (object
                                               in self.serializedResources):
            pass

        elif isinstance(object, Property):
            if subject is not None and predicate is not None and object.id is not None:
                self._append_to_graph(subject, predicate, URIRef(object.id))

        elif isinstance(object, Literal):
            if subject is not None and predicate is not None:
                self._append_to_graph(subject, predicate, object)

        elif isinstance(object, RdfLiteral):
            if subject is not None and predicate is not None:
                self._append_to_graph(subject, predicate, object.getValue())

        elif isinstance(object, Variant):
            if subject is not None and predicate is not None:
                self._append_to_graph(subject, predicate, object.asTerm())

        elif isinstance(object, URIRef):
            if not subject is None:
                self._append_to_graph(subject, predicate, object)

        else:
            if not subject is None:
                self._append_to_graph(subject, predicate, Literal(object))
        """
        elif isinstance(object, list):  # ordered list is the default for raw lists
            self._add_ordered_list(object, predicate, subject)

        elif isinstance(object, OrderedList):
            self._add_ordered_list(object, predicate, subject)
        
        elif isinstance(object, LinkedList):
            self._add_linked_list(object, predicate, subject)
            
        elif isinstance(object, ItemizedList):
            self._add_itemized_list(object, predicate, subject)
        """

    def _convert_element(self, element, type):
        return Literal(element, datatype=URIRef(type))

    def is_list(self, node):
        item = self.graph.value(subject=node, predicate=RDF.first)
        return item is not None

    def parse_list(self, container, parent_node=None, klass=None, first=None):

        if first is None and parent_node is not None:
            first = self.graph.value(subject=parent_node, predicate=RDF.first)
        if first is not None:
            arr = self.graph.value(subject=parent_node, predicate=RDF.rest)
            if arr:
                return self._parse_linked_list(container, first, arr, klass)

            arr = self.graph.value(subject=first,
                                   predicate=URIRef(NS.SMARTAPI + "rawArray"))
            if arr:
                return self._parse_nude_list(container, first, klass)

            arr = self.graph.value(subject=first,
                                   predicate=URIRef(NS.SMARTAPI + "array"))
            if arr:
                return self._parse_itemized_list(container, first, klass)

            arr = self.graph.value(subject=first,
                                   predicate=URIRef(NS.SMARTAPI +
                                                    "indexedArray"))
            if arr:
                return self._parse_ordered_list(container, first, klass)

        return None

    def _parse_list_entry(self, entry, klass=None, from_nude=False):
        from SmartAPI.model.ValueObject import ValueObject
        from SmartAPI.rdf.Variant import Variant
        from SmartAPI.common.Tools import Tools

        if from_nude and klass is not None:
            item = klass()
            item.fromNude(entry)
            return item

        if isinstance(entry, Literal):
            return Variant(entry.toPython())
        elif isinstance(entry, URIRef):
            if entry == RDF.nil:
                return None
            return Variant(entry)
        else:
            if klass is None:
                types = []
                sl = self.listStatements(subject=entry,
                                         predicate=URIRef(PROPERTY.RDF_TYPE),
                                         object=None)
                for s in sl:
                    types.append(s.getResource().toString())
                klass = Tools().mapper.getClass(types, default=Variant)

            item = klass()
            for s in self.find_statements_for_node(entry):
                if s.predicate == NS.SMARTAPI + "valueObject":
                    itemv = ValueObject()
                    for sv in self.find_statements_for_node(s.object):
                        itemv.parse(sv)
                    item.addValueObject(itemv)
                else:
                    item.parseStatement(s)
            return item

    def _parse_linked_list(self, container, value, next, klass):
        if value is not None:
            item = self._parse_list_entry(value, klass)
            if item: container.append(item)

        while next is not None:
            value = self.graph.value(subject=next, predicate=RDF.first)
            next = self.graph.value(subject=next, predicate=RDF.rest)
            if value is not None:
                item = self._parse_list_entry(value, klass)
                if item: container.append(item)

        return 'LinkedList'

    def _parse_nude_list(self, container, current, klass):
        arr = self.graph.value(subject=current,
                               predicate=URIRef(NS.SMARTAPI + "rawArray"))
        if arr:
            value = self.graph.value(subject=arr, predicate=RDF.value)
            v = simplejson.loads(value.toPython())
            for o in v:
                container.append(
                    self._parse_list_entry(o, klass, from_nude=True))

        return 'NudeList'

    def _parse_itemized_list(self, container, current, klass):

        arr = self.graph.value(subject=current,
                               predicate=URIRef(NS.SMARTAPI + "array"))
        size = self.graph.value(subject=current,
                                predicate=URIRef(NS.SMARTAPI + "size"))
        if arr:
            for s, p, o in self.graph.triples((arr, RDF.value, None)):
                container.append(self._parse_list_entry(o, klass))

        return 'ItemizedList'

    def _parse_ordered_list(self, container, current, klass):
        arr = self.graph.value(subject=current,
                               predicate=URIRef(NS.SMARTAPI + "indexedArray"))
        size = self.graph.value(subject=current,
                                predicate=URIRef(NS.SMARTAPI + "size"))
        if arr and size:
            # prefill
            for i in range(size.toPython()):
                container.append(None)
            #container = [None] * size.toPython()
            for s, p, o in self.graph.triples(
                (arr, URIRef(NS.SMARTAPI + "entry"), None)):
                index = self.graph.value(subject=o,
                                         predicate=URIRef(NS.SMARTAPI +
                                                          "index"))
                value = self.graph.value(subject=o, predicate=RDF.value)
                container[index.toPython()] = self._parse_list_entry(
                    value, klass)

        return 'OrderedList'

    # obsolete?
    """
    def _add_linked_list(self, rdflist, predicate, subject):
        from SmartAPI.model.Obj import Obj
        from SmartAPI.rdf.Variant import Variant

        elements = rdflist.get_items()
        current = lst = BNode()
        self.graph.add((subject, URIRef(predicate.id), lst))
        l = len(elements)
        for index, var in enumerate(elements):
            if isinstance(var, Variant):  # support lists with raw values (not just wrapped inside Evaluation
                self.graph.add((current, RDF.first, var.asTerm()))
            elif isinstance(var, Obj):
                self._add_element(var.serialize(self), RDF.first, subject = current)
            elif isinstance(var, Resource):
                var_node = BNode()
                for p in var.properties:
                    self._add_element(p[1], URIRef(p[0]), subject = var_node)
                self.graph.add((current, RDF.first, var_node))
            else:
                self.graph.add((current, RDF.first, Literal(var)))
            
            next = RDF.nil if index == l-1 else BNode()  # last item
            self.graph.add((current, RDF.rest, next))
            current = next
    """

    def add(self, statement):
        if isinstance(statement, list):
            for l in statement:
                self._add_element(l, None)
        else:
            self._add_element(statement, None)

    def findSubject(self, predicate, object):
        return Resource(model=self,
                        node=self.graph.value(predicate=predicate,
                                              object=object))

    def findObject(self, subject, predicate):
        return Statement(node=self.graph.value(subject=subject,
                                               predicate=predicate),
                         subject=subject,
                         predicate=predicate)

    def find_statements_for_node(self, node, predicate=None):
        r = []

        for s, p, o in self.graph.triples((node, predicate, None)):
            r.append(
                Statement(model=self,
                          subject=s,
                          predicate=p,
                          object=o,
                          resource=Resource(model=self, node=o)))
        return r

    def listStatements(self, subject=None, predicate=None, object=None):
        r = []
        for s, p, o in self.graph.triples((subject, predicate, object)):
            r.append(
                Statement(model=self,
                          subject=s,
                          predicate=p,
                          object=o,
                          resource=Resource(model=self, node=o)))
        return r

    def serialize(self, format=SERIALIZATION.JSON_LD):
        return self.graph.serialize(format=format)

    def parse(self, data=None, file=None, format=SERIALIZATION.JSON_LD):
        if data is not None:
            try:
                if format == SERIALIZATION.JSON_LD:
                    json = simplejson.loads(data)
                    if isinstance(json, dict) and json.has_key(
                            '@graph') and json.has_key('@context'):
                        self.graph.parse(data=simplejson.dumps(json['@graph']),
                                         format='json-ld',
                                         context=json['@context'])
                    else:
                        self.graph.parse(data=data, format='json-ld')
                # other formats
                else:
                    self.graph.parse(data=data, format=format)
            except:
                print "Could not read the input data into a graph"
                traceback.print_exc()
                #traceback.print_stack()
            return

        elif file is not None:
            try:
                f = open(file)
                self.graph.parse(f, format=format)
                f.close()
            except:
                print "Could not read the file into a model"
                traceback.print_exc()
            return
        print "No input to parse into a graph"
Example #54
0
from datetime import datetime

from rdflib import Graph, URIRef, Literal, BNode, RDF, Namespace
from rdflib.namespace import FOAF, DOAP, DC

from nose.tools import nottest

EARL = Namespace("http://www.w3.org/ns/earl#")

report = Graph()

report.bind("foaf", FOAF)
report.bind("earl", EARL)
report.bind("doap", DOAP)
report.bind("dc", DC)

me = URIRef("http://gromgull.net/me")
report.add((me, RDF.type, FOAF.Person))
report.add((me, FOAF.homepage, URIRef("http://gromgull.net")))
report.add((me, FOAF.name, Literal("Gunnar Aastrand Grimnes")))

rdflib = URIRef("https://github.com/RDFLib/rdflib")

report.add((rdflib, DOAP.homepage, rdflib))
report.add((rdflib, DOAP.name, Literal("rdflib")))
report.add((rdflib, DOAP.developer, me))
report.add((rdflib, RDF.type, DOAP.Project))

now = Literal(datetime.now())

Example #55
0
def main():
    with open(JSON) as infile:
        data = json.load(infile)
    gr = Graph()
    gr.namespace_manager.bind('skos', SKOS)
    gr.namespace_manager.bind('qml', QML)
    import pdb; pdb.set_trace()                        
    gr.add ( (SCHEME , RDF.type , SKOS.ConceptScheme))
    gr.add ( (SCHEME , RDFS.label , Literal('Quality Indicators Dictionary and Markup Language - QualityML')))
    for c in data['class']:
        name = c['id']
        id = URIRef(QML[name])
        gr.add( (id, RDF.type, SKOS.Concept ))
        gr.add( (id, RDF.type, QML.Class ))
        gr.add( (id, SKOS.topConceptOf, SCHEME ))
        gr.add( (id, SKOS.inScheme, SCHEME ))
        gr.add ( (SCHEME , SKOS.hasTopConcept , id ))
        label(gr, id, c )

    for i in data['indicator'] :
        name = i['id']
        qc = URIRef(QML[i['class']]) 
        id = URIRef(QML[name])
        gr.add( (id, RDF.type, SKOS.Concept ))
        gr.add( (id, RDF.type, QML.Indicator ))
        gr.add( (id, SKOS.broader, qc ))
        gr.add( (id, SKOS.inScheme, SCHEME ))
        label(gr, id, i )

    for m in data['measure'] :
        name = m['id']
        qc = URIRef(QML[i['class']]) 
        id = URIRef(QML["/".join( ('measure',name))])
        gr.add( (id, RDF.type, SKOS.Concept ))
        gr.add( (id, RDF.type, QML.Measure ))
        gr.add( (id, SKOS.broader, qc ))
        gr.add( (id, SKOS.inScheme, SCHEME ))
        label(gr, id, m )
        
    with open ('qml.ttl' , "w") as outfile:
        outfile.write(gr.serialize(format='turtle'))
Example #56
0
 def __init__(self):
     self.graph = Graph()
     self.top_nodes = []
     self.serializedResources = []
Example #57
0
    assert cr["warnings"] == ["warn", "warn", "warn"]
    cr.add_info(None)
    cr.add_warning(None)
    assert cr["info"] == ["info", "info", "info"]
    assert cr["warnings"] == ["warn", "warn", "warn"]
    cr.add_result(cr)  # results all the way down
    assert cr["results"][0]["name"] == "test"
    assert cr["results"][0]["description"] == "test results of a test"
    assert not cr["results"][0]["passes"]
    assert cr["results"][0]["info"] == ["info", "info", "info"]
    assert cr["results"][0]["warnings"] == ["warn", "warn", "warn"]


from rdflib import Graph, URIRef, Literal, BNode

sg = Graph()  # use OCX RDFS as schema graph b/c it is small
sg.parse(location="tests/input/schema.ttl", format="turtle")
dg = Graph()  # a data graph for tests
dg.parse(location="tests/input/data.ttl", format="turtle")
dc = DataChecks(dg, sg)


def test_init():
    assert type(dc) == DataChecks
    assert len(dc.graph) == len(dg)
    assert dc.schema_graph.isomorphic(sg)  # will fail if BNodes in sg


def test_find_primary_entities():
    result = dc.find_primary_entities([])
    assert result["name"] == "primary entities present"
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
import ast #.literal_eval
import csv
import pprint


graph = Graph()
skos = Namespace('http://www.w3.org/2004/02/skos/core#')
rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#')
rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
schema = Namespace('https://schema.org/')
eaccpf = Namespace('http://culturalis.org/eac-cpf#')
dbo = Namespace('http://dbpedia.org/ontology/')
rdaad = Namespace('http://rdaregistry.info/Elements/a/datatype/')
djo = Namespace('http://dijest.ac.il/ontology/')
djr = Namespace('http://dijest.ac.il/resource/')
owl = Namespace('http://www.w3.org/2002/07/owl#')


graph.bind('skos', skos)
graph.bind('rdfs', rdfs)
graph.bind('rdf', rdf)
graph.bind('schema', schema)
graph.bind('eac-cpf', eaccpf)
graph.bind('dbo', dbo)
graph.bind('rdaad', rdaad)
graph.bind('djo', djo)
graph.bind('djr', djr)
graph.bind('owl', owl)

#basis_uri = 'http://dijest.ac.il/person/'
Example #59
0
classType = sys.argv[2]

endpoint_uri = config['Mandatory']['endpointURI']
graph_uri = config['Mandatory']['graphURI']

# Set up endpoint and access to triple store
sparql = SPARQLWrapper(endpoint_uri)
sparql.setReturnFormat(JSON)
sparql.setMethod(POST)
store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

# Specify the (named) graph we're working with
sparql.addDefaultGraph(graph_uri)

# Create an in memory graph
g = Graph(store, identifier=graph_uri)

query = "select ?p ?o where {<" + URI + "> ?p ?o}"
properties = g.query(query)

# Configurations mappings
mapping = ConfigParser()
mapping.read('mapping_fields.ini')

propURI = ""
props = ""
for row in properties:
    propURI = str(row[0])
    if propURI != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type":
        for key in mapping[classType]:
            if mapping[classType][key] == propURI:
Example #60
0
tei = {'tei': 'http://www.tei-c.org/ns/1.0'}

from rdflib import Graph, Literal, BNode, Namespace, URIRef

from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, OWL

agrelon = Namespace("https://d-nb.info/standards/elementset/agrelon#")
crm = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
frbroo = Namespace("http://iflastandards.info/ns/fr/frbr/frbroo/")
pro = Namespace("http://purl.org/spar/pro/")
proles = Namespace("http://www.essepuntato.it/2013/10/politicalroles/")
prov = Namespace("http://www.w3.org/ns/prov#")
schema = Namespace("https://schema.org/")
tvc = Namespace("http://www.essepuntato.it/2012/04/tvc/")

g = Graph()

g.bind("agrelon", agrelon)
g.bind("crm", crm)
g.bind("frbroo", frbroo)
g.bind("dcterms", DCTERMS)
g.bind("schema", schema)
g.bind("owl", OWL)
g.bind("pro", pro)
g.bind("proles", proles)
g.bind("prov", prov)
g.bind("tvc", tvc)

#############################
#                           #
#        Persons            #