コード例 #1
0
ファイル: test_nquads.py プロジェクト: zqhead/rdflib
 def test_parse_shared_bnode_context(self):
     bnode_ctx = dict()
     g = ConjunctiveGraph()
     h = ConjunctiveGraph()
     g.parse(self.data, format="nquads", bnode_context=bnode_ctx)
     self.data.seek(0)
     h.parse(self.data, format="nquads", bnode_context=bnode_ctx)
     self.assertEqual(set(h.subjects()), set(g.subjects()))
コード例 #2
0
ファイル: test_nquads.py プロジェクト: zqhead/rdflib
 def test_parse_distinct_bnode_context(self):
     g = ConjunctiveGraph()
     g.parse(self.data, format="nquads", bnode_context=dict())
     s1 = set(g.subjects())
     self.data.seek(0)
     g.parse(self.data, format="nquads", bnode_context=dict())
     s2 = set(g.subjects())
     self.assertNotEqual(set(), s2 - s1)
コード例 #3
0
ファイル: test_nquads.py プロジェクト: zqhead/rdflib
 def test_parse_distinct_bnode_contexts_between_graphs(self):
     g = ConjunctiveGraph()
     h = ConjunctiveGraph()
     g.parse(self.data, format="nquads")
     s1 = set(g.subjects())
     self.data.seek(0)
     h.parse(self.data, format="nquads")
     s2 = set(h.subjects())
     self.assertNotEqual(s1, s2)
コード例 #4
0
def verify_rdf(rdf_output):
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")
    assert len(g) == 6
    assert len(set(g.subjects())) == 2
    assert len(set(g.predicates())) == 3
    assert len(set(g.objects())) == 6
コード例 #5
0
def test_null_values_with_single_string():
    csvw = CSVW(csv_path="tests/null1.csv",
                metadata_path="tests/null1.single.csv-metadata.json")
    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    # There should be no subject NA
    all_subjects = {x for x in g.subjects()}
    assert subj_ns['null_key'] not in all_subjects
    assert subj_ns['1'] in all_subjects
    assert len(all_subjects) == 4

    # Null valued objects should not be created
    all_objects = {x for x in g.objects()}
    assert Literal('null_key', datatype=XSD.token) not in all_objects
    assert Literal('null_sector') not in all_objects
    assert Literal('null_id', datatype=XSD.token) not in all_objects
    assert Literal('PUBLIC') in all_objects
    assert Literal('12', datatype=XSD.token) in all_objects

    # Spot check some triples do not exist but other do from the same row
    null_key_lit = Literal('null_id', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['2'], id_uri, null_key_lit)))) == 0

    priv_lit = Literal('PRIVATE')
    assert len(list(g.triples((subj_ns['2'], sect_uri, priv_lit)))) == 1

    null_sector_lit = Literal('null_sector')
    assert len(list(g.triples((subj_ns['3'], sect_uri, null_sector_lit)))) == 0

    twelve_lit = Literal('12', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['3'], id_uri, twelve_lit)))) == 1
コード例 #6
0
def get_mediator_vocabs(userid):
    vocabs = {}
    if not os.path.isfile(os.path.join(ag.mediatorsdir, '%s.rdf'%userid)):
        "Cannot find file %s"%os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
        return vocabs
    #Get list of vocabularies created by userid
    graph = Graph()
    graph.parse(os.path.join(ag.mediatorsdir, '%s.rdf'%userid))
    for v in graph.subjects(namespaces['dcterms']['mediator'], None):
        k = v.split('/')[-1]
        svn_src = "http://damssupport.ouls.ox.ac.uk/trac/vocab/browser/trunks/internalVocabularies/%s"%k
        vocabs[k] = (v, svn_src)
    return vocabs
コード例 #7
0
    def get_rdf_metadata(self, uniprot_id):
        """Retrieve RDF metadata for the given UniProt accession.

        XXX Not finished. XML parsing looks to be more straightforward
        """
        from rdflib import ConjunctiveGraph as Graph
        url_base = "%s/uniprot/%s.rdf"
        full_url = url_base % (self._server, uniprot_id)
        graph = Graph()
        with self._get_open_handle(full_url) as in_handle:
            graph.parse(in_handle)
        main_subject = [s for s in list(set(graph.subjects())) if
                s.split('/')[-1] == uniprot_id][0]
        for sub, pred, obj in graph:
            print sub, pred, obj
コード例 #8
0
    def get_rdf_metadata(self, uniprot_id):
        """Retrieve RDF metadata for the given UniProt accession.

        XXX Not finished. XML parsing looks to be more straightforward
        """
        from rdflib import ConjunctiveGraph as Graph
        url_base = "%s/uniprot/%s.rdf"
        full_url = url_base % (self._server, uniprot_id)
        graph = Graph()
        with self._get_open_handle(full_url) as in_handle:
            graph.parse(in_handle)
        main_subject = [s for s in list(set(graph.subjects())) if
                s.split('/')[-1] == uniprot_id][0]
        for sub, pred, obj in graph:
            print sub, pred, obj
コード例 #9
0
def test_default():
    csvw = CSVW(csv_path='tests/virtual1.csv',
                metadata_path='tests/virtual1.default.csv-metadata.json')
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    all_subjects = {x for x in g.subjects()}
    assert len(all_subjects) == 4

    ns = Namespace("http://example.org/")
    assert ns['sub-1'] in all_subjects
    assert ns['sub-2'] in all_subjects
    assert len([g.triples((ns['sub-1'], ns['obj-1'], ns['myvalue']))]) == 1
    assert len([g.triples((ns['sub-2'], ns['obj-2'], ns['myvalue']))]) == 1
コード例 #10
0
def get_mediator_details(userid):
    #Get mediator_details - firstname, lastname, department, email
    details = {}
    details['userid'] = userid
    details['uri'] = None
    details['name'] = None
    details['fname'] = None
    details['lname'] = None
    details['title'] = None
    details['email'] = None
    details['dept'] = []
    if userid.startswith('uuid'):
        userid = get_mediator_account(userid)
        details['userid'] = userid
        if not userid:
            return details
    if not os.path.isfile(os.path.join(ag.mediatorsdir, '%s.rdf'%userid)):
        return details
    graph = Graph()
    graph.parse(os.path.join(ag.mediatorsdir, '%s.rdf'%userid))
    t = ''
    f = ''
    l = ''
    for title in graph.objects(None, namespaces['foaf']['title']):
        if title.strip():
            t = title
            details['title'] = t
    for fname in graph.objects(None, namespaces['foaf']['firstName']):
        if fname.strip():
            f = fname
            details['fname'] = fname
    for lname in graph.objects(None, namespaces['foaf']['lastName']):
        if lname.strip():
            l = lname
            details['lname'] = lname
    details['name'] = "%s %s %s"%(t, f, l)
    details['name'] = details['name'].strip()
    if not details['name']:
        details['name'] = userid
    for email in graph.objects(None, namespaces['foaf']['mbox']):
        details['email'] = email
    for dept in graph.objects(None, namespaces['dcterms']['isPartOf']):
        details['dept'].append(dept)
    for uri in graph.subjects(namespaces['foaf']['account'], None):
        details['uri'] = uri
    return details
コード例 #11
0
ファイル: generators.py プロジェクト: poulp/randomfoOd
    def generate(cls, n):
        graph = ConjunctiveGraph()
        load_rdf_file(STORE['utensils'], graph)

        all_uris = set(graph.subjects())
        n = min(n, len(all_uris))
        selected_uris = sample(all_uris, n)

        # On récupère les ustensiles voulus dans le graphe
        selected_triples = chain(*map(graph.triples, ((uri, None, None) for uri in selected_uris)))
        map(rdfSubject.db.add, selected_triples)

        utensils = [Utensil(uri) for uri in selected_uris]

        # On récupère les actions de ces ustensiles
        ActionGenerator.generate(utensils)

        return utensils
コード例 #12
0
ファイル: test_formats.py プロジェクト: vishalbelsare/pycsvw
def verify_rdf_contents(contents, fmt):
    g = ConjunctiveGraph()
    g.parse(data=contents, format=fmt)

    books = Namespace('http://www.books.org/')
    isbn = Namespace("http://www.books.org/isbn/")

    # Check number of all triples
    assert sum(
        1 for _ in g.triples((None, None,
                              None))) == NUM_SUBJECTS * NUM_TRIPLES_PER_SUBJ

    # Check number of subject
    subjs = set(g.subjects())
    expected_subjs = ["0062316095", "0374532508", "1610391845", "0374275637"]
    assert len(subjs) == len(expected_subjs)
    for s in expected_subjs:
        assert isbn[s] in subjs

        # Verify isbn number is positive integer
        s_isbn = list(g.triples((isbn[s], books['isbnnumber'], None)))
        assert len(s_isbn) == 1
        s_isbn_val = s_isbn[0][2]
        assert isinstance(s_isbn_val, Literal)
        assert s_isbn_val.datatype == XSD.positiveInteger
        # Verify pages is a unsignedShort
        s_page = list(g.triples((isbn[s], books['pagecount'], None)))
        assert len(s_page) == 1
        s_page_val = s_page[0][2]
        assert isinstance(s_page_val, Literal)
        assert s_page_val.datatype == XSD.unsignedShort
        # Verify hardcover is a boolean
        s_hardcover = list(g.triples((isbn[s], books['hardcover'], None)))
        assert len(s_hardcover) == 1
        s_hardcover_val = s_hardcover[0][2]
        assert isinstance(s_hardcover_val, Literal)
        assert s_hardcover_val.datatype == XSD.boolean
        # Verify price is a decimal
        s_price = list(g.triples((isbn[s], books['price'], None)))
        assert len(s_price) == 1
        s_price_val = s_price[0][2]
        assert isinstance(s_price_val, Literal)
        assert s_price_val.datatype == XSD.decimal
コード例 #13
0
def verify_rdf(rdf_output):
    ids_ns = Namespace("http://foo.example.org/CSV/People-IDs/")
    ages_ns = Namespace("http://foo.example.org/CSV/People-Ages/")
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    all_subjects = {x for x in g.subjects()}
    assert len(all_subjects) == 2

    bob_subj = ids_ns['1']
    joe_subj = ids_ns['2']
    assert bob_subj in all_subjects
    assert joe_subj in all_subjects

    # Bob's details
    assert len([g.triples((bob_subj, ids_ns.id, Literal(1)))]) == 1
    assert len([g.triples((bob_subj, ids_ns.name, Literal("Bob")))]) == 1
    assert len([g.triples((bob_subj, ages_ns.age, Literal(34)))]) == 1

    # Joe's details
    assert len([g.triples((joe_subj, ids_ns.id, Literal(2)))]) == 1
    assert len([g.triples((joe_subj, ids_ns.name, Literal("Joe")))]) == 1
    assert len([g.triples((joe_subj, ages_ns.age, Literal(54)))]) == 1
コード例 #14
0
    raise


# Test6: ontology is internally consistent with respect to domains, ranges, etc

# step 1: find all the classes.
rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain")
rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range")
rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource")
rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection")
skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept")

otherClasses = [asColl, skosConcept]
classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class")))
props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property")))

for p in props:
    domains = list(g.objects(p, rdfsdomain))
    for d in domains:
        assert(d in classes)

for p in props:
    ranges = list(g.objects(p, rdfsrange))
    for r in ranges:
        if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \
            not r == rdfsresource:
            print "Found inconsistent property: %s has unknown range" % p

for c in classes:
コード例 #15
0
ファイル: rdfdb.py プロジェクト: t00m/KB4IT
class KB4ITGraph:
    """
    This class creates a RDF graph based on attributes for each doc.
    Also it has convenient function to ask the graph
    """
    def __init__(self, path=None):
        """
        If not path is passed it build a graph in memory. Otherwise, it
        creates a persistent graph in disk.
        """
        if path is not None:
            # Create persistent Graph in disk
            self.path = path
            self.graph = ConjunctiveGraph('Sleepycat', URIRef("kb4it://"))
            graph_path = path + SEP + 'kb4it.graph'
            self.graph.store.open(graph_path)
        else:
            # Create Graph in Memory
            self.graph = ConjunctiveGraph('IOMemory')

        # Assign namespaces to the Namespace Manager of this graph
        namespace_manager = NamespaceManager(ConjunctiveGraph())
        for ns in NSBINDINGS:
            namespace_manager.bind(ns, NSBINDINGS[ns])
        self.graph.namespace_manager = namespace_manager


    def __uniq_sort(self, result):
        alist = list(result)
        aset = set(alist)
        alist = list(aset)
        alist.sort()
        return alist


    def subjects(self, predicate, object):
        """
        Returns a list of sorted and uniques subjects given a predicate
        and an object.
        """
        return self.__uniq_sort(self.graph.subjects(predicate, object))


    def predicates(self, subject=None, object=None):
        """
        Returns a list of sorted and uniques predicates given a subject
        and an object.
        """
        return self.__uniq_sort(self.graph.predicates(subject, object))


    def objects(self, subject, predicate):
        """
        Returns a list of sorted and uniques objects given a subject
        and an predicate.
        """
        return self.__uniq_sort(self.graph.objects(subject, predicate))


    def value(self, subject=None, predicate=None, object=None, default=None, any=True):
        """
        Returns a value given the subject and the predicate.
        """
        return self.graph.value(subject, predicate, object, default, any)


    def add_document(self, doc):
        """
        Add a new document to the graph.
        """
        subject = URIRef(doc)
        predicate = RDF['type']
        object = URIRef(KB4IT['Document'])
        self.graph.add([subject, predicate, object])


    def add_document_attribute(self, doc, attribute, value):
        """
        Add a new attribute to a document
        """
        predicate = 'has%s' % attribute
        subject = URIRef(doc)
        predicate = KB4IT[predicate]
        object = Literal(value)
        self.graph.add([subject, predicate, object])


    def get_attributes(self):
        """
        Get all predicates except RFD.type and Title
        """
        blacklist = set()
        blacklist.add(RDF['type'])
        blacklist.add(KB4IT['hasTitle'])
        alist = list(self.graph.predicates(None, None))
        aset = set(alist) - blacklist
        alist = list(aset)
        alist.sort()
        return alist


    def serialize(self):
        """
        Serialize graph to pretty xml format
        """
        return self.graph.serialize(format='pretty-xml')


    def close(self):
        """
        Close the graph if it is persistent.
        FIXME: check if it is open
        """
        self.graph.store.close()
コード例 #16
0
def create_vocab_statusfile(userid, vocabprefix, vocabfile, baseuri, update=False, using_uuid=False, refvocab=False):
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    vocabdir = os.path.join(ag.vocabulariesdir, str(vocabprefix))
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    vocab_file_name = os.path.basename(vocabfile)
    vocabfile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, vocab_file_name))

    #Add vocab in mediator file
    graph = Graph()
    mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
    graph.parse(mediatorfile)
    user_uri = []
    for uri in graph.subjects(namespaces['foaf']['account'], Literal(userid)):
        if not uri in user_uri:
            user_uri.append(uri)
    user_uri = URIRef(user_uri[0])
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(mediatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()

    #Add vocab in vocab status file
    graph = Graph()
    if update and os.path.isfile(vocabstatusfile):
        graph.parse(vocabstatusfile)
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    graph.add((user_uri, namespaces['foaf']['account'], Literal(userid)))
    graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(vocabfile_uri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespaceUri'], URIRef(baseuri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespacePrefix'], Literal(vocabprefix)))
    graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0])))
    if refvocab:
        add_ref_vocab(vocabprefix, refvocab)
        graph.add((vocab_uri, namespaces['dcterms']['isVersionOf'], URIRef(refvocab)))
    # get mimetype of file
    if os.path.isfile(vocabfile):
        graph.add((vocabfile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%vocabfile)))
        graph.add((vocabfile_uri, namespaces['nfo']['fileName'], Literal(vocab_file_name)))
        mt = None
        if check_rdf(vocabfile):
            mt = 'application/rdf+xml'
            graph.add((vocabfile_uri, namespaces['dcterms']['conformsTo'], Literal(mt)))
            graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
        elif check_n3(vocabfile):
            mt = 'text/rdf+nt'
            root, ext = os.path.splitext(vocabfile)
            if ext == '.rdf':
                rdffile = "%s_2.rdf"%root
            else:
                rdffile = "%s.rdf"%root
            converttordf = convert_n3_rdf(vocabfile, rdffile)
            if converttordf and os.path.isfile(rdffile):
                rdf_file_name = os.path.basename(rdffile)
                rdffile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, rdf_file_name))
                graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(rdffile_uri)))
                graph.add((rdffile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%rdffile)))
                graph.add((rdffile_uri, namespaces['nfo']['fileName'], Literal(rdf_file_name)))
                graph.add((rdffile_uri, namespaces['dcterms']['conformsTo'], Literal('application/rdf+xml')))
                graph.add((rdffile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
                graph.add((rdffile_uri, namespaces['dcterms']['format'], Literal('application/rdf+xml')))
        else:
            mt1 = mimetypes.guess_type(vocabfile)
            mt2 = get_file_mimetype(vocabfile)
            if mt1[0]:
                mt = mt1[0]
            else:
                mt = mt2
            if str(mt) == 'application/rdf+xml':
                graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[2])))
            else:
                graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[1])))
        if mt:
            graph.add((vocabfile_uri, namespaces['dcterms']['format'], Literal(mt)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
コード例 #17
0
    def add_property_axioms(graph, properties):
        ontology_graph = ConjunctiveGraph()
        GH = 'https://raw.githubusercontent.com'
        OBO = 'http://purl.obolibrary.org/obo'
        ontologies = [
            OBO + '/sepio.owl',
            OBO + '/geno.owl',
            OBO + '/iao.owl',
            OBO + '/ero.owl',
            OBO + '/pco.owl',
            OBO + '/xco.owl',
            OBO + '/ro.owl',
            GH + '/jamesmalone/OBAN/master/ontology/oban_core.ttl',
        ]

        # random timeouts can waste hours. (too many redirects?)
        # there is a timeout param in urllib.request,
        # but it is not exposed by rdflib.parsing
        # so retry once on URLError
        for ontology in ontologies:
            LOG.info("parsing: " + ontology)
            try:
                ontology_graph.parse(ontology,
                                     format=rdflib_util.guess_format(ontology))
            except SAXParseException as e:
                LOG.error(e)
                LOG.error('Retrying as turtle: ' + ontology)
                ontology_graph.parse(ontology, format="turtle")
            except OSError as e:  # URLError:
                # simple retry
                LOG.error(e)
                LOG.error('Retrying: ' + ontology)
                ontology_graph.parse(ontology,
                                     format=rdflib_util.guess_format(ontology))

        # Get object properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['ObjectProperty']), graph,
            OWL['ObjectProperty'], properties)

        # Get annotation properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['AnnotationProperty']),
            graph, OWL['AnnotationProperty'], properties)

        # Get data properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['DatatypeProperty']),
            graph, OWL['DatatypeProperty'], properties)

        for row in graph.predicates(DCTERMS['source'],
                                    OWL['AnnotationProperty']):
            if row == RDF['type']:
                graph.remove((DCTERMS['source'], RDF['type'],
                              OWL['AnnotationProperty']))
        graph.add((DCTERMS['source'], RDF['type'], OWL['ObjectProperty']))

        # Hardcoded properties
        graph.add(
            (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'),
             RDF['type'], OWL['AnnotationProperty']))

        graph.add((URIRef('https://monarchinitiative.org/MONARCH_anonymous'),
                   RDF['type'], OWL['AnnotationProperty']))

        return graph
コード例 #18
0
        resource_to_remove.add(URIRef(item))

        while len(resource_to_remove):
            res = resource_to_remove.pop()
            for s, p, o in g.triples((res, None, None)):
                g.remove((s, p, o))
                if type(o) is URIRef and "/br/" not in str(o):
                    resource_to_remove.add(o)

    full_info_dir = info_dir + args.prefix + sep

    print("Generate data compliant with the OCDM.")
    gs = GraphSet(base_iri, context_path)
    entity_count = 1000
    counter = 0
    for s in g.subjects():
        if counter == entity_count:
            store_all(gs)
            counter = 0
            gs = GraphSet(base_iri, context_path)

        with open(args.done, "a") as f:
            s_string = str(s)
            if s_string not in done:
                entity = None
                if "/ar/" in s_string:
                    entity = gs.add_ar(agent_name,
                                       source_agent=args.source_agent,
                                       source=args.source,
                                       res=s)
                elif "/be/" in s_string:
コード例 #19
0
class BerkeleyDBTestCase(unittest.TestCase):
    def setUp(self):
        if not has_bsddb:
            self.skipTest("skipping as berkleydb is missing")
        self.store_name = "BerkeleyDB"
        self.path = mktemp()
        self.g = ConjunctiveGraph(store=self.store_name)
        self.rt = self.g.open(self.path, create=True)
        assert self.rt == VALID_STORE, "The underlying store is corrupt"
        assert (
            len(self.g) == 0
        ), "There must be zero triples in the graph just after store (file) creation"
        data = """
                PREFIX : <https://example.org/>

                :a :b :c .
                :d :e :f .
                :d :g :h .
                """
        self.g.parse(data=data, format="ttl")

    def tearDown(self):
        self.g.close()

    def test_write(self):
        assert (
            len(self.g) == 3
        ), "There must be three triples in the graph after the first data chunk parse"
        data2 = """
                PREFIX : <https://example.org/>

                :d :i :j .
                """
        self.g.parse(data=data2, format="ttl")
        assert (
            len(self.g) == 4
        ), "There must be four triples in the graph after the second data chunk parse"
        data3 = """
                PREFIX : <https://example.org/>

                :d :i :j .
                """
        self.g.parse(data=data3, format="ttl")
        assert (
            len(self.g) == 4
        ), "There must still be four triples in the graph after the thrd data chunk parse"

    def test_read(self):
        sx = None
        for s in self.g.subjects(
                predicate=URIRef("https://example.org/e"),
                object=URIRef("https://example.org/f"),
        ):
            sx = s
        assert sx == URIRef("https://example.org/d")

    def test_sparql_query(self):
        q = """
            PREFIX : <https://example.org/>

            SELECT (COUNT(*) AS ?c)
            WHERE {
                :d ?p ?o .
            }"""

        c = 0
        for row in self.g.query(q):
            c = int(row.c)
        assert c == 2, "SPARQL COUNT must return 2"

    def test_sparql_insert(self):
        q = """
            PREFIX : <https://example.org/>

            INSERT DATA {
                :x :y :z .
            }"""

        self.g.update(q)
        assert len(self.g) == 4, "After extra triple insert, length must be 4"

    def test_multigraph(self):
        q = """
            PREFIX : <https://example.org/>

            INSERT DATA {
                GRAPH :m {
                    :x :y :z .
                }
                GRAPH :n {
                    :x :y :z .
                }
            }"""

        self.g.update(q)

        q = """
            SELECT (COUNT(?g) AS ?c)
            WHERE {
                SELECT DISTINCT ?g
                WHERE {
                    GRAPH ?g {
                        ?s ?p ?o
                    }
                }
            }
            """
        c = 0
        for row in self.g.query(q):
            c = int(row.c)
        assert c == 3, "SPARQL COUNT must return 3 (default, :m & :n)"

    def test_open_shut(self):
        assert len(self.g) == 3, "Initially we must have 3 triples from setUp"
        self.g.close()
        self.g = None

        # reopen the graph
        self.g = ConjunctiveGraph("BerkeleyDB")
        self.g.open(self.path, create=False)
        assert (
            len(self.g) == 3
        ), "After close and reopen, we should still have the 3 originally added triples"
コード例 #20
0
    (filmNS['directedBy'], rdfsRange, filmNS['Director']),
]

graph = ConjunctiveGraph()
for triple in schemaTriples:
    graph.add(triple)


def isSubClassOf(subClass, superClass, graph):
    if subClass == superClass: return True
    for parentClass in graph.objects(subClass, rdfsSubClassOf):
        if isSubClassOf(parentClass, superClass, graph): return True
    return False


pprint.pprint(list(graph.subjects(rdfType, owlClass)))
pprint.pprint(list(graph.subjects(rdfType, owlObjectProperty)))
pprint.pprint(list(graph.subjects(rdfType, owlDatatypeProperty)))

print(isSubClassOf(filmNS['Actor'], filmNS['Person'], graph))
print(isSubClassOf(filmNS['Film'], filmNS['Person'], graph))

owl_filename = "film_ontology.owl"
with open(owl_filename, "w") as owl_file:
    xml_string = str(graph.serialize(format="xml"), "utf-8")
    owl_file.write(xml_string)

# Define a blank node for the performance
performance = BNode('_:perf1')

filmTriples = [
コード例 #21
0
ファイル: clone_factory.py プロジェクト: yanlirock/event-kge
global_modules_dict = dict()
global_fes_dict = dict()
entities_dict = dict()

remove_original = False
clone_g = ConjunctiveGraph()

# TODO: remove NamedIndividual for all entities
for s, p, o in original_g.triples((None, RDF.type, OWL.NamedIndividual)):
    original_g.remove((s, p, o))

for i, clone in enumerate(clones):
    if i == len(clones) - 1:
        remove_original = True
    # copy all device entities
    for dev in original_g.subjects(RDF.type, device):
        # their associated triples
        for s, p, o in original_g.triples((dev, None, None)):
            new_s = clone + '-' + unicode(s).split('#')[1]
            new_s = amberg_ns[new_s]
            if p in (RDF.type, amberg_ns['hasSkill']):
                if remove_original:
                    original_g.remove((s, p, o))
                clone_g.add((new_s, p, o))
            elif p in (hasPart, amberg_ns['connectsTo']):
                new_o = clone + '-' + unicode(o).split('#')[1]
                new_o = amberg_ns[new_o]
                if remove_original:
                    original_g.remove((s, p, o))
                clone_g.add((new_s, p, new_o))
コード例 #22
0
ファイル: citationstorer.py プロジェクト: opencitations/index
    def __load_citations_from_rdf_file(data_f_path, prov_f_path, service_name,
                                       id_type, id_shape, citation_type):
        citation_data = Graph()
        citation_data.load(data_f_path, format="nt11")

        citation_prov = ConjunctiveGraph()
        citation_prov.load(prov_f_path, format="nquads")

        for cit_ent in citation_data.subjects(RDF.type, Citation.citation):
            prov_entity = None
            snapshot = 0

            for entity in citation_prov.subjects(Citation.specialization_of,
                                                 cit_ent):
                entity_snapshot = int(sub("^.+/se/(.+)$", "\\1", entity))
                if prov_entity is None or snapshot < entity_snapshot:
                    prov_entity = entity
                    snapshot = entity_snapshot

            invalidated = None
            update = None
            creation_date = None
            timespan = None
            for en in citation_prov.objects(prov_entity,
                                            Citation.invalidated_at_time):
                invalidated = str(en)
            for en in citation_prov.objects(prov_entity,
                                            Citation.has_update_query):
                update = str(en)
            for en in citation_data.objects(
                    cit_ent, Citation.has_citation_creation_date):
                creation_date = str(en)
            for en in citation_data.objects(cit_ent,
                                            Citation.has_citation_time_span):
                timespan = str(en)

            c = Citation(
                sub("^.+/ci/(.+)$", "\\1", str(cit_ent)),
                str(
                    list(
                        citation_data.objects(cit_ent,
                                              Citation.has_citing_entity))[0]),
                None,
                str(
                    list(
                        citation_data.objects(cit_ent,
                                              Citation.has_cited_entity))[0]),
                None, creation_date, timespan, entity_snapshot,
                str(
                    list(
                        citation_prov.objects(prov_entity,
                                              Citation.was_attributed_to))[0]),
                str(
                    list(
                        citation_prov.objects(
                            prov_entity, Citation.had_primary_source))[0]),
                str(
                    list(
                        citation_prov.objects(prov_entity,
                                              Citation.generated_at_time))[0]),
                service_name, id_type, id_shape, citation_type,
                Citation.journal_self_citation in citation_data.objects(
                    cit_ent, RDF.type), Citation.author_self_citation
                in citation_data.objects(cit_ent, RDF.type), invalidated,
                str(
                    list(
                        citation_prov.objects(prov_entity,
                                              Citation.description))[0]),
                update)

            yield c
コード例 #23
0
ファイル: swap_primer.py プロジェクト: mobilemadman2/rdflib-1
    primer.add((myNS.pat, myNS.knows, myNS.jo))
    # or:
    primer.add((myNS['pat'], myNS['age'], Literal(24)))

    # Now, with just that, lets see how the system
    # recorded *way* too many details about what
    # you just asserted as fact.
    #

    from pprint import pprint
    pprint(list(primer))

    # just think .whatever((s, p, o))
    # here we report on what we know

    pprint(list(primer.subjects()))
    pprint(list(primer.predicates()))
    pprint(list(primer.objects()))

    # and other things that make sense

    # what do we know about pat?
    pprint(list(primer.predicate_objects(myNS.pat)))

    # who is what age?
    pprint(list(primer.subject_objects(myNS.age)))

    # Okay, so lets now work with a bigger
    # dataset from the example, and start
    # with a fresh new graph.
コード例 #24
0
ファイル: GraphUtils.py プロジェクト: DoctorBud/dipper
    def add_property_axioms(graph, properties):
        ontology_graph = ConjunctiveGraph()
        GH = 'https://raw.githubusercontent.com'
        MI = '/monarch-initiative'
        ontologies = [
            GH + MI + '/SEPIO-ontology/master/src/ontology/sepio.owl',
            GH + MI + '/GENO-ontology/develop/src/ontology/geno.owl',
            GH + '/oborel/obo-relations/master/ro.owl',
            'http://purl.obolibrary.org/obo/iao.owl',
            'http://purl.obolibrary.org/obo/ero.owl',
            GH + '/jamesmalone/OBAN/master/ontology/oban_core.ttl',
            'http://purl.obolibrary.org/obo/pco.owl',
            'http://purl.obolibrary.org/obo/xco.owl'
        ]

        # random timeouts can waste hours. (too many redirects?)
        # there is a timeout param in urllib.request,
        # but it is not exposed by rdflib.parsing
        # so retry once on URLError
        for ontology in ontologies:
            logger.info("parsing: " + ontology)
            try:
                ontology_graph.parse(
                    ontology, format=rdflib_util.guess_format(ontology))
            except SAXParseException as e:
                logger.error(e)
                logger.error('Retrying as turtle: ' + ontology)
                ontology_graph.parse(ontology, format="turtle")
            except OSError as e:  # URLError:
                # simple retry
                logger.error(e)
                logger.error('Retrying: ' + ontology)
                ontology_graph.parse(
                    ontology, format=rdflib_util.guess_format(ontology))

        # Get object properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['ObjectProperty']),
            graph, OWL['ObjectProperty'], properties)

        # Get annotation properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['AnnotationProperty']),
            graph, OWL['AnnotationProperty'], properties)

        # Get data properties
        graph = GraphUtils.add_property_to_graph(
            ontology_graph.subjects(RDF['type'], OWL['DatatypeProperty']),
            graph, OWL['DatatypeProperty'], properties)

        for row in graph.predicates(DC['source'], OWL['AnnotationProperty']):
            if row == RDF['type']:
                graph.remove(
                    (DC['source'], RDF['type'], OWL['AnnotationProperty']))
        graph.add((DC['source'], RDF['type'], OWL['ObjectProperty']))

        # Hardcoded properties
        graph.add((
            URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'),
            RDF['type'], OWL['AnnotationProperty']))

        graph.add((URIRef('https://monarchinitiative.org/MONARCH_anonymous'),
                  RDF['type'], OWL['AnnotationProperty']))

        return graph
コード例 #25
0
    def test_get_history(self):
        with open(filepath('test-patch-adds-items.json')) as f:
            patch = f.read()

        with self.client as client:
            res1 = client.patch(
                '/d/',
                data=patch,
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'})
            patch_url = urlparse(res1.headers['Location']).path
            client.post(
                patch_url + 'merge',
                headers={'Authorization': 'Bearer '
                         + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'})
            res2 = client.get('/h')
            self.assertEqual(res2.status_code, http.client.OK)
            self.assertEqual(
                res2.headers['Content-Type'], 'application/ld+json')
            jsonld = res2.get_data(as_text=True)

        g = ConjunctiveGraph()
        g.parse(format='json-ld', data=jsonld)

        # Initial data load
        self.assertIn(  # None means any
            (PERIODO['p0h#change-1'], PROV.endedAtTime, None), g)
        self.assertIn(
            (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0d?version=0']), g)
        self.assertIn(
            (PERIODO['p0d?version=0'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0h#patch-1']), g)
        self.assertIn(
            (PERIODO['p0h#patch-1'],
             FOAF.page, PERIODO['p0patches/1/patch.jsonpatch']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0d?version=1']), g)
        self.assertIn(
            (PERIODO['p0d?version=1'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0trgkv?version=1']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=1'],
             PROV.specializationOf, PERIODO['p0trgkv']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0trgkvwbjd?version=1']), g)
        self.assertIn(
            (PERIODO['p0trgkvwbjd?version=1'],
             PROV.specializationOf, PERIODO['p0trgkvwbjd']), g)

        # Change from first submitted patch
        self.assertIn(  # None means any
            (PERIODO['p0h#change-2'], PROV.startedAtTime, None), g)
        self.assertIn(  # None means any
            (PERIODO['p0h#change-2'], PROV.endedAtTime, None), g)
        start = g.value(
            subject=PERIODO['p0h#change-2'],
            predicate=PROV.startedAtTime)
        self.assertEqual(start.datatype, XSD.dateTime)
        self.assertRegex(start.value.isoformat(), W3CDTF)
        end = g.value(
            subject=PERIODO['p0h#change-2'],
            predicate=PROV.endedAtTime)
        self.assertEqual(end.datatype, XSD.dateTime)
        self.assertRegex(end.value.isoformat(), W3CDTF)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.wasAssociatedWith,
             URIRef('http://orcid.org/1234-5678-9101-112X')), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.wasAssociatedWith,
             URIRef('http://orcid.org/1211-1098-7654-321X')), g)
        for association in g.subjects(
                predicate=PROV.agent,
                object=URIRef('http://orcid.org/1234-5678-9101-112X')):
            role = g.value(subject=association, predicate=PROV.hadRole)
            self.assertIn(role, (PERIODO['p0v#submitted'],
                                 PERIODO['p0v#updated']))
        merger = g.value(
            predicate=PROV.agent,
            object=URIRef('http://orcid.org/1211-1098-7654-321X'))
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.qualifiedAssociation, merger), g)
        self.assertIn(
            (merger, PROV.hadRole, PERIODO['p0v#merged']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0d?version=1']), g)
        self.assertIn(
            (PERIODO['p0d?version=1'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0h#patch-2']), g)
        self.assertIn(
            (PERIODO['p0h#patch-2'],
             FOAF.page, PERIODO['p0patches/2/patch.jsonpatch']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'],
             PROV.generated, PERIODO['p0d?version=2']), g)
        self.assertIn(
            (PERIODO['p0d?version=2'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'],
             PROV.generated, PERIODO['p0trgkv?version=2']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=2'],
             PROV.specializationOf, PERIODO['p0trgkv']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=2'],
             PROV.wasRevisionOf, PERIODO['p0trgkv?version=1']), g)

        entities = 0
        for _, _, version in g.triples(
                (PERIODO['p0h#change-2'], PROV.generated, None)):
            entity = g.value(subject=version, predicate=PROV.specializationOf)
            self.assertEqual(str(entity) + '?version=2', str(version))
            entities += 1
        self.assertEqual(entities, 5)
コード例 #26
0
ファイル: fileio.py プロジェクト: NetherNova/grakelasso
def create_graph(filelist, output_train, output_test, pos_graphs, cv, predicate, ob):
	global relation_counter
	relation_counter = 1000000
	global entity_counter
	global local_entity_counter
	global local_entity_map
	global id_to_uri
	id_to_uri = dict()

	entity_counter = 0
	entity_map = dict()
	relation_map = dict()
	graph_labels_train = []
	graph_labels_test = []
	filelist = np.array(filelist)
	i_fold = 0
	for train_index, test_index in cross_validation.KFold(len(filelist), n_folds=cv):
		train = True
		test = True
		filelist_train = filelist[train_index]
		filelist_test = filelist[test_index]

		output_train_tmp = output_train + str(i_fold) + ".txt"
		output_test_tmp = output_test + str(i_fold) + ".txt"

		# delete train and test output files
		try:
			os.remove(output_train_tmp)
		except OSError:
			pass
		try:
			os.remove(output_test_tmp)
		except OSError:
			pass
		# First round train then test
		while train or test:
			graph_labels_tmp = []
			filelist_tmp = None
			graph_labels_list_tmp = None
			if train:
				filelist_tmp = filelist_train
				output_tmp = output_train_tmp
				train = False
				graph_labels_list_tmp = graph_labels_train
			else:
				filelist_tmp = filelist_test
				output_tmp = output_test_tmp
				test = False
				graph_labels_list_tmp = graph_labels_test
			for f in filelist_tmp:
				num = int(f.split("_")[1])
				labels = pos_graphs[num]
				graph_labels_tmp.append(labels)
				g = ConjunctiveGraph()
				g.load(open(f, "rb"))
				operations = list(g.subjects(predicate, ob))
				with open(output_tmp, "a") as tf:
					o = operations[0]
					entity_set = set()
					edge_set = []
					local_entity_counter = 0
					local_entity_map = []
					local_entity_counter = 0
					local_entity_map = dict()
					dfs_triples(entity_set, entity_map, edge_set, relation_map, g, o)
					#id = list(g.objects(o, ID))[0]
					tf.write("t")
					tf.write("\n")
					for (local_id, global_id) in sorted(entity_set, key=lambda x: x[0]):
						tf.write("v" + " " + str(local_id) + " " + str(global_id))
						tf.write("\n")
					for (s,p,o) in edge_set:
						tf.write("e" + " " + str(s) + " " + str(o) + " " + str(p))
						tf.write("\n")
			graph_labels_list_tmp.append(graph_labels_tmp)
		i_fold += 1
	return id_to_uri, graph_labels_train, graph_labels_test
コード例 #27
0
    def test_get_history(self):
        with open(filepath('test-patch-adds-items.json')) as f:
            patch = f.read()

        with self.client as client:
            res1 = client.patch(
                '/d/',
                data=patch,
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'})

            patch_url = urlparse(res1.headers['Location']).path

            client.post(
                patch_url + 'messages',
                data='{"message": "Here is my patch"}',
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'})

            client.post(
                patch_url + 'messages',
                data='{"message": "Looks good to me"}',
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'})

            client.post(
                patch_url + 'merge',
                buffered=True,
                headers={'Authorization': 'Bearer '
                         + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'})

            res3 = client.get('/h', headers={'Accept': 'application/ld+json'})
            self.assertEqual(res3.status_code, http.client.SEE_OTHER)
            self.assertEqual(
                urlparse(res3.headers['Location']).path, '/h.jsonld')

            res4 = client.get('/history.jsonld?inline-context')
            self.assertEqual(res4.status_code, http.client.OK)
            self.assertEqual(
                res4.headers['Content-Type'], 'application/ld+json')
            jsonld = res4.get_data(as_text=True)

        g = ConjunctiveGraph()
        g.parse(format='json-ld', data=jsonld)

        # Initial data load
        self.assertIn(  # None means any
            (HOST['h#change-1'], PROV.endedAtTime, None), g)
        self.assertIn(
            (HOST['h#change-1'], PROV.used, HOST['d?version=0']), g)
        self.assertIn(
            (HOST['d?version=0'],
             PROV.specializationOf, HOST['d']), g)
        self.assertIn(
            (HOST['h#change-1'], RDFS.seeAlso, HOST['h#patch-request-1']), g)
        self.assertIn(
            (HOST['h#patch-request-1'], FOAF.page, HOST['patches/1/']), g)
        self.assertNotIn(
            (HOST['h#patch-request-1'],
             AS.replies, HOST['h#patch-request-1-comments']), g)
        self.assertIn(
            (HOST['h#change-1'], PROV.used, HOST['h#patch-1']), g)
        self.assertIn(
            (HOST['h#patch-1'],
             FOAF.page, HOST['patches/1/patch.jsonpatch']), g)
        self.assertIn(
            (HOST['h#change-1'],
             PROV.generated, HOST['d?version=1']), g)
        self.assertIn(
            (HOST['d?version=1'],
             PROV.specializationOf, HOST['d']), g)

        # Change from first submitted patch
        self.assertIn(  # None means any
            (HOST['h#change-2'], PROV.startedAtTime, None), g)
        self.assertIn(  # None means any
            (HOST['h#change-2'], PROV.endedAtTime, None), g)
        start = g.value(
            subject=HOST['h#change-2'],
            predicate=PROV.startedAtTime)
        self.assertEqual(start.datatype, XSD.dateTime)
        self.assertRegex(start.value.isoformat(), W3CDTF)
        end = g.value(
            subject=HOST['h#change-2'],
            predicate=PROV.endedAtTime)
        self.assertEqual(end.datatype, XSD.dateTime)
        self.assertRegex(end.value.isoformat(), W3CDTF)
        self.assertIn(
            (HOST['h#change-2'], PROV.wasAssociatedWith,
             URIRef('https://orcid.org/1234-5678-9101-112X')), g)
        self.assertIn(
            (HOST['h#change-2'], PROV.wasAssociatedWith,
             URIRef('https://orcid.org/1211-1098-7654-321X')), g)
        for association in g.subjects(
                predicate=PROV.agent,
                object=URIRef('https://orcid.org/1234-5678-9101-112X')):
            role = g.value(subject=association, predicate=PROV.hadRole)
            self.assertIn(role, (HOST['v#submitted'],
                                 HOST['v#updated']))
        merger = g.value(
            predicate=PROV.agent,
            object=URIRef('https://orcid.org/1211-1098-7654-321X'))
        self.assertIn(
            (HOST['h#change-2'], PROV.qualifiedAssociation, merger), g)
        self.assertIn(
            (merger, PROV.hadRole, HOST['v#merged']), g)
        self.assertIn(
            (HOST['h#change-2'], PROV.used, HOST['d?version=1']), g)
        self.assertIn(
            (HOST['d?version=1'],
             PROV.specializationOf, HOST['d']), g)
        self.assertIn(
            (HOST['h#change-2'], RDFS.seeAlso, HOST['h#patch-request-2']), g)
        self.assertIn(
            (HOST['h#patch-request-2'], FOAF.page, HOST['patches/2/']), g)
        self.assertIn(
            (HOST['h#patch-request-2'],
             AS.replies, HOST['h#patch-request-2-comments']), g)
        commentCount = g.value(
            subject=HOST['h#patch-request-2-comments'],
            predicate=AS.totalItems)
        self.assertEqual(commentCount.value, 2)
        self.assertIn(
            (HOST['h#patch-request-2-comments'],
             AS.first, HOST['h#patch-request-2-comment-1']), g)
        self.assertIn(
            (HOST['h#patch-request-2-comments'],
             AS.last, HOST['h#patch-request-2-comment-2']), g)
        self.assertIn(
            (HOST['h#patch-request-2-comments'],
             AS.items, HOST['h#patch-request-2-comment-1']), g)
        self.assertIn(
            (HOST['h#patch-request-2-comments'],
             AS.items, HOST['h#patch-request-2-comment-2']), g)
        self.assertIn(
            (HOST['h#patch-request-2-comment-1'], RDF.type, AS.Note), g)
        self.assertIn(
            (HOST['h#patch-request-2-comment-1'],
             AS.attributedTo,
             URIRef('https://orcid.org/1234-5678-9101-112X')), g)
        self.assertIn(  # None means any
            (HOST['h#patch-request-2-comment-1'], AS.published, None), g)
        comment1_media_type = g.value(
            subject=HOST['h#patch-request-2-comment-1'],
            predicate=AS.mediaType)
        self.assertEqual(comment1_media_type.value, 'text/plain')
        comment1_content = g.value(
            subject=HOST['h#patch-request-2-comment-1'],
            predicate=AS.content)
        self.assertEqual(comment1_content.value, 'Here is my patch')
        self.assertIn(
            (HOST['h#patch-request-2-comment-2'], RDF.type, AS.Note), g)
        self.assertIn(
            (HOST['h#patch-request-2-comment-2'],
             AS.attributedTo,
             URIRef('https://orcid.org/1211-1098-7654-321X')), g)
        self.assertIn(  # None means any
            (HOST['h#patch-request-2-comment-2'], AS.published, None), g)
        comment2_media_type = g.value(
            subject=HOST['h#patch-request-2-comment-2'],
            predicate=AS.mediaType)
        self.assertEqual(comment2_media_type.value, 'text/plain')
        comment2_content = g.value(
            subject=HOST['h#patch-request-2-comment-2'],
            predicate=AS.content)
        self.assertEqual(comment2_content.value, 'Looks good to me')
        self.assertIn(
            (HOST['h#change-2'], PROV.used, HOST['h#patch-2']), g)
        self.assertIn(
            (HOST['h#patch-2'],
             FOAF.page, HOST['patches/2/patch.jsonpatch']), g)
        self.assertIn(
            (HOST['h#change-2'],
             PROV.generated, HOST['d?version=2']), g)
        self.assertIn(
            (HOST['d?version=2'],
             PROV.specializationOf, HOST['d']), g)
コード例 #28
0
ファイル: sparta_ex.py プロジェクト: Pfiver/RNA-Seqlyze
import urllib
from rdflib import ConjunctiveGraph as Graph
import sparta

url = 'http://www.gopubmed.org/GoMeshPubMed/gomeshpubmed/Search/RDF?q=18463287&type=RdfExportAll'
gopubmed_handle = urllib.urlopen(url)
graph = Graph()
graph.parse(gopubmed_handle)
gopubmed_handle.close()

graph_subjects = list(set(graph.subjects()))
sparta_factory = sparta.ThingFactory(graph)
for subject in graph_subjects:
    sparta_graph = sparta_factory(subject)
    print subject, [unicode(i) for i in sparta_graph.dc_title][0]
コード例 #29
0

rdfGraph = ConjunctiveGraph()
try:
    rdfGraph.parse("yoga-ontology.rdf", format="xml")
except:
    print("Error")

ns = Namespace('http://webprotege.stanford.edu/')
asana = ns.RD1UGDkMwbwNp3Nh9Gy5W3M  # root element of classification
sukhasana = ns.R8SV4CeNnDntt7K2HTjws64  # element to delete
description = ns.R7zDsGb0eQYf6uJETHG3qBx  # predicate for dataprop (description)
newAsana = ns.newElement1

positiveAffect = rdfGraph.subjects(RDFS.label,
                                   Literal("положительно влияет на",
                                           "ru")).__next__()
negativeAffect = rdfGraph.subjects(RDFS.label,
                                   Literal("отрицательно влияет на",
                                           "ru")).__next__()
backbone = rdfGraph.subjects(RDFS.label, Literal("Позвоночник",
                                                 lang="ru")).__next__()

print(f"\nLabel of root element is {rdfGraph.label(asana)}")

print("\nFull information of root element:")
for po in rdfGraph.predicate_objects(asana):
    print(po)

print("\nGetting all instances for type of root element")
printElements(rdfGraph)
コード例 #30
0
s = graph.serialize(format='n3')
#print(s)
#print("graph has %s statements." % len(graph))


def isSubClassOf(subClass, superClass, graph):
    if subClass == superClass: return True
    for parentClass in graph.objects(subClass, rdfsSubClassOf):
        if isSubClassOf(parentClass, superClass, graph):
            return True
        else:
            return False


#in danh sach tat cac ca class trong file owl
print(list(graph.subjects(rdfType, owlClass)))
#in dan hsach tat ca cac thuoc tinh trong file owl
print(list(graph.subjects(rdfType, owlObjectProperty)))
#in danh sach ca data property

# list(graph.subjects(rdfType, owlObjectProperty))
#define a blank node
performance = BNode('_:perf1')
#dinh nghia du lieu dang triples


def TruyVan(query, graph, instances=None):
    if instances is None: instances = set()
    for instance in graph.subjects(rdfType, query):
        instances.add(instance)
    for subClass in graph.subjects(rdfsSubClassOf, query):
コード例 #31
0
def get_vocab_base(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        graph = None
        graph = Graph()
        try:
            graph.parse(vocabfile, format="n3")
        except:
            return (None, None, None)
    identifier = None
    for v in graph.objects(None, namespaces['dc']['identifier']):
        identifier = v
    if not identifier:
        for v in graph.objects(None, namespaces['dcterms']['identifier']):
            identifier = v

    base = None
    if not base:
        for s in graph.subjects(namespaces['rdf']['type'], namespaces['owl']['Ontology']):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dc']['title'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dcterms']['title'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dc']['creator'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dcterms']['creator'], None):
            base = s
            break
    if not base:
        for v in graph.objects(None, namespaces['vann']['preferredNamespaceUri']):
            base = v
            break
    if not base:
        for v in graph.namespaces():
            if v[0] == '':
                base = v[1]
                break

    prefix = None
    vocab_prefixes = graph.objects(None, namespaces['vann']['preferredNamespacePrefix'])
    for vp in vocab_prefixes:
        prefix = vp
    if not prefix and base:
        for v in graph.namespaces():
            if str(v[1]) == str(base):
                prefix = v[0]
                break
    if not prefix and base:
        prefix = base.strip().strip('/').split('/')[-1].strip('#').strip(' ')
    if base:
        base = base.strip()
        if (base[-1]!="/" and base[-1]!="#"):
            base += "#"
    return (identifier, base, prefix)
コード例 #32
0
def graph_plan(plan, fountain):
    plan_graph = ConjunctiveGraph()
    plan_graph.bind('agora', AGORA)
    prefixes = plan.get('prefixes')
    ef_plan = plan.get('plan')
    tree_lengths = {}
    s_trees = set([])
    patterns = {}

    for (prefix, u) in prefixes.items():
        plan_graph.bind(prefix, u)

    def __get_pattern_node(p):
        if p not in patterns:
            patterns[p] = BNode('tp_{}'.format(len(patterns)))
        return patterns[p]

    def __inc_tree_length(tree, l):
        if tree not in tree_lengths:
            tree_lengths[tree] = 0
        tree_lengths[tree] += l

    def __add_variable(p_node, vid, subject=True):
        sub_node = BNode(str(vid).replace('?', 'var_'))
        if subject:
            plan_graph.add((p_node, AGORA.subject, sub_node))
        else:
            plan_graph.add((p_node, AGORA.object, sub_node))
        plan_graph.set((sub_node, RDF.type, AGORA.Variable))
        plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string)))

    def include_path(elm, p_seeds, p_steps):
        elm_uri = __extend_uri(prefixes, elm)
        path_g = plan_graph.get_context(elm_uri)
        b_tree = BNode(elm_uri)
        s_trees.add(b_tree)
        path_g.set((b_tree, RDF.type, AGORA.SearchTree))
        path_g.set((b_tree, AGORA.fromType, elm_uri))

        for seed in p_seeds:
            path_g.add((b_tree, AGORA.hasSeed, URIRef(seed)))

        previous_node = b_tree
        __inc_tree_length(b_tree, len(p_steps))
        for j, step in enumerate(p_steps):
            prop = step.get('property')
            b_node = BNode(previous_node.n3() + prop)
            if j < len(p_steps) - 1 or pattern[1] == RDF.type:
                path_g.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop)))
            path_g.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type'))))
            path_g.add((previous_node, AGORA.next, b_node))
            previous_node = b_node

        p_node = __get_pattern_node(pattern)
        path_g.add((previous_node, AGORA.byPattern, p_node))

    for i, tp_plan in enumerate(ef_plan):
        paths = tp_plan.get('paths')
        pattern = tp_plan.get('pattern')
        hints = tp_plan.get('hints')
        context = BNode('space_{}'.format(tp_plan.get('context')))
        for path in paths:
            steps = path.get('steps')
            seeds = path.get('seeds')
            if not len(steps) and len(seeds):
                include_path(pattern[2], seeds, steps)
            elif len(steps):
                ty = steps[0].get('type')
                include_path(ty, seeds, steps)

        for t in s_trees:
            plan_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer)))

        pattern_node = __get_pattern_node(pattern)
        plan_graph.add((context, AGORA.definedBy, pattern_node))
        plan_graph.set((context, RDF.type, AGORA.SearchSpace))
        plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern))
        (sub, pred, obj) = pattern

        if isinstance(sub, BNode):
            __add_variable(pattern_node, str(sub))
        elif isinstance(sub, URIRef):
            plan_graph.add((pattern_node, AGORA.subject, sub))

        if isinstance(obj, BNode):
            __add_variable(pattern_node, str(obj), subject=False)
        elif isinstance(obj, Literal):
            node = BNode(str(obj).replace(' ', ''))
            plan_graph.add((pattern_node, AGORA.object, node))
            plan_graph.set((node, RDF.type, AGORA.Literal))
            plan_graph.set((node, AGORA.value, Literal(str(obj), datatype=XSD.string)))
        else:
            plan_graph.add((pattern_node, AGORA.object, obj))

        plan_graph.add((pattern_node, AGORA.predicate, pred))
        if pred == RDF.type:
            if 'check' in hints:
                plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean)))

        sub_expected = plan_graph.subjects(predicate=AGORA.expectedType)
        for s in sub_expected:
            expected_types = list(plan_graph.objects(s, AGORA.expectedType))
            for et in expected_types:
                plan_graph.remove((s, AGORA.expectedType, et))
            q_expected_types = [plan_graph.qname(t) for t in expected_types]
            expected_types = [d for d in expected_types if
                              not set.intersection(set(fountain.get_type(plan_graph.qname(d)).get('super')),
                                                   set(q_expected_types))]
            for et in expected_types:
                plan_graph.add((s, AGORA.expectedType, et))

    return plan_graph
コード例 #33
0
class PreProcessor(object):
    def __init__(self, kg_path):
        self.kg_path = kg_path
        self.ent_dict = dict()
        self.rel_dict = dict()
        self.g = ConjunctiveGraph()
        self.unique_msgs = self.ent_dict.copy()

    def load_knowledge_graph(self,
                             format='xml',
                             exclude_rels=[],
                             clean_schema=True,
                             amberg_params=None,
                             excluded_entities=None):
        self.g.load(self.kg_path, format=format)
        # remove triples with excluded relation
        remove_rel_triples(self.g, exclude_rels)
        # remove triples with relations between class-level constructs
        if clean_schema:
            remove_rel_triples(self.g, schema_relations)
        if excluded_entities is not None:
            remove_ent_triples(self.g, excluded_entities)
        if amberg_params:
            path_to_events = amberg_params[0]
            max_events = amberg_params[1]
            self.merged = get_merged_dataframe(path_to_events, max_events)
            self.unique_msgs, unique_vars, unique_mods, unique_fes = get_unique_entities(
                self.merged)
            update_amberg_ontology(self.g, self.ent_dict, self.unique_msgs,
                                   unique_mods, unique_fes, unique_vars,
                                   self.merged)

        self.update_entity_relation_dictionaries()

    def update_entity_relation_dictionaries(self):
        """
        Given an existing entity dictionary, update it to *ontology*
        :param ontology:
        :param ent_dict: the existing entity dictionary
        :return:
        """
        ent_counter = 0
        fixed_ids = set([id for id in self.ent_dict.values()])
        # sorting ensures equal random splits on equal seeds
        for h in sorted(
                set(self.g.subjects(None, None)).union(
                    set(self.g.objects(None, None)))):
            uni_h = unicode(h)
            if uni_h not in self.ent_dict:
                while ent_counter in fixed_ids:
                    ent_counter += 1
                self.ent_dict.setdefault(uni_h, ent_counter)
                ent_counter += 1
        # add new relations to dict
        for r in sorted(set(self.g.predicates(None, None))):
            uni_r = unicode(r)
            if uni_r not in self.rel_dict:
                self.rel_dict.setdefault(uni_r, len(self.rel_dict))

    def load_unique_msgs_from_txt(self, path, max_events=None):
        """
        Assuming csv text files with two columns
        :param path:
        :return:
        """
        with open(path, "rb") as f:
            for line in f:
                split = line.split(',')
                try:
                    emb_id = int(split[1].strip())
                except:
                    print("Error reading id of {0} in given dictionary".format(
                        line))
                    # skip this event entitiy, treat it as common entitiy later on
                    continue
                self.ent_dict[split[0]] = emb_id
        # sort ascending w.r.t. embedding id, in case of later stripping
        # self.ent_dict = sorted(self.ent_dict.items(), key=operator.itemgetter(1), reverse=False)
        self.unique_msgs = self.ent_dict.copy()
        if max_events is not None:
            all_msgs = sorted(self.unique_msgs.items(),
                              key=operator.itemgetter(1),
                              reverse=False)
            self.unique_msgs = dict(all_msgs[:max_events])
            excluded_events = dict(all_msgs[max_events:]).keys()
            return excluded_events

    def prepare_sequences(self, path_to_input, use_dict=True):
        """
        Dumps pickle for sequences and dictionary
        :param data_frame:
        :param file_name:
        :param index:
        :param classification_event:
        :return:
        """
        print("Preparing sequential data...")
        with open(path_to_input, "rb") as f:
            result = []
            for line in f:
                entities = line.split(',')
                if use_dict:
                    result.append([
                        int(e.strip()) for e in entities
                        if int(e.strip()) in self.unique_msgs.values()
                    ])
                else:
                    result.append([int(e.strip()) for e in entities])
        print("Processed {0} sequences".format(len(result)))
        return result

    def get_vocab_size(self):
        return len(self.unique_msgs)

    def get_ent_dict(self):
        return self.ent_dict

    def get_rel_dict(self):
        return self.rel_dict

    def get_kg(self):
        return self.g

    def get_unique_msgs(self):
        return self.unique_msgs

    def get_merged(self):
        return self.merged
コード例 #34
0
def test_multiple_value_urls_in_virtual():
    csvw = CSVW(csv_path="tests/value_urls.csv",
                metadata_path="tests/value_urls.csv-metadata.json")
    rdf_contents = csvw.to_rdf(fmt="nt")
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="nt")

    # Test subjects
    all_subjects = list(g.subjects())
    s_amount = NS['amount']
    s_desc = NS['description']
    s_id = NS['id']
    assert s_amount in all_subjects
    assert s_desc in all_subjects
    assert s_id in all_subjects

    # Test descriptions
    p_def = NS['definition']
    assert len(list(g.triples(
        (s_amount, p_def, Literal("the amount paid"))))) == 1
    assert len(
        list(g.triples(
            (s_desc, p_def, Literal("description of the expense"))))) == 1
    assert len(list(g.triples((s_id, p_def, Literal("transaction id"))))) == 1

    # Test each is a element type
    o_element = NS['element']
    assert len(list(g.triples((s_amount, RDF.type, o_element)))) == 1
    assert len(list(g.triples((s_desc, RDF.type, o_element)))) == 1
    assert len(list(g.triples((s_id, RDF.type, o_element)))) == 1

    # Test that range is specified
    r_amount = NS['element/amount-RANGE']
    r_desc = NS['element/description-RANGE']
    r_id = NS['element/id-RANGE']

    assert len(list(g.triples((s_amount, RDFS.range, r_amount)))) == 1
    assert len(list(g.triples((s_desc, RDFS.range, r_desc)))) == 1
    assert len(list(g.triples((s_id, RDFS.range, r_id)))) == 1

    # Range is another subject
    assert r_amount in all_subjects
    assert r_desc in all_subjects
    assert r_id in all_subjects

    # Range is a OWL datatype of specified type
    assert len(list(g.triples((r_amount, OWL.onDatatype, XSD.decimal)))) == 1
    assert len(list(g.triples((r_desc, OWL.onDatatype, XSD.string)))) == 1
    assert len(list(g.triples((r_id, OWL.onDatatype, XSD.integer)))) == 1

    # Check the restrictions for amount
    rest_amount_node = list(g.triples((r_amount, OWL.withRestrictions, None)))
    rest_amount_node = rest_amount_node[0][2]
    assert isinstance(rest_amount_node, BNode)
    assert len(list(g.triples(
        (rest_amount_node, RDF.first, XSD.decimal)))) == 1
    rest_amount_node = next(
        g.objects(subject=rest_amount_node, predicate=RDF.rest))
    assert len(list(g.triples(
        (rest_amount_node, RDF.first, XSD.MaxLength)))) == 1
    rest_amount_node = next(
        g.objects(subject=rest_amount_node, predicate=RDF.rest))
    assert len(
        list(
            g.triples((rest_amount_node, RDF.first,
                       Literal(10, datatype=XSD.nonNegativeInteger))))) == 1
    rest_amount_node = next(
        g.objects(subject=rest_amount_node, predicate=RDF.rest))
    assert len(list(g.triples(
        (rest_amount_node, RDF.first, XSD.MinLength)))) == 1
    rest_amount_node = next(
        g.objects(subject=rest_amount_node, predicate=RDF.rest))
    assert len(
        list(
            g.triples((rest_amount_node, RDF.first,
                       Literal(1, datatype=XSD.nonNegativeInteger))))) == 1
    rest_amount_node = next(
        g.objects(subject=rest_amount_node, predicate=RDF.rest))
    assert len(list(g.triples((rest_amount_node, RDF.first, None)))) == 0
    assert len(list(g.triples((rest_amount_node, RDF.rest, None)))) == 0

    # Check the restrictions for description
    rest_desc_node = list(g.triples((r_desc, OWL.withRestrictions, None)))
    rest_desc_node = rest_desc_node[0][2]
    assert isinstance(rest_desc_node, BNode)
    assert len(list(g.triples((rest_desc_node, RDF.first, XSD.string)))) == 1
    rest_desc_node = next(g.objects(subject=rest_desc_node,
                                    predicate=RDF.rest))
    assert len(list(g.triples(
        (rest_desc_node, RDF.first, XSD.MaxLength)))) == 1
    rest_desc_node = next(g.objects(subject=rest_desc_node,
                                    predicate=RDF.rest))
    assert len(
        list(
            g.triples((rest_desc_node, RDF.first,
                       Literal(100, datatype=XSD.nonNegativeInteger))))) == 1
    rest_desc_node = next(g.objects(subject=rest_desc_node,
                                    predicate=RDF.rest))
    assert len(list(g.triples((rest_desc_node, RDF.first, None)))) == 0
    assert len(list(g.triples((rest_desc_node, RDF.rest, None)))) == 0

    # Check the restrictions for id
    rest_id_node = list(g.triples((r_id, OWL.withRestrictions, None)))
    rest_id_node = rest_id_node[0][2]
    assert isinstance(rest_id_node, BNode)
    assert len(list(g.triples((rest_id_node, RDF.first, XSD.integer)))) == 1
    rest_id_node = next(g.objects(subject=rest_id_node, predicate=RDF.rest))
    assert len(list(g.triples((rest_id_node, RDF.first, XSD.MinLength)))) == 1
    rest_id_node = next(g.objects(subject=rest_id_node, predicate=RDF.rest))
    assert len(
        list(
            g.triples((rest_id_node, RDF.first,
                       Literal(0, datatype=XSD.nonNegativeInteger))))) == 1
    rest_id_node = next(g.objects(subject=rest_id_node, predicate=RDF.rest))
    assert len(list(g.triples((rest_id_node, RDF.first, None)))) == 0
    assert len(list(g.triples((rest_id_node, RDF.rest, None)))) == 0

    # Check constant value for each
    const_prop = NS['another-list-value-with-constants']
    for s in [r_amount, r_id, r_desc]:
        constant_node = list(g.triples((r_amount, const_prop, None)))
        constant_node = constant_node[0][2]
        assert isinstance(constant_node, BNode)
        assert len(list(g.triples(
            (constant_node, RDF.first, XSD.Length)))) == 1
        constant_node = next(
            g.objects(subject=constant_node, predicate=RDF.rest))
        assert len(
            list(
                g.triples((constant_node, RDF.first,
                           Literal(1, datatype=XSD.nonNegativeInteger))))) == 1
        constant_node = next(
            g.objects(subject=constant_node, predicate=RDF.rest))
        assert len(list(g.triples((constant_node, RDF.first, None)))) == 0
        assert len(list(g.triples((constant_node, RDF.rest, None)))) == 0

    # Verify that empty valueUrl does not end up in graph or rdf contents
    assert NS['empty-list-predicate1'] not in list(g.objects())
    assert "empty-list-predicate1" not in rdf_contents

    # Verify that empty valueUrl does not end up in graph
    assert NS['empty-list-predicate2'] not in list(g.objects())
    assert "empty-list-predicate2" not in rdf_contents

    # Test total number of lists through rdf:nils in order to verify each list
    # ends up with a nil
    test_num_lists = 3 * 3  # 3 rows and 3 virtual list valued columns
    nil_text = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> ."
    assert rdf_contents.count(nil_text) == test_num_lists