Beispiel #1
0
    def serialize(self, add, delete):

        commit = Namespace("urn:commit:" + str(uuid.uuid1()) + ":")
        eccrev = Namespace("https://vocab.eccenca.com/revision/")

        g = ConjunctiveGraph()
        namespace_manager = NamespaceManager(g)
        namespace_manager.bind('eccrev', eccrev, override=False)

        g.add((commit.term(""), RDF.type, eccrev.Commit))

        graphUris = set(delete.keys()) | set(add.keys())

        for graphUri in graphUris:
            if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0):
                revision = Namespace("urn:revision:" + str(uuid.uuid1()) + ":")
                g.add((commit.term(""), eccrev.hasRevision, revision.term("")))
                g.add((revision.term(""), RDF.type, eccrev.Revision))
                if str(graphUri) != 'http://quitdiff.default/':
                    g.add((revision.term(""), eccrev.hasRevisionGraph, graphUri))
                if graphUri in delete.keys() and len(delete[graphUri]) > 0:
                    deleteGraphName = revision.term(":delete")
                    g.add((revision.term(""), eccrev.deltaDelete, deleteGraphName))
                    for triple in delete[graphUri]:
                        g.add(triple + (deleteGraphName,))
                if graphUri in add.keys() and len(add[graphUri]) > 0:
                    insertGraphName = revision.term(":insert")
                    g.add((revision.term(""), eccrev.deltaInsert, insertGraphName))
                    for triple in add[graphUri]:
                        g.add(triple + (insertGraphName,))

        return g.serialize(format="trig").decode("utf-8")
def make_component_hierarchy(component_map, component_part_map):
    g = rdflib.Graph()
    indra_ns = 'http://sorger.med.harvard.edu/indra/'
    ln = Namespace(indra_ns + 'locations/')
    rn = Namespace(indra_ns + 'relations/')
    part_of = rn.term('partof')
    has_name = rn.term('hasName')
    for comp_id, comp_name in component_map.items():
        g.add((ln.term(comp_id), has_name, Literal(comp_name)))
        sups = component_part_map.get(comp_id)
        if sups is not None:
            for sup_id in sups:
                g.add((ln.term(comp_id), part_of, ln.term(sup_id)))
    return g
Beispiel #3
0
def make_component_hierarchy(component_map, component_part_map):
    g = rdflib.Graph()
    indra_ns = 'http://sorger.med.harvard.edu/indra/'
    en = Namespace(indra_ns + 'entities/')
    rn = Namespace(indra_ns + 'relations/')
    part_of = rn.term('partof')
    has_name = rn.term('hasName')
    for comp_id, comp_name in component_map.items():
        g.add((en.term(comp_id), has_name, Literal(comp_name)))
        sups = component_part_map.get(comp_id)
        if sups is not None:
            for sup_id in sups:
                g.add((en.term(comp_id), part_of, en.term(sup_id)))
    return g
Beispiel #4
0
    def export(self, entities, entity_namespace, ontology_namespace,
               export_language):
        entity_namespace = Namespace(entity_namespace)
        ontology_namespace = Namespace(ontology_namespace)
        g = Graph()

        for entity in entities:
            entity_uuid = str(uuid.uuid4())
            g.add((entity_namespace.term(entity_uuid), RDF.type,
                   ontology_namespace.term(
                       CIDOCCRMExporter.__entity_type_to_ontlogy_type[
                           entity.entity_type])))
            g.add((entity_namespace.term(entity_uuid), RDFS.label,
                   Literal(entity.name, lang='fr')))
        return g.serialize(format=export_language).decode('utf-8')
Beispiel #5
0
    def create_ontology(self, tr, predicate, subClass, address, booktitle):
        LDT = Namespace("http://www.JceFinalProjectOntology.com/")
        ut = Namespace("http://www.JceFinalProjectOntology.com/subject/#")
        usubClass = URIRef("http://www.JceFinalProjectOntology.com/subject/" +
                           subClass.strip() + '#')
        #LDT.subClass=LDT[subClass]
        print(ut)
        print(usubClass)

        store = IOMemory()

        sty = LDT[predicate]
        g = rdflib.Graph(store=store, identifier=LDT)
        t = ConjunctiveGraph(store=store, identifier=ut)
        print('Triples in graph before add: ', len(t))
        #g.add((LDT,RDF.type,RDFS.Class))
        g.add((URIRef(LDT), RDF.type, RDFS.Class))
        g.add((URIRef(LDT), RDFS.label, Literal("JFPO")))
        g.add((URIRef(LDT), RDFS.comment, Literal('class of all properties')))
        for v in self.symbols.values():
            if self.if_compoTerm(v) == True:
                vs = self.splitTerms(v)[0]
            else:
                vs = v
            g.add((LDT[vs], RDF.type, RDF.Property))
            g.add((LDT[vs], RDFS.label, Literal('has' + vs)))
            g.add((LDT[vs], RDFS.comment, Literal(v)))
            g.add((LDT[vs], RDFS.range, OWL.Class))
            g.add((LDT[vs], RDFS.domain, Literal(vs)))
        g.bind('JFPO', LDT)
        #g.commit()
        g.serialize('trtst.rdf', format='turtle')

        t.add((ut[tr], RDF.type, OWL.Class))
        t.add((ut[tr], RDFS.subClassOf, OWL.Thing))
        t.add((ut[tr], RDFS.label, Literal(tr)))
        t.add((ut[tr], DC.title, Literal(booktitle)))
        t.add((ut[tr], DC.source, Literal(address)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        t.add((ut[tr], LDT[predicate], RDF.Property))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        relation = 'has' + predicate
        t.add((ut[tr], LDT.term(predicate), URIRef(usubClass)))

        t.add((usubClass, RDF.type, OWL.Class))
        t.add((usubClass, RDFS.subClassOf, OWL.Thing))
        t.add((usubClass, RDFS.subClassOf, URIRef(sty)))
        t.add((usubClass, RDFS.label, Literal(subClass)))

        #tc=Graph(store=store,identifier=usubClass)
        t.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        t.bind('JFPO', LDT)
        t.commit()
        #print(t.serialize(format='pretty-xml'))

        t.serialize('test2.owl', format='turtle')
    def create_ontology(self,tr,predicate,subClass,address,booktitle):
        LDT= Namespace("http://www.JceFinalProjectOntology.com/")
        ut=Namespace("http://www.JceFinalProjectOntology.com/subject/#")
        usubClass=URIRef("http://www.JceFinalProjectOntology.com/subject/"+subClass.strip()+'#')
        #LDT.subClass=LDT[subClass]
        print(ut)
        print(usubClass)

        store=IOMemory()

        sty=LDT[predicate]
        g = rdflib.Graph(store=store,identifier=LDT)
        t = ConjunctiveGraph(store=store,identifier=ut)
        print ('Triples in graph before add: ', len(t))
        #g.add((LDT,RDF.type,RDFS.Class))
        g.add((URIRef(LDT),RDF.type,RDFS.Class))
        g.add((URIRef(LDT),RDFS.label,Literal("JFPO")))
        g.add((URIRef(LDT),RDFS.comment,Literal('class of all properties')))
        for  v in self.symbols.values():
            if self.if_compoTerm(v)==True:
                vs=self.splitTerms(v)[0]
            else:
                vs =v
            g.add((LDT[vs],RDF.type,RDF.Property))
            g.add((LDT[vs],RDFS.label,Literal('has'+vs)))
            g.add((LDT[vs],RDFS.comment,Literal(v)))
            g.add((LDT[vs],RDFS.range,OWL.Class))
            g.add((LDT[vs],RDFS.domain,Literal(vs)))
        g.bind('JFPO',LDT)
        #g.commit()
        g.serialize('trtst.rdf',format='turtle')

        t.add( (ut[tr], RDF.type,OWL.Class) )
        t.add((ut[tr],RDFS.subClassOf,OWL.Thing))
        t.add((ut[tr],RDFS.label,Literal(tr)))
        t.add((ut[tr],DC.title,Literal(booktitle)))
        t.add((ut[tr],DC.source,Literal(address)))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))
        t.add((ut[tr],LDT[predicate],RDF.Property))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))

        t.add((ut[tr],DC[predicate],URIRef(usubClass)))
        relation='has'+predicate
        t.add((ut[tr],LDT.term(predicate),URIRef(usubClass)))

        t.add( (usubClass,RDF.type,OWL.Class))
        t.add((usubClass,RDFS.subClassOf,OWL.Thing))
        t.add((usubClass,RDFS.subClassOf,URIRef(sty)))
        t.add((usubClass,RDFS.label,Literal(subClass)))

        #tc=Graph(store=store,identifier=usubClass)
        t.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        t.bind('JFPO',LDT)
        t.commit()
                #print(t.serialize(format='pretty-xml'))

        t.serialize('test2.owl',format='turtle')
Beispiel #7
0
def main():
    indra_ns = 'http://sorger.med.harvard.edu/indra/'
    rn = Namespace(indra_ns + 'relations/')
    en = Namespace(indra_ns + 'entities/')
    g = Graph()

    isa = rn.term('isa')

    g.add((en.term('phosphorylation'), isa, en.term('modification')))
    g.add((en.term('ubiquitination'), isa, en.term('modification')))
    g.add((en.term('sumoylation'), isa, en.term('modification')))
    g.add((en.term('acetylation'), isa, en.term('modification')))
    g.add((en.term('hydroxylation'), isa, en.term('modification')))

    save_hierarchy(g, hierarchy_path)
Beispiel #8
0
    def serialize(self, add, delete):
        diff = Namespace("http://topbraid.org/diff#")

        g = ConjunctiveGraph()

        namespace_manager = NamespaceManager(g)
        namespace_manager.bind('diff', diff, override=False)
        namespace_manager.bind('owl', OWL, override=False)

        graphUris = set(delete.keys()) | set(add.keys())

        for graphUri in graphUris:
            if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0):
                changeset = Namespace("urn:diff:" + str(uuid.uuid1()))
                graphTerm = changeset.term("")
                if str(graphUri) != 'http://quitdiff.default/':
                    g.add((graphTerm, OWL.imports, graphUri, graphTerm))
                g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm))
                g.add((graphTerm, OWL.imports, diff.term(""), graphTerm))
                if graphUri in delete.keys() and len(delete[graphUri]) > 0:
                    i = 0
                    for triple in delete[graphUri]:
                        deleteStatementName = BNode()
                        g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm))
                        g.add((deleteStatementName, RDF.subject, triple[0], graphTerm))
                        g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm))
                        g.add((deleteStatementName, RDF.object, triple[2], graphTerm))
                        i += 1
                if graphUri in add.keys() and len(add[graphUri]) > 0:
                    i = 0
                    for triple in add[graphUri]:
                        insertGraphName = BNode()
                        g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm))
                        g.add((insertGraphName, RDF.subject, triple[0], graphTerm))
                        g.add((insertGraphName, RDF.predicate, triple[1], graphTerm))
                        g.add((insertGraphName, RDF.object, triple[2], graphTerm))
                        i += 1

        return g.serialize(format="trig").decode("utf-8")
        print("%s of %s processed" % (i, entities.count()))
    converter.convert_entity(e)


def get_uri_by_object_id(objectId):
    g.bind("lobbyOntology",
           'https://studi.f4.htw-berlin.de/~s0539710/lobbyradar/ontology#')
    result = g.query(
        "SELECT ?uri ?type WHERE { ?uri lobbyOntology:mongo_id '%s' ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type }"
        % str(objectId))
    for row in result:
        return {'uri': row['uri'], 'type': row['type']}


positions_map = {
    u"Vorstand": lobbyOntology.term("executiveOf"),
    u"Mitglied": org.term("memberOf"),
    u"Ordentliches Mitglied": lobbyOntology.term("fullMemberOf"),
    u"Stellvertretendes Mitglied": lobbyOntology.term("deputyMemberOf"),
    u"Arbeitsverh\xe4ltnis": lobbyOntology.term("employeeOf"),
    u"Aufsichtsratsmitglied": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Aufsichtsrates": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Aufsichtsrats": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Kuratoriums": lobbyOntology.term("kuratoriumMemberOf"),
    u"Mitglied des Stiftungsrates": lobbyOntology.term("kuratoriumMemberOf"),
    u"Mitglied des Beirates": lobbyOntology.term("advisoryMemberOf"),
    u"Mitglied des Vorstandes": lobbyOntology.term("executiveOf"),
    u"Vorstandsmitglied": lobbyOntology.term("executiveOf"),
    u"Staatssekret\xe4r": lobbyOntology.term("secretaryOf"),
    u'Parlamentarischer Staatssekret\xe4r': lobbyOntology.term("secretaryOf"),
    u"Mitglied im Rundfunkrat": lobbyOntology.term("mediaAdvisoryMemberOf"),
import sys
import json
from os.path import join, dirname, abspath
from rdflib import Graph, Namespace, Literal
from indra.sources import sofia


# Note that this is just a placeholder, it doesn't resolve as a URL
sofia_ns = Namespace('http://cs.cmu.edu/sofia/')
indra_ns = 'http://sorger.med.harvard.edu/indra/'
indra_rel_ns = Namespace(indra_ns + 'relations/')
isa = indra_rel_ns.term('isa')


def save_ontology(g, path):
    with open(path, 'wb') as out_file:
        g_bytes = g.serialize(format='nt')
        # Replace extra new lines in string and get rid of empty line at end
        g_bytes = g_bytes.replace(b'\n\n', b'\n').strip()
        # Split into rows and sort
        rows = g_bytes.split(b'\n')
        rows.sort()
        g_bytes = b'\n'.join(rows)
        out_file.write(g_bytes)


def build_ontology(ont_json, rdf_path):
    G = Graph()
    for top_key, entries in ont_json.items():
        for entry_key, examples in entries.items():
            if '/' in entry_key:
Beispiel #11
0
import hashlib

if __name__ == '__main__':
    arguments = docopt(__doc__, version='SKOS2XL 1.0')
    infile = arguments['--infile']
    outfile = arguments['--outfile']
    uri = arguments['--uri']

    g = Graph()

    xl = Namespace('http://www.w3.org/2008/05/skos-xl#')
    base = Namespace(uri)

    nsm = NamespaceManager(g)
    # do we make the assumption that no SKOS-XL is already bound?
    nsm.bind('xl', xl, override=False)

    file_format = guess_format(infile)

    g.parse(infile, format=file_format)

    for s, o in g.subject_objects(predicate=SKOS.prefLabel):
        concept = Resource(g, s)
        hsh = hashlib.md5(o.encode('utf-8')).hexdigest()
        label = Resource(g, base.term('label/' + o.language + '_' + hsh))
        label.add(RDF.type, xl.Label)
        label.add(xl.literalForm, o)
        concept.add(xl.prefLabel, label)

    g.serialize(outfile, format=file_format)
Beispiel #12
0
def main(search=None, cache=None, identifiers=[]):

    ns = Namespace("https://data.create.humanities.uva.nl/id/rkd/")

    ds = Dataset()
    ds.bind('rdfs', RDFS)
    ds.bind('schema', schema)
    ds.bind('sem', sem)
    ds.bind('bio', bio)
    ds.bind('foaf', foaf)
    ds.bind('void', void)
    ds.bind('skos', SKOS)
    ds.bind('owl', OWL)
    ds.bind('dc', dc)

    ds.bind('rkdArtist', URIRef("https://data.rkd.nl/artists/"))
    ds.bind('rkdThes', nsThesaurus)
    ds.bind('rkdPerson', nsPerson)
    ds.bind('rkdImage', URIRef("https://rkd.nl/explore/images/"))
    ds.bind('rkdThumb', URIRef("https://images.rkd.nl/rkd/thumb/650x650/"))

    ds.bind('aat', URIRef("http://vocab.getty.edu/aat/"))

    ## First the images

    g = rdfSubject.db = ds.graph(identifier=ns)

    # Load cache thesaurus
    if os.path.isfile('rkdthesaurus.json'):
        with open('rkdthesaurus.json') as infile:
            thesaurusDict = json.load(infile)
    else:
        thesaurusDict = dict()

    # Load cache images
    if os.path.isfile('imagecache.json'):
        with open('imagecache.json') as infile:
            imageCache = json.load(infile)
    else:
        imageCache = dict()

    # to fetch all identifiers from the search
    if search:
        thesaurusDict, imageCache = parseURL(search,
                                             thesaurusDict=thesaurusDict,
                                             imageCache=imageCache)
    elif cache:
        # assume that everything in the thesaurus is also cached
        for doc in cache.values():
            parseData(doc, thesaurusDict=thesaurusDict)
    elif identifiers:
        for i in identifiers:
            thesaurusDict, imageCache = parseURL(APIURL + str(i),
                                                 thesaurusDict=thesaurusDict,
                                                 imageCache=imageCache)

    # Any images without labels?
    # These were not included in the search, but fetch them anyway.
    print("Finding referred images that were not included")
    q = """
    PREFIX schema: <http://schema.org/>
    SELECT ?uri WHERE {
        ?role a schema:Role ; schema:isRelatedTo ?uri .

        FILTER NOT EXISTS { ?uri schema:name ?name }
    }
    """
    images = g.query(q)
    print(f"Found {len(images)}!")
    for i in images:
        identifier = str(i['uri']).replace('https://rkd.nl/explore/images/',
                                           '')
        thesaurusDict, imageCache = parseURL(
            "https://api.rkd.nl/api/record/images/" + str(identifier),
            thesaurusDict=thesaurusDict,
            imageCache=imageCache)

    ## Then the thesaurus
    print("Converting the thesaurus")
    rdfSubject.db = ds.graph(identifier=ns.term('thesaurus/'))

    ids = list(thesaurusDict.keys())
    for i in ids:
        _, thesaurusDict = getThesaurus(i, thesaurusDict, 'concept')

    # Save updated cache
    with open('rkdthesaurus.json', 'w') as outfile:
        json.dump(thesaurusDict, outfile)

    with open('imagecache.json', 'w') as outfile:
        json.dump(imageCache, outfile)

    ## Serialize
    print("Serializing!")
    ds.serialize('rkdportraits14751825.trig', format='trig')
def main():
    indra_ns = 'http://sorger.med.harvard.edu/indra/'
    rn = Namespace(indra_ns + 'relations/')
    act = Namespace(indra_ns + 'activities/')
    g = Graph()

    isa = rn.term('isa')

    g.add((act.term('transcription'), isa, act.term('activity')))
    g.add((act.term('catalytic'), isa, act.term('activity')))
    g.add((act.term('gtpbound'), isa, act.term('activity')))
    g.add((act.term('kinase'), isa, act.term('catalytic')))
    g.add((act.term('phosphatase'), isa, act.term('catalytic')))
    g.add((act.term('gef'), isa, act.term('catalytic')))
    g.add((act.term('gap'), isa, act.term('catalytic')))

    save_hierarchy(g, hierarchy_path)
Beispiel #14
0
saaInventory = Namespace(
    "https://data.goldenagents.org/datasets/montiasgpi/Inventory/")
saaItem = Namespace(
    "https://data.goldenagents.org/datasets/montiasgpi/Inventory/Item/")

tgn = Namespace("http://vocab.getty.edu/tgn/")

ARCHIVE_DESCRIPTIONS = 'data/GPI/getty_dutch_archival_descriptions_utf8.csv'
ARCHIVE_ITEMS = 'data/GPI/getty_dutch_archival_contents_utf8.csv'

############################################################
# Mapping to the Getty Thesaurus of Geographic Names (TGN) #
############################################################

COUNTRIES = {
    'Netherlands': tgn.term('7016845'),
    'Belgium': tgn.term('1000063'),
    'Germany': tgn.term('7000084')
}

CITIES = {
    'Alkmaar': tgn.term('7007057'),
    'Amsterdam': tgn.term('7006952'),
    'Antwerp': tgn.term('7007856'),
    'Dordrecht': tgn.term('7006798'),
    'Haarlem': tgn.term('7007048'),
    'Hamburg': tgn.term('7005289'),
    'Hoorn': tgn.term('7007056'),
    'Leiden': tgn.term('7006809'),
    'Hague, The': tgn.term('7006810'),
    'Utrecht': tgn.term('7006926'),
Beispiel #15
0
)
div_items_list = pq(d).find('.feed-list').find('li').find('div').filter(
    '.item')
for item in div_items_list:

    dates = pq(item).find('div').filter('.left-col.calendar').find('span')
    month = pq(dates).filter('.month').text()
    year = pq(dates).filter('.year').text()
    day = pq(dates).filter('.day').text()
    time = pq(
        pq(item).find('div').filter('.middle-col.desc.hidden-xs').find(
            'span').filter('.meta')[1]).text().replace(' Time: ', '')
    datestr = '{}-{}-{} {}'.format(year, month, day, time)

    description = pq(item).find('div').find('span').filter('.desc').text()
    link = pq(item).find('div').filter('.middle-col.desc.hidden-xs').find(
        'a').attr('href')
    title = pq(item).find('div').filter('.middle-col.desc.hidden-xs').find(
        'a').filter('.title').text()
    type = pq(
        pq(item).find('div').filter('.middle-col.desc.hidden-xs').find(
            'span').filter('.meta').outerHtml()).text()
    event = URIRef(link)
    dt = datetime.strptime(datestr.lower(), '%Y-%b-%d %H:%M')

    g.add((event, FOAF.title, Literal(title)))
    g.add((event, n.term('description'), Literal(description)))
    g.add((event, n.term('date'), Literal(dt, datatype=XSD.date)))
    g.add((event, n.term('type'), Literal(type)))

print(g.serialize(format='application/rdf+xml'))
entity = entities.find_one({'name': 'Rudolf Henke'})
all_entities = entities.find()
for i,e in enumerate(all_entities):
    if (i % (entities.count() / 20) == 0):
        print("%s of %s processed" % (i, entities.count()))
    converter.convert_entity(e)

def get_uri_by_object_id(objectId):
    g.bind("lobbyOntology", 'https://studi.f4.htw-berlin.de/~s0539710/lobbyradar/ontology#')
    result = g.query("SELECT ?uri ?type WHERE { ?uri lobbyOntology:mongo_id '%s' ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type }" % str(objectId))
    for row in result:
        return { 'uri': row['uri'], 'type': row['type'] }


positions_map = {
    u"Vorstand": lobbyOntology.term("executiveOf"),
    u"Mitglied": org.term("memberOf"),
    u"Ordentliches Mitglied": lobbyOntology.term("fullMemberOf"),
    u"Stellvertretendes Mitglied": lobbyOntology.term("deputyMemberOf"),
    u"Arbeitsverh\xe4ltnis": lobbyOntology.term("employeeOf"),
    u"Aufsichtsratsmitglied": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Aufsichtsrates": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Aufsichtsrats": lobbyOntology.term("supervisoryMemberOf"),
    u"Mitglied des Kuratoriums": lobbyOntology.term("kuratoriumMemberOf"),
    u"Mitglied des Stiftungsrates": lobbyOntology.term("kuratoriumMemberOf"),
    u"Mitglied des Beirates": lobbyOntology.term("advisoryMemberOf"),
    u"Mitglied des Vorstandes": lobbyOntology.term("executiveOf"),
    u"Vorstandsmitglied": lobbyOntology.term("executiveOf"),
    u"Staatssekret\xe4r": lobbyOntology.term("secretaryOf"),
    u'Parlamentarischer Staatssekret\xe4r': lobbyOntology.term("secretaryOf"),
    u"Mitglied im Rundfunkrat": lobbyOntology.term("mediaAdvisoryMemberOf"),
Beispiel #17
0
import sys
from rdflib import Graph, Namespace, Literal
import csv
import urllib2

if __name__ == '__main__':
    indra_ns = 'http://sorger.med.harvard.edu/indra/'
    if len(sys.argv) > 1:
        proteins_file = sys.argv[1]
    else:
        proteins_file = '../../data/ras_pathway_proteins.csv'
    rn = Namespace(indra_ns + 'relations/')
    en = Namespace(indra_ns + 'entities/')
    g = Graph()

    has_name = rn.term('hasName')
    has_long_name = rn.term('hasLongName')
    has_synonym = rn.term('hasSynonym')
    isa = rn.term('isa')

    # Read BEL family names
    res = urllib2.urlopen('http://resource.belframework.org/belframework/'+\
        'latest-release/namespace/selventa-protein-families.belns')
    belns_text = res.read()
    start = belns_text.find('[Values]')
    lines = belns_text[start:].split('\n')
    bel_family_names = []
    for l in lines:
        if l.endswith(' Family|P'):
            family_name = l[:-2].replace(' ', '_').replace('/', '_')
            bel_family_names.append(family_name)
Beispiel #18
0
        predicate_mapping,
        category_mapping,
        property_mapping,
    ]

    for mapping in mappings:
        for key, value in mapping.items():
            if iri.lower() == key.lower():
                return value

    return contract(iri)

OBO = Namespace('http://purl.obolibrary.org/obo/')

top_level_terms = {
    OBO.term('CL_0000000'): 'cell',
    OBO.term('UBERON_0001062'): 'anatomical_entity',
    OBO.term('PATO_0000001'): 'quality',
    OBO.term('NCBITaxon_131567'): 'organism',
    OBO.term('CLO_0000031'): 'cell_line',
    OBO.term('MONDO_0000001'): 'disease',
    OBO.term('CHEBI_23367'): 'molecular_entity',
    OBO.term('CHEBI_23888'): 'drug',
    OBO.term('UPHENO_0001001'): 'phenotypic_feature',
    OBO.term('GO_0008150'): 'biological_process',
    OBO.term('GO_0009987'): 'cellular_process',
    OBO.term('GO_0005575'): 'cellular_component',
    OBO.term('GO_0003674'): 'molecular_function',
    OBO.term('SO_0000704'): 'gene',
    OBO.term('GENO_0000002'): 'variant_locus',
    OBO.term('GENO_0000536'): 'genotype',
Beispiel #19
0
                # print("Dominios", dir(property_i))
                # print("Rangos", property_i.ranges)
                list_all_class[domain.uri].add_property(property_object)

contextos = []
list_all_class_string = []
from rdflib import URIRef, BNode, Literal, Graph, RDF, Namespace

FOAF.knows
g = Graph()
rr = Namespace('http://www.w3.org/ns/r2rml#')
g.namespace_manager.bind("rr", rr)
for clase_i in list_all_class.items():
    # contextos.append({"name": clase_i[1].get_formated_name(), "uri": clase_i[1].uri})
    class_current = URIRef("#{}".format(clase_i[1].get_formated_name()))
    g.add((class_current, RDF.type, rr.term('TriplesMap')))
    g.add((class_current, Literal('rr:logicalTable'),
           Literal(clase_i[1].get_formated_name())))

    g.add((class_current, Literal('rr:subjectMap'),
           Literal(clase_i[1].get_formated_name())))

    subMap = BNode()

    g.add((class_current, Literal('rr:subjectMap'), subMap))
    g.add((subMap, rr.template,
           Literal(clase_i[1].uri.replace("#", "/") + "/{ID}")))
    g.add((subMap, rr.termType, rr.IRI))
    g.add((subMap, rr.term('class'), Literal("<" + clase_i[1].uri + ">")))
    for property_i in clase_i[1].properties:
        # contextos.append({"name": property_i.get_formated_name(), "uri": property_i.uri})
Beispiel #20
0
class RdfSink(Sink):
    """
    RdfSink is responsible for writing data as records
    to an RDF serialization.

    .. note::
        Currently only RDF N-Triples serialization is supported.

    Parameters
    ----------
    owner: Transformer
        Transformer to which the GraphSink belongs
    filename: str
        The filename to write to
    format: str
        The file format (``nt``)
    compression: str
        The compression type (``gz``)
    reify_all_edges: bool
        Whether or not to reify all the edges
    kwargs: Any
        Any additional arguments

    """
    def __init__(
        self,
        owner,
        filename: str,
        format: str = "nt",
        compression: Optional[bool] = None,
        reify_all_edges: bool = False,
        **kwargs: Any,
    ):
        super().__init__(owner)
        if format not in {"nt"}:
            raise ValueError(
                f"Only RDF N-Triples ('nt') serialization supported.")
        self.DEFAULT = Namespace(self.prefix_manager.prefix_map[""])
        # self.OBO = Namespace('http://purl.obolibrary.org/obo/')
        self.OBAN = Namespace(self.prefix_manager.prefix_map["OBAN"])
        self.PMID = Namespace(self.prefix_manager.prefix_map["PMID"])
        self.BIOLINK = Namespace(self.prefix_manager.prefix_map["biolink"])
        self.toolkit = get_toolkit()
        self.reverse_predicate_mapping = {}
        self.property_types = get_biolink_property_types()
        self.cache = {}
        self.reify_all_edges = reify_all_edges
        self.reification_types = {
            RDF.Statement,
            self.BIOLINK.Association,
            self.OBAN.association,
        }
        if compression == "gz":
            f = gzip.open(filename, "wb")
        else:
            f = open(filename, "wb")
        self.FH = f
        self.encoding = "ascii"

    def set_reverse_predicate_mapping(self, m: Dict) -> None:
        """
        Set reverse predicate mappings.

        Use this method to update mappings for predicates that are
        not in Biolink Model.

        Parameters
        ----------
        m: Dict
            A dictionary where the keys are property names and values
            are their corresponding IRI.

        """
        for k, v in m.items():
            self.reverse_predicate_mapping[v] = URIRef(k)

    def set_property_types(self, m: Dict) -> None:
        """
        Set export type for properties that are not in
        Biolink Model.

        Parameters
        ----------
        m: Dict
            A dictionary where the keys are property names and values
            are their corresponding types.

        """
        for k, v in m.items():
            (element_uri, canonical_uri, predicate,
             property_name) = process_predicate(self.prefix_manager, k)
            if element_uri:
                key = element_uri
            elif predicate:
                key = predicate
            else:
                key = property_name
            self.property_types[key] = v

    def write_node(self, record: Dict) -> None:
        """
        Write a node record as triples.

        Parameters
        ----------
        record: Dict
            A node record

        """
        for k, v in record.items():
            if k in {"id", "iri"}:
                continue
            (
                element_uri,
                canonical_uri,
                predicate,
                property_name,
            ) = self.process_predicate(k)
            if element_uri is None:
                # not a biolink predicate
                if k in self.reverse_predicate_mapping:
                    prop_uri = self.reverse_predicate_mapping[k]
                    # prop_uri = self.prefix_manager.contract(prop_uri)
                else:
                    prop_uri = k
            else:
                prop_uri = canonical_uri if canonical_uri else element_uri
            prop_type = self._get_property_type(prop_uri)
            log.debug(
                f"prop {k} has prop_uri {prop_uri} and prop_type {prop_type}")
            prop_uri = self.uriref(prop_uri)
            if isinstance(v, (list, set, tuple)):
                for x in v:
                    value_uri = self._prepare_object(k, prop_type, x)
                    self._write_triple(self.uriref(record["id"]), prop_uri,
                                       value_uri)
            else:
                value_uri = self._prepare_object(k, prop_type, v)
                self._write_triple(self.uriref(record["id"]), prop_uri,
                                   value_uri)

    def _write_triple(self, s: URIRef, p: URIRef, o: Union[URIRef,
                                                           Literal]) -> None:
        """
        Serialize a triple.

        Parameters
        ----------
        s: rdflib.URIRef
            The subject
        p: rdflib.URIRef
            The predicate
        o: Union[rdflib.URIRef, rdflib.Literal]
            The object

        """
        self.FH.write(
            _nt_row((s, p, o)).encode(self.encoding, "_rdflib_nt_escape"))

    def write_edge(self, record: Dict) -> None:
        """
        Write an edge record as triples.

        Parameters
        ----------
        record: Dict
            An edge record

        """
        ecache = []
        associations = set(
            [self.prefix_manager.contract(x) for x in self.reification_types])
        associations.update([
            str(x)
            for x in set(self.toolkit.get_all_associations(formatted=True))
        ])
        if self.reify_all_edges:
            reified_node = self.reify(record["subject"], record["object"],
                                      record)
            s = reified_node["subject"]
            p = reified_node["predicate"]
            o = reified_node["object"]
            ecache.append((s, p, o))
            n = reified_node["id"]
            for prop, value in reified_node.items():
                if prop in {"id", "association_id", "edge_key"}:
                    continue
                (
                    element_uri,
                    canonical_uri,
                    predicate,
                    property_name,
                ) = self.process_predicate(prop)
                if element_uri:
                    prop_uri = canonical_uri if canonical_uri else element_uri
                else:
                    if prop in self.reverse_predicate_mapping:
                        prop_uri = self.reverse_predicate_mapping[prop]
                        # prop_uri = self.prefix_manager.contract(prop_uri)
                    else:
                        prop_uri = predicate
                prop_type = self._get_property_type(prop)
                log.debug(
                    f"prop {prop} has prop_uri {prop_uri} and prop_type {prop_type}"
                )
                prop_uri = self.uriref(prop_uri)
                if isinstance(value, list):
                    for x in value:
                        value_uri = self._prepare_object(prop, prop_type, x)
                        self._write_triple(URIRef(n), prop_uri, value_uri)
                else:
                    value_uri = self._prepare_object(prop, prop_type, value)
                    self._write_triple(URIRef(n), prop_uri, value_uri)
        else:
            if (("type" in record and record["type"] in associations)
                    or ("association_type" in record
                        and record["association_type"] in associations)
                    or ("category" in record
                        and any(record["category"]) in associations)):
                reified_node = self.reify(record["subject"], record["object"],
                                          record)
                s = reified_node["subject"]
                p = reified_node["predicate"]
                o = reified_node["object"]
                ecache.append((s, p, o))
                n = reified_node["id"]
                for prop, value in reified_node.items():
                    if prop in {"id", "association_id", "edge_key"}:
                        continue
                    (
                        element_uri,
                        canonical_uri,
                        predicate,
                        property_name,
                    ) = self.process_predicate(prop)
                    if element_uri:
                        prop_uri = canonical_uri if canonical_uri else element_uri
                    else:
                        if prop in self.reverse_predicate_mapping:
                            prop_uri = self.reverse_predicate_mapping[prop]
                            # prop_uri = self.prefix_manager.contract(prop_uri)
                        else:
                            prop_uri = predicate
                    prop_type = self._get_property_type(prop)
                    prop_uri = self.uriref(prop_uri)
                    if isinstance(value, list):
                        for x in value:
                            value_uri = self._prepare_object(
                                prop, prop_type, x)
                            self._write_triple(URIRef(n), prop_uri, value_uri)
                    else:
                        value_uri = self._prepare_object(
                            prop, prop_type, value)
                        self._write_triple(URIRef(n), prop_uri, value_uri)
            else:
                s = self.uriref(record["subject"])
                p = self.uriref(record["predicate"])
                o = self.uriref(record["object"])
                self._write_triple(s, p, o)
        for t in ecache:
            self._write_triple(t[0], t[1], t[2])

    def uriref(self, identifier: str) -> URIRef:
        """
        Generate a rdflib.URIRef for a given string.

        Parameters
        ----------
        identifier: str
            Identifier as string.

        Returns
        -------
        rdflib.URIRef
            URIRef form of the input ``identifier``

        """
        if identifier.startswith("urn:uuid:"):
            uri = identifier
        elif identifier in reverse_property_mapping:
            # identifier is a property
            uri = reverse_property_mapping[identifier]
        else:
            # identifier is an entity
            fixed_identifier = identifier
            if fixed_identifier.startswith(":"):
                # TODO: this should be handled upstream by prefixcommons-py
                fixed_identifier = fixed_identifier.replace(":", "", 1)
            if " " in identifier:
                fixed_identifier = fixed_identifier.replace(" ", "_")

            if self.prefix_manager.is_curie(fixed_identifier):
                uri = self.prefix_manager.expand(fixed_identifier)
                if fixed_identifier == uri:
                    uri = self.DEFAULT.term(fixed_identifier)
            elif self.prefix_manager.is_iri(fixed_identifier):
                uri = fixed_identifier
            else:
                uri = self.DEFAULT.term(fixed_identifier)
            # if identifier == uri:
            #     if PrefixManager.is_curie(identifier):
            #         identifier = identifier.replace(':', '_')
        return URIRef(uri)

    def _prepare_object(self, prop: str, prop_type: str,
                        value: Any) -> rdflib.term.Identifier:
        """
        Prepare the object of a triple.

        Parameters
        ----------
        prop: str
            property name
        prop_type: str
            property type
        value: Any
            property value

        Returns
        -------
        rdflib.term.Identifier
            An instance of rdflib.term.Identifier

        """
        if prop_type == "uriorcurie" or prop_type == "xsd:anyURI":
            if isinstance(value, str) and PrefixManager.is_curie(value):
                o = self.uriref(value)
            elif isinstance(value, str) and PrefixManager.is_iri(value):
                if _is_valid_uri(value):
                    o = URIRef(value)
                else:
                    o = Literal(value)
            else:
                o = Literal(value)
        elif prop_type.startswith("xsd"):
            o = Literal(value, datatype=self.prefix_manager.expand(prop_type))
        else:
            o = Literal(value,
                        datatype=self.prefix_manager.expand("xsd:string"))
        return o

    def _get_property_type(self, p: str) -> str:
        """
        Get type for a given property name.

        Parameters
        ----------
        p: str
            property name

        Returns
        -------
        str
            The type for property name

        """
        # TODO: this should be properly defined in the model
        default_uri_types = {
            "biolink:type",
            "biolink:category",
            "biolink:subject",
            "biolink:object",
            "biolink:relation",
            "biolink:predicate",
            "rdf:type",
            "rdf:subject",
            "rdf:predicate",
            "rdf:object",
        }
        if p in default_uri_types:
            t = "uriorcurie"
        else:
            if p in self.property_types:
                t = self.property_types[p]
            elif f":{p}" in self.property_types:
                t = self.property_types[f":{p}"]
            elif f"biolink:{p}" in self.property_types:
                t = self.property_types[f"biolink:{p}"]
            else:
                t = "xsd:string"
            # if value:
            #     if isinstance(value, (list, set, tuple)):
            #         x = value[0]
            #         if self.graph.has_node(x):
            #             t = 'uriorcurie'
            #         else:
            #             t = 'xsd:string'
            #     else:
            #         if self.graph.has_node(value):
            #             t = 'uriorcurie'
            #         else:
            #             t = 'xsd:string'
        return t

    def process_predicate(self, p: Optional[Union[URIRef, str]]) -> Tuple:
        """
        Process a predicate where the method checks if there is a mapping in Biolink Model.

        Parameters
        ----------
        p: Optional[Union[URIRef, str]]
            The predicate

        Returns
        -------
        Tuple
            A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available),
            the CURIE form of p, the reference of p

        """
        if p in self.cache:
            # already processed this predicate before; pull from cache
            element_uri = self.cache[p]["element_uri"]
            canonical_uri = self.cache[p]["canonical_uri"]
            predicate = self.cache[p]["predicate"]
            property_name = self.cache[p]["property_name"]
        else:
            # haven't seen this property before; map to element
            if self.prefix_manager.is_iri(p):
                predicate = self.prefix_manager.contract(str(p))
            else:
                predicate = None
            if self.prefix_manager.is_curie(p):
                property_name = self.prefix_manager.get_reference(p)
                predicate = p
            else:
                if predicate and self.prefix_manager.is_curie(predicate):
                    property_name = self.prefix_manager.get_reference(
                        predicate)
                else:
                    property_name = p
                    predicate = f":{p}"
            element = self.get_biolink_element(p)
            canonical_uri = None
            if element:
                if isinstance(element, SlotDefinition):
                    # predicate corresponds to a biolink slot
                    if element.definition_uri:
                        element_uri = self.prefix_manager.contract(
                            element.definition_uri)
                    else:
                        element_uri = (
                            f"biolink:{sentencecase_to_snakecase(element.name)}"
                        )
                    if element.slot_uri:
                        canonical_uri = element.slot_uri
                elif isinstance(element, ClassDefinition):
                    # this will happen only when the IRI is actually
                    # a reference to a class
                    element_uri = self.prefix_manager.contract(
                        element.class_uri)
                else:
                    element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}"
                if "biolink:Attribute" in get_biolink_ancestors(element.name):
                    element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
                if not predicate:
                    predicate = element_uri
            else:
                # no mapping to biolink model;
                # look at predicate mappings
                element_uri = None
                if p in self.reverse_predicate_mapping:
                    property_name = self.reverse_predicate_mapping[p]
                    predicate = f":{property_name}"
            self.cache[p] = {
                "element_uri": element_uri,
                "canonical_uri": canonical_uri,
                "predicate": predicate,
                "property_name": property_name,
            }
        return element_uri, canonical_uri, predicate, property_name

    def get_biolink_element(self, predicate: Any) -> Optional[Element]:
        """
        Returns a Biolink Model element for a given predicate.

        Parameters
        ----------
        predicate: Any
            The CURIE of a predicate

        Returns
        -------
        Optional[Element]
            The corresponding Biolink Model element

        """
        toolkit = get_toolkit()
        if self.prefix_manager.is_iri(predicate):
            predicate_curie = self.prefix_manager.contract(predicate)
        else:
            predicate_curie = predicate
        if self.prefix_manager.is_curie(predicate_curie):
            reference = self.prefix_manager.get_reference(predicate_curie)
        else:
            reference = predicate_curie
        element = toolkit.get_element(reference)
        if not element:
            try:
                mapping = toolkit.get_element_by_mapping(predicate)
                if mapping:
                    element = toolkit.get_element(mapping)
            except ValueError as e:
                self.owner.log_error(
                    entity=str(predicate),
                    error_type=ErrorType.INVALID_EDGE_PREDICATE,
                    message=str(e))
                element = None
        return element

    def reify(self, u: str, v: str, data: Dict) -> Dict:
        """
        Create a node representation of an edge.

        Parameters
        ----------
        u: str
            Subject
        v: str
            Object
        k: str
            Edge key
        data: Dict
            Edge data

        Returns
        -------
        Dict
            The reified node

        """
        s = self.uriref(u)
        p = self.uriref(data["predicate"])
        o = self.uriref(v)

        if "id" in data:
            node_id = self.uriref(data["id"])
        else:
            # generate a UUID for the reified node
            node_id = self.uriref(generate_uuid())
        reified_node = data.copy()
        if "category" in reified_node:
            del reified_node["category"]
        reified_node["id"] = node_id
        reified_node["type"] = "biolink:Association"
        reified_node["subject"] = s
        reified_node["predicate"] = p
        reified_node["object"] = o
        return reified_node

    def finalize(self) -> None:
        """
        Perform any operations after writing the file.
        """
        self.FH.close()