Example #1
0
    def format_adjective_satelites(self):
        """"""

        count = 0

        self.logger.info(f"start formatting AdjectiveSatelliteSynset")
        satellite_synsets = self.graph.subjects(
            RDF.type, SCHEMA.AdjectiveSatelliteSynset)
        for synset in satellite_synsets:
            if synset.endswith("-a"):
                count += 1
                new_synset = URIRef(synset.replace("-a", "-s"))
                self.logger.debug(
                    f"replacing '{synset.n3()}' by '{new_synset.n3()}'")
                self._replace_node(synset, new_synset,
                                   "format_adjective_satelites")
                # replace synset id
                synset_id = synset.split("synset-")[-1]
                new_synset_id = new_synset.split("synset-")[-1]
                self._drop_triple(
                    (new_synset, SCHEMA.synsetId, Literal(synset_id)),
                    "format_adjective_satelites")
                self._add_triple(
                    (new_synset, SCHEMA.synsetId, Literal(new_synset_id)),
                    "format_adjective_satelites")
            else:
                self.logger.warning(
                    f"ill formed AdjectiveSatelliteSynset '{synset.n3()}'")

        # resulting added and removed triples
        self.logger.info(f"action applied to {count} valid synsets"
                         f"\n\ttotal: {self.added_triples} triples added"
                         f"\n\ttotal: {self.removed_triples} triples removed")
Example #2
0
 def test_html_decoded_entity_xhtml(self):
     if platform.system() == "Java":
         raise SkipTest('problem with HTML entities for html5lib in Jython')
     g = ConjunctiveGraph()
     g.parse(data=htmlentitydecode(html), format='rdfa')
     self.assertEqual(len(g), 1)
     self.assertEqual(g.value(URIRef("http://example.com"),
                               URIRef("http://purl.org/dc/terms/title")
                               ), u"Exampl\xe9")
Example #3
0
 def test_html_entity_xhtml(self):
     if sys.version_info[0] == 3:
         raise SkipTest('minidom parser strips HTML entities in Python 3.2')
     if platform.system() == "Java":
         raise SkipTest('problem with HTML entities for html5lib in Jython')
     g = ConjunctiveGraph()
     warnings.simplefilter('ignore', UserWarning)
     g.parse(data=html, format='rdfa')
     self.assertEqual(len(g), 1)
     self.assertTrue(
         g.value(URIRef("http://example.com"),
                 URIRef("http://purl.org/dc/terms/title")).eq(u"Exampl"))
Example #4
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('--input',
                        '-i',
                        type=str,
                        required=True,
                        help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    # "is model of": "RO:0003301"
    # is_model_of = URIRef('OBO:RO_0003301')
    is_model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    # if we curie_map & globaltt here we could ...
    # (pfx lcl) = globaltt["is model of"].split(':')
    # iri = curie_map[pfx] + '_'.join((pfx, lcl))
    # is_model_of = URIRef(iri)

    models = graph.subject_objects(is_model_of)
    model_len = len(set(list(models)))

    if model_len < EXPECTED_PAIRS:
        LOG.error("Not enough <RO:is model of> predicates in graph: found {}, "
                  "expected {} check omia log for warnings".format(
                      model_len, EXPECTED_PAIRS))
        exit(1)
    # else:
    #    LOG.info(
    #        "Found {} model_of predicates in graph, expected at least: {}".format(
    #            model_len, EXPECTED_PAIRS))

    breed = 'https://monarchinitiative.org/model/OMIA-breed:758'
    disease = 'http://omim.org/entry/305100'

    omim_diseases = graph.objects(subject=URIRef(breed), predicate=is_model_of)

    if list(omim_diseases) != [URIRef(disease)]:
        LOG.error("Missing breed to omim triple for %s", breed)
        LOG.error(list(omim_diseases))
        exit(1)

    LOG.info("PASSED")
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument(
        '--input', '-i', type=str, required=True,
        help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    models = graph.subject_objects(model_of)
    model_len = len(list(models))

    if model_len < EXPECTED_PAIRS:
        logger.error("Not enough model_of predicates in graph:"
                     " {} expected {} check omia log for"
                     " warnings".format(model_len, EXPECTED_PAIRS))
        exit(1)
    else:
        logger.info("PASSED")
Example #6
0
def canvases(uri):
    g = Graph(store=rdfstore(), identifier=URIRef(uri))

    subjs = [str(i) for i in g.subjects(NS.rdf['type'], NS.sc['Canvas'])]
    if len(subjs) == 0:
        subjs = [str(i) for i in g.subjects(NS.rdf['type'], NS.dms['Canvas'])]

    return subjs
Example #7
0
    def _build_graph(self, uri, expressions):
        relations = []
        for e in expressions:
            if len(e["then"]) > 0:
                relations.append(e["qname_expanded"])
        
        if len(relations) > 0:
            uris = set()
            for r in relations:
                for o in self.graph.objects(URIRef(uri), URIRef(r)):
                    uris.add(o)

            for u in uris:
                r = self.resources.get(u)
                self.graph.parse(data=r, format="n3")
                self._build_graph(u, e["then"])
        
        return 
Example #8
0
def main():
    parser = argparse.ArgumentParser(
        description='OMIA integration test',
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('--input',
                        '-i',
                        type=str,
                        required=True,
                        help='Location of input ttl file')

    args = parser.parse_args()

    graph = ConjunctiveGraph()
    graph.parse(args.input, format=rdflib_util.guess_format(args.input))

    model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301')

    models = graph.subject_objects(model_of)
    model_len = len(list(models))

    if model_len < EXPECTED_PAIRS:
        logger.error("Not enough model_of predicates in graph:"
                     " {} expected {} check omia log for"
                     " warnings".format(model_len, EXPECTED_PAIRS))
        exit(1)

    omim_diseases = graph.objects(
        subject=URIRef('https://monarchinitiative.org/model/OMIA-breed:18'),
        predicate=model_of)

    if list(omim_diseases) != [
            URIRef('http://purl.obolibrary.org/obo/OMIM_275220')
    ]:
        logger.error(
            "Missing breed to omim triple for {}".format('OMIA-breed:18'))
        exit(1)

    logger.info("PASSED")
Example #9
0
    def words_unique_pos(self):
        """"""

        count = 0
        nomlex_map = {"n": SCHEMA.noun, "v": SCHEMA.verb}

        self.logger.info(f"start formatting Words to unique POS")
        words = self._get_all_words()
        for word, in words:
            count += 1
            # accesses word POS
            senses = list(self.graph.subjects(SCHEMA.word, word))
            word_pos = set([self._get_pos(sense) for sense in senses])

            # adds pos n or v if nomlex
            for nomlex_pos, nomlex_pred in nomlex_map.items():
                if (None, nomlex_pred, word) in self.graph:
                    word_pos.add(nomlex_pos)

            # splits word given its POS
            for pos in word_pos:
                self.logger.debug(
                    f"format word '{word.n3()}' with pos '{pos}'")
                new_word = URIRef(f"{word.toPython()}-{pos}")

                # copy predications
                self._copy_subject(word, new_word, "copy_word")

                # replace suitable senses
                senses_pos = [s for s in senses if self._get_pos(s) == pos]
                for sense in senses_pos:
                    self._add_triple((sense, SCHEMA.word, new_word),
                                     "copy_senses")

                # copies nomlex predications
                for nomlex_pos, nomlex_pred in nomlex_map.items():
                    if nomlex_pos == pos:
                        for subject in self.graph.subjects(nomlex_pred, word):
                            self._add_triple((subject, nomlex_pred, new_word))

                # add property pos
                self._add_triple((new_word, SCHEMA.pos, Literal(pos)),
                                 "property_pos")

            # after splitting drops old word
            self._drop_node(word, "drop_word")

        # resulting added and removed triples
        self.logger.info(f"action applied to {count} words"
                         f"\n\ttotal: {self.added_triples} triples added"
                         f"\n\ttotal: {self.removed_triples} triples removed")
Example #10
0
def create_graph(graph_id=URIRef(SCRIPT_NAME + '_GraphID')):
    memory_store = plugin.get('IOMemory', Store)()
    g = Graph(store=memory_store, identifier=graph_id)

    g.bind('mo', MO)
    g.bind('tl', TL)
    g.bind('event', EVENT)
    g.bind('dtl', DTL)
    g.bind('dc', DC)
    g.bind('xsd', XSD)
    g.bind('foaf', FOAF)
    g.bind('olo', OLO)
    g.bind('dbp', DBP)
    g.bind('rel', REL)
    g.bind('lj', LJ)

    return g
Example #11
0
    def fix_links_to_satelites(self, name=""):
        """"""
        count = 0

        query = "SELECT ?s2 WHERE { VALUES ?p { owns:adverbPertainsTo owns:derivationallyRelated owns:classifiesByUsage owns:classifiesByTopic owns:classifiesByRegion } ?s1 ?p ?s2 . ?s1 a owns:WordSense . FILTER NOT EXISTS { ?s2 a ?t . } }"
        result = self.graph.query(query)

        for sense, in result:
            new_sense = URIRef(sense.replace("-a-", "-s-"))
            if sense == new_sense:
                continue
            if ((new_sense, RDF.type, SCHEMA.WordSense)) in self.graph:
                count += 1
                self._replace_node(sense, new_sense, name)
            else:
                self.logger.warning(
                    f"sense {sense.n3()} not replaced by {new_sense.n3()}: undefined new sense"
                )

        # how many actions
        return count
Example #12
0
def output_process(args):
    prefix, temp_file, output_properties, base_dir = args
    graph = Graph()
    with open(temp_file) as fp:
        graph.parse(format='turtle', data=prefix + fp.read())
    output_fp = {}
    for s, p, o in graph:
        exclude_if = any([
            o in IGNORE_CLASSES,
            isinstance(o, BNode),
            list(graph.objects(o, URIRef('http://www.w3.org/2002/07/owl#onProperty')))
        ])
        if p in output_properties and not exclude_if:
            output = output_properties[p]
            if output not in output_fp:
                _, file_path = tempfile.mkstemp(dir=os.path.join(base_dir, output))
                output_fp[output] = open(file_path, 'w')
            fp = output_fp[output]
            fp.write(json.dumps({'s': s.n3(), 'o': o.n3()}))
            fp.write('\n')
    for fp in output_fp.values():
        fp.close()
Example #13
0
    def build(task):
        logger.info('Building mappings')

        g = load_mappings_from_file(task.file_dep, uri_filter)

        if target.endswith('.nt'):
            stream = BytesIO()
            g.serialize(stream, format='nt')
            with open(target, 'wb') as fp:
                stream.seek(0)
                fp.writelines(sorted(stream.readlines()))

        elif target.endswith('.ttl'):
            for pf in prefixes:
                g.namespace_manager.bind(pf[0], URIRef(pf[1]))

            serializer = OrderedTurtleSerializer(g)
            with open(task.targets[0], 'wb') as fp:
                serializer.serialize(fp)
        else:
            raise Error('Unknown file ext')

        logger.info('Wrote %s' % task.targets[0])
Example #14
0
 def create_uri(self, forWhat, uid):
     uri = "http://www.DTL.org/JE/" + forWhat + "/" + str(uid)
     return URIRef(uri)
Example #15
0
def create_uri(forWhat, uid):
    uri = "http://www.DTL.org/DISCOGRAPHY/" + forWhat + "/" + str(uid)
    return URIRef(uri)
Example #16
0
 def _build_query(self, s, depth, expressions, bindings):
     
     for e in expressions:
         
         # These are common to all queries.
         if depth == 0:
             bindings["s"] = URIRef(s)
         if depth == 0 and e["qname_expanded"] != "":
             bindings["p"] = URIRef(e["qname_expanded"])
         if depth == 1 and e["qname_expanded"] != "":
             bindings["p2"] = URIRef(e["qname_expanded"])
         
         if e["qname"] == ".":
             '''
                 Example:
                 {
                     "datatype": "xsd:string",
                     "filters": [],
                     "function": "",
                     "langtag_filter": "",
                     "qname": ".",
                     "qname_expanded": "",
                     "quotedtext": "",
                     "returnVal": "uri",
                     "then": []
                 },
             '''
             e["query"] = "."
             # e["values"] = [[s]]
             e["values"] = [URIRef(s)]
         elif len(e["then"]) == 0 and e["qname_expanded"] != "":
             
             if depth == 0:
                 '''
                 Depth is 0 (top) and there are no sub selects.
                 The most basic query.
                 These all should have a valid qname_expanded entry, yes?
                 Example:
                     {
                         "datatype": "xsd:string",
                         "function": "",
                         "langtag_filter": "",
                         "qname": "aic:objectTerm",
                         "qname_expanded": "http://definitions.artic.edu/ontology/1.0/objectTerm",
                         "quotedtext": "",
                         "returnVal": "objectTerm_uri",
                         "then": []
                     },
                 '''
                 q = self.preparedQueries["depth0_basic"]
             elif depth == 1:
                 '''
                 Example:
                     {
                         "datatype": "xsd:string",
                         "function": "",
                         "langtag_filter": "",
                         "qname": "aic:objectTerm",
                         "qname_expanded": "http://definitions.artic.edu/ontology/1.0/objectTerm",
                         "quotedtext": "",
                         "returnVal": "objectTerm_uid",
                         "then": [
                             {
                                 "function": "",
                                 "langtag_filter": "",
                                 "qname": "aic:uid",
                                 "qname_expanded": "http://definitions.artic.edu/ontology/1.0/uid",
                                 "quotedtext": "",
                                 "then": []
                             }
                         ]
                     },
                 '''
                 q = self.preparedQueries["depth1_basic"]
             
             e["query"] = q._original_args[0]
             values = self.graph.query(q, initBindings=bindings)
             e["values"] = values
         
         elif len(e["then"]) > 0 and e["qname_expanded"] == "":
             # qname_expanded is null, but there is depth here.
             # Probably a function.
             '''
                 Example:
                 {
                     "datatype": "xsd:string",
                     "function": "",
                     "langtag_filter": "",
                     "qname": "aic:objectTitle",
                     "qname_expanded": "http://definitions.artic.edu/ontology/1.0/objectTitle",
                     "quotedtext": "",
                     "returnVal": "objectTitle",
                     "then": [
                         {
                             "function": "fn:concat",
                             "langtag_filter": "",
                             "qname": "fn:concat(skos:prefLabel,\" (\",aic:languageText,\")\")",
                             "qname_expanded": "",
                             "quotedtext": "",
                             "then": [
                                 {
                                     "function": "",
                                     "langtag_filter": "",
                                     "qname": "skos:prefLabel",
                                     "qname_expanded": "http://www.w3.org/2004/02/skos/core#prefLabel",
                                     "quotedtext": "",
                                     "then": []
                                 },
                                 {
                                     "function": "",
                                     "langtag_filter": "",
                                     "qname": "",
                                     "qname_expanded": "",
                                     "quotedtext": "\" (\"",
                                     "then": []
                                 },
                                 {
                                     "function": "",
                                     "langtag_filter": "",
                                     "qname": "aic:languageText",
                                     "qname_expanded": "http://definitions.artic.edu/ontology/1.0/languageText",
                                     "quotedtext": "",
                                     "then": []
                                 },
                                 {
                                     "function": "",
                                     "langtag_filter": "",
                                     "qname": "",
                                     "qname_expanded": "",
                                     "quotedtext": "\")\"",
                                     "then": []
                                 }
                             ]
                         }
                     ]
                 },
             '''
             
             count = 1
             for e1 in e["then"]:
                 if e1["qname_expanded"] != "":
                     bindings["possible_p" + str(count)] = e1["qname_expanded"]
                     bindings["o" + str(count)] = e1["qname"].replace(':', '')
                     count += 1
             
             q = self.preparedQueries["depth1_optional"]
             
             e["query"] = q._original_args[0]
             values = self.graph.query(q, initBindings=bindings)
             e["values"] = values
         
         else:
             new_depth = depth + 1
             self._build_query(s, new_depth, e["then"], bindings)
	   OPTIONAL {?film dbpedia-owl:language ?language}
       OPTIONAL {?film dbpedia-owl:starring ?actor} 
       OPTIONAL {?film dbpedia-owl:musicComposer ?musicComposer} 
	   OPTIONAL {?film dbpedia-owl:director ?director}
       OPTIONAL {?film dbpedia-owl:producer ?producer}
       OPTIONAL {?film dbpedia-owl:cinematography ?cinematography}
       OPTIONAL {?film dbpedia-owl:distributor ?distributor}
       OPTIONAL {?film dbpedia-owl:country ?country}
       OPTIONAL {?film dbpedia-owl:writer ?screenwriter}
	   FILTER (LANG(?name)="en")
	   FILTER (LANG(?abstract)="en")
	   }
	   LIMIT 100000
	   """
sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

# creating the RDF store and graph
memory_store = IOMemory()
graph_id = URIRef("http://www.semanticweb.org/store/movie")
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

# merging results and saving the store
g = sparql.query().convert()
g.parse("ontology.owl")
g.serialize("result_basic.owl", "xml")
Example #18
0
      ?determination_method dis:name ?determination_methodLabel .
      ?doctor dis:name ?doctorLabel .
      ?risk_factor dis:name ?risk_factorLabel .
      ?test dis:name ?testLabel .
      ?determination_method dis:name ?determination_methodLabel .
      ?country dis:name ?countryLabel .

      }
       WHERE{
       ?d wdt:P31 wd:Q200779 .
       OPTIONAL {?d wdt:P828 ?cause}
       OPTIONAL {?d wdt:P1995 ?specialty}

       SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
       }"""

sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

memory_store = IOMemory()
graph_id = URIRef('http://www.semanticweb.org/store/diseases')
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

g = sparql.query().convert()
g.parse("diseases_populated_2.owl", format="xml")
g.serialize("diseases_populated_3.owl", "xml")
Example #19
0
import pprint
import datetime
import sys, os
from ontology import is_a, load_ontology, parser

DEBUG = False


DBOWL   = Namespace("http://dbpedia.org/ontology/")
SCHEMA  = Namespace("http://schema.org/")
GRS     = Namespace("http://www.georss.org/georss/")
DBPPROP  = Namespace("http://dbpedia.org/property/")
GEO     = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")

GRSPOINT = GRS['point']
LABEL =   URIRef('http://www.w3.org/2000/01/rdf-schema#label')
COMMENT = URIRef('http://www.w3.org/2000/01/rdf-schema#comment')

NOW = datetime.datetime.now()
TODAY = datetime.date.today()
ORIGIN = datetime.date(1, 1, 1)
timestamp = NOW.strftime('%Y-%m-%dT%H-%M-%S')
dumpfilename = 'dbpedia-%s.json.bz2' % timestamp

print 'Populating DBPedia dump file %s' % dumpfilename

entries = {}
redirects = set()

facets = {
    'http://schema.org/Event': 'event',
Example #20
0
n3data = """\
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://goonmill.org/2007/skill.n3#> .

:Foo a rdfs:Class .

:bar a :Foo ."""

ask_query = """\
ASK { 
    <http://goonmill.org/2007/skill.n3#bar> \
        a \
        <http://goonmill.org/2007/skill.n3#Foo> 
}"""

alicecontext = URIRef("http://example.org/foaf/aliceFoaf")

alicegraph = """\
# Named graph: http://example.org/foaf/aliceFoaf
@prefix  foaf:     <http://xmlns.com/foaf/0.1/> .
@prefix  rdf:      <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix  rdfs:     <http://www.w3.org/2000/01/rdf-schema#> .

_:a  foaf:name     "Alice" .
_:a  foaf:mbox     <mailto:[email protected]> .
_:a  foaf:knows    _:b .

_:b  foaf:name     "Bob" .
_:b  foaf:mbox     <mailto:[email protected]> .
_:b  foaf:nick     "Bobby" .
_:b  rdfs:seeAlso  <http://example.org/foaf/bobFoaf> . 
Example #21
0
       OPTIONAL {?VideoGame dbo:producer ?producer}
       OPTIONAL {?VideoGame dbo:writer ?writer}
       OPTIONAL {?VideoGame dbo:genre ?genre}
       OPTIONAL {?VideoGame dbo:computingPlatform ?platform}      
       OPTIONAL {?VideoGame dbo:gameEngine ?engine}
       OPTIONAL {?VideoGame dbo:releaseDate ?releaseDate}
       OPTIONAL {?VideoGame dbp:artist ?artist}
       OPTIONAL {?VideoGame dbp:programmer ?programmer}
       OPTIONAL {?VideoGame dbp:award ?award}
       OPTIONAL {?VideoGame dbp:ign ?ign}
       }
       """

sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

# Creating the RDF store and graph
memory_store = IOMemory()
graph_id = URIRef(
    "http://www.semanticweb.org/mengchizhang/ontologies/VideoGame")
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

# Merging results and saving the store
g = sparql.query().convert()
g.parse("VideoGame.owl")
g.serialize("example_final.owl", "xml")
Example #22
0
       OPTIONAL {?videoGame dbpedia-owl:genre ?genre}
       OPTIONAL {?videoGame foaf:name ?name}
       OPTIONAL {?platform foaf:name ?name}
       OPTIONAL {?videoGame dbpedia-owl:abstract ?abstract}
       OPTIONAL {?platform dbpedia-owl:abstract ?abstract}
       OPTIONAL {?videoGame dbpprop:developer ?developer}
       OPTIONAL {?platform dbpprop:developer ?developer}
       OPTIONAL {?videoGame dbpprop:media ?media}
       OPTIONAL {?videoGame dbpprop:publisher ?publisher}
       FILTER (LANG(?abstract)="en")

    }
      """

sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

# creating the RDF store and graph
memory_store = IOMemory()
graph_id = URIRef("http://www.semanticweb.org/store/project1")
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

# merging results and saving the store
g = sparql.query().convert()
g.parse("project.owl")
g.serialize("new.owl", "xml")
Example #23
0
def create_uri(forWhat, uid):
    uri = IRI + forWhat + "/" + str(uid)
    return URIRef(uri)
Example #24
0
def make_property_graph(properties, args):
    graph = ConjunctiveGraph()
    output_graph = ConjunctiveGraph()

    GH = 'https://raw.githubusercontent.com'
    OBO = 'https://purl.obolibrary.org/obo'
    ontologies = [
        OBO + '/sepio.owl',
        OBO + '/geno.owl',
        OBO + '/iao.owl',
        OBO + '/ero.owl',
        OBO + '/pco.owl',
        OBO + '/xco.owl',
        OBO + '/ro.owl',
        GH + '/jamesmalone/OBAN/master/ontology/oban_core.ttl',
    ]

    for ontology in ontologies:
        print("parsing: " + ontology)
        try:
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))
        except SAXParseException as e:
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format="turtle")
        except OSError as e:  # URLError:
            # simple retry
            logger.error(e)
            logger.error('Retrying: ' + ontology)
            graph.parse(ontology, format=rdflib_util.guess_format(ontology))

    # Get object properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['ObjectProperty']), output_graph,
        OWL['ObjectProperty'], properties)

    # Get annotation properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['AnnotationProperty']), output_graph,
        OWL['AnnotationProperty'], properties)

    # Get data properties
    output_graph = add_property_to_graph(
        graph.subjects(RDF['type'], OWL['DatatypeProperty']), output_graph,
        OWL['DatatypeProperty'], properties)

    # Hardcoded properties
    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'),
         RDF['type'], OWL['AnnotationProperty']))

    output_graph.add(
        (URIRef('https://monarchinitiative.org/MONARCH_anonymous'),
         RDF['type'], OWL['AnnotationProperty']))

    # Check monarch data triple
    data_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.input))
    new_url = "https://data.monarchinitiative.org/ttl/{0}".format(
        re.sub(r".*/", "", args.output))
    if (URIRef(data_url), RDF.type, OWL['Ontology']) in output_graph:
        output_graph.remove(URIRef(data_url), RDF.type, OWL['Ontology'])

    output_graph.add((URIRef(new_url), RDF.type, OWL['Ontology']))

    for row in output_graph.predicates(DC['source'],
                                       OWL['AnnotationProperty']):
        if row == RDF['type']:
            output_graph.remove(
                (DC['source'], RDF['type'], OWL['AnnotationProperty']))

    output_graph.add((DC['source'], RDF['type'], OWL['ObjectProperty']))

    return output_graph
Example #25
0
def update_fuseki(config, files):
    """
    The current procedure first dumps the enriched graph to a temporary file in a dir accessible by
    the web server, then loads the file using the SPARQL LOAD operation.

    I first tried pushing the enriched graph directly to the update endpoint
    without writing a temporary file, but that approach failed for two reasons:
     - Using INSERT DATA with "lots" of triples (>> 20k) caused Fuseki to give a 500 response.
     - Using INSERT DATA with chunks of 20k triples worked well... when there were no blank nodes.
       If the same bnode were referenced in two different chunks, it would end up as *two* bnodes.
       Since we're using bnodes in RDF lists, many lists ended up broken. From the SPARQL ref.:

            Variables in QuadDatas are disallowed in INSERT DATA requests (see Notes 8 in the grammar).
            That is, the INSERT DATA statement only allows to insert ground triples. Blank nodes in
            QuadDatas are assumed to be disjoint from the blank nodes in the Graph Store,
            i.e., will be inserted with "fresh" blank nodes.

    Using tdbloader would be another option, but then we would still need a temp file, we would also need
    to put that file on a volume accessible to the docker container, and we would need to shutdown the
    server while loading the file. And it's a solution tied to Fuseki.

    I'm not aware if there is a limit on how large graphs Fuseki can load with the LOAD operation.
    I guess we'll find out.
    """

    if config['dumps_dir'] is None:
        raise Exception("The 'dumps_dir' option must be set")

    if config['dumps_dir_url'] is None:
        raise Exception("The 'dumps_dir_url' option must be set")

    tmpfile = '{}/import_{}.ttl'.format(config['dumps_dir'].rstrip('/'),
                                        config['basename'])
    tmpfile_url = '{}/import_{}.ttl'.format(
        config['dumps_dir_url'].rstrip('/'), config['basename'])

    tc = enrich_and_concat(files, tmpfile)

    c0 = get_graph_count(config)

    store = SPARQLUpdateStore('{}/sparql'.format(config['fuseki']),
                              '{}/update'.format(config['fuseki']))
    graph_uri = URIRef(config['graph'])
    graph = Graph(store, graph_uri)

    logger.info("Fuseki: Loading %d triples into <%s> from %s", tc, graph_uri,
                tmpfile_url)

    # CLEAR GRAPH first to make sure all blank nodes are erased
    # https://github.com/scriptotek/emnesok/issues/70
    store.update('CLEAR GRAPH <{}>'.format(graph_uri))

    store.update('LOAD <{}> INTO GRAPH <{}>'.format(tmpfile_url, graph_uri))

    c1 = get_graph_count(config)
    if c0 == c1:
        logger.info('Fuseki: Graph <%s> updated, number of concepts unchanged',
                    config['graph'])
    else:
        logger.info(
            'Fuseki: Graph <%s> updated, number of concepts changed from %d to %d.',
            config['graph'], c0, c1)

    invalidate_varnish_cache(config['basename'])
    logger.info('Invalidated Varnish cache for %s', config['basename'])
       OPTIONAL {?book dbpedia-owl:releaseDate ?firstPublished}
       OPTIONAL {?book dbpedia-owl:country ?country}
       OPTIONAL {?book dbpedia-owl:language ?lang}
       OPTIONAL {?book dbpedia-owl:abstract ?abstract}
       OPTIONAL {?book dbpedia-owl:isbn ?isbn}
       OPTIONAL {?book dbpedia-owl:numberOfPages ?pages}
       OPTIONAL {?book dbpedia-owl:mediaType ?mediaType}
       OPTIONAL {?book dbpprop:genre ?genre}
       FILTER ( regex(str(?author), "Franz_Kafka") || regex(str(?author), "Mark_Twain") )
       FILTER (LANG(?abstract)="en")
      }"""

sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

# creating the RDF store and graph
memory_store = IOMemory()
graph_id = URIRef("http://bedilico.us/store/book")
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

# merging results and saving the store

g = sparql.query().convert()

g.parse("gutenbuch.owl")
g.serialize("bookShelf_from_dbpedia.owl", "xml")
execfile("gb_hook.py")