Ejemplo n.º 1
0
    def test_single_column_sheet_generates_string_value_triple(
            self, test_data_dir):
        kgiri_base = "https://kg.your-company.kom"  # Don't use fixture for this since the .xslx file uses this one
        args = argparse.Namespace(
            input=f'{test_data_dir}/single_sheet_single_column.xlsx',
            verbose=True,
            ignored_values=list(),
            ignored_prefixes=list(),
            skip_sheets=list(),
            kgiri_base=kgiri_base,
            kgiri_prefix="abc",
            data_source_code="def",
            key_column_number=1,
            strip_any_prefix=False,
            output=None)
        parser = ekglib.XlsxParser(args)

        rdf_string_value = ekglib.RAW.StringValue
        actual = set(
            parser.g.triples((None, RDF.type, term.URIRef(rdf_string_value))))
        assert actual == {
            (term.URIRef("%s/id/data-name-popular-value-1" % kgiri_base),
             RDF.type, rdf_string_value),
            (term.URIRef("%s/id/data-name-boring-value-2" % kgiri_base),
             RDF.type, rdf_string_value),
            (term.URIRef("%s/id/data-name-random-value-3" % kgiri_base),
             RDF.type, rdf_string_value)
        }
Ejemplo n.º 2
0
    def _uri(self, uri, rand=True):
        """
        Get URI for the item we are inserting
        """
        try:
            uri = str(uri)
        except:
            pass

        try:
            return self._uri_map[uri]
        except:
            pass

        if uri == self._added_uri:
            self._uri_map[uri] = term.URIRef(self._update_namespace(
                self._namespace + self._uri_prefix + self._row[self._uidi]))

        else:
            if rand and uri.startswith(self._namespace + "n"):
                if self._next_id:
                    new_id = self._next_id
                    self._next_id += 1
                else:
                    new_id = self.sparql.get_available_id()

                self._uri_map[uri] = term.URIRef(self._update_namespace(
                    "%s%s%s" % (self._namespace, "n", new_id)))

            else:
                return term.URIRef(self._update_namespace(uri))

        return self._uri_map[uri]
Ejemplo n.º 3
0
def makegraph(codebook, variable, vocab_name):

    base = 'http://data.socialhistory.org/resource/' + vocab_name + '/'
    vrb_iri = to_iri(base + variable + '/')
    VCB_NAMESPACE = Namespace(vrb_iri)
    SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')

    g = Graph()
    g.bind(vocab_name, VCB_NAMESPACE)
    g.bind('skos', SKOS)
    
    
    g.add((VCB_NAMESPACE[variable], RDF.type, SKOS['Scheme']))
    
    g.add((VCB_NAMESPACE[variable], SKOS['definition'], Literal(codebook['def'][0])))
    if len(codebook) == 1:
        return g

    for i in range(len(codebook['code'])):
        
        iri = to_iri(VCB_NAMESPACE[str(codebook['code'][i])])

        g.add((term.URIRef(iri), RDF.type, SKOS['Concept']))
        g.add((term.URIRef(iri), SKOS['inScheme'], VCB_NAMESPACE[variable]))
        g.add((term.URIRef(iri), SKOS['prefLabel'], Literal(codebook['label'][i])))


        if RepresentsInt(codebook['code'][i]): 
            g.add((term.URIRef(iri), RDF.value, Literal(codebook['code'][i], datatype=XSD.int)))
    
    return g
Ejemplo n.º 4
0
 def test_has_only_mlr_values(self):
     triples = list(self.graph.triples((term.URIRef(TEST_ID), None, None)))
     predicates = set(p for (s, p, o) in triples)
     extra_predicates = []
     for p in predicates:
         if p == term.URIRef('http://www.inria.fr/acacia/corese#graph'):
             continue
         if not p in Element_names:
             extra_predicates.append(p)
     assert not extra_predicates, extra_predicates
Ejemplo n.º 5
0
    def test_substitutions(self):
        g = TemplateGraph('test.cfg', 'people')
        g._read_config()
        g._row = ['one', 'two', 'three']

        trm = term.URIRef('http://somewhere.edu/$VAR001x')
        s = g._fill_in(trm)
        assert (s == term.URIRef('http://somewhere.edu/twox'))

        s = g._replace(term.URIRef('http://somewhere.edu/per987-x'), '987', 3)

        assert (s == term.URIRef('http://somewhere.edu/per$VAR003-x'))
Ejemplo n.º 6
0
    def get_definition(self, owl_term, add_links=True):
        definition = list(self.graph.objects(owl_term, OBO_DEFINITION))
        definition = definition + \
            list(self.graph.objects(owl_term, SKOS_DEFINITION))

        if definition:
            if len(definition) > 1:
                warnings.warn('Multiple definitions for ' +
                              self.get_label(owl_term) + ': ' +
                              ",".join(definition))
            definition = unicode(definition[0])
        else:
            definition = ""

        # Add link to term in document if the definition refer to a term
        if add_links:
            # definition = re.sub(
            # "("+"|".join(self.labels.keys())+")", "[\\1]", definition)
            terms = re.findall(r'\'.*?\'', definition)
            for mterm in sorted(set(terms), key=len, reverse=True):
                literal = Literal(mterm.replace("'", ""))
                if str(literal) in self.labels:
                    purl = self.labels[str(literal)]
                    if "#" in purl and \
                            not self.is_deprecated(term.URIRef(mterm)):
                        definition = definition.replace(
                            mterm, "<a title=" + purl.split("#")[1] + ">" +
                            mterm.replace("[", "").replace("]", "") + "</a>")

        # Remove final dot if present
        if definition:
            if definition[-1] == ".":
                definition = definition[:-1]

        return definition
Ejemplo n.º 7
0
    def test_multiple_matches(self):
        '''
        http://www.w3.org/TR/rdf-sparql-query/#MultipleMatches
        '''
        g = create_graph("""
        @prefix foaf:  <http://xmlns.com/foaf/0.1/> .

        _:a  foaf:name   "Johnny Lee Outlaw" .
        _:a  foaf:mbox   <mailto:[email protected]> .
        _:b  foaf:name   "Peter Goodguy" .
        _:b  foaf:mbox   <mailto:[email protected]> .
        _:c  foaf:mbox   <mailto:[email protected]> .
        """)
        results = list(
            g.query("""
        PREFIX foaf:   <http://xmlns.com/foaf/0.1/>
        SELECT ?name ?mbox
        WHERE
          { ?x foaf:name ?name .
            ?x foaf:mbox ?mbox }
        """))
        expected_results = [
            (term.Literal(name), term.URIRef(mbox))
            for name, mbox in [("Johnny Lee Outlaw", "mailto:[email protected]"
                                ), ("Peter Goodguy",
                                    "mailto:[email protected]")]
        ]
        results.sort()
        expected_results.sort()
        self.assertEqual(results, expected_results)
Ejemplo n.º 8
0
 def _uriref(self, list_type, identifier):
     '''
     Create a URIRef of the given list type and id.
     @param list_type Singular of lists (e.g. movie for the movies list)
     @param id Identifier of node
     '''
     partial_uri = urllib.parse.quote(identifier)
     return term.URIRef(f"http://imdb.org/{list_type}/{partial_uri}")
Ejemplo n.º 9
0
 def test_match_literal_arbitary_type(self):
     g = create_graph(self.data)
     results = list(
         g.query("""
     SELECT ?v WHERE { ?v ?p "abc"^^<http://example.org/datatype#specialDatatype> }
     """))
     expected_results = [(term.URIRef('http://example.org/ns#z'), )]
     self.assertEqual(results, expected_results)
Ejemplo n.º 10
0
 def test_match_literal_numeric_type(self):
     g = create_graph(self.data)
     results = list(
         g.query("""
     SELECT ?v WHERE { ?v ?p 42 }
     """))
     expected_results = [(term.URIRef('http://example.org/ns#y'), )]
     self.assertEqual(results, expected_results)
Ejemplo n.º 11
0
def check_rule_with_data(rule_path, data_path):

    r = load_yamldown(rule_path)
    schema = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          "../../../metadata/rules.schema.yml")
    validate(r, schema)
    if not rule.sparql_from(r):
        raise Exception("No SPARQL impl for rule {}".format(rule_path))

    g = rdflib.graph.ConjunctiveGraph()
    test_data_graph = g.parse(data_path,
                              format="ttl",
                              publicID="http://geneontology.org/rules/test")
    test_data_graph.add((term.URIRef("http://geneontology.org/rules/test"),
                         term.URIRef("http://geneontology.org/graphType"),
                         term.URIRef("http://geneontology.org/gafCam")))
    results = g.query(rule.sparql_from(r))
    return results
Ejemplo n.º 12
0
 def test_codomain(self):
     wrong_codomain_type = []
     for predicate in MLR_codomain.iterkeys():
         for s, p, o in self.graph.triples(
             (None, term.URIRef(predicate), None)):
             for s2, p2, o2 in self.graph.triples((o, RDF.type, None)):
                 if o2 not in MLR_codomain[predicate]:
                     wrong_codomain_type.append((predicate, o2))
     assert not wrong_codomain_type, wrong_codomain_type
Ejemplo n.º 13
0
 def _expand_qname(self, qname):
     """expand a qualified name's namespace prefix to include the resolved
     namespace root url"""
     if type(qname) is not rt.URIRef:
         raise TypeError("Cannot expand qname of type %s, must be URIRef" %
                         type(qname))
     for ns in self.graph.namespaces():
         if ns[0] == qname.split(':')[0]:
             return rt.URIRef("%s%s" % (ns[1], qname.split(':')[-1]))
     return qname
Ejemplo n.º 14
0
 def test_has_all_mlr_values(self):
     triples = list(self.graph.triples((term.URIRef(TEST_ID), None, None)))
     #sys.stderr.write(`triples`)
     predicates = set(p for (s, p, o) in triples)
     missing_predicates = []
     for (p, n) in Element_names.iteritems():
         if p in Known_Missing:
             continue
         if p not in predicates:
             missing_predicates.append((p, n))
     assert not missing_predicates, missing_predicates
Ejemplo n.º 15
0
def populateValue(g, datasetId, ds, data, p, o, iriCache):

    ## Skipping following IRI's as they are handled separately (getResearcher, getProtocols, etc.)
    skipIri = [
        term.URIRef('http://uri.interlex.org/temp/uris/contributorTo'),
        term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasUriApi'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasUriHuman'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasProtocol'),
        term.URIRef('http://uri.interlex.org/temp/uris/wasUpdatedAtTime')]
    key = strip_iri(p.strip())

    if p in skipIri:
        return

    if isinstance(o, term.URIRef):
        value = iri_lookup(g, o.strip(), iriCache)
        if value:
            if isinstance(value, dict) and 'curie' in value:
                ds['term'][value['curie']] = value
                value = value['curie']
            # if isinstance(value, dict) and 'iri' in value:
            #     key = strip_iri(value['iri'])
            #     ds['term'][key] = value
            #     value = key

            if key in arrayProps:
                array = data.setdefault(key, [])
                array.append(value)
            
            else:
                if key in data:
                    log.warning('Unexpected creation of array for:  %s - %s - %s', datasetId, key, value)
                    log.warning('Existing value for this key     :  %s - %s - %s', datasetId, key, data[key])
                    log.warning('----- Will use the shortest value -----')
                    if len(value) < len(data[key]):
                        data[key] = value
                else:
                    data[key] = value

    elif isinstance(o, term.Literal):
        value = strip_iri(o.strip())
        if key in arrayProps:
            array = data.setdefault(key, [])
            array.append(value)
        else:
            if key in data:
                log.warning('Unexpected creation of array for:  %s - %s - %s', datasetId, key, value)
                log.warning('Existing value for this key     :  %s - %s - %s', datasetId, key, data[key])
                log.warning('----- Will use the shortest value -----')
                if len(value) < len(data[key]):
                    data[key] = value
            else:
                data[key] = value

    elif isinstance(o, term.BNode):
        data[key] = parseMeasure(datasetId, g, o, {'value': '', 'unit': ''})

    else:
        raise Exception('Unknown RDF term: %s' % type(o))
Ejemplo n.º 16
0
 def _replace(self, t, text, n):
     """
     Replace the substring given in text with $VAR###
     :param t: Subject or Object of triple to have the replacement done.
     :param text: The text to replace
     :param n: Integer value to put in the ### part of $VAR###
     :returns The given subject or object with the replacement done.
     """
     if isinstance(t, term.URIRef):
         return term.URIRef(t.replace(text, "$VAR%03d" % n))
     elif isinstance(t, term.Literal):
         return term.Literal(t.replace(text, "$VAR%03d" % n),
                             datatype=t.datatype)
     return t
Ejemplo n.º 17
0
def print_1968_launches():
    ### These lines print the links to actual pages about each spacecraft launched in 1968, for example

    # load the term for the predicate we are going to filter (Dublin Core Standard)
    DUBLIN = term.URIRef(u'http://purl.org/dc/terms/subject')

    # same procedure as above
    g2 = Graph()
    result2 = g2.parse(
        u'http://dbpedia.org/resource/Category:Spacecraft_launched_in_1968')

    print("graph has %s statements." % len(g2))

    for s in g2.subjects(predicate=DUBLIN, object=None):
        print(s)
Ejemplo n.º 18
0
    def parse_ontology(self):
        """place the ontology graph into a set of custom data structures
        for use by the validator"""
        start = time.clock()
        log.info("Parsing ontology file for {}".format(
            self.__class__.__name__))
        for subj, pred, obj in self._schema_nodes():
            if subj not in self.attributes_by_class.keys():
                if obj == rt.URIRef(
                        self.lexicon['class']) and pred == rt.URIRef(
                            self.lexicon['type']):
                    self.attributes_by_class[subj] = []

            leaves = [(subj, pred, obj)]
            if type(obj) == rt.BNode:
                leaves = deepest_node((subj, pred, obj), self.graph)

            for s, p, o in leaves:
                if o not in self.attributes_by_class.keys():
                    self.attributes_by_class[o] = []
                if pred == rt.URIRef(self.lexicon['domain']):
                    self.attributes_by_class[o].append(subj)
        log.info("Ontology parsing complete in {}".format(
            (time.clock() - start) * 1000))
Ejemplo n.º 19
0
def test_export():

    out = "TEST"  #None
    try:
        #staticpath = global_settings.STATIC_PATH
        #return staticpath[:len(staticpath)-staticpath[::-1].find('/')]
        #return staticpath[:len(staticpath)-staticpath[::-1].find('/')-1] + '/b2note_api/test_rdf.rdf'
        #nf = open(os.path.join(staticpath[:len(staticpath)-staticpath[::-1].find('/')-1], '/b2note_api/test_rdf.rdf'), 'w')
        apipath = "/bsc/public/b2note_project/b2note_devel/"
        nf = open(apipath + "b2note_api/test_rdf.rdf", "w")
        nf.write('''<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
   xmlns:oa="http://www.w3.org/ns/oa#"
   xmlns:as="http://www.w3.org/ns/activitystreams#"
   xmlns:foaf="http://xmlns.com/foaf/0.1/"
   xmlns:dcterms="http://purl.org/dc/terms/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
>
</rdf:RDF>
''')

        nf.close()

        t_start = datetime.datetime.now()

        annL = None
        annL = retrieve_annotation_jsonld_from_api()

        t_api = datetime.datetime.now()

        if annL:
            # Replace field name "type" by "@type" for rdflib-jsonld correct processing
            annL = addarobase_totypefieldname(annL)
            # B2SHARE sends fiel urls containing whitespace characters,
            # that rdflib refuses to serialize, replace by %20
            if annL:
                for ann in annL:
                    if isinstance(ann, dict):
                        if "target" in ann.keys():
                            if isinstance(ann["target"], dict):
                                if "source" in ann["target"].keys():
                                    if isinstance(ann["target"]["source"],
                                                  (str, unicode)):
                                        if ann["target"]["source"].find(
                                                " ") > 0:
                                            ann["target"]["source"] = ann[
                                                "target"]["source"].replace(
                                                    " ", "%20")
                                if "id" in ann["target"].keys():
                                    if isinstance(ann["target"]["id"],
                                                  (str, unicode)):
                                        if ann["target"]["id"].find(" ") > 0:
                                            ann["target"]["id"] = ann[
                                                "target"]["id"].replace(
                                                    " ", "%20")
            else:
                print(
                    "export_to_triplestore function, no annotation list from addarobase function."
                )
                stdlogger.error(
                    "export_to_triplestore function, no annotation list from addarobase function."
                )
                return None
        else:
            print(
                "export_to_triplestore function, no annotation list retrieved."
            )
            stdlogger.error(
                "export_to_triplestore function, no annotation list retrieved."
            )
            return None

        t_nospace = datetime.datetime.now()

        # Re-set blank node ids in existing graph
        prog = 0
        bnc = 0
        nf = open(apipath + "b2note_api/test_rdf.rdf", "r")
        nRDF = nf.read()
        nf.close()
        while '''rdf:nodeID="''' in nRDF[prog:]:
            b = prog + nRDF[prog:].find('''rdf:nodeID="''') + len(
                '''rdf:nodeID="''')
            f = b + nRDF[b:].find('''"''')
            prog = b
            old_node_id = nRDF[b:f]
            if old_node_id[:len("B2NOTEBLANKNODE")] != "B2NOTEBLANKNODE":
                new_node_id = "B2NOTEBLANKNODE" + str(bnc)
                nRDF = nRDF.replace(old_node_id, new_node_id)
                bnc += 1
        nf = open(apipath + "b2note_api/test_rdf.rdf", "w")
        nf.write(nRDF)
        nf.close()

        g = None
        nRDF = None
        if annL:
            for ann in annL:
                # Build-up graph from jsonld list of annotations
                g = Graph().parse(data=json.dumps(ann), format='json-ld')

                if g:
                    # The library adds a trailing slash character to the Software homepage url
                    for s, p, o in g.triples(
                        (None, None, term.URIRef(u"https://b2note.bsc.es/"))):
                        g.add((s, p, term.URIRef(u"https://b2note.bsc.es")))
                    for s, p, o in g.triples(
                        (None, None, term.URIRef(u"https://b2note.bsc.es/"))):
                        g.remove(
                            (s, p, term.URIRef(u"https://b2note.bsc.es/")))
                else:
                    print(
                        "export_to_triplestore function, no graph parsed from json-ld."
                    )
                    stdlogger.error(
                        "export_to_triplestore function, no graph parsed from json-ld."
                    )
                    return None

                files = None
                if g:
                    files = g.serialize(format='xml')

                descr = None
                if files:
                    b = files.find('''<rdf:Description''')
                    b = b - files[:b][::-1].find('''>''') + 1
                    f = files.find('''</rdf:RDF>''')
                    descr = files[b:f]
                    if descr:
                        prog = 0
                        while '''rdf:nodeID="''' in descr[prog:]:
                            b = prog + descr[prog:].find(
                                '''rdf:nodeID="''') + len('''rdf:nodeID="''')
                            f = b + descr[b:].find('''"''')
                            prog = b
                            old_node_id = descr[b:f]
                            print prog, bnc, b, f, old_node_id
                            if old_node_id[:len("B2NOTEBLANKNODE"
                                                )] != "B2NOTEBLANKNODE":
                                new_node_id = "B2NOTEBLANKNODE" + str(bnc)
                                descr = descr.replace(old_node_id, new_node_id)
                                bnc += 1
                else:
                    print(
                        "export_to_triplestore function, no graph from removing trailing slash from software homepage url."
                    )
                    stdlogger.error(
                        "export_to_triplestore function, no graph from removing trailing slash from software homepage url."
                    )
                    return None

                if descr:
                    nf = open(apipath + "b2note_api/test_rdf.rdf", "r")
                    nRDF = nf.read()
                    nf.close()

                    nf = open(apipath + "b2note_api/test_rdf.rdf", "w")
                    nf.write(nRDF[:nRDF.find('''</rdf:RDF>''')] + files[b:f] +
                             '\n' + '''</rdf:RDF>''')
                    nf.close()

                else:
                    print(
                        "export_to_triplestore function, no annotation description extracted from serilalized RDF."
                    )
                    stdlogger.error(
                        "export_to_triplestore function, no annotation description extracted from serilalized RDF."
                    )
                    return None

            t_makegraph = datetime.datetime.now()

            R = None
            if nRDF:
                R = httpPutRdfXmlFileContentToOpenVirtuoso(
                    'http://opseudat03.bsc.es:8890/DAV/home/b2note/rdf_sink/annotations.rdf',
                    virtuoso_settings['VIRTUOSO_B2NOTE_USR'],
                    virtuoso_settings['VIRTUOSO_B2NOTE_PWD'], nRDF)
            else:
                print(
                    "export_to_triplestore function, replacement RDF was not constructed."
                )
                stdlogger.error(
                    "export_to_triplestore function, replacement RDF was not constructed."
                )
                return None

            t_sending = datetime.datetime.now()

            if R is not None:
                return '''
                <h1>DONE</h1>
                <br>
                <p>Nb annotations: ''' + str(len(annL)) + '''</p>
                <p>t_start: 0, 0, ''' + str(t_start) + '''</p>
                <p>t_api: ''' + str(t_api - t_start) + ''', ''' + str(
                    t_api - t_start) + ''', ''' + str(t_api) + '''</p>
                <p>t_nospace: ''' + str(t_nospace - t_api) + ''', ''' + str(
                        t_nospace -
                        t_start) + ''', ''' + str(t_nospace) + '''</p>
                <p>t_makegraph: ''' + str(t_makegraph - t_api) + ''', ''' + str(
                            t_makegraph -
                            t_start) + ''', ''' + str(t_makegraph) + '''</p>
                <p>t_sending: ''' + str(t_sending - t_api) + ''', ''' + str(
                                t_sending -
                                t_start) + ''', ''' + str(t_sending) + '''</p>
                <br>
                <pre>''' + R.text + '''</pre>
                <br>
                <p>Example query:<p>
                <pre>SELECT DISTINCT ?file ?free_text ?semantic_label
FROM &#60;urn:dav:home:b2note:rdf_sink>
WHERE {
 ?s ?p &#60;http://www.w3.org/ns/oa#Annotation>.
 ?s &#60;http://www.w3.org/ns/oa#hasTarget> ?file.
 ?s &#60;http://www.w3.org/ns/oa#hasBody> ?b.
 OPTIONAL{
  ?b &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?free_text.
 }
 OPTIONAL{
  ?b &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#type> &#60;http://www.w3.org/ns/oa#Composite>.
  ?b &#60;http://www.w3.org/ns/activitystreams#items> ?d.
  ?d &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> ?e.
  ?e &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#first> ?f.
  ?f &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?semantic_label.
 }
}
LIMIT 50
</pre>
'''

    except:
        print("export_to_triplestore function, did not complete.")
        stdlogger.error("export_to_triplestore function, did not complete.")
        return False

    return out
Ejemplo n.º 20
0
    def test_get_resmap(self):
        response = self.client.get("/hsapi/resource/{pid}/map/".format(pid=self.pid),
                                   format='json')
        # Note: this presumes that there is always a single redirection.
        # This might not be true if we utilize systems other than iRODS.
        self.assertEqual(response.status_code, status.HTTP_302_FOUND)
        response2 = self.client.get(response.url)
        self.assertEqual(response2.status_code, status.HTTP_200_OK)

        # collect response from stream
        output = ""
        while True:
            try:
                output += response2.streaming_content.next()
            except StopIteration:
                break

        # parse as simple RDF graph
        g = Graph()
        g.parse(data=output)

        documents = g.triples(
            (None, term.URIRef(u'http://purl.org/spar/cito/documents'), None)
        )

        # check for "documents" node
        doclen = 0
        for s, p, o in documents:

            doclen += 1
            self.assertTrue(isinstance(s, term.URIRef))
            subject = s.split('/')
            subject = subject[len(subject)-1]
            self.assertEqual(subject, "resourcemetadata.xml")

            self.assertTrue(isinstance(o, term.Literal))
            object = o.split('/')
            object = object[len(object)-1]
            self.assertEqual(object, "resourcemap.xml#aggregation")

        self.assertEqual(doclen, 1)

        # now create a file in the resource map
        txt_file_name = 'test.txt'
        txt_file_path = os.path.join(self.tmp_dir, txt_file_name)
        txt = open(txt_file_path, 'w')
        txt.write("Hello World.\n")
        txt.close()

        # Upload the new resource file
        params = {'file': (txt_file_name,
                           open(txt_file_path),
                           'text/plain')}
        url = "/hsapi/resource/{pid}/files/".format(pid=self.pid)
        response = self.client.post(url, params)
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
        content = json.loads(response.content)
        self.assertEquals(content['resource_id'], self.pid)

        # download the resource map and
        # Make sure the new file appears in the resource map
        response = self.client.get("/hsapi/resource/{pid}/map/".format(pid=self.pid))
        self.assertEqual(response.status_code, status.HTTP_302_FOUND)
        response2 = self.client.get(response.url)
        self.assertEqual(response2.status_code, status.HTTP_200_OK)

        # collect the map from the stream
        output = ""
        while True:
            try:
                output += response2.streaming_content.next()
            except StopIteration:
                break

        # parse as a simple RDF file of triples
        g = Graph()
        g.parse(data=output)

        # check that the graph contains an appropriate "documents" node
        documents = g.triples(
            (None, term.URIRef(u'http://purl.org/spar/cito/documents'), None)
        )

        doclen = 0
        for s, p, o in documents:

            doclen += 1
            self.assertTrue(isinstance(s, term.URIRef))
            subject = s.split('/')
            subject = subject[len(subject)-1]
            self.assertEqual(subject, "resourcemetadata.xml")

            self.assertTrue(isinstance(o, term.Literal))
            object = o.split('/')
            object = object[len(object)-1]
            self.assertEqual(object, "resourcemap.xml#aggregation")

        self.assertEqual(doclen, 1)

        formats = g.triples(
            (None, term.URIRef(u'http://purl.org/dc/elements/1.1/format'), None)
        )

        # check that MIME types are correctly defined
        fmtlen = 0
        for s, p, o in formats:
            fmtlen += 1
            subject = s.split('/')
            subject = subject[len(subject)-1]
            self.assertTrue(isinstance(o, term.Literal))
            if (subject == 'test.txt'):
                self.assertEqual(str(o), u'text/plain')
            else:
                self.assertEqual(str(o), u'application/rdf+xml')

        # pidgeonhole principle: if there are three, then one is the file in question
        self.assertEqual(fmtlen, 3)
Ejemplo n.º 21
0
def export_to_triplestore():
    out = None
    try:

        annL = None
        annL = retrieve_annotation_jsonld_from_api()

        if annL:
            # Replace field name "type" by "@type" for rdflib-jsonld correct processing
            annL = addarobase_totypefieldname(annL)
            # B2SHARE sends fiel urls containing whitespace characters,
            # that rdflib refuses to serialize, replace by %20
            for ann in annL:
                if isinstance(ann, dict):
                    if "target" in ann.keys():
                        if isinstance(ann["target"], dict):
                            if "source" in ann["target"].keys():
                                if isinstance(ann["target"]["source"],
                                              (str, unicode)):
                                    if ann["target"]["source"].find(" ") > 0:
                                        ann["target"]["source"] = ann[
                                            "target"]["source"].replace(
                                                " ", "%20")
                            if "id" in ann["target"].keys():
                                if isinstance(ann["target"]["id"],
                                              (str, unicode)):
                                    if ann["target"]["id"].find(" ") > 0:
                                        ann["target"]["id"] = ann["target"][
                                            "id"].replace(" ", "%20")
        else:
            print(
                "export_to_triplestore function, no annotation list retrieved."
            )
            stdlogger.error(
                "export_to_triplestore function, no annotation list retrieved."
            )
            return None

        g = None
        if annL:
            # Build-up graph from jsonld list of annotations
            g = Graph().parse(data=json.dumps(annL), format='json-ld')
        else:
            print(
                "export_to_triplestore function, no annotation list from addarobase function."
            )
            stdlogger.error(
                "export_to_triplestore function, no annotation list from addarobase function."
            )
            return None

        if g:
            # The library adds a trailing slash character to the Software homepage url
            for s, p, o in g.triples(
                (None, None, term.URIRef(u"https://b2note.bsc.es/"))):
                g.add((s, p, term.URIRef(u"https://b2note.bsc.es")))
            for s, p, o in g.triples(
                (None, None, term.URIRef(u"https://b2note.bsc.es/"))):
                g.remove((s, p, term.URIRef(u"https://b2note.bsc.es/")))
        else:
            print(
                "export_to_triplestore function, no graph parsed from json-ld."
            )
            stdlogger.error(
                "export_to_triplestore function, no graph parsed from json-ld."
            )
            return None

        files = None
        if g:
            files = g.serialize(format='xml')
        else:
            print(
                "export_to_triplestore function, no graph from removing trailing slash from software homepage url."
            )
            stdlogger.error(
                "export_to_triplestore function, no graph from removing trailing slash from software homepage url."
            )
            return None

        # CLEAR previous graph
        graph_urn = "urn:dav:home:b2note:rdf_sink"
        q = urllib.quote_plus('CLEAR GRAPH <' + graph_urn + '>')
        url = 'http://opseudat03.bsc.es:8890/sparql?query=' + q
        rc = None
        rc = requests.get(url,
                          auth=HTTPBasicAuth(
                              virtuoso_settings['VIRTUOSO_B2NOTE_USR'],
                              virtuoso_settings['VIRTUOSO_B2NOTE_PWD']))

        R = None
        if rc and rc.text and isinstance(
                rc.text,
            (str, unicode)) and rc.text.find("Clear graph &lt;" + graph_urn +
                                             "&gt; -- done") > 0:
            R = httpPutRdfXmlFileContentToOpenVirtuoso(
                'http://opseudat03.bsc.es:8890/DAV/home/b2note/rdf_sink/annotations.rdf',
                virtuoso_settings['VIRTUOSO_B2NOTE_USR'],
                virtuoso_settings['VIRTUOSO_B2NOTE_PWD'], files)
        else:
            print(
                "export_to_triplestore function, call to CLEAR previous GRAPH on triplestore failed."
            )
            stdlogger.error(
                "export_to_triplestore function, call to CLEAR previous GRAPH on triplestore failed."
            )
            return None

        if R is not None:
            print "export_to_triplestore function, completed publishing of B2Note annotations to Open Virtuoso triplestore."
            return '''
                <h1>B2NOTE triplestore data update</h1>
                <p>Completed publishing annotations to B2NOTE Open Virtuoso triplestore.</p>
                <p>SPARQL endpoint: <a href="http://opseudat03.bsc.es:8890/sparql" target="_blank">http://opseudat03.bsc.es:8890/sparql</a></p>
                <p>Example query:<p>
                <pre>SELECT DISTINCT ?file ?free_text ?semantic_label
FROM &#60;urn:dav:home:b2note:rdf_sink>
WHERE {
 ?s ?p &#60;http://www.w3.org/ns/oa#Annotation>.
 ?s &#60;http://www.w3.org/ns/oa#hasTarget> ?file.
 ?s &#60;http://www.w3.org/ns/oa#hasBody> ?b.
 OPTIONAL{
  ?b &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?free_text.
 }
 OPTIONAL{
  ?b &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#type> &#60;http://www.w3.org/ns/oa#Composite>.
  ?b &#60;http://www.w3.org/ns/activitystreams#items> ?d.
  ?d &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> ?e.
  ?e &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#first> ?f.
  ?f &#60;http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?semantic_label.
 }
}
LIMIT 50
</pre>'''
        else:
            print "export_to_triplestore function, could not send rdf/xml file content to Open Virtuoso rdf-sink."
            stdlogger.error(
                "export_to_triplestore function, could not send rdf/xml file content to Open Virtuoso rdf-sink."
            )
            return None

    except:
        print("export_to_triplestore function, did not complete.")
        stdlogger.error("export_to_triplestore function, did not complete.")
        return False

    return out
Ejemplo n.º 22
0
        while True:
            found = False
            for p, o in self.schema_def.ontology[superclass]:
                if self.schema_def.lexicon['subclass'] == str(p):
                    found = True
                    classes.append(o)
                    superclass = o
            if not found:
                break
        return classes

    def _is_instance(self, (subj, pred, obj)):
        """helper, returns the class type of subj"""
        input_pred_ns = self._namespace_from_uri(self._expand_qname(pred))
        triples = self.graph.triples(
            (subj, rt.URIRef(self.schema_def.lexicon['type']), None))
        if triples:
            for tr in triples:
                triple_obj_ns = self._namespace_from_uri(
                    self._expand_qname(tr[2]))
                if input_pred_ns == triple_obj_ns:  # match namespaces
                    return tr[2]  # return the object

    def _field_name_from_uri(self, uri):
        """helper, returns the name of an attribute (without namespace prefix)"""
        # TODO - should use graph API
        uri = str(uri)
        parts = uri.split('#')
        if len(parts) == 1:
            return uri.split('/')[-1] or uri
        return parts[-1]
Ejemplo n.º 23
0
'''
Created on 14 Jun 2012

@author: AYODELE-M.AKINGBULU
'''
from rdflib import Graph, term, namespace
graph = Graph(store='Sleepycat', identifier='test')
graph.open("somefolder", create=True)
graph.add((term.URIRef('http://www.google.com/'), namespace.RDFS.label,
           term.Literal('Google home page')))
graph.add((term.URIRef('http://wikipedia.org/'), namespace.RDFS.label,
           term.Literal('Wikipedia home page')))
graph.close()
graph = Graph(store='Sleepycat', identifier='test')
graph.open("somefolder")
len(graph)

print "things in a_graph"
for s, p, o in graph:
    print s, p, o
import sys
import pandas as pd

from rdflib import Graph, term

geography_codes_register = "https://statistics.gov.scot/downloads/graph?uri=http://statistics.gov.scot/graph/standard-geography-code-register"
geography_codes = Graph()
official_name_predicate = term.URIRef(
    "http://statistics.data.gov.uk/def/statistical-geography#officialname")


def init_graph():
    """ Initialise the graph with latest geography codes from scot gov. """
    print("Initialising Geography Register this can take circa 20 seconds")
    geography_codes.parse(geography_codes_register, "nt")


def get_official_name(feature_code: str):
    """ Function extracts a feature codes official name from the Scot Gov geography register"""
    subject = term.URIRef(
        f"http://statistics.gov.scot/id/statistical-geography/{feature_code}")
    return geography_codes.value(subject, official_name_predicate)


def write_geo_names_csv(filename: str):
    """ Parse csv file, and lookup official names for each featurecode in the file """
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 400)
    stats_df = pd.read_csv(filename)
    print(stats_df.head())
def get_official_name(feature_code: str):
    """ Function extracts a feature codes official name from the Scot Gov geography register"""
    subject = term.URIRef(
        f"http://statistics.gov.scot/id/statistical-geography/{feature_code}")
    return geography_codes.value(subject, official_name_predicate)
Ejemplo n.º 26
0
def test_example(g):
    test('Some triples have been loaded',
         len(g))
    test('A person has been defined',
         g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))
    print('All tests passed. Well done!')
Ejemplo n.º 27
0
def main(data,lang):
	track=0
	nif=rdflib.Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#")
	if lang != "en" :
		nlp = spacy.load(''+lang+'_core_news_sm')
	else:	
		nlp = spacy.load('en_core_web_sm')	
		
	for filename in os.listdir('Files/Input'+lang+'/'):
		if(track < int(data)):	
			graph2=rdflib.Graph()
			graph2.parse('Files/Input'+lang+'/'+filename,format='nt')
			g=Graph()
			name=filename.split(".")[0]
			s=graph2.serialize(format="nt")
			for s,p,o in graph2:
				if type(o)==rdflib.term.Literal and nif.isString in p:
					sentences = nlp(o.encode().decode('utf-8'))
					for i in sentences.sents:
						try:
							BII=o.encode(sys.stdout.encoding, errors='replace').index(i.text.encode(sys.stdout.encoding, errors='replace'))
							EII=o.encode(sys.stdout.encoding, errors='replace').index(i.text.encode(sys.stdout.encoding, errors='replace'))+len(i.text.encode(sys.stdout.encoding, errors='replace'))
							inner=nlp(i.text.encode().decode('utf-8'))
							offset=0
							for ing in inner:
								offset = i.text.encode().decode('utf-8').index(ing.text.encode().decode('utf-8'),offset)
								BI= offset+ BII
								EI=BI +len(ing.text.encode().decode('utf-8'))
								offset=offset+len(ing.text.encode().decode('utf-8'))
								hello="http://purl.org/olia/olia.owl#"+ ing.pos_
								if ing.text.encode().decode('utf-8') not in string.punctuation:
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),RDF.type,nif.Word])
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.beginIndex,rdflib.term.Literal(str(BI))])
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.endIndex,	rdflib.term.Literal(str(EI))])
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.anchorOf,rdflib.term.Literal(ing.text.encode().decode('utf-8'))])
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.referenceContext,rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=context")])
									g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.oliaCategory,term.URIRef(hello)])                         								
						except:
							pass
			g.bind("nif",nif)        
			g.serialize(destination='Files/POS/'+filename,format="turtle")
			track=track+1
	print("Your Output is stored in POS Folder via spacyio")
Ejemplo n.º 28
0
def URIRef(value: Any) -> Identifier:
    return term.URIRef(value)  # type: ignore
Ejemplo n.º 29
0
data_path = 'annotated_images'
images = []

try:
    images = load_images(data_path)
    dicom_file_name = glob(data_path + '/*')
    print('Total of %d DICOM images.' % len(dicom_file_name))
except:
    print(
        'Check value "data_path". If "data_path" is correct, then check the constraint. '
        'Most likely the images are not added. Add images.')

# ***************************************************Terms Begin********************************************************
# Classes
dicom_image = term.URIRef(BASE + 'DicomImage')

# Study Types
localizer = term.URIRef(BASE + 'Localizer')
general = term.URIRef(BASE + 'General')

# Annotation Status
annotated = term.URIRef(BASE + 'Annotated')
not_annotated = term.URIRef(BASE + 'NotAnnotated')

# Part of Body
head = term.URIRef(BASE + 'Head')
neck = term.URIRef(BASE + 'Neck')
chest = term.URIRef(BASE + 'Chest')
abdomen = term.URIRef(BASE + 'Abdomen')
pelvis = term.URIRef(BASE + 'Pelvis')
Ejemplo n.º 30
0
                        )
                    ])
                    g.add([
                        rdflib.term.URIRef("http://dbpedia.org/resource/" +
                                           name + "?dbpv=2016-10&nif=word_" +
                                           str(BI) + "_" + str(EI)),
                        nif.oliaLink,
                        rdflib.term.URIRef("http://purl.org/olia/penn.owl#" +
                                           tagged[i][count][1])
                    ])
                    g.add([
                        rdflib.term.URIRef("http://dbpedia.org/resource/" +
                                           name + "?dbpv=2016-10&nif=word_" +
                                           str(BI) + "_" + str(EI)),
                        nif.oliaCategory,
                        term.URIRef(hello)
                    ])
                    g.add([
                        rdflib.term.URIRef("http://dbpedia.org/resource/" +
                                           name + "?dbpv=2016-10&nif=word_" +
                                           str(BI) + "_" + str(EI)),
                        nif.oliaCategory,
                        term.URIRef(hell)
                    ])
                    count = count + 1
            except:
                pass

g.bind("nif", nif)
#print(g.serialize(format="turtle"))
g.serialize(destination='Files/Search/' + name + "-pos.ttl", format="turtle")