Ejemplo n.º 1
0
 def test_to_python_ym_duration(self):
     l = Literal("P1Y2M", datatype=XSD.yearMonthDuration)
     self.assertTrue(isinstance(l.toPython(), Duration))
     self.assertEqual(l.toPython(), parse_duration("P1Y2M"))
Ejemplo n.º 2
0
 def setUp(self):
     self.x = Literal(
         "2008-12-01T18:02:00Z",
         datatype=URIRef('http://www.w3.org/2001/XMLSchema#dateTime'))
Ejemplo n.º 3
0
 def test_util_from_n3_expectliteralanddtype(self):
     s = '"true"^^xsd:boolean'
     res = util.from_n3(s, default=None, backend=None)
     self.assertEqual(res, Literal('true', datatype=URIRef('xsd:boolean')))
Ejemplo n.º 4
0
    g = Graph()
    g.bind("foaf", FOAF)
    predicate1 = URIRef(f"http://xmlns.com/foaf/0.1/rating")
    predicate2 = URIRef(f"http://xmlns.com/foaf/0.1/votes")
    predicate3 = URIRef(f"http://xmlns.com/foaf/0.1/episode")
    with open(f'ratings.list', encoding='latin-1') as f:
        for line_number, line in enumerate(f):
            info = re.match(
                '^ +[0-9.]+ +([0-9]+) +([0-9.]+) +"?([^"\n]+)"? \(([0-9]+)[^\)]*\)( {(.+)})?',
                line)
            if info:
                # print(info.group(1),info.group(2),info.group(3))
                year_string = info.group(4).strip()
                if year_string == str(year):
                    movie_string = info.group(3).strip()
                    movie_name = Literal(movie_string, datatype=XSD.string)
                    movie = URIRef(
                        f"http://imdb.org/movie/{urllib.parse.quote(movie_string)}"
                    )
                    g.add((movie, FOAF.name, movie_name))
                    rating_string = info.group(2).strip()
                    rating_name = Literal(rating_string, datatype=XSD.integer)
                    g.add((movie, predicate1, rating_name))
                    votes_string = info.group(1).strip()
                    votes_name = Literal(votes_string, datatype=XSD.float)
                    g.add((movie, predicate2, votes_name))
                    if info.group(6):
                        episode_string = info.group(6).strip()
                        episode_name = Literal(episode_string)
                        g.add((movie, predicate3, episode_name))
Ejemplo n.º 5
0
 def contains(cls, value):
     return Literal(value)
Ejemplo n.º 6
0
    def gen_graph(self,
                  serial='turtle',
                  namespace=None,
                  implicit_class_target=False):
        self.gen_prefix_bindings()
        self.extract_props()
        self.extract_classes()
        self.extract_restrictions()
        ng = Graph()

        for prefix, namespace in self.G.namespace_manager.namespaces():
            ng.bind(prefix, namespace)

        SH = Namespace('http://www.w3.org/ns/shacl#')
        ng.bind('sh', SH)

        EX = Namespace('http://www.example.org/')
        ng.bind('ex', EX)

        if namespace != None:
            if self.uri_validator(namespace[0]) != False:
                uri = namespace[0]
                if namespace[0][-1] not in ['#', '/', '\\']:
                    uri = namespace[0] + '/'
                EX = Namespace(uri)
                ng.bind(namespace[1], EX)
            else:
                print('##malformed URI, using http://example.org/ instead...')
                EX = Namespace('http://www.example.org/')
                ng.bind('ex', EX)
        else:
            EX = Namespace('http://www.example.org/')
            ng.bind('ex', EX)

#         add class Node Shapes
        for c in self.CLASSES.keys():
            subject = c
            clabel = self.CLASSES[c]['label']

            if not implicit_class_target:
                subject = EX[clabel]
                ng.add((subject, SH.targetClass, c))
            else:
                ng.add((subject, RDF.type, RDFS.Class))
                # Copy rdfs:subClassOf
                for t in self.G.triples((subject, RDFS.subClassOf, None)):
                    ng.add(t)

            ng.add((subject, RDF.type, SH.NodeShape))
            #            ng.add((EX[clabel], SH.name, Literal(self.CLASSES[c]['shape_name']+' Node shape')))
            ng.add((subject, SH.nodeKind, SH.BlankNodeOrIRI))
            if self.CLASSES[c]['definition'] is not None:
                ng.add((subject, SH.description,
                        Literal((self.CLASSES[c]['definition']))))

        for p in self.PROPS.keys():
            label = self.PROPS[p]['label']
            #            ng.add((EX[label], SH.name, Literal(str(self.PROPS[p]['shape_name']) +' Property shape')))
            # copy rdfs:label as property shape names
            for o in self.G.objects(subject=p, predicate=RDFS.label):
                ng.add((EX[label], SH.name, o))
            ng.add((EX[label], RDF.type, SH.PropertyShape))
            ng.add((EX[label], SH.path, p))

            if OWL.FunctionalProperty in self.PROPS[p]['type']:
                ng.add((EX[label], SH.maxCount, Literal(1)))

            if OWL.InverseFunctionalProperty in self.PROPS[p]['type']:

                ng.add((EX[label], SH.path, BNode(p + 'inverse')))
                ng.add((BNode(p + 'inverse'), SH.inversePath, p))
                ng.add((BNode(p + 'inverse'), SH.maxCount, Literal(1)))

            if self.PROPS[p]['range_value'] is not None:
                rang = self.PROPS[p]['range_value']
                st = BNode()
                ng.add((EX[label], SH['in'], st))
                Collection(ng, st, [Literal(x) for x in rang])

            if self.PROPS[p]['range'] is not None:
                rang = self.PROPS[p]['range']
                if rang in self.datatypes:
                    ng.add((EX[label], SH.datatype, rang))

                else:
                    ng.add((EX[label], SH['class'], rang))

            if self.PROPS[p]['e_prop'] is not None:
                for x in self.PROPS[p]['e_prop']:
                    ng.add((EX[label], SH.equals, x))

            ## create range unions using sh:or
            if self.PROPS[p]['range_union'] is not None:
                rang = self.PROPS[p]['range_union']
                if set(rang).issubset(self.datatypes) == True:

                    st = BNode(label + str(0) + 'a')
                    ng.add((EX[label], EX['or'], st))

                    for x, y in enumerate(rang):
                        if x == 0:
                            ng.add((st, RDF.first,
                                    BNode(label + str(x) + '_name')))
                            ng.add((BNode(label + str(x) + '_name'),
                                    SH['datatype'], y))

                            ng.add((st, RDF.rest,
                                    BNode(label + str(x + 1) + 'a')))

                        else:
                            ng.add((BNode(label + str(x) + 'a'), RDF.first,
                                    BNode(label + str(x) + '_name')))
                            ng.add((BNode(label + str(x) + '_name'),
                                    SH['datatype'], y))
                        if x + 1 == len(rang):
                            ng.add((BNode(label + str(x) + 'a'), RDF.rest,
                                    RDF.nil))
                        else:
                            ng.add((BNode(label + str(x) + 'a'), RDF.rest,
                                    BNode(label + str(x + 1) + 'a')))

                else:

                    st = BNode(label + str(0) + 'a')
                    ng.add((EX[label], EX['or'], st))

                    for x, y in enumerate(rang):
                        if x == 0:
                            ng.add((st, RDF.first,
                                    BNode(label + str(x) + '_name')))
                            ng.add((BNode(label + str(x) + '_name'),
                                    SH['class'], y))

                            ng.add((st, RDF.rest,
                                    BNode(label + str(x + 1) + 'a')))

                        else:
                            ng.add((BNode(label + str(x) + 'a'), RDF.first,
                                    BNode(label + str(x) + '_name')))
                            ng.add((BNode(label + str(x) + '_name'),
                                    SH['class'], y))
                        if x + 1 == len(rang):
                            ng.add((BNode(label + str(x) + 'a'), RDF.rest,
                                    RDF.nil))
                        else:
                            ng.add((BNode(label + str(x) + 'a'), RDF.rest,
                                    BNode(label + str(x + 1) + 'a')))

            if self.PROPS[p]['definition'] is not None:
                ng.add((EX[label], SH.description,
                        Literal((self.PROPS[p]['definition']))))

            if self.PROPS[p]['domain'] is not None:
                subject = self.PROPS[p]['domain']
                if subject in self.CLASSES.keys():
                    plabel = self.PROPS[p]['label']  #
                    if implicit_class_target:
                        ng.add((subject, SH.property, EX[plabel]))
                    else:
                        dlabel = self.CLASSES[subject]['label']
                        ng.add((EX[dlabel], SH.property, EX[plabel]))

            if self.PROPS[p]['domain_union'] is not None:
                for d in self.PROPS[p]['domain_union']:
                    if d in self.CLASSES.keys():
                        plabel = self.PROPS[p]['label']  #

                        if implicit_class_target:
                            ng.add((d, SH.property, EX[plabel]))
                        else:
                            dlabel = self.CLASSES[d]['label']
                            ng.add((EX[dlabel], SH.property, EX[plabel]))

        for r in self.REST.keys():
            blank = BNode()

            #                if self.REST[r]['onProp'] == p: #and self.REST[r]['onClass'] == self.PROPS[p]['domain']:

            ng.add((EX[self.sh_label_gen(self.REST[r]['onClass'])],
                    SH.property, blank))
            ng.add((blank, SH.path, self.REST[r]['onProp']))
            if self.REST[r]['type'] in [OWL.cardinality]:

                ng.add((blank, SH.minCount,
                        Literal(self.REST[r]['value'], datatype=XSD.integer)))
                ng.add((blank, SH.maxCount,
                        Literal(self.REST[r]['value'], datatype=XSD.integer)))
            elif self.REST[r]['type'] in [OWL.minCardinality]:
                ng.add((blank, SH.minCount,
                        Literal(self.REST[r]['value'], datatype=XSD.integer)))
            elif self.REST[r]['type'] in [OWL.maxCardinality]:
                ng.add((blank, SH.maxCount,
                        Literal(self.REST[r]['value'], datatype=XSD.integer)))

            elif self.REST[r]['type'] in [OWL.allValuesFrom]:
                if type(self.REST[r]['value']) == BNode:

                    for sub1, pred1, ob1 in self.G.triples(
                        (self.REST[r]['value'], None, None)):
                        if pred1 == OWL.unionOf:
                            union_c = Collection(self.G, ob1)
                            dummy = r + self.REST[r]['value']
                            nest = BNode(dummy + str(0) + 'a')
                            ng.add((blank, SH['or'], nest))
                            for x, y in enumerate(union_c):
                                if x == 0:
                                    ng.add((nest, RDF.first,
                                            BNode(dummy + str(x) + '_name')))
                                    ng.add((BNode(dummy + str(x) + '_name'),
                                            SH['class'], y))
                                    ng.add((nest, RDF.rest,
                                            BNode(dummy + str(x + 1) + 'a')))
                                else:
                                    ng.add((BNode(dummy + str(x) + 'a'),
                                            RDF.first,
                                            BNode(dummy + str(x) + '_name')))
                                    ng.add((BNode(dummy + str(x) + '_name'),
                                            SH['class'], y))
                                if x == len(rang):
                                    ng.add((BNode(dummy + str(x) + 'a'),
                                            RDF.rest, RDF.nil))
                                else:
                                    ng.add(
                                        (BNode(dummy + str(x) + 'a'), RDF.rest,
                                         BNode(dummy + str(x + 1) + 'a')))
#
#
                elif type(self.REST[r]['value']) in self.datatypes:
                    ng.add((blank, SH['datatype'], self.REST[r]['value']))
                else:
                    ng.add((blank, SH['class'], self.REST[r]['value']))

            elif self.REST[r]['type'] in [OWL.someValuesFrom]:
                ng.add((blank, SH['qualifiedMinCount'],
                        Literal(1, datatype=XSD.integer)))
                ng.add((blank, SH['qualifiedValueShape'],
                        BNode('count' + str(r))))
                ng.add((BNode('count' + str(r)), SH['class'],
                        self.REST[r]['value']))
            else:
                pass

        print(ng.serialize(format='ttl').decode())
Ejemplo n.º 7
0
def convertTerm(term, queryProlog):
    """
    Utility function  for converting parsed Triple components into Unbound 
    """
    #from rdfextras.sparql.sql.RdfSqlBuilder import BNodeRef
    if isinstance(term, Variable):
        if hasattr(
                queryProlog,
                'variableBindings') and term in queryProlog.variableBindings:
            #Resolve pre-bound variables at SQL generation time for SPARQL-to-SQL invokations
            rt = queryProlog.variableBindings.get(term, term)
            return isinstance(rt, BNode) and BNodeRef(rt) or rt
        else:
            return term
    elif isinstance(term, BNodeRef):
        return term
    elif isinstance(term, BNode):
        #from rdfextras.sparql.sql.RdfSqlBuilder import RdfSqlBuilder
        #if isinstance(queryProlog,RdfSqlBuilder):
        #    return BNode(term + '_bnode') # ensure namespace doesn't overlap with variables
        return term
    elif isinstance(term, QName):
        #QNames and QName prefixes are the same in the grammar
        if not term.prefix:
            if queryProlog is None:
                return URIRef(term.localname)
            else:
                if queryProlog.baseDeclaration and u'' in queryProlog.prefixBindings and queryProlog.prefixBindings[
                        u'']:
                    base = URIRef(Resolver().normalize(
                        queryProlog.prefixBindings[u''],
                        queryProlog.baseDeclaration))
                elif queryProlog.baseDeclaration:
                    base = queryProlog.baseDeclaration
                else:
                    base = queryProlog.prefixBindings[u'']
                return URIRef(Resolver().normalize(term.localname, base))
        elif term.prefix == '_':
            #Told BNode See: http://www.w3.org/2001/sw/DataAccess/issues#bnodeRef
            #from rdfextras.sparql.sql.RdfSqlBuilder import RdfSqlBuilder, EVAL_OPTION_ALLOW_BNODE_REF, BNodeRef
            # if isinstance(queryProlog,RdfSqlBuilder):
            #     if queryProlog.UseEvalOption(EVAL_OPTION_ALLOW_BNODE_REF):
            #         # this is a 'told' BNode referencing a BNode in the data set (i.e. previously returned by a query)
            #         return BNodeRef(term.localname)
            #     else:
            #          # follow the spec and treat it as a variable
            #         return BNode(term.localname + '_bnode')  # ensure namespace doesn't overlap with variables
            import warnings
            warnings.warn(
                "The verbatim interpretation of explicit bnode identifiers is contrary to (current) DAWG stance",
                SyntaxWarning)
            return SessionBNode(term.localname)
        else:
            return URIRef(Resolver().normalize(
                term.localname, queryProlog.prefixBindings[term.prefix]))
    elif isinstance(term, QNamePrefix):
        if queryProlog is None:
            return URIRef(term)
        else:
            if queryProlog.baseDeclaration is None:
                return URIRef(term)
            return URIRef(Resolver().normalize(term,
                                               queryProlog.baseDeclaration))
    elif isinstance(term, ParsedString):
        return Literal(term)
    elif isinstance(term, ParsedDatatypedLiteral):
        dT = term.dataType
        if isinstance(dT, QName):
            dT = convertTerm(dT, queryProlog)
        return Literal(term.value, datatype=dT)
    elif isinstance(term, IRIRef) and queryProlog.baseDeclaration:
        return URIRef(Resolver().normalize(term, queryProlog.baseDeclaration))
    else:
        return term
Ejemplo n.º 8
0
    # top level object container for sensors, default class is SmartObject
    sensors = baseObject.create({
        'resourceName': 'sensors',
        'resourceClass': 'SmartObject'
    })

    #weather resource under sensors for the weather sensor
    # create a default class SmartObject for the weather sensor cluster
    weather = sensors.create({
        'resourceName': 'rhvWeather-01',
        'resourceClass': 'SmartObject'
    })

    # example description in simple link-format like concepts
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01'), RDFS.Class, Literal('SmartObject')))
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01'), RDF.type, Literal('SensorSystem')))
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01'), RDFS.Resource, Literal('Weather')))
    #
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01/outdoor_temperature'), RDF.type,
         Literal('sensor')))
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01/outdoor_temperature'), RDFS.Resource,
         Literal('temperature')))
    baseObject.Description.set(
        (URIRef('sensors/rhvWeather-01/outdoor_humidity'), RDF.type,
         Literal('sensor')))
    baseObject.Description.set(
Ejemplo n.º 9
0
proposed = []
Location_schema = namedtuple(
    'Legal_Location', ('Term', 'Label', 'ParentTerm', 'Alpha2', 'Alpha3',
                       'Numeric', 'M49', 'broader', 'narrower', 'created',
                       'modified', 'status', 'contributors', 'resolution'))
concepts = extract_terms_from_csv(f'{IMPORT_CSV_PATH}/legal_Locations.csv',
                                  Location_schema)
for row in concepts:
    if row.status not in VOCAB_TERM_ACCEPT:
        proposed.append(row.Term)
        continue
    term = BASE[row.Term]
    parent = row.ParentTerm.replace("dpv:", "")
    graph.add((term, RDF.type, DPVO[f'{parent}']))
    graph.add((term, RDF.type, OWL.NamedIndividual))
    graph.add((term, DCT.title, Literal(row.Label, lang='en')))
    graph.add((term, RDFS.label, Literal(row.Label, lang='en')))
    if row.Alpha2:
        graph.add(
            (term, BASE.iso_alpha2, Literal(row.Alpha2, datatype=XSD.string)))
        graph.add(
            (term, BASE.iso_alpha3, Literal(row.Alpha3, datatype=XSD.string)))
        graph.add(
            (term, BASE.iso_numeric, Literal(row.Numeric,
                                             datatype=XSD.string)))
    if row.M49:
        graph.add((term, BASE.un_m49, Literal(row.M49, datatype=XSD.string)))
    parents = [p.strip() for p in row.broader.split(',') if p]
    for item in parents:
        print(f'item: {item}')
        prefix, parent = item.split(':')
Ejemplo n.º 10
0
 def test_duration_sum(self):
     self.assertEqual(
         Literal("P1Y2M4DT5H6M7S", datatype=XSD.duration)
         + Literal("P1Y2M4DT5H6M7S", datatype=XSD.duration).toPython(),
         Literal("P2Y4M8DT10H12M14S", datatype=XSD.duration),
     )
Ejemplo n.º 11
0
 def testNoDanglingPoint(self):
     vv = Literal("0.88", datatype=_XSD_DOUBLE)
     out = _literal_n3(vv, use_plain=True)
     print(out)
     self.assertTrue(out in ["8.8e-01", "0.88"], out)
Ejemplo n.º 12
0
 def test_duration_le(self):
     self.assertTrue(
         Literal("P4DT5H6M7S", datatype=XSD.duration)
         < Literal("P8DT10H12M14S", datatype=XSD.duration)
     )
Ejemplo n.º 13
0
 def test_equality(self):
     x = Literal("P1Y2M3W4DT5H6M7S", datatype=XSD.duration)
     y = Literal("P1Y2M25DT5H6M7S", datatype=XSD.duration)
     self.assertTrue(x == y)
Ejemplo n.º 14
0
 def test_to_python_ymdhms_duration(self):
     l = Literal("P1Y2M4DT5H6M7S", datatype=XSD.duration)
     self.assertTrue(isinstance(l.toPython(), Duration))
     self.assertEqual(l.toPython(), parse_duration("P1Y2M4DT5H6M7S"))
Ejemplo n.º 15
0
 def setUp(self):
     self.uriref = URIRef("http://example.org/")
     self.bnode = BNode()
     self.literal = Literal("http://example.org/")
     self.python_literal = u"http://example.org/"
     self.python_literal_2 = u"foo"
Ejemplo n.º 16
0
def add_common_triples_for_all_terms(term, graph):
    '''Adds triples for any term to graph
    Common triples are those shared by Class and Property
    terms: data structure of term; is object with attributes
    graph: rdflib graph
    returns: None'''

    # rdfs:label
    graph.add(
        (BASE[f'{term.term}'], RDFS.label, Literal(term.rdfs_label,
                                                   lang='en')))
    # dct:description
    graph.add((BASE[f'{term.term}'], DCT.description,
               Literal(term.dct_description, lang='en')))
    # rdfs:seeAlso
    # TODO: use relation field for relevant terms
    # currently this considers all terms that are related to use rdfs:seeAlso
    # the next column contains the relation, parse and use that
    if term.rdfs_seealso:
        links = [l.strip() for l in term.rdfs_seealso.split(',')]
        for link in links:
            if link.startswith('http'):
                graph.add((BASE[f'{term.term}'], RDFS.seeAlso, URIRef(link)))
            elif ':' in link:
                # assuming something like rdfs:Resource
                prefix, label = link.split(':')
                # gets the namespace from registered ones and create URI
                # will throw an error if namespace is not registered
                # dpv internal terms are expected to have the prefix i.e. dpv:term
                link = NAMESPACES[prefix][f'{label}']
                graph.add((BASE[f'{term.term}'], RDFS.seeAlso, link))
            else:
                graph.add((BASE[f'{term.term}'], RDFS.seeAlso,
                           Literal(link, datatype=XSD.string)))
    # rdfs:comment
    if term.rdfs_comment:
        graph.add((BASE[f'{term.term}'], RDFS.comment,
                   Literal(term.rdfs_comment, lang='en')))
    # rdfs:isDefinedBy
    if term.rdfs_isdefinedby:
        links = [
            l.strip() for l in term.rdfs_isdefinedby.replace('(', '').replace(
                ')', '').split(',')
        ]
        link_iterator = iter(links)
        for label in link_iterator:
            link = next(link_iterator)
            # add link to a temp file so that the label can be displayed in HTML
            if not link in LINKS:
                LINKS[link] = label
            # add link to graph
            if link.startswith('http'):
                graph.add((BASE[f'{term.term}'], DCT.source, URIRef(link)))
            else:
                graph.add((BASE[f'{term.term}'], DCT.source,
                           Literal(link, datatype=XSD.string)))
    # dct:created
    graph.add((BASE[f'{term.term}'], DCT.created,
               Literal(term.dct_created, datatype=XSD.date)))
    # dct:modified
    if term.dct_modified:
        graph.add((BASE[f'{term.term}'], DCT.modified,
                   Literal(term.dct_modified, datatype=XSD.date)))
    # sw:term_status
    graph.add((BASE[f'{term.term}'], SW.term_status,
               Literal(term.sw_termstatus, lang='en')))
    # dct:creator
    if term.dct_creator:
        authors = [a.strip() for a in term.dct_creator.split(',')]
        for author in authors:
            graph.add((BASE[f'{term.term}'], DCT.creator,
                       Literal(author, datatype=XSD.string)))
    # is defined by this vocabulary
    graph.add((BASE[f'{term.term}'], RDFS.isDefinedBy, BASE['']))
    # resolution
    # do nothing

    return None
Ejemplo n.º 17
0
For example:

>>> from rdflib import Literal,XSD
>>> Literal("01", datatype=XSD.int)
rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))

This flag may be changed at any time, but will only affect literals
created after that time, previously created literals will remain
(un)normalized.

"""


DAWG_LITERAL_COLLATION = False
"""
DAWG_LITERAL_COLLATION determines how literals are ordered or compared
to each other.

In SPARQL, applying the >,<,>=,<= operators to literals of
incompatible data-types is an error, i.e:

Literal(2)>Literal('cake') is neither true nor false, but an error.

This is a problem in PY3, where lists of Literals of incompatible
types can no longer be sorted.

Setting this flag to True gives you strict DAWG/SPARQL compliance,
setting it to False will order Literals with incompatible datatypes by
datatype URI
Ejemplo n.º 18
0
def add_triples_for_properties(properties, graph):
    '''Adds triples for properties to graph
    properties: list of CSV data rows
    graph: rdflib graph
    returns: None'''

    proposed = []
    for prop in properties:
        # only record accepted classes
        if prop.sw_termstatus not in VOCAB_TERM_ACCEPT:
            if prop.sw_termstatus == 'proposed':
                proposed.append(prop.term)
            continue
        # rdf:type
        DEBUG(prop.term)
        graph.add((BASE[f'{prop.term}'], RDF.type, RDF.Property))
        if prop.rdfs_domain or prop.rdfs_range:
            graph.add((BASE[f'{prop.term}'], RDF.type, OWL.ObjectProperty))
        else:
            graph.add((BASE[f'{prop.term}'], RDF.type, OWL.AnnotationProperty))
        # rdfs:domain
        if prop.rdfs_domain:
            # assuming something like rdfs:Resource
            prefix, label = prop.rdfs_domain.split(':')
            if 'o__' in prefix:
                # explicit owl declaration
                link = prefix.replace('o__')
                link = NAMESPACES[prefix][f'{label}']
            elif prefix == 'dpv':
                if label == 'Concept':
                    link = OWL.Thing
                else:
                    link = NAMESPACES_DPV_OWL[f'{prefix}'][f'{label}']
            else:
                link = NAMESPACES[prefix][f'{label}']
            # gets the namespace from registered ones and create URI
            # will throw an error if namespace is not registered
            # dpv internal terms are expected to have the prefix i.e. dpv:term
            graph.add((BASE[f'{prop.term}'], RDFS.domain, link))
        # rdfs:range
        if prop.rdfs_range:
            # assuming something like rdfs:Resource
            prefix, label = prop.rdfs_range.split(':')
            if 'o__' in prefix:
                # explicit owl declaration
                link = prefix.replace('o__')
                link = NAMESPACES[prefix][f'{label}']
            elif prefix == 'dpv':
                if label == 'Concept':
                    link = OWL.Thing
                else:
                    link = NAMESPACES_DPV_OWL[f'{prefix}'][f'{label}']
            else:
                link = NAMESPACES[prefix][f'{label}']
            # gets the namespace from registered ones and create URI
            # will throw an error if namespace is not registered
            # dpv internal terms are expected to have the prefix i.e. dpv:term
            graph.add((BASE[f'{prop.term}'], RDFS.range, link))
        # rdfs:subPropertyOf
        if prop.rdfs_subpropertyof:
            parents = [p.strip() for p in prop.rdfs_subpropertyof.split(',')]
            for parent in parents:
                if parent == 'dpv:Relation':
                    continue
                if parent.startswith('http'):
                    graph.add((BASE[f'{prop.term}'], RDFS.subPropertyOf,
                               URIRef(parent)))
                elif ':' in parent:
                    # assuming something like rdfs:Resource
                    prefix, term = parent.split(':')
                    # gets the namespace from registered ones and create URI
                    # will throw an error if namespace is not registered
                    # dpv internal terms are expected to have the prefix i.e. dpv:term
                    if 'o__' in prefix:
                        # explicit owl declaration
                        parent = prefix.replace('o__')
                        parent = NAMESPACES[prefix][f'{term}']
                    elif prefix == 'dpv':
                        parent = NAMESPACES_DPV_OWL[f'{prefix}'][f'{term}']
                    else:
                        parent = NAMESPACES[prefix][f'{term}']
                    graph.add(
                        (BASE[f'{prop.term}'], RDFS.subPropertyOf, parent))
                else:
                    graph.add((BASE[f'{prop.term}'], RDFS.subPropertyOf,
                               Literal(parent, datatype=XSD.string)))
        add_common_triples_for_all_terms(prop, graph)

    return proposed
Ejemplo n.º 19
0
def mapToOperator(expr, prolog, combinationArg=None, constraint=False):
    """
    Reduces certain expressions (operator expressions, function calls, terms, and combinator expressions)
    into strings of their Python equivalent
    """
    #print expr, type(expr), constraint
    combinationInvokation = combinationArg and '(%s)' % combinationArg or ""
    if isinstance(expr, ListRedirect):
        expr = expr.reduce()
    if isinstance(expr, UnaryOperator):
        return UnaryOperatorMapping[type(expr)] % (mapToOperator(
            expr.argument, prolog, combinationArg, constraint=constraint))
    elif isinstance(expr, BinaryOperator):
        return BinaryOperatorMapping[type(expr)] % (
            mapToOperator(
                expr.left, prolog, combinationArg, constraint=constraint),
            mapToOperator(
                expr.right, prolog, combinationArg,
                constraint=constraint), combinationInvokation)
    elif isinstance(expr, (Variable, Unbound)):
        if constraint:
            return """operators.EBV(rdflib.Variable("%s"))%s""" % (
                expr.n3(), combinationInvokation)
        else:
            return '"?%s"' % expr
    elif isinstance(expr, ParsedREGEXInvocation):
        return 'operators.regex(%s,%s%s)%s' % (
            mapToOperator(
                expr.arg1, prolog, combinationArg, constraint=constraint),
            mapToOperator(
                expr.arg2, prolog, combinationArg, constraint=constraint),
            expr.arg3 and ',"' + str(expr.arg3) + '"'
            or '', combinationInvokation)
    elif isinstance(expr, BuiltinFunctionCall):
        normBuiltInName = FUNCTION_NAMES[expr.name].lower()
        normBuiltInName = CAMEL_CASE_BUILTINS.get(
            normBuiltInName, 'operators.' + normBuiltInName)
        return "%s(%s)%s"%(normBuiltInName,",".join(
                    [mapToOperator(i,prolog,combinationArg,constraint=constraint) \
                         for i in expr.arguments]),combinationInvokation)
    elif isinstance(expr, ParsedDatatypedLiteral):
        lit = Literal(expr.value, datatype=convertTerm(expr.dataType, prolog))
        if constraint:
            return """operators.EBV(%r)%s""" % (lit, combinationInvokation)
        else:
            return repr(lit)
    elif isinstance(expr, Literal):
        return repr(expr)
    elif isinstance(expr, URIRef):
        import warnings
        warnings.warn(
            "There is the possibility of __repr__ being deprecated in python3K",
            DeprecationWarning,
            stacklevel=3)
        return repr(expr)
    elif isinstance(expr, QName):
        if expr[:2] == '_:':
            return repr(BNode(expr[2:]))
        else:
            return "'%s'" % convertTerm(expr, prolog)
    elif isinstance(expr, basestring):
        return "'%s'" % convertTerm(expr, prolog)
    elif isinstance(expr, ParsedAdditiveExpressionList):
        return 'Literal(%s)'%(operators.addOperator(
                  [mapToOperator(item,prolog,combinationArg='i',constraint=constraint) \
                           for item in expr],combinationArg))
    elif isinstance(expr, FunctionCall):
        if isinstance(expr.name, QName):
            fUri = convertTerm(expr.name, prolog)
        if fUri in XSDToPython:
            return "operators.XSDCast(%s,'%s')%s" % (mapToOperator(
                expr.arguments[0],
                prolog,
                combinationArg='i',
                constraint=constraint), fUri, combinationInvokation)
        #@@FIXME The hook for extension functions goes here
        if fUri not in prolog.extensionFunctions:
            import warnings
            warnings.warn(
                "Use of unregistered extension function: %s" % (fUri),
                UserWarning, 1)
        else:
            raise NotImplemented(
                "Extension Mechanism hook not yet completely hooked up..")
        #raise Exception("Whats do i do with %s (a %s)?"%(expr,type(expr).__name__))
    else:
        if isinstance(expr, ListRedirect):
            expr = expr.reduce()
            if expr.pyBooleanOperator:
                return expr.pyBooleanOperator.join([
                    mapToOperator(i, prolog, constraint=constraint)
                    for i in expr
                ])
        raise Exception("What do i do with %s (a %s)?" %
                        (expr, type(expr).__name__))
Ejemplo n.º 20
0
def cast_value(v, **kws):
    if not isinstance(v, Literal):
        v = Literal(v, **kws)
    return v
Ejemplo n.º 21
0
    def test_removeN_subjects(self):
        triple1 = (URIRef('http://foo.com/subject1'), URIRef('http://bar.com/pred1'), Literal('obj1'))
        quad1 = (URIRef('http://foo.com/subject1'), None, None, None )
        triple2 = (URIRef('http://foo.com/subject2'), URIRef('http://bar.com/pred2'), Literal('obj2'))
        quad2 = (URIRef('http://foo.com/subject2'), None, None, None )

        self.rdflib_graph.add(triple1)
        self.rdflib_graph.add(triple2)
        self.rdflib_graph.commit()

        self.assertTrue( len( self.rdflib_graph ) == 2 )
        self.rdflib_graph.removeN([quad1, quad2])
        self.assertTrue( len( self.rdflib_graph ) == 0 )
Ejemplo n.º 22
0
def extract_deprecated(g: Graph):
    return g[:OWL.deprecated:Literal(True)]
Ejemplo n.º 23
0
 def contains(cls, value):
     return Literal(value, datatype=cls._DATATYPE)
 predicate = URIRef(f"http://xmlns.com/foaf/0.1/{key}_of")
 with open(f'{map[key]}.list', encoding='latin-1') as f:
     person = None
     person_added = False
     person_name = None
     person_string = None
     for line in f:
         try:
             #print(line)
             info = re.match(
                 '^(([^\t\n]+\t+)|\t+)"?([^"\n]+)"? \(([0-9?]{4})[^\)]*\)( {([^}]+)})?.*$',
                 line)
             if info:
                 if info.group(2):
                     person_string = info.group(2).strip()
                     person_name = Literal(person_string,
                                           datatype=XSD.string)
                     person = URIRef(
                         f"http://imdb.org/{key}/{urllib.parse.quote(person_string)}"
                     )
                     year_string = info.group(4).strip()
                     if year_string == str(year):
                         movie_string = info.group(3).strip()
                         movie_name = Literal(movie_string,
                                              datatype=XSD.string)
                         movie = URIRef(
                             f"http://imdb.org/movie/{urllib.parse.quote(movie_string)}"
                         )
                         g.add((movie, FOAF.name, movie_name))
                         #print(person_string, movie_string, year_string)
                         g.add((person, FOAF.name, person_name))
                         g.add((person, predicate, movie))
Ejemplo n.º 25
0
def rdf_description(name, notation='xml'):
    """
    Funtion takes  title of node, and rdf notation.
    """
    valid_formats = ["xml", "n3", "ntriples", "trix"]
    default_graph_uri = "http://gstudio.gnowledge.org/rdfstore"
    # default_graph_uri = "http://example.com/"
    configString = "/var/tmp/rdfstore"

    # Get the IOMemory plugin.
    store = plugin.get('IOMemory', Store)('rdfstore')

    # Open previously created store, or create it if it doesn't exist yet
    graph = Graph(store="IOMemory", identifier=URIRef(default_graph_uri))
    path = mkdtemp()
    rt = graph.open(path, create=False)
    if rt == NO_STORE:

        graph.open(path, create=True)
    else:
        assert rt == VALID_STORE, "The underlying store is corrupt"

    # Now we'll add some triples to the graph & commit the changes
    #rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    graph.bind("gstudio", "http://gnowledge.org/")
    exclusion_fields = [
        "id", "rght", "node_ptr_id", "image", "lft", "_state",
        "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"
    ]

    #verifies the type of node

    node = NID.objects.get(title=name)
    node_type = node.reftype

    if (node_type == 'Gbobject'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)
    elif (node_type == 'None'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processes'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'System'):
        node = Gbobject.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Objecttype'):
        node = Objecttype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attributetype'):
        node = Attributetype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Complement'):
        node = Complement.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Union'):
        node = Union.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Intersection'):
        node = Intersection.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Expression'):
        node = Expression.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Processtype'):
        node = Processtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Systemtype'):
        node = Systemtype.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'AttributeSpecification'):
        node = AttributeSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'RelationSpecification'):
        node = RelationSpecification.objects.get(title=name)
        rdflib = link(node)

    elif (node_type == 'Attribute'):
        node = Attribute.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Relationtype'):
        node = Relationtype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    elif (node_type == 'Metatype'):
        node = Metatype.objects.get(title=name)
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')
    else:
        rdflib = Namespace('http://sbox.gnowledge.org/gstudio/')

    node_dict = node.__dict__

    subject = str(node_dict['id'])
    for key in node_dict:
        if key not in exclusion_fields:
            predicate = str(key)
            pobject = str(node_dict[predicate])
            graph.add((rdflib[subject], rdflib[predicate], Literal(pobject)))

    rdf_code = graph.serialize(format=notation)

    graph.commit()
    print rdf_code
    graph.close()
Ejemplo n.º 26
0
def check_ro(base_path: Path, nested: bool = False) -> None:
    manifest_file = base_path / "metadata" / "manifest.json"
    assert manifest_file.is_file(), f"Can't find {manifest_file}"
    arcp_root = find_arcp(base_path)
    base = urllib.parse.urljoin(arcp_root, "metadata/manifest.json")
    g = Graph()

    # Avoid resolving JSON-LD context https://w3id.org/bundle/context
    # so this test works offline
    context = Path(get_data("tests/bundle-context.jsonld")).as_uri()
    with open(manifest_file, encoding="UTF-8") as fh:
        jsonld = fh.read()
        # replace with file:/// URI
        jsonld = jsonld.replace("https://w3id.org/bundle/context", context)
    g.parse(data=jsonld, format="json-ld", publicID=base)
    if os.environ.get("DEBUG"):
        print("Parsed manifest:\n\n")
        g.serialize(sys.stdout, format="ttl")
    _ro = None

    for _ro in g.subjects(ORE.isDescribedBy, URIRef(base)):
        break
    assert _ro is not None, "Can't find RO with ore:isDescribedBy"

    profile = None
    for dc in g.objects(_ro, DCTERMS.conformsTo):
        profile = dc
        break
    assert profile is not None, "Can't find profile with dct:conformsTo"
    assert profile == URIRef(
        provenance_constants.CWLPROV_VERSION), ("Unexpected cwlprov version " +
                                                profile)

    paths = []
    externals = []
    for aggregate in g.objects(_ro, ORE.aggregates):
        if not arcp.is_arcp_uri(aggregate):
            externals.append(aggregate)
            # Won't check external URIs existence here
            # TODO: Check they are not relative!
            continue
        lfile = _arcp2file(base_path, aggregate)
        paths.append(os.path.relpath(lfile, base_path))
        assert os.path.isfile(lfile), f"Can't find aggregated {lfile}"

    assert paths, "Didn't find any arcp aggregates"
    assert externals, "Didn't find any data URIs"

    for ext in ["provn", "xml", "json", "jsonld", "nt", "ttl"]:
        f = "metadata/provenance/primary.cwlprov.%s" % ext
        assert f in paths, "provenance file missing " + f

    for f in [
            "workflow/primary-job.json",
            "workflow/packed.cwl",
            "workflow/primary-output.json",
    ]:
        assert f in paths, "workflow file missing " + f
    # Can't test snapshot/ files directly as their name varies

    # TODO: check urn:hash::sha1 thingies
    # TODO: Check OA annotations

    packed = urllib.parse.urljoin(arcp_root, "/workflow/packed.cwl")
    primary_job = urllib.parse.urljoin(arcp_root, "/workflow/primary-job.json")
    primary_prov_nt = urllib.parse.urljoin(
        arcp_root, "/metadata/provenance/primary.cwlprov.nt")
    uuid = arcp.parse_arcp(arcp_root).uuid

    highlights = set(g.subjects(OA.motivatedBy, OA.highlighting))
    assert highlights, "Didn't find highlights"
    for h in highlights:
        assert (h, OA.hasTarget, URIRef(packed)) in g

    describes = set(g.subjects(OA.motivatedBy, OA.describing))
    for d in describes:
        assert (d, OA.hasBody, URIRef(arcp_root)) in g
        assert (d, OA.hasTarget, URIRef(uuid.urn)) in g

    linked = set(g.subjects(OA.motivatedBy, OA.linking))
    for link in linked:
        assert (link, OA.hasBody, URIRef(packed)) in g
        assert (link, OA.hasBody, URIRef(primary_job)) in g
        assert (link, OA.hasTarget, URIRef(uuid.urn)) in g

    has_provenance = set(g.subjects(OA.hasBody, URIRef(primary_prov_nt)))
    for p in has_provenance:
        assert (p, OA.hasTarget, URIRef(uuid.urn)) in g
        assert (p, OA.motivatedBy, PROV.has_provenance) in g
        # Check all prov elements are listed
        formats = set()
        for prov in g.objects(p, OA.hasBody):
            assert (
                prov,
                DCTERMS.conformsTo,
                URIRef(provenance_constants.CWLPROV_VERSION),
            ) in g
            # NOTE: DC.format is a Namespace method and does not resolve like other terms
            formats.update(set(g.objects(prov, DC["format"])))
        assert formats, "Could not find media types"
        expected = {
            Literal(f)
            for f in (
                "application/json",
                "application/ld+json",
                "application/n-triples",
                'text/provenance-notation; charset="UTF-8"',
                'text/turtle; charset="UTF-8"',
                "application/xml",
            )
        }
        assert formats == expected, "Did not match expected PROV media types"

    if nested:
        # Check for additional PROVs
        # Let's try to find the other wf run ID
        otherRuns = set()
        for p in g.subjects(OA.motivatedBy, PROV.has_provenance):
            if (p, OA.hasTarget, URIRef(uuid.urn)) in g:
                continue
            otherRuns.update(set(g.objects(p, OA.hasTarget)))
        assert otherRuns, "Could not find nested workflow run prov annotations"
Ejemplo n.º 27
0
 def test_util_from_n3_expectliteralandlangdtype(self):
     s = '"michel"@fr^^xsd:fr'
     res = util.from_n3(s, default=None, backend=None)
     self.assert_(isinstance(res, Literal))
     self.assertEqual(res, Literal('michel', datatype=URIRef('xsd:fr')))
Ejemplo n.º 28
0
    def map(raw_data):
        MAP = Namespace('http://dp.la/about/map/')
        EDM = Namespace('http://www.europeana.eu/schemas/edm/')
        ORE = Namespace('http://www.openarchives.org/ore/terms/')

        g = rdflib.Graph()
        g.bind('dc', DC)
        g.bind('rdf', RDF)
        g.bind('skos', SKOS)
        g.bind('map', MAP)
        g.bind('edm', EDM)
        g.bind('ore', ORE)
        g.bind('dcterms', DCTERMS)

        data = json.load(raw_data)

        item = URIRef(data['url_item'])
        g.add((item, RDF.type, EDM['WebResource']))

        cdl = URIRef('http://dp.la/api/contributor/cdl')
        g.add((cdl, RDF.type, EDM['Agent']))
        g.add((cdl, SKOS['prefLabel'], Literal('California Digital Library')))

        if 'reference_image_md5' in data:
            thumb = URIRef('https://thumbnails.calisphere.org/clip/150x150/' +
                           data['reference_image_md5'])
            g.add((thumb, RDF.type, EDM['WebResource']))

        root = BNode()
        g.add((root, RDF.type, ORE['Aggregation']))

        originalRecord = BNode()
        g.add((root, MAP['originalRecord'], originalRecord))
        g.add((originalRecord, RDF.type, EDM['WebResource']))

        aggregatedCHO = BNode()
        g.add((root, EDM.aggregatedCHO, aggregatedCHO))
        g.add((aggregatedCHO, RDF.type, MAP.SourceResource))

        if 'title_ss' in data:
            for title in data['title_ss']:
                g.add((aggregatedCHO, DCTERMS.title, Literal(title)))

        if 'date_ss' in data:
            for date in data['date_ss']:
                date_bnode = BNode()
                g.add((aggregatedCHO, DC.date, date_bnode))
                g.add((date_bnode, RDF.type, EDM.TimeSpan))
                g.add((date_bnode, MAP.providedLabel, Literal(date)))

        if 'identifier_ss' in data:
            for identifier in data['identifier_ss']:
                g.add((aggregatedCHO, DC.identifier, Literal(identifier)))

        g.add((aggregatedCHO, DC.identifier, Literal(data['url_item'])))

        if 'rights_ss' in data:
            for rights in data['rights_ss']:
                g.add((aggregatedCHO, DC.rights, Literal(rights)))

        if 'contributor_ss' in data:
            for contributor in data['contributor_ss']:
                contributor_bnode = BNode()
                g.add((aggregatedCHO, DCTERMS.contributor, contributor_bnode))
                g.add((contributor_bnode, RDF.type, EDM.Agent))
                g.add((contributor_bnode, MAP.providedLabel,
                       Literal(contributor)))

        if 'creator_ss' in data:
            for creator in data['creator_ss']:
                creator_bnode = BNode()
                g.add((aggregatedCHO, DCTERMS.creator, creator_bnode))
                g.add((creator_bnode, RDF.type, EDM.Agent))
                g.add((creator_bnode, MAP.providedLabel, Literal(creator)))

        if 'collection_name' in data:
            for collection in data['collection_name']:
                collection_bnode = BNode()
                g.add((aggregatedCHO, DCTERMS.isPartOf, collection_bnode))
                g.add((collection_bnode, RDF.type, DCTERMS.Collection))
                g.add((collection_bnode, DCTERMS.title, Literal(collection)))

        if 'publisher_ss' in data:
            for publisher in data['publisher_ss']:
                g.add((aggregatedCHO, DCTERMS.publisher, Literal(publisher)))

        if 'type' in data:
            for type in data['type']:
                g.add((aggregatedCHO, DCTERMS.type, Literal(type)))

        provider = None

        if 'campus_name' in data and 'repository_name' in data:
            provider = data['campus_name'][0] + ', ' + data['repository_name'][
                0]

        elif 'repository_name' in data:
            provider = data['repository_name'][0]

        if provider is not None:
            provider_bnode = BNode()
            g.add((root, EDM.dataProvider, provider_bnode))
            g.add((provider_bnode, RDF.type, EDM.Agent))
            g.add((provider_bnode, MAP.providedLabel, Literal(provider)))

        g.add((root, EDM.isShownAt, item))

        if 'reference_image_md5' in data:
            md5 = data['reference_image_md5']
            image_url = "https://thumbnails.calisphere.org/clip/150x150/" + md5
            g.add((root, EDM.preview, URIRef(image_url)))

        return g.serialize(format='turtle')
Ejemplo n.º 29
0
 def test_util_from_n3_expectliteralwithdatatypefromint(self):
     s = '42'
     res = util.from_n3(s)
     self.assertEqual(res, Literal(42))
Ejemplo n.º 30
0
 def test_to_python_timedelta(self):
     l = Literal("P4DT5H6M7S", datatype=XSD.dayTimeDuration)
     self.assertTrue(isinstance(l.toPython(), timedelta))
     self.assertEqual(l.toPython(), parse_duration("P4DT5H6M7S"))