def test_timezone_offset(self): dt = "2010-02-10T12:36:00+03:00" l = Literal(dt, datatype=URIRef('http://www.w3.org/2001/XMLSchema#dateTime')) self.assertTrue(isinstance(l.toPython(), datetime)) self.assertEqual(l.toPython().isoformat(), dt)
def test_timezone_offset_millisecond(self): dt = "2011-01-16T19:39:18.239743+01:00" l = Literal(dt, datatype=URIRef('http://www.w3.org/2001/XMLSchema#dateTime')) self.assertTrue(isinstance(l.toPython(), datetime)) self.assertEqual(l.toPython().isoformat(), dt)
def test_to_python(self): dt = "2008-12-01T18:02:00" l = Literal(dt, datatype=URIRef('http://www.w3.org/2001/XMLSchema#dateTime')) self.assertTrue(isinstance(l.toPython(), datetime)) self.assertEqual(l.toPython().isoformat(), dt)
def test_timezone_z(self): dt = "2008-12-01T18:02:00.522630Z" l = Literal(dt, datatype=URIRef("http://www.w3.org/2001/XMLSchema#dateTime")) self.assert_(isinstance(l.toPython(), datetime)) self.assertEquals(datetime_isoformat(l.toPython(), DATE_EXT_COMPLETE + "T" + "%H:%M:%S.%f" + TZ_EXT), dt) self.assertEquals(l.toPython().isoformat(), "2008-12-01T18:02:00.522630+00:00")
def test_timezone_offset_to_utc(self): dt = "2010-02-10T12:36:00+03:00" l = Literal(dt, datatype=URIRef('http://www.w3.org/2001/XMLSchema#dateTime')) utc_dt = l.toPython().astimezone(UTC) self.assertEqual(datetime_isoformat(utc_dt), "2010-02-10T09:36:00Z")
def test_microseconds(self): dt1 = datetime(2009, 6, 15, 23, 37, 6, 522630) l = Literal(dt1) # datetime with microseconds should be cast as a literal with using # XML Schema dateTime as the literal datatype self.assertEqual(text_type(l), '2009-06-15T23:37:06.522630') self.assertEqual(l.datatype, XSD.dateTime) dt2 = l.toPython() self.assertEqual(dt2, dt1)
def test_microseconds(self): import platform if platform.system() == 'Java' or (platform.system() != 'Java' and sys.version_info[:2] == (2, 5)): from nose import SkipTest raise SkipTest('datetime microseconds unsupported in Python2.5 and Jython') dt1 = datetime(2009, 6, 15, 23, 37, 6, 522630) l = Literal(dt1) # datetime with microseconds should be cast as a literal with using # XML Schema dateTime as the literal datatype self.assertEqual(text_type(l), '2009-06-15T23:37:06.522630') self.assertEqual(l.datatype, XSD.dateTime) dt2 = l.toPython() self.assertEqual(dt2, dt1)
def test_microseconds(self): dt1 = datetime(2009, 6, 15, 23, 37, 6, 522630) l = Literal(dt1) # datetime with microseconds should be cast as a literal with using # XML Schema dateTime as the literal datatype self.assertEquals(l.title(), '2009-06-15T23:37:06.522630') self.assertEquals(l.datatype, XSD.dateTime) # 2.6.0 added the %f format to datetime.strftime, so we should have # a lossless conversion back to the datetime # http://bugs.python.org/issue1982 if sys.version_info >= (2,6,0): dt2 = l.toPython() self.assertEqual(dt2, dt1) # otherwise, we just get back the same literal again else: dt2 = l.toPython() l2 = Literal('2009-06-15T23:37:06.522630', datatype=XSD.dateTime) self.assertTrue(l2, l.toPython())
class IdentifierEquality(unittest.TestCase): def setUp(self): self.uriref = URIRef("http://example.org/") self.bnode = BNode() self.literal = Literal("http://example.org/") self.python_literal = u"http://example.org/" self.python_literal_2 = u"foo" def testA(self): self.assertEqual(self.uriref == self.literal, False) def testB(self): self.assertEqual(self.literal == self.uriref, False) def testC(self): self.assertEqual(self.uriref == self.python_literal, False) def testD(self): self.assertEqual(self.python_literal == self.uriref, False) def testE(self): self.assertEqual(self.literal == self.python_literal, False) def testE2(self): self.assertTrue(self.literal.eq(self.python_literal), True) def testF(self): self.assertEqual(self.python_literal == self.literal, False) def testG(self): self.assertEqual("foo" in CORE_SYNTAX_TERMS, False) def testH(self): self.assertEqual(URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF") in CORE_SYNTAX_TERMS, True) def testI(self): g = Graph() g.add((self.uriref, RDF.value, self.literal)) g.add((self.uriref, RDF.value, self.uriref)) self.assertEqual(len(g), 2)
def test_util_from_n3_expectliteralwithescapedquote(self): s = '"\\""' res = util.from_n3(s, default=None, backend=None) self.assertTrue(res, Literal('\\"', lang="en"))
def test_util_from_n3_expectliteralandlangdtype(self): s = '"michel"@fr^^xsd:fr' res = util.from_n3(s, default=None, backend=None) self.assertTrue(isinstance(res, Literal)) self.assertEqual(res, Literal("michel", datatype=XSD["fr"]))
def test_util_from_n3_expectliteralwithdatatypefromint(self): s = "42" res = util.from_n3(s) self.assertEqual(res, Literal(42))
def convert_to_aleph(input_dict): return_dict = {'background': ''} data = rdflib.Graph() prepare(data) print("parsing examples") data.parse(data=input_dict['examples'], format='n3') print("parsing bk") for ontology in input_dict['bk_file']: data.parse(data=ontology, format='n3') settings = input_dict[ 'settings'] if 'settings' in input_dict else ALEPH_SETTINGS generalizations = defaultdict(list) annotations = defaultdict(list) print("going through generalization predicates") generelization_predicates = list( data.subjects(predicate=RDF.type, object=HEDWIG.GeneralizationPredicate)) for predicate in generelization_predicates: for sub, obj in data.subject_objects(predicate=predicate): if user_defined(sub) and user_defined(obj): generalizations[sub].append(obj) print("going through examples") pos = '' neg = '' positive_class = Literal(input_dict['positive_class']) for example in data.subjects(predicate=RDF.type, object=HEDWIG.Example): positive = (example, HEDWIG.class_label, positive_class) in data if positive: pos += 'positive(\'%s\').\n' % example else: neg += 'positive(\'%s\').\n' % example for annotation_link in data.objects(subject=example, predicate=HEDWIG.annotated_with): example_annotations = data.objects(subject=annotation_link, predicate=HEDWIG.annotation) annotation = example_annotations.next() if next(example_annotations, None) is not None: raise Exception( "Unable to parse data - annotations for example %s are unclear" % example) annotations[example].append(annotation) print("writing bk") bk = ':- modeh(1, positive(+instance)).\n' bk += ':- mode(*, annotated_with(+instance, #annotation)).\n' bk += ':- determination(positive/1, annotated_with/2).\n' bk += '\n\n' for setting in settings: bk += ':- set(%s, %s).\n' % (setting, settings[setting]) bk += '\n\n' for sub_concept in generalizations: for super_concept in generalizations[sub_concept]: bk += 'annotated_with(X, \'%s\') :- annotated_with(X, \'%s\').\n' % ( super_concept, sub_concept) bk += '\n' print("writing pos and neg") i = 0 print(len(annotations)) for example in annotations: i += 1 if i % 1000 == 0: print(i) for concept in annotations[example]: bk += 'annotated_with(\'%s\', \'%s\').\n' % (example, concept) return_dict['bk'] = bk return_dict['pos'] = pos return_dict['neg'] = neg print("done!!!") return return_dict
def addSuccess(self, test, capt): result = BNode() # TODO: coin URIRef self.graph.add((result, RDFS.label, Literal(test))) self.graph.add((result, RDFS.comment, Literal(type(test)))) self.graph.add((result, RDF.type, EARL.TestResult)) self.graph.add((result, EARL.outcome, EARL["pass"]))
def testOmitsMissingLang(self) -> None: self.assertEqual( repr(Literal("foo", datatype=URIRef("http://example.com/"))), "rdflib.term.Literal('foo', datatype=rdflib.term.URIRef('http://example.com/'))", )
def testTrueBoolean(self) -> None: test_value = Literal("tRue", datatype=_XSD_BOOLEAN) self.assertTrue(test_value.value) test_value = Literal("1", datatype=_XSD_BOOLEAN) self.assertTrue(test_value.value)
def testOmitsMissingDatatypeAndLang(self) -> None: self.assertEqual(repr(Literal("foo")), "rdflib.term.Literal('foo')")
# top level object container for sensors, default class is SmartObject sensors = baseObject.create({ 'resourceName': 'sensors', 'resourceClass': 'SmartObject' }) #weather resource under sensors for the weather sensor # create a default class SmartObject for the weather sensor cluster weather = sensors.create({ 'resourceName': 'rhvWeather-01', 'resourceClass': 'SmartObject' }) # example description in simple link-format like concepts baseObject.Description.set( (URIRef('sensors/rhvWeather-01'), RDFS.Class, Literal('SmartObject'))) baseObject.Description.set( (URIRef('sensors/rhvWeather-01'), RDF.type, Literal('SensorSystem'))) baseObject.Description.set( (URIRef('sensors/rhvWeather-01'), RDFS.Resource, Literal('Weather'))) # baseObject.Description.set( (URIRef('sensors/rhvWeather-01/outdoor_temperature'), RDF.type, Literal('sensor'))) baseObject.Description.set( (URIRef('sensors/rhvWeather-01/outdoor_temperature'), RDFS.Resource, Literal('temperature'))) baseObject.Description.set( (URIRef('sensors/rhvWeather-01/outdoor_humidity'), RDF.type, Literal('sensor'))) baseObject.Description.set(
def plum_x(self, paper: Paper) -> Graph: g = Graph() g.bind("sd", ScholalryData) g.bind("iont", IOnt) g.bind("covid", AltmetricsCOVID) doi = paper.get_doi() timestamp = paper.get_timestamp() paper = URIRef("https://doi.org/" + doi) g.add((paper, RDF.type, ScholalryData.Document)) g.add((paper, DC.created, Literal(timestamp))) headers = { 'X-ELS-Insttoken': self.__insttoken, 'X-ELS-APIKey': self.__api_key } endpoint = ScopusAPIClient.PLUMX_ENDPOINT + "/%s" params = doi request = urllib.request.Request(endpoint % params, headers=headers) try: response = urllib.request.urlopen(request) output = response.read() js = json.loads(output) if output is not None: js = json.loads(output) if "count_categories" in js: cats = js["count_categories"] for cat in cats: name = cat["name"].lower() total = cat["total"] indicator = URIRef(AltmetricsCOVID + doi + "_" + name) g.add((paper, IOnt.hasIndicator, indicator)) g.add((indicator, RDF.type, IOnt.Indicator)) g.add((indicator, RDFS.label, Literal(cat["name"]))) g.add( (indicator, IOnt.hasSource, AltmetricsCOVID.plumx)) g.add((indicator, IOnt.basedOnMetric, URIRef(AltmetricsCOVID["name"]))) g.add((URIRef(AltmetricsCOVID["name"]), RDF.type, IOnt.Metric)) g.add((indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_" + name + "_value"))) g.add( (URIRef(AltmetricsCOVID + doi + "_" + name + "_value"), RDF.type, IOnt.IndicatorValue)) g.add((URIRef(AltmetricsCOVID + doi + "_" + name + "_value"), IOnt.indicatorValue, Literal(total, datatype=XSD.integer))) if "count_types" in cat: for m in cat["count_types"]: name_2 = m["name"].lower() total_2 = m["total"] level_2_indicator = URIRef(AltmetricsCOVID + doi + "_" + name_2) g.add((level_2_indicator, RDF.type, IOnt.Indicator)) g.add((level_2_indicator, RDFS.label, Literal(m["name"]))) g.add((indicator, IOnt.hasSource, AltmetricsCOVID.plumx)) g.add((indicator, IOnt.hasSubIndicator, level_2_indicator)) g.add((indicator, IOnt.basedOnMetric, URIRef(AltmetricsCOVID["name"]))) g.add( (level_2_indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_" + name_2 + "_value"))) g.add((URIRef(AltmetricsCOVID + doi + "_" + name_2 + "_value"), RDF.type, IOnt.IndicatorValue)) g.add((URIRef(AltmetricsCOVID + doi + "_" + name_2 + "_value"), IOnt.indicatorValue, Literal(total_2, datatype=XSD.integer))) level_3_indicator = URIRef(AltmetricsCOVID + doi + "_" + name_2 + "_source") g.add((level_3_indicator, RDF.type, IOnt.Indicator)) g.add((level_3_indicator, RDFS.label, Literal(m["name"] + " source"))) g.add((indicator, IOnt.hasSource, AltmetricsCOVID.plumx)) g.add((level_2_indicator, IOnt.hasSubIndicator, level_3_indicator)) g.add((indicator, IOnt.basedOnMetric, URIRef(AltmetricsCOVID["name"]))) g.add( (level_3_indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_" + name_2 + "_value"))) except: log.error("No altmetrics available for paper %s." % doi) return g
def test_to_python_ym_duration(self): l = Literal("P1Y2M", datatype=XSD.yearMonthDuration) self.assertTrue(isinstance(l.toPython(), Duration)) self.assertEqual(l.toPython(), parse_duration("P1Y2M"))
def testNoDanglingPoint(self): """confirms the fix for https://github.com/RDFLib/rdflib/issues/237""" vv = Literal("0.88", datatype=_XSD_DOUBLE) out = vv._literal_n3(use_plain=True) self.assert_(out in ["8.8e-01", "0.88"], out)
def setUp(self): self.uriref = URIRef("http://example.org/") self.bnode = BNode() self.literal = Literal("http://example.org/") self.python_literal = u"http://example.org/" self.python_literal_2 = u"foo"
def add_val(self, subj: Node, pred: URIRef, json_obj: JsonObj, json_key: str, valuetype: Optional[URIRef] = None) -> Optional[BNode]: """ Add the RDF representation of val to the graph as a target of subj, pred. Note that FHIR lists are represented as a list of BNODE objects with a fhir:index discrimanant :param subj: graph subject :param pred: predicate :param json_obj: object containing json_key :param json_key: name of the value in the JSON resource :param valuetype: value type if NOT determinable by predicate :return: value node if target is a BNode else None """ if json_key not in json_obj: print("Expecting to find object named '{}' in JSON:".format( json_key)) print(json_obj._as_json_dumps()) print("entry skipped") return None val = json_obj[json_key] if isinstance(val, List): list_idx = 0 for lv in val: entry_bnode = BNode() # TODO: this is getting messy. Refactor and clean this up if pred == FHIR.Bundle.entry: entry_subj = URIRef(lv.fullUrl) self.add(entry_bnode, FHIR.index, Literal(list_idx)) self.add_val(entry_bnode, FHIR.Bundle.entry.fullUrl, lv, 'fullUrl') self.add(entry_bnode, FHIR.Bundle.entry.resource, entry_subj) self.add(subj, pred, entry_bnode) entry_mv = FHIRMetaVocEntry(self._vocabulary, FHIR.BundleEntryComponent) for k, p in entry_mv.predicates().items(): if k not in ['resource', 'fullUrl'] and k in lv: print("---> adding {}".format(k)) self.add_val(subj, p, lv, k) FHIRResource(self._vocabulary, None, self._base_uri, lv.resource, self._g, False, self._replace_narrative_text, False, resource_uri=entry_subj) else: self.add(entry_bnode, FHIR.index, Literal(list_idx)) if isinstance(lv, JsonObj): self.add_value_node(entry_bnode, pred, lv, valuetype) else: vt = self._meta.predicate_type(pred) atom_type = self._meta.primitive_datatype_nostring( vt) if vt else None self.add(entry_bnode, FHIR.value, Literal(lv, datatype=atom_type)) self.add(subj, pred, entry_bnode) list_idx += 1 else: vt = self._meta.predicate_type( pred) if not valuetype else valuetype if self._meta.is_atom(pred): if self._replace_narrative_text and pred == FHIR.Narrative.div and len( val) > 120: val = REPLACED_NARRATIVE_TEXT self.add(subj, pred, Literal(val)) else: v = BNode() if self._meta.is_primitive(vt): self.add( v, FHIR.value, Literal( str(val), datatype=self._meta.primitive_datatype_nostring( vt, val))) else: self.add_value_node(v, pred, val, valuetype) self.add(subj, pred, v) if pred == FHIR.Reference.reference: self.add_reference(subj, val) elif pred == FHIR.RelatedArtifact.resource: self.add_reference(v, val) self.add_extension_val(v, json_obj, json_key) return v return None
for Literal.__new__ can override this. For example: >>> from rdflib import Literal,XSD >>> Literal("01", datatype=XSD.int) rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) This flag may be changed at any time, but will only affect literals created after that time, previously created literals will remain (un)normalized. """ DAWG_LITERAL_COLLATION = False """ DAWG_LITERAL_COLLATION determines how literals are ordered or compared to each other. In SPARQL, applying the >,<,>=,<= operators to literals of incompatible data-types is an error, i.e: Literal(2)>Literal('cake') is neither true nor false, but an error. This is a problem in PY3, where lists of Literals of incompatible types can no longer be sorted. Setting this flag to True gives you strict DAWG/SPARQL compliance, setting it to False will order Literals with incompatible datatypes by datatype URI
def add_extension_val(self, subj: Node, json_obj: Union[JsonObj, List[JsonObjTypes]], key: str, pred: Optional[URIRef] = None) -> None: """ Add any extensions for the supplied object. This can be called in following situations: 1) Single extended value "key" : (value), "_key" : { "extension": [ { "url": "http://...", "value[x]": "......" } ] } 2) Single extension only "_key" : { "extension": [ { "url": "http://...", "value[x]": "......" } ] } 3) Multiple extended values: (TBD) 4) Multiple extensions only "_key" : [ { "extension": [ { "url": "http://...", "value[x]": "......" } ] } ] :param subj: Node containing subject :param json_obj: Object (potentially) containing "_key" :param key: name of element that is possibly extended (as indicated by "_" prefix) :param pred: predicate for the contained elements. Only used in situations 3) (?) and 4 """ extendee_name = "_" + key if extendee_name in json_obj: if not isinstance(subj, BNode): raise NotImplementedError( "Extension to something other than a simple BNode") if isinstance(json_obj[extendee_name], list): if not pred: raise NotImplemented("Case 3 not implemented") entry_idx = 0 for extension in json_obj[extendee_name]: entry = BNode() self.add(entry, FHIR.index, Literal(entry_idx)) self.add_val(entry, FHIR.Element.extension, extension, 'extension') self.add(subj, pred, entry) entry_idx += 1 elif 'fhir_comments' in json_obj[extendee_name] and len( json_obj[extendee_name]) == 1: # TODO: determine whether and how fhir comments should be represented in RDF. # for the moment we just drop them print("fhir_comment ignored") print(json_obj[extendee_name]._as_json_dumps()) pass else: self.add_val(subj, FHIR.Element.extension, json_obj[extendee_name], 'extension')
doctest.testmod() if __name__ == "__main__": test() from rdflib import Graph g = Graph() c = Collection(g, BNode()) assert len(c) == 0 c = Collection(g, BNode(), [Literal("1"), Literal("2"), Literal("3"), Literal("4")]) assert len(c) == 4 assert c[1] == Literal("2"), c[1] del c[1] assert list(c) == [Literal("1"), Literal("3"), Literal("4")], list(c) try: del c[500] except IndexError: pass c.append(Literal("5"))
try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) elif att == RDF.type: # S2 predicate = RDF.type object = absolutize(atts[RDF.type]) elif att in NODE_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: # S3 self.error("Invalid property attribute URI: %s" % att) continue # for when error does not throw an exception else: predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) self.store.add((subject, predicate, object)) current.subject = subject def node_element_end(self, name, qname): self.parent.object = self.current.subject def property_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize # Cheap hack so 2to3 doesn't turn it into __next__
def testOmitsMissingDatatype(self) -> None: self.assertEqual( repr(Literal("foo", lang="en")), "rdflib.term.Literal('foo', lang='en')", )
def node_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize # Cheap hack so 2to3 doesn't turn it into __next__ next = getattr(self, 'next') next.start = self.property_element_start next.end = self.property_element_end if name in NODE_ELEMENT_EXCEPTIONS: self.error("Invalid node element URI: %s" % name) if RDF.ID in atts: if RDF.about in atts or RDF.nodeID in atts: self.error( "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID" ) id = atts[RDF.ID] if not is_ncname(id): self.error("rdf:ID value is not a valid NCName: %s" % id) subject = absolutize("#%s" % id) if subject in self.ids: self.error("two elements cannot use the same ID: '%s'" % subject) self.ids[subject] = 1 # IDs can only appear once within a document elif RDF.nodeID in atts: if RDF.ID in atts or RDF.about in atts: self.error( "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID" ) nodeID = atts[RDF.nodeID] if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if self.preserve_bnode_ids is False: if nodeID in self.bnode: subject = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject else: subject = BNode(nodeID) elif RDF.about in atts: if RDF.ID in atts or RDF.nodeID in atts: self.error( "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID" ) subject = absolutize(atts[RDF.about]) else: subject = BNode() if name != RDF.Description: # S1 self.store.add((subject, RDF.type, absolutize(name))) language = current.language for att in atts: if not att.startswith(str(RDFNS)): predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) elif att == RDF.type: # S2 predicate = RDF.type object = absolutize(atts[RDF.type])
def testNoDanglingPoint(self) -> None: """confirms the fix for https://github.com/RDFLib/rdflib/issues/237""" vv = Literal("0.88", datatype=_XSD_DOUBLE) out = vv._literal_n3(use_plain=True) self.assertTrue(out in ["8.8e-01", "0.88"], out)
def property_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize # Cheap hack so 2to3 doesn't turn it into __next__ next = getattr(self, 'next') object = None current.data = None current.list = None if not name.startswith(str(RDFNS)): current.predicate = absolutize(name) elif name == RDF.li: current.predicate = current.next_li() elif name in PROPERTY_ELEMENT_EXCEPTIONS: self.error("Invalid property element URI: %s" % name) else: current.predicate = absolutize(name) id = atts.get(RDF.ID, None) if id is not None: if not is_ncname(id): self.error("rdf:ID value is not a value NCName: %s" % id) current.id = absolutize("#%s" % id) else: current.id = None resource = atts.get(RDF.resource, None) nodeID = atts.get(RDF.nodeID, None) parse_type = atts.get(RDF.parseType, None) if resource is not None and nodeID is not None: self.error( "Property element cannot have both rdf:nodeID and rdf:resource" ) if resource is not None: object = absolutize(resource) next.start = self.node_element_start next.end = self.node_element_end elif nodeID is not None: if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if self.preserve_bnode_ids is False: if nodeID in self.bnode: object = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject object = subject else: object = subject = BNode(nodeID) next.start = self.node_element_start next.end = self.node_element_end else: if parse_type is not None: for att in atts: if att != RDF.parseType and att != RDF.ID: self.error("Property attr '%s' now allowed here" % att) if parse_type == "Resource": current.subject = object = BNode() current.char = self.property_element_char next.start = self.property_element_start next.end = self.property_element_end elif parse_type == "Collection": current.char = None object = current.list = RDF.nil # BNode() #self.parent.subject next.start = self.node_element_start next.end = self.list_node_element_end else: # if parse_type=="Literal": # All other values are treated as Literal # See: http://www.w3.org/TR/rdf-syntax-grammar/ #parseTypeOtherPropertyElt object = Literal("", datatype=RDF.XMLLiteral) current.char = self.literal_element_char current.declared = {XMLNS: 'xml'} next.start = self.literal_element_start next.char = self.literal_element_char next.end = self.literal_element_end current.object = object return else: object = None current.char = self.property_element_char next.start = self.node_element_start next.end = self.node_element_end datatype = current.datatype = atts.get(RDF.datatype, None) language = current.language if datatype is not None: # TODO: check that there are no atts other than datatype and id datatype = absolutize(datatype) else: for att in atts: if not att.startswith(str(RDFNS)): predicate = absolutize(att) elif att in PROPERTY_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: self.error("""Invalid property attribute URI: %s""" % att) else: predicate = absolutize(att) if att == RDF.type: o = URIRef(atts[att]) else: if datatype is not None: language = None o = Literal(atts[att], language, datatype) if object is None: object = BNode() self.store.add((object, predicate, o)) if object is None: current.data = "" current.object = None else: current.data = None current.object = object
def testFalseBoolean(self) -> None: test_value = Literal("falsE", datatype=_XSD_BOOLEAN) self.assertFalse(test_value.value) test_value = Literal("0", datatype=_XSD_BOOLEAN) self.assertFalse(test_value.value)
def endElementNS(self, name, qname): if name[0] != str(TRIXNS): self.error( "Only elements in the TriX namespace are allowed. %s!=%s" % (name[0], TRIXNS)) if name[1] == "uri": if self.state == 3: self.graph = Graph(store=self.store, identifier=URIRef(self.chars.strip())) self.state = 2 elif self.state == 4: self.triple += [URIRef(self.chars.strip())] else: self.error( "Illegal internal self.state - This should never " + "happen if the SAX parser ensures XML syntax correctness") elif name[1] == "id": if self.state == 3: self.graph = Graph(self.store, identifier=self.get_bnode( self.chars.strip())) self.state = 2 elif self.state == 4: self.triple += [self.get_bnode(self.chars.strip())] else: self.error( "Illegal internal self.state - This should never " + "happen if the SAX parser ensures XML syntax correctness") elif name[1] == "plainLiteral" or name[1] == "typedLiteral": if self.state == 4: self.triple += [ Literal(self.chars, lang=self.lang, datatype=self.datatype) ] else: self.error("This should never happen if the SAX parser " + "ensures XML syntax correctness") elif name[1] == "triple": if self.state == 4: if len(self.triple) != 3: self.error("Triple has wrong length, got %d elements: %s" % (len(self.triple), self.triple)) self.graph.add(self.triple) # self.store.store.add(self.triple,context=self.graph) # self.store.addN([self.triple+[self.graph]]) self.state = 2 else: self.error("This should never happen if the SAX parser " + "ensures XML syntax correctness") elif name[1] == "graph": self.graph = None self.state = 1 elif name[1] == "TriX": self.state = 0 else: self.error("Unexpected close element")
def n3_to_nx_hyper(data, positive_class): return_non_targets = defaultdict(list) return_graph = nx.Graph() positive_class = Literal(positive_class) generelization_predicates = list( data.subjects(predicate=RDF.type, object=HEDWIG.GeneralizationPredicate)) for predicate in generelization_predicates: for sub, obj in data.subject_objects(predicate=predicate): if user_defined(sub) and user_defined(obj): relation_node = 'r_%s-%s' % (sub[-7:], obj[-7:]) assert relation_node not in return_graph return_graph.add_node(relation_node) return_graph.add_edge(relation_node, sub, type='subject') return_graph.add_edge(relation_node, obj, type='object') return_graph.add_edge(relation_node, predicate, type='predicate') x = 1 target_nodes = set() i = 0 for example in data.subjects(predicate=RDF.type, object=HEDWIG.Example): if (example, HEDWIG.class_label, positive_class) in data: target_nodes.add(example) for annotation_link in data.objects( subject=example, predicate=HEDWIG.annotated_with): annotations = data.objects(subject=annotation_link, predicate=HEDWIG.annotation) annotation = annotations.next() if next(annotations, None) is not None: raise Exception( "Unable to parse data - annotations for example %s are unclear" % example) if annotation not in return_graph: raise Exception( "Data - BK synchronization error: annotation %s does not appear in the Background " "knowledge!" % annotation) # VERSION 1: annotation_node = 'a_%s-%s' % (example[-7:], annotation[-7:]) assert annotation_node not in return_graph return_graph.add_node(annotation_node) return_graph.add_edge(annotation_node, example, type='object') return_graph.add_edge(annotation_node, annotation, type='subject') return_graph.add_edge(annotation_node, 'annotates', type='predicate') i += 1 # VERSION 2: # return_graph.add_edge(example, annotation) else: for annotation_link in data.objects( subject=example, predicate=HEDWIG.annotated_with): annotations = data.objects(subject=annotation_link, predicate=HEDWIG.annotation) annotation = annotations.next() if next(annotations, None) is not None: raise Exception( "Unable to parse data - annotations for example %s are unclear" % example) if annotation not in return_graph: raise Exception( "Data - BK synchronization error: annotation %s does not appear in the Background " "knowledge!" % annotation) return_non_targets[example].append(annotation) return return_graph, target_nodes, return_non_targets, generelization_predicates
def gen_list(s: URIRef, p: URIRef, objs: List[str]) -> List[Tuple[URIRef, URIRef, Literal]]: return [(s, p, e if isinstance(e, URIRef) else Literal(e)) for e in objs]
def setUp(self): self.x = Literal( "2008-12-01T18:02:00Z", datatype=URIRef("http://www.w3.org/2001/XMLSchema#dateTime"), )
def gen_lit_list(objs: List[str]) -> List[Literal]: return [o if isinstance(o, URIRef) else Literal(o) for o in objs]
def test_util_from_n3_expectliteralanddtype(self): s = '"true"^^xsd:boolean' res = util.from_n3(s, default=None, backend=None) self.assertTrue(res.eq(Literal("true", datatype=XSD["boolean"])))
def test_permutations(self): g = self.build_graph() self.assertEqual([(s6, p6, o6), (s6, p6, o6), (s6, p6, Literal(1)), (s6, p6, Literal(2)), (s6, p7, o6), (s6, p7, o7)], sorted(list(g.triples((s6, None, None))))) self.assertEqual([(s6, p6, o6), (s6, p6, o6), (s6, p6, Literal(1)), (s6, p6, Literal(2))], sorted(g.triples((None, p6, None)))) self.assertEqual([(s6, p7, o6), (s6, p7, o7)], sorted(g.triples((None, p7, None)))) self.assertEqual([(s6, p6, o6), (s6, p6, o6), (s6, p7, o6)], sorted(g.triples(None, None, o6))) self.assertEqual([(s6, p7, o7)], sorted(g.triples(None, None, o7))) self.assertEqual([(s6, p6, o6), (s6, p6, o6), (s6, p6, Literal(1)), (s6, p6, Literal(2))], sorted(g.triples(s6, p6, None))) self.assertEqual([(s6, p6, o6)], sorted(g.triples(None, p6, o6))) self.assertEqual([(s6, p6, o6), (s6, p7, o6)], sorted(g.triples((s6, None, o6)))) self.assertEqual([(s6, p6, o6)], sorted(g.triples((s6, p6, o6)))) self.assertEqual([(s1, p1, o1), (s1, p1, Literal('a')), (s1, p1, Literal('b')), (s1, p1, Literal('b')), (s1, p1, Literal('c')), (s1, p1, Literal('d')), (s1, p1, Literal('e')), (s2, p2, o2), (s2, p2, Literal('a')), (s2, p2, Literal('b')), (s2, p2, Literal('c')), (s2, p2, Literal('d')), (s2, p2, Literal('e')), (s4, p4, Literal(17.0, datatype=XSD.decimal)), (s5, p5, o5), (s5, p5, Literal(1)), (s5, p5, Literal(2)), (s6, p6, o6), (s6, p6, o6), (s6, p6, Literal(1)), (s6, p6, Literal(2)), (s6, p7, o6), (s6, p7, o7)], sorted(g.triples(None, None, None)))
def test_util_from_n3_expectliteralmultiline(self): s = '"""multi\nline\nstring"""@en' res = util.from_n3(s, default=None, backend=None) self.assertTrue(res, Literal("multi\nline\nstring", lang="en"))
def from_n3(s, default=None, backend=None, nsm=None): r''' Creates the Identifier corresponding to the given n3 string. >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo') True >>> from_n3('"foo"@de') == Literal('foo', lang='de') True >>> from_n3('"""multi\nline\nstring"""@en') == Literal( ... 'multi\nline\nstring', lang='en') True >>> from_n3('42') == Literal(42) True >>> from_n3(Literal(42).n3()) == Literal(42) True >>> from_n3('"42"^^xsd:integer') == Literal(42) True >>> from rdflib import RDFS >>> from_n3('rdfs:label') == RDFS['label'] True >>> nsm = NamespaceManager(Graph()) >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin True ''' if not s: return default if s.startswith("<"): # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. return URIRef( s[1:-1].encode("raw-unicode-escape").decode("unicode-escape")) elif s.startswith('"'): if s.startswith('"""'): quotes = '"""' else: quotes = '"' value, rest = s.rsplit(quotes, 1) value = value[len(quotes):] # strip leading quotes datatype = None language = None # as a given datatype overrules lang-tag check for it first dtoffset = rest.rfind("^^") if dtoffset >= 0: # found a datatype # datatype has to come after lang-tag so ignore everything before # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ # #prod-turtle2-RDFLiteral datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) else: if rest.startswith("@"): language = rest[1:] # strip leading at sign value = value.replace(r"\"", '"') # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") return Literal(value, language, datatype) elif s == "true" or s == "false": return Literal(s == "true") elif s.isdigit(): return Literal(int(s)) elif s.startswith("{"): identifier = from_n3(s[1:-1]) return QuotedGraph(backend, identifier) elif s.startswith("["): identifier = from_n3(s[1:-1]) return Graph(backend, identifier) elif s.startswith("_:"): return BNode(s[2:]) elif ":" in s: if nsm is None: # instantiate default NamespaceManager and rely on its defaults nsm = NamespaceManager(Graph()) prefix, last_part = s.split(":", 1) ns = dict(nsm.namespaces())[prefix] return Namespace(ns)[last_part] else: return BNode(s)
def test_to_python_timedelta(self): l = Literal("P4DT5H6M7S", datatype=XSD.dayTimeDuration) self.assertTrue(isinstance(l.toPython(), timedelta)) self.assertEqual(l.toPython(), parse_duration("P4DT5H6M7S"))
def citation_count(self, paper: Paper) -> Graph: g = Graph() g.bind("sd", ScholalryData) g.bind("iont", IOnt) g.bind("covid", AltmetricsCOVID) doi = paper.get_doi() timestamp = paper.get_timestamp() paper = URIRef("https://doi.org/" + doi) g.add((paper, RDF.type, ScholalryData.Document)) g.add((paper, DC.created, Literal(timestamp))) headers = { 'X-ELS-Insttoken': self.__insttoken, 'X-ELS-APIKey': self.__api_key } endpoint = ScopusAPIClient.CITATION_COUNT_ENDPOINT + "?%s" params = {'doi': doi} params = urllib.parse.urlencode(params) request = urllib.request.Request(endpoint % params, headers=headers) try: response = urllib.request.urlopen(request) output = response.read() if output is not None: js = json.loads(output) if "citation-count-response" in js: ccr = js["citation-count-response"] if "document" in ccr: docu = ccr["document"] if "citation-count" in docu: citation_count = docu["citation-count"] indicator = URIRef(AltmetricsCOVID + doi + "_citations") g.add((paper, IOnt.hasIndicator, indicator)) g.add((indicator, RDF.type, IOnt.Indicator)) g.add( (indicator, RDFS.label, Literal("Citations"))) g.add((indicator, IOnt.hasSource, AltmetricsCOVID.scopus)) g.add((indicator, IOnt.basedOnMetric, AltmetricsCOVID.citation_count)) g.add((AltmetricsCOVID.citation_count, RDF.type, IOnt.Metric)) g.add((indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_citations_value"))) g.add((URIRef(AltmetricsCOVID + doi + "_citations_value"), RDF.type, IOnt.IndicatorValue)) g.add((URIRef(AltmetricsCOVID + doi + "_citations_value"), IOnt.indicatorValue, Literal(citation_count, datatype=XSD.integer))) level_2_indicator = URIRef(AltmetricsCOVID + doi + "_citation-indexes") g.add( (level_2_indicator, RDF.type, IOnt.Indicator)) g.add((level_2_indicator, RDFS.label, Literal("Citations indexes"))) g.add((level_2_indicator, IOnt.hasSource, AltmetricsCOVID.scopus)) g.add((indicator, IOnt.hasSubIndicator, level_2_indicator)) g.add((level_2_indicator, IOnt.basedOnMetric, AltmetricsCOVID.citation_count)) g.add((AltmetricsCOVID.citation_count, RDF.type, IOnt.Metric)) g.add((level_2_indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_citations_value"))) level_3_indicator = URIRef(AltmetricsCOVID + doi + "_scopus") g.add( (level_3_indicator, RDF.type, IOnt.Indicator)) g.add((level_3_indicator, RDFS.label, Literal("Scopus citation count"))) g.add((level_3_indicator, IOnt.hasSource, AltmetricsCOVID.scopus)) g.add((level_2_indicator, IOnt.hasSubIndicator, level_3_indicator)) g.add((level_3_indicator, IOnt.basedOnMetric, AltmetricsCOVID.citation_count)) g.add((AltmetricsCOVID.citation_count, RDF.type, IOnt.Metric)) g.add((level_3_indicator, IOnt.hasIndicatorValue, URIRef(AltmetricsCOVID + doi + "_citations_value"))) except: log.error("No citation count available for paper %s." % doi) return g
def test_to_python_ymdhms_duration(self): l = Literal("P1Y2M4DT5H6M7S", datatype=XSD.duration) self.assertTrue(isinstance(l.toPython(), Duration)) self.assertEqual(l.toPython(), parse_duration("P1Y2M4DT5H6M7S"))
def test_timezone(self): if sys.version_info >= (2, 6, 0): l = Literal("2008-12-01T18:02:00.522630Z", datatype=URIRef("http://www.w3.org/2001/XMLSchema#dateTime")) self.assert_(isinstance(l.toPython(), datetime))