def relaxed_compare(graph1, graph2): '''Compare two graphs, but treat untyped literals as strings''' # if graphs are really identical, comparison is true if graph1 == graph2: return True # if different number of triples, comparison is false elif len(graph1) != len(graph2): return False # otherwise, get the triples that are not identical in both graphs else: in_both, in_first, in_second = graph_diff(graph1, graph2) # compare extra triples in first graph to second for (s, p, o) in in_first: v = in_second.value(subject=s, predicate=p) if not o.eq(v): return False else: pass # compare extra triples in second graph to first for (s, p, o) in in_second: v = in_first.value(subject=s, predicate=p) if not o.eq(v): return False else: pass # if no checks have failed to this point, the graphs are equal return True
def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password, namespace=None, skip_person=False, confirmed_orcid_id=False): with Store(data_path) as store: #Crosswalk (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri, person_id=person_id, skip_person=skip_person, person_class=person_class, confirmed_orcid_id=confirmed_orcid_id) graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower()) previous_graph = Graph(namespace_manager=ns_manager) #Load last graph if os.path.exists(graph_filepath): log.debug("Loading previous graph %s", graph_filepath) previous_graph.parse(graph_filepath, format="turtle") #Diff against last graph (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph) #SPARQL Update log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id) sparql_delete(delete_graph, endpoint, username, password) sparql_insert(add_graph, endpoint, username, password) #Save new last graph log.debug("Saving new graph %s", graph_filepath) with codecs.open(graph_filepath, "w") as out: graph.serialize(format="turtle", destination=out) #Touch store.touch(orcid_id) return graph, add_graph, delete_graph
def update_graph(q): """Update global GRAPHS dictionary with the current data for the given Q number. Downloads the entity data from wikidata in Turle form, parses it and then does a diff (to stdout) with any current version of that entity. FIXME -- THIS IS NOT THE RIGHT WAY TO GET THE GRAPH DATA! DON'T KNOW HOW TO GET CURRENT RDF. """ global GRAPHS try: r = requests.get(url=WIKIDATA_ENTITY_BASE + q, headers={'Accept': 'text/turtle'}) g_new = Graph() g_new.parse(data=r.text, format='turtle') except: print("Update for %s failed" % (q)) return if q in GRAPHS: g_old = GRAPHS[q] print("%s: old, %d -> new, %d triples" % (q, len(g_old), len(g_new))) in_both, in_old, in_new = graph_diff(g_old, g_new) print("%s: < %d, == %d, > %d" % (q, len(in_old), len(in_both), len(in_new))) for s, p, o in in_old: print("< %s %s %s" % (str(s), str(p), str(o))) for s, p, o in in_new: print("> %s %s %s" % (str(s), str(p), str(o))) else: print("%s: new, %d triples" % (q, len(g_new))) GRAPHS[q] = g_new
def test_compare_triples(): for mime, fext in MIME_TYPES.items(): dump_path = path.join(DUMP_DIR, path.basename(mime)) for url in URLs: if six.PY2: fname = '%s.%s' % (path.basename(urlparse.urlparse(url).path), fext) else: fname = '%s.%s' % (path.basename(urlparse(url).path), fext) fname = path.join(dump_path, fname) req = Request(url) req.add_header('Accept', mime) res = urlopen(req) g_fdp.parse(data=res.read(), format=mime) g_dump.parse(fname, format=mime) both, first, second = graph_diff(g_fdp, g_dump) n_first = len(first) # n_second = len(second) # n_both = len(both) assert_equals( n_first, 0, '{} triple(s) different from reference:\n\n{}===\n{}\n'.format( n_first, first.serialize(format='turtle'), second.serialize(format='turtle')))
def merge_graphs(g: Graph, g2: Graph, map_uri_from=None, map_uri_to=None, args: Optional[AttribDict] = None): """Merge two graphs (g2 into g), but taking care to replace certain properties that are known to take a single value, and mapping URIs where needed""" i = 0 remapped = 0 removed = 0 both, first, second = graph_diff(g, g2) for (s, p, o) in second: s = handle_rel_uri(s, args.baseuri) p = handle_rel_uri(p, args.baseuri) o = handle_rel_uri(o, args.baseuri, prop=p) if map_uri_from and map_uri_to: if s == URIRef(map_uri_from): s = URIRef(map_uri_to) remapped += 1 if o == URIRef(map_uri_from): remapped += 1 o = URIRef(map_uri_to) if p in SINGULAR_PROPERTIES: #remove existing triples in the graph for (s2, p2, o2) in g.triples((s, p, None)): g.remove((s2, p2, o2)) removed += 1 g.add((s, p, o)) i += 1 l = len(g2) print( f" Merged {i} of {l} triples, removed {removed} superseded values, remapped {remapped} uris", file=sys.stderr)
def testB(self): """Curiously, this one passes, even before the fix in issue 151""" g = rdflib.Graph() g.add((rdflib.URIRef("urn:a"), rdflib.URIRef("urn:p"), rdflib.Literal(u'\xe9'))) diff = graph_diff(g, g)
def testA(self): """with bnode""" g = rdflib.Graph() g.add( (rdflib.BNode(), rdflib.URIRef("urn:p"), rdflib.Literal(u"\xe9"))) diff = graph_diff(g, g)
def test_compare_triples(): for mime, fext in MIME_TYPES.items(): dump_path = path.join(DUMP_DIR, path.basename(mime)) for url in URLs: if six.PY2: fname = '%s.%s' % (path.basename( urlparse.urlparse(url).path), fext) else: fname = '%s.%s' % (path.basename(urlparse(url).path), fext) fname = path.join(dump_path, fname) req = Request(url) req.add_header('Accept', mime) res = urlopen(req) g_fdp.parse(data=res.read(), format=mime) g_dump.parse(fname, format=mime) both, first, second = graph_diff(g_fdp, g_dump) n_first = len(first) # n_second = len(second) # n_both = len(both) assert_equals( n_first, 0, '{} triple(s) different from reference:\n\n{}===\n{}\n'.format( n_first, first.serialize(format='turtle'), second.serialize(format='turtle')))
def rdf_comparator(self, old_data: str, new_data: str) -> bool: old_graph = Graph() new_graph = Graph() old_graph.parse(data=old_data, format="turtle") new_graph.parse(data=new_data, format="turtle") old_iso = to_isomorphic(old_graph) # Remove the metadata specific triples for t in list(old_iso.triples((None, MMNS.generation_date, None))): old_iso.remove(t) new_iso = to_isomorphic(new_graph) for t in list(new_iso.triples((None, MMNS.generation_date, None))): new_iso.remove(t) # Graph compare takes a Looong time in_both, in_old, in_new = graph_diff(old_iso, new_iso) # if old_iso != new_iso: # in_both, in_old, in_new = graph_diff(old_iso, new_iso) old_len = len(list(in_old)) new_len = len(list(in_new)) if old_len or new_len: if old_len: print("----- Old graph only -----") self._print_triples(in_old) if new_len: print("----- New Grapn Only -----") self._print_triples(in_new) self.assertTrue(False, "RDF file mismatch") return False return True
def check_graph(graph,type): """check if update or create by comparision with live graph""" if not graph: return False result=graph.query("""SELECT DISTINCT ?subject WHERE {?subject ?b ?c}""") """for every subject of changeset graph try to find other triples in DBpedia live to differ between add/update/delete""" events=[] for subject in result: resource=subject[0] if(resource.find(DBpedia.DBPEDIAURL)==0): #apply only for resources on server with DBPEDIA URL live_resource=DBpedia.liveize(resource) #online version of dbpedia live have different URIs as changeset URIs onl_graph=rdflib.Graph() #try: onl_graph.parse(live_resource) onl_iso = to_isomorphic(onl_graph) loc_iso = to_isomorphic(graph) in_both, in_onl, in_loc = graph_diff(onl_iso,loc_iso) event_type="notupdated" event=None for res_of_diff, b, c in in_onl: # if live graph has more triples about resource it should be an update if(str(live_resource)==str(res_of_diff)): event_type="update" break; if(event_type=="notupdated" and type=="added"): event = ResourceChange(uri=str(live_resource), changetype="CREATE") elif(event_type=="update" and type=="added"): event = ResourceChange(uri=str(live_resource), changetype="UPDATE") else: event = ResourceChange(uri=str(live_resource), changetype="DELETE") events.append(event) #except Exception as e: #print "Error parsing %s: %s" % (live_resource,e) #self.notify_observers(event) return events
def turtle(test): g = Graph() try: base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='turtle') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = Graph() res.parse(test.result, format='nt') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print("Diff:") #print "%d triples in both"%len(both) print("Turtle Only:") for t in first: print(t) print("--------------------") print("NT Only") for t in second: print(t) raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def calcPrecisionRecall2(cano_dbp, cano_lift, size_bgp_dbp, size_bgp_lift): if cano_dbp == cano_lift: # If ground truth and deduction is equal then precision and recall are 1 precision = 1 recall = 1 else: in_both, in_first, in_second = graph_diff(cano_dbp, cano_lift) b = len(in_both) # b has the number of well deduced triple patterns for s, p, o in in_first: for ss, pp, oo in in_second: if ( (isinstance(s, Variable) and isinstance(ss, Variable)) and p == pp and o == oo ) or (s == ss and p == pp and (isinstance(o, Variable) and isinstance(oo, Variable)) or (s == ss and (isinstance(p, Variable) and isinstance(pp, Variable)) and o == oo)): b += 1 # b is incremented with triple patterns whose variables were canonized differently because the size of the BGP and that have two things in common (subject, predicate, or object); break try: precision = b / size_bgp_lift #How many deduced triple patterns are relevant except ZeroDivisionError: precision = 0 print("Division by zero in precision with size_bgp_lift") try: recall = b / size_bgp_dbp # How many relevant triple patterns are deduced except ZeroDivisionError: recall = 0 print("Division by zero in recall with size_bgp_dbp") return (precision, recall)
def turtle(test): g = Graph() try: base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='turtle') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = Graph() res.parse(test.result, format='nt') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print "Diff:" #print "%d triples in both"%len(both) print "Turtle Only:" for t in first: print t print "--------------------" print "NT Only" for t in second: print t raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def test_subsets(self) -> None: """ This test verifies that `graph_diff` returns the correct values for two graphs, `g0` and `g1` where the triples in `g0` is a subset of the triples in `g1`. The expectation is that graph_diff reports that there are no triples only in `g0`, and that there are triples that occur in both `g0` and `g1`, and that there are triples only in `g1`. """ g0_ts: _TripleSetT = set() bnode = BNode() g0_ts.update({ (bnode, FOAF.name, Literal("Golan Trevize")), (bnode, RDF.type, FOAF.Person), }) g0 = Graph() g0 += g0_ts g1_ts: _TripleSetT = set() bnode = BNode() g1_ts.update({ *g0_ts, (bnode, FOAF.name, Literal("Janov Pelorat")), (bnode, RDF.type, FOAF.Person), }) g1 = Graph() g1 += g1_ts result = graph_diff(g0, g1) in_both, in_first, in_second = GraphHelper.triple_sets(result) self.assertFalse(in_first) self.assertTrue(in_second) self.assertTrue(in_both)
def assertEqualGraphs(self, want, got, exact=True): """Assert that two RDF graphs are identical (isomorphic). :param want: The graph as expected, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph :param got: The actual graph, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph :param exact: Whether to require that the graphs are exactly alike (True) or only if all triples in `want` exists in `got` (False) :type exact: bool """ def _loadgraph(filename): g = rdflib.Graph() # we must read the data ourself, providing a non-ascii # filename to Graph.parse fails deep in rdflib internals format = guess_format(filename) if format == "nt": data = util.readfile(filename, "r", encoding="utf-8") else: data = util.readfile(filename, "rb") g.parse(data=data, format=format) return g if not isinstance(want, rdflib.Graph): want = _loadgraph(want) if not isinstance(got, rdflib.Graph): got = _loadgraph(got) (in_both, in_first, in_second) = graph_diff(want, got) msg = "" if in_first: for (s, p, o) in sorted(in_first, key=lambda t: (t[0], t[1], t[2])): msg += "- %s %s %s\n" % (s.n3(), p.n3(), o.n3()) if (exact and in_second) or in_first: for (s, p, o) in sorted(in_second, key=lambda t: (t[0], t[1], t[2])): msg += "+ %s %s %s\n" % (s.n3(), p.n3(), o.n3()) if ((len(in_first) > 0) or (len(in_second) > 0 and exact)): if len(in_first) > 0: msg = "%s expected triples were not found\n" % len(in_first) + msg if len(in_second) > 0: msg = "%s unexpected triples were found\n" % len(in_second) + msg ntdiff = True if ntdiff: msg = "%r != %r\n" % (want, got) + msg else: import difflib d = difflib.unified_diff(want.serialize(format="turtle").decode("utf-8").split("\n"), got.serialize(format="turtle").decode("utf-8").split("\n"), n=10000) msg = msg + "\n".join(d) # print("=======WANT=======") # print(want.serialize(format="n3")) # print("=======GOT========") # print(got.serialize(format="n3")) # sys.exit(0) return self.fail(msg)
def testB(self): """Curiously, this one passes, even before the fix in issue 151""" g = rdflib.Graph() g.add((rdflib.URIRef("urn:a"), rdflib.URIRef("urn:p"), rdflib.Literal("\xe9"))) diff = graph_diff(g, g)
def _dump_diff(g1, g2): in_both, in_first, in_second = graph_diff(g1, g2) print("\nin both:") _dump_turtle_sorted(in_both) print("\nin first:") _dump_turtle_sorted(in_first) print("\nin second:") _dump_turtle_sorted(in_second)
def _dump_diff(g1: Graph, g2: Graph) -> None: in_both, in_first, in_second = graph_diff(g1, g2) print("\nin both:") _dump_turtle(in_both) print("\nin first:") _dump_turtle(in_first) print("\nin second:") _dump_turtle(in_second)
def compare_full_graphs(self, gt_graph, other_graph, owl, include=False, raise_now=False, reconcile=True, to_ignore=None): ''' Compare gt_graph and other_graph ''' my_exception = "" # We reconcile gt_graph with other_graph if reconcile: gt_graph, other_graph = self._reconcile_graphs( gt_graph, other_graph) in_both, in_gt, in_other = graph_diff(gt_graph, other_graph) exc_missing = list() for s, p, o in in_gt: # If there is a corresponding s,p check if # there is an equivalent o for o_other in in_other.objects(s, p): same_json_array, close_float, same_str = \ self._same_json_or_float(o, o_other) if same_json_array or close_float or same_str: # Remove equivalent o from other as well in_other.remove((s, p, o_other)) break else: if (p not in to_ignore): exc_missing.append( "\nMissing :\t '%s %s %s'" % (self.get_readable_name(owl, gt_graph, s), self.get_readable_name(owl, gt_graph, p), self.get_readable_name(owl, gt_graph, o))) exc_added = list() if not include: for s, p, o in in_other: if p not in to_ignore: exc_added.append( "\nAdded :\t '%s %s %s'" % (self.get_readable_name(owl, other_graph, s), self.get_readable_name(owl, other_graph, p), self.get_readable_name(owl, other_graph, o))) my_exception += "".join(sorted(exc_missing) + sorted(exc_added)) if raise_now and my_exception: raise Exception(my_exception) return my_exception
def test_graph_diff(g1, g2): in_both, only_in_first, only_in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) only_in_first.namespace_manager = g1.namespace_manager only_in_second.namespace_manager = g2.namespace_manager ok_(len(only_in_second) == 0, f""" <<< {only_in_first.serialize(format='n3').decode('utf-8')} === {only_in_second.serialize(format='n3').decode('utf-8')} >>> """)
def compare_graphs(self): """ Name: compare_graphs Description: Iterate through RDF graphs and populate subject, predicate, object lists. Parameters: None. Return: None. """ print("Loading graphs for comparison...") graph = self.__graph_parse__(self.custom_gloss, self.gloss_format) graph_tool = self.__graph_parse__(self.tool_output, self.tool_format) in_both , in_graph, in_graph_tool = compare.graph_diff(graph, graph_tool)
def complete_new_graph(cls, service, uri, parameters, new_graph, resource=None): """I implement :meth:`ILocalResource.complete_new_graph`. If new_graph contains only a wikitext property, then all corresponding triples are generated. If new_graph contains other triples and either no wikitext *or* the same wikitext as previously, then the wikitext is updated to reflect the triples. If new_graph contains other triples and a wikitext different from the previous one, then the wikitext and the triples *have* to be consistent, or a InvalidDataError will be raised. """ assert resource is not None # topics can only be created by PUT wikitexts = list(new_graph.objects(uri, SW.wikitext)) if len(wikitexts) > 1: # leave it to WithCardinalityMixin to raise an error return if len(wikitexts) == 0: new_wikitext = None else: new_wikitext = unicode(wikitexts[0]) if new_wikitext is not None and len(new_graph) == 1: # wikitext only: parse other triples from it wikitext_to_triples(resource, new_wikitext, into=new_graph) return if new_wikitext is not None and new_wikitext != resource.wikitext: # wikitext *and* triples were changed: they must be consistent from_text = wikitext_to_triples(resource, new_wikitext) from_text.add((uri, SW.wikitext, wikitexts[0])) if not isomorphic(from_text, new_graph): raise InvalidDataError("wikitext and triples are inconsistent") else: return # new_wikitext is either None or equal to old wikitext, # so we focus on the triples of new_graph if new_wikitext is None: old_wikitext = resource.get_state().value(uri, SW.wikitext) new_graph.add((uri, SW.wikitext, old_wikitext)) new_wikitext = unicode(old_wikitext) _, added, removed = graph_diff(new_graph, resource.get_state()) if added: new_wikitext = add_triples(resource, new_wikitext, added) if removed: new_wikitext = ban_triples(resource, new_wikitext, removed) if added or removed: new_graph.set((uri, SW.wikitext, Literal(new_wikitext)))
def do_algorithm(self, source_content: str, target_content: str) -> List[SyncOperation]: source_g = Graph().parse(format='turtle', data=source_content) target_g = Graph().parse(format='turtle', data=target_content) source_g_iso = to_isomorphic(source_g) target_g_iso = to_isomorphic(target_g) _, removals_graph, additions_graph = graph_diff( source_g_iso, target_g_iso) additions_ops = self._create_add_ops_from(additions_graph) removals_ops = self._create_remove_ops_from(removals_graph) return removals_ops + additions_ops
def compare_full_graphs(self, gt_graph, other_graph, owl, include=False, raise_now=False, reconcile=True, to_ignore=None): ''' Compare gt_graph and other_graph ''' my_exception = "" # We reconcile gt_graph with other_graph if reconcile: gt_graph, other_graph = self._reconcile_graphs( gt_graph, other_graph) in_both, in_gt, in_other = graph_diff(gt_graph, other_graph) exc_missing = list() for s, p, o in in_gt: # If there is a corresponding s,p check if # there is an equivalent o for o_other in in_other.objects(s, p): same_json_array, close_float, same_str = \ self._same_json_or_float(o, o_other) if same_json_array or close_float or same_str: # Remove equivalent o from other as well in_other.remove((s, p, o_other)) break else: if (p not in to_ignore): exc_missing.append( "\nMissing :\t '%s %s %s'" % ( self.get_readable_name(owl, gt_graph, s), self.get_readable_name(owl, gt_graph, p), self.get_readable_name(owl, gt_graph, o) )) exc_added = list() if not include: for s, p, o in in_other: if p not in to_ignore: exc_added.append( "\nAdded :\t '%s %s %s'" % ( self.get_readable_name(owl, other_graph, s), self.get_readable_name(owl, other_graph, p), self.get_readable_name(owl, other_graph, o) )) my_exception += "".join(sorted(exc_missing) + sorted(exc_added)) if raise_now and my_exception: raise Exception(my_exception) return my_exception
def process_file(file): root, filename = os.path.split(file) rdf_file = os.path.splitext(filename)[0]+'.nt' rdf_path = root + '/' + rdf_file clean_file(file) try: xml = etree.parse(file) rdf = transform(xml) g = Graph() g.parse(StringInputSource(rdf),"xml") # If the graph already exists then we want to generate some diffs before overwriting it: these can be used generating changesets when uploading to a datastore existing = False if os.path.exists(rdf_path): print "Comparing graphs" go = Graph() go.parse(rdf_path,format='nt') existing = True elif os.path.exists(root+'/archive/'+rdf_file): print "Comparing with archived graph" go = Graph() go.parse(root+'/archive/'+rdf_file,format='nt') existing = True if existing: both, old, new = graph_diff(go,g) if(len(old)): # old.serialize(rdf_path+'_old',format='nt') #Uncomment if you want a non-reified version of the statements cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to remove from'+file) for (s,p,o) in old.triples((None, None, None)): cs.remove(s,p,o) cs.getGraph().serialize(rdf_path+'_csremove',format='nt') if(len(new)): # new.serialize(rdf_path+'_new',format='nt') #Uncomment if you want a non-reified version of the statements cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to add from '+file) for (s,p,o) in new.triples((None, None, None)): cs.remove(s,p,o) cs.getGraph().serialize(rdf_path+'_csadd',format='nt') g.serialize(rdf_path,format='nt') except Exception, e: print "Error processing file "+ file print e
def compare_graphs(actual, expected): actual_iso = to_isomorphic(actual) expected_iso = to_isomorphic(expected) if actual_iso != expected_iso: _, in_first, in_second = graph_diff(actual_iso, expected_iso) print("The actual and expected graphs differ") print("----- Contents of actual graph not in expected graph -----") dump_ttl_sorted(in_first) print("----- Contents of expected graph not in actual graph -----") dump_ttl_sorted(in_second) assert actual_iso == expected_iso
def sync_named_graph(self, name, incoming, size=BATCH_SIZE): """ Pass in incoming data and sync with existing data in named graph. """ existing = self.get_existing(name) both, adds, deletes = graph_diff(incoming, existing) del both added = self.bulk_add(name, adds, size=size) logger.info("Adding {} triples to {}.".format(added, name)) removed = self.bulk_remove(name, deletes, size=size) logger.info("Removed {} triples from {}.".format(removed, name)) return added, removed
def compare_rdf(expected: Union[Graph, str], actual: Union[Graph, str], fmt: Optional[str] = "turtle") -> Optional[str]: """ Compare expected to actual, returning a string if there is a difference :param expected: expected RDF. Can be Graph, file name, uri or text :param actual: actual RDF. Can be Graph, file name, uri or text :param fmt: RDF format :return: None if they match else summary of difference """ def rem_metadata(g: Graph) -> IsomorphicGraph: # Remove list declarations from target for s in g.subjects(RDF.type, RDF.List): g.remove((s, RDF.type, RDF.List)) g_iso = to_isomorphic(g) return g_iso expected_graph = to_graph(expected, fmt) expected_isomorphic = rem_metadata(expected_graph) actual_graph = to_graph(actual, fmt) actual_isomorphic = rem_metadata(actual_graph) # Graph compare takes a Looong time in_both, in_old, in_new = graph_diff(expected_isomorphic, actual_isomorphic) # if old_iso != new_iso: # in_both, in_old, in_new = graph_diff(old_iso, new_iso) old_len = len(list(in_old)) if old_len: for t in triples_to_ignore: if t in in_old: print(f"WARNING: {t} removed from expected graph") in_old.remove(t) old_len = len(in_old) new_len = len(list(in_new)) if old_len and new_len: fix_subject_bnodes(in_old, in_new) old_len = len(in_old) new_len = len(in_new) if old_len or new_len: txt = StringIO() with redirect_stdout(txt): print("----- Missing Triples -----") if old_len: print_triples(in_old) print("----- Added Triples -----") if new_len: print_triples(in_new) return txt.getvalue() return None
def trig(test): g = ConjunctiveGraph() try: base = "http://www.w3.org/2013/TriGTests/" + split_uri(test.action)[1] g.parse(test.action, publicID=base, format="trig") if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = ConjunctiveGraph() res.parse(test.result, format="nquads") if verbose: both, first, second = graph_diff(g, res) if not first and not second: return print("===============================") print("TriG") print(g.serialize(format="nquads")) print("===============================") print("NQuads") print(res.serialize(format="nquads")) print("===============================") print("Diff:") # print "%d triples in both"%len(both) print("TriG Only:") for t in first: print(t) print("--------------------") print("NQuads Only") for t in second: print(t) raise Exception("Graphs do not match!") assert isomorphic( g, res ), "graphs must be the same, expected\n%s\n, got\n%s" % ( g.serialize(), res.serialize(), ) except: if test.syntax: raise
def compare_rdf(expected: Union[Graph, str], actual: Union[Graph, str], fmt: Optional[str] = "turtle") -> Optional[str]: """ Compare expected to actual, returning a string if there is a difference :param expected: expected RDF. Can be Graph, file name, uri or text :param actual: actual RDF. Can be Graph, file name, uri or text :param fmt: RDF format :return: None if they match else summary of difference """ def rem_metadata(g: Graph) -> IsomorphicGraph: # Remove list declarations from target for s in g.subjects(RDF.type, RDF.List): g.remove((s, RDF.type, RDF.List)) for t in g: if t[1] in (LINKML.generation_date, LINKML.source_file_date, LINKML.source_file_size, TYPE.generation_date, TYPE.source_file_date, TYPE.source_file_size): g.remove(t) g_iso = to_isomorphic(g) return g_iso # Bypass compare if settings have turned it off if SKIP_RDF_COMPARE: print(f"tests/utils/compare_rdf.py: {SKIP_RDF_COMPARE_REASON}") return None expected_graph = to_graph(expected, fmt) expected_isomorphic = rem_metadata(expected_graph) actual_graph = to_graph(actual, fmt) actual_isomorphic = rem_metadata(actual_graph) # Graph compare takes a Looong time in_both, in_old, in_new = graph_diff(expected_isomorphic, actual_isomorphic) # if old_iso != new_iso: # in_both, in_old, in_new = graph_diff(old_iso, new_iso) old_len = len(list(in_old)) new_len = len(list(in_new)) if old_len or new_len: txt = StringIO() with redirect_stdout(txt): print("----- Missing Triples -----") if old_len: print_triples(in_old) print("----- Added Triples -----") if new_len: print_triples(in_new) return txt.getvalue() return None
def main(): values = ap.parse_args() format1 = guess_format(values.file1) format2 = guess_format(values.file2) g1: Graph = Graph().parse(values.file1, format=format1) g2: Graph = Graph().parse(values.file2, format=format2) iso1: IsomorphicGraph = to_isomorphic(g1) iso2: IsomorphicGraph = to_isomorphic(g2) _in_both, in_first, in_second = graph_diff(iso1, iso2) print(f"Only in {values.file1}") dump_nt_sorted(in_first) print(f"Only in {values.file2}") dump_nt_sorted(in_second)
def post_updates(named_graph, graph): """ Function for posting the data. """ #Define the VIVO store query_endpoint = os.environ['VIVO_URL'] + '/api/sparqlQuery' update_endpoint = os.environ['VIVO_URL'] + '/api/sparqlUpdate' vstore = SyncVStore( os.environ['VIVO_EMAIL'], os.environ['VIVO_PASSWORD'] ) vstore.open((query_endpoint, update_endpoint)) existing = vstore.get_existing(named_graph) # Get the URIs for statements that will be additions. changed_uris = set([u for u in graph.subjects()]) # Get the statements from the deletes that apply to this # incremental update. This will be the posted deletes. remove_graph = Graph() # Remove all triples related to the changed uris. for curi in changed_uris: for pred, obj in existing.predicate_objects(subject=curi): remove_graph.add((curi, pred, obj)) # Diff both, adds, deletes = graph_diff(graph, remove_graph) num_additions = len(adds) num_remove = len(deletes) if (num_additions == 0) and (num_remove == 0): logger.info("No updates to {}.".format(named_graph)) else: #print adds.serialize(format='n3') #print '-' * 10 #print deletes.serialize(format='n3') if num_additions > 0: logger.info("Will add {} triples to {}.".format(num_additions, named_graph)) vstore.bulk_add(named_graph, adds) if num_remove > 0: logger.info("Will remove {} triples from {}.".format(num_remove, named_graph)) vstore.bulk_remove(named_graph, deletes) return True
def recheck(self) -> bool: """ Recompute the differences between the graphs :returns: True if changes were detected """ if not self.passed and self.changed: self.expected_graph.changed = False self.actual_graph.changed = False if not self.known_match: cur_lens = (self.both_len, self.old_len, self.new_len) self.in_both, self.in_old, self.in_new = graph_diff( self.expected_graph, self.actual_graph) self._upd_lens() self.passed = not self.diffs_exist return cur_lens != (self.both_len, self.old_len, self.new_len) return False
def graphdiff(first, second): """ Diff between graph instances, should be replaced/included in quit diff """ from rdflib.compare import to_isomorphic, graph_diff diffs = OrderedDict() iris = set() if first is not None and isinstance(first, InMemoryAggregatedGraph): first_identifiers = list((g.identifier for g in first.graphs())) iris = iris.union(first_identifiers) if second is not None and isinstance(second, InMemoryAggregatedGraph): second_identifiers = list((g.identifier for g in second.graphs())) iris = iris.union(second_identifiers) for iri in sorted(list(iris)): changes = diffs.get(iri, []) if (first is not None and iri in first_identifiers) and ( second is not None and iri in second_identifiers): g1 = first.get_context(iri) g2 = second.get_context(iri) in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) if len(in_second) > 0: changes.append( ('additions', ((s, p, o) for s, p, o in in_second))) if len(in_first) > 0: changes.append( ('removals', ((s, p, o) for s, p, o in in_first))) elif first is not None and iri in first_identifiers: changes.append( ('removals', ((s, p, o) for s, p, o in first.get_context(iri)))) elif second is not None and iri in second_identifiers: changes.append( ('additions', ((s, p, o) for s, p, o in second.get_context(iri)))) else: continue diffs[iri] = changes return diffs
def trig(test): g = ConjunctiveGraph() try: base = 'http://www.w3.org/2013/TriGTests/'+split_uri(test.action)[1] g.parse(test.action, publicID=base, format='trig') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = ConjunctiveGraph() res.parse(test.result, format='nquads') if verbose: both, first, second = graph_diff(g,res) if not first and not second: return print('===============================') print('TriG') print(g.serialize(format='nquads')) print('===============================') print('NQuads') print(res.serialize(format='nquads')) print('===============================') print("Diff:") #print "%d triples in both"%len(both) print("TriG Only:") for t in first: print(t) print("--------------------") print("NQuads Only") for t in second: print(t) raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def trig(test): g = ConjunctiveGraph() try: base = 'http://www.w3.org/2013/TriGTests/' + split_uri(test.action)[1] g.parse(test.action, publicID=base, format='trig') if not test.syntax: raise AssertionError("Input shouldn't have parsed!") if test.result: # eval test res = ConjunctiveGraph() res.parse(test.result, format='nquads') if verbose: both, first, second = graph_diff(g, res) if not first and not second: return print '===============================' print 'TriG' print g.serialize(format='nquads') print '===============================' print 'NQuads' print res.serialize(format='nquads') print '===============================' print "Diff:" #print "%d triples in both"%len(both) print "TriG Only:" for t in first: print t print "--------------------" print "NQuads Only" for t in second: print t raise Exception('Graphs do not match!') assert isomorphic(g, res), 'graphs must be the same' except: if test.syntax: raise
def turtle_equal(a, b): """ Given two strings representing turtle-encoded RDF, check whether they represent the same graph. """ ga = Graph().parse(format='turtle', data=a) for x, y, z in ga: print((x, y, z)) gb = Graph().parse(format='turtle', data=b) eq = isomorphic(ga, gb) if not eq: both, first, second = graph_diff(ga, gb) print("Present in both:") print(both) print("Present in first:") print(first) print("Present in second:") print(second) return eq
def find_diff(g_rdf, g0_rdf): graphs_equal = True in_both, in_first, in_second = graph_diff(g_rdf, g0_rdf) g1 = sorted(in_first.serialize(format='nt').splitlines())[1:] g2 = sorted(in_second.serialize(format='nt').splitlines())[1:] # Compare literals if len(g1) != len(g2): graphs_equal = False matching_indices = [[], []] for idx in range(len(g1)): g1_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g1[idx]), format='nt'))[0] match_found = False for idx2 in range(len(g2)): if idx2 in matching_indices[1]: continue g2_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g2[idx2]), format='nt'))[0] try: all_match = all([g1_stmt[i].eq(g2_stmt[i]) for i in range(3)]) except TypeError as e: #logger.info(e, g1_stmt, g2_stmt) all_match = False if all_match: matching_indices[0].append(idx) matching_indices[1].append(idx2) match_found = True break if not match_found: graphs_equal = False in_first2 = rl.ConjunctiveGraph() for idx in range(len(g1)): if idx in matching_indices[0]: in_both.parse(BytesIO(g1[idx]), format='nt') else: in_first2.parse(BytesIO(g1[idx]), format='nt') in_second2 = rl.ConjunctiveGraph() for idx in range(len(g2)): if not idx in matching_indices[1]: in_second2.parse(BytesIO(g2[idx]), format='nt') #logger.info(in_first2) #logger.info(in_second2) return graphs_equal, in_both, in_first2, in_second2
def run_grit_test(rdfxml_fpath, grit_fpath): rdfxml = etree.parse(rdfxml_fpath) speced_grit = etree.parse(grit_fpath) actual_grit = GRIT_XSLT(rdfxml) assert canonical_str(actual_grit) == canonical_str(speced_grit), \ "Grit from <%s> doesn't equal specified result in <%s>" % ( rdfxml_fpath, grit_fpath) if not isograph: return gleaned_rdf = GRDDL_XSLT(speced_grit) gleaned_graph = isograph(gleaned_rdf) ref_graph = isograph(rdfxml) diff = lambda: "\nOnly in gleaned:%s\nOnly in spec:%s\n" % tuple( "\n".join( sorted(g.serialize(format='nt').splitlines()) ) for g in graph_diff(gleaned_graph, ref_graph)[1:] ) assert gleaned_graph == ref_graph, \ "RDF from GRDDL:ed <%s> doesn't equal original RDF. Diff: %s" % ( grit_fpath, diff())
def graphdiff(first, second): """ Diff between graph instances, should be replaced/included in quit diff """ from rdflib.compare import to_isomorphic, graph_diff diffs = OrderedDict() iris = set() if first is not None and isinstance(first, InMemoryAggregatedGraph): first_identifiers = list((g.identifier for g in first.graphs())) iris = iris.union(first_identifiers) if second is not None and isinstance(second, InMemoryAggregatedGraph): second_identifiers = list((g.identifier for g in second.graphs())) iris = iris.union(second_identifiers) for iri in sorted(list(iris)): changes = diffs.get(iri, []) if ( first is not None and iri in first_identifiers ) and ( second is not None and iri in second_identifiers ): g1 = first.get_context(iri) g2 = second.get_context(iri) in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) if len(in_second) > 0: changes.append(('additions', ((s, p, o) for s, p, o in in_second))) if len(in_first) > 0: changes.append(('removals', ((s, p, o) for s, p, o in in_first))) elif first is not None and iri in first_identifiers: changes.append(('removals', ((s, p, o) for s, p, o in first.get_context(iri)))) elif second is not None and iri in second_identifiers: changes.append(('additions', ((s, p, o) for s, p, o in second.get_context(iri)))) else: continue diffs[iri] = changes return diffs
def difftool(self, local, remote, merged, base, diffFormat='sparql'): if local: self.local = self.readIsomorphicGraph(local) if remote: self.remote = self.readIsomorphicGraph(remote) if merged: self.merged = self.readIsomorphicGraph(merged) if base: self.base = self.readIsomorphicGraph(base) add = {} remove = {} graphUris = set(self.local.keys()) | set(self.remote.keys()) for uri in graphUris: if uri in self.local.keys() and uri in self.remote.keys(): localGraph = self.local[uri] remoteGraph = self.remote[uri] in_both, in_first, in_second = compare.graph_diff(localGraph, remoteGraph) add[uri] = in_second remove[uri] = in_first elif uri in self.local.keys(): remove[uri] = self.local[uri] elif uri in self.remote.keys(): add[uri] = self.remote[uri] else: True module = diffFormat.title() + "Diff" diff = getattr(import_module('quit_diff.serializer.' + module), module) diffSerializer = diff() print(diffSerializer.serialize(add, remove))
def compute_added_and_removed(new_graph, old_graph, added=None, removed=None): """I compute the graphs of added triples and of removed triples. For overridden versions of `check_new_graph` that require `added` and `removed` to be set, I should be called as:: added, removed = self._compute_added_and_removed( new_graph, old_graph, added, removed) If `added` and `removed` are not None, this method will simply return them, preventing the overhead of computing them again. However, it is important to call this function *before* the call to ``super(...).check_new_graph``, because the result is not transmitted to the calling function. So to ensure that the computation happens only once, it must be performed at the highest level that needs it. """ if added is None: assert removed is None _, added, removed = graph_diff(new_graph, old_graph) else: assert removed is not None return added, removed
def assertEqualGraphs(self, want, got, exact=True): """Assert that two RDF graphs are identical (isomorphic). :param want: The graph as expected, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph :param got: The actual graph, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph :param exact: Whether to require that the graphs are exactly alike (True) or only if all triples in want exists in got (False) :type exact: bool """ def _loadgraph(filename): g = rdflib.Graph() # we must read the data ourself, providing a non-ascii # filename to Graph.parse fails deep in rdflib internals g.parse(data=util.readfile(filename, "rb"), format=guess_format(filename)) return g if not isinstance(want, rdflib.Graph): want = _loadgraph(want) if not isinstance(got, rdflib.Graph): got = _loadgraph(got) (in_both, in_first, in_second) = graph_diff(want, got) msg = "" if in_first: for (s, p, o) in sorted(in_first, key=lambda t: (t[0], t[1], t[2])): msg += "- %s %s %s\n" % (s.n3(), p.n3(), o.n3()) if (exact and in_second) or in_first: for (s, p, o) in sorted(in_second, key=lambda t: (t[0], t[1], t[2])): msg += "+ %s %s %s\n" % (s.n3(), p.n3(), o.n3()) if ((len(in_first) > 0) or (len(in_second) > 0 and exact)): if len(in_first) > 0: msg = "%s expected triples were not found\n" % len(in_first) + msg if len(in_second) > 0: msg = "%s unexpected triples were found\n" % len(in_second) + msg msg = "%r != %r\n" % (want, got) + msg return self.fail(msg)
if not args: print "USAGE: %s FILE [rdf...]" % p.basename(cmd) print "Where FILE is a local copy of <https://lagen.nu/1976:725>. Get it by doing e.g.:" print " $ /usr/bin/curl -sk 'https://lagen.nu/1976:725' > /tmp/sfs-1976_725.xhtml" print print "If additional local rdf files are supplied, a diff of the " \ "extracted data and the supplied data is output (instead of just the " \ "extracted data)." exit() docpath = args[0] graph = fsdoc_to_graph(docpath) from rdfextras.tools.pathutils import guess_format cmp_graph = Graph() for fpath in args[1:]: cmp_graph.load(fpath, format=guess_format(fpath)) if cmp_graph: from rdflib.compare import graph_diff in_both, in_first, in_second = graph_diff(graph, cmp_graph) print "# %s new statements:" % len(in_first) for pfx, uri in graph.namespaces(): in_first.bind(pfx, uri) print in_first.serialize(format='n3') else: print "# Nothing to compare against. New RDF is:" print graph.serialize(format='n3')
#!/usr/bin/env python from os import path as p, popen from rdflib import Graph, compare scriptpath = lambda lpath: p.join(p.dirname(__file__), lpath) real = Graph().parse(popen("sh %s" % scriptpath("run.sh")), format='n3') expected = Graph().parse(scriptpath("expected.ttl"), format='n3') diff = compare.graph_diff(expected, real) assert not diff[1] and not diff[2], "Expected: %s Got: %s" % tuple( g.serialize(format='n3') for g in diff[1:]) print "Ok."
#If the old data has been uploaded then it will be in /archive/ #If it has not yet been uploaded it will be in the main path. Prefer the main path version... rdf_path = exec_path + '/online.nt' #Now check if either really does exist, and make the changesets if os.path.exists(rdf_path): print "Comparing with old data" og = Graph() try: og.parse(rdf_path,format='nt') except Exception, e: print "Failed reading archived online data" print e print "Running graph diff - new data against archived data" both, old, new = graph_diff(og,ng) if(len(old)): cs = BatchChangeSet() cs.setCreatorName('R4D Update Scripts') cs.setChangeReason('Statements to remove from'+dirList[0]) for (s,p,o) in old.triples((None, None, None)): cs.remove(s,p,o) print "Saving triples for removal to changeset" cs.getGraph().serialize(rdf_path+'_csremove',format='nt') if(len(new)): cs = BatchChangeSet() cs.setCreatorName('IATI Update Scripts') cs.setChangeReason('Statements to add from '+dirList[0]) for (s,p,o) in new.triples((None, None, None)): cs.remove(s,p,o) print "Saving new triples to changeset"
def my_graph_diff(graph1, graph2): """Compares graph2 to graph1 and highlights everything that changed. Colored if pygments available""" # quick fix for wrong type if not type(graph1) == type(graph2) == rdflib.Graph: if type(graph1) == rdflib.ConjunctiveGraph: g1contexts = list(graph1.contexts()) assert len(g1contexts) == 1 graph1 = g1contexts[0] if type(graph2) == rdflib.ConjunctiveGraph: g2contexts = list(graph2.contexts()) assert len(g2contexts) == 1 graph2 = g2contexts[0] # Return if both graphs are isomorphic iso1 = compare.to_isomorphic(graph1) iso2 = compare.to_isomorphic(graph2) if graph1.identifier == graph2.identifier: str_bit = u"The 2 '%s' Graphs" % graph1.identifier else: str_bit = (u"Graphs '%s' and '%s'" % (graph1.identifier, graph2.identifier)) if iso1 == iso2: logger.debug(u"%s are isomorphic" % str_bit) return print(u"Differences between %s." % str_bit) in_both, in_first, in_second = compare.graph_diff(iso1, iso2) def dump_nt_sorted(g): return sorted(g.serialize(format='nt').splitlines()) sorted_first = dump_nt_sorted(in_first) sorted_second = dump_nt_sorted(in_second) import difflib diff = difflib.unified_diff( sorted_first, sorted_second, u'Original', u'Current', lineterm='' ) try: from pygments import highlight from pygments.formatters import terminal from pygments.lexers import web lexer = web.XmlLexer() formatter = terminal.TerminalFormatter() print(highlight(u'\n'.join(diff), lexer, formatter)) except ImportError: logger.info("Install pygments for colored diffs") print(u'\n'.join(diff)) except UnicodeDecodeError: print(u"Only in first", unicode(sorted_first)) print(u"Only in second", unicode(sorted_second))
from rdflib import Graph from rdflib.compare import to_isomorphic, graph_diff import sys if len(sys.argv)>=3: F1 = sys.argv[1] F2= sys.argv[2] else: F1 = "/home/barry/Downloads/instance.ttl" F2 = "/home/barry/Downloads/t2.ttl" g1 = Graph() g1.parse(F1, format="turtle") g2 = Graph() g2.parse(F2, format="turtle") iso1 = to_isomorphic(g1) iso2 = to_isomorphic(g2) in_both, in_first, in_second = graph_diff(iso1, iso2) if len(sys.argv)==4: print(in_first.serialize(format="n3").decode('utf-8')) else: print(in_second.serialize(format="n3").decode('utf-8'))
def testA(self): """with bnode""" g = rdflib.Graph() g.add((rdflib.BNode(), rdflib.URIRef("urn:p"), rdflib.Literal(u'\xe9'))) diff = graph_diff(g, g)
# Compare two releases of schema.org from rdflib import Graph from rdflib.compare import to_isomorphic, graph_diff if __name__ == '__main__': sdons = 'http://schema.org/' g1 = Graph() g2 = Graph() p = Graph() # first = str(sys.argv[1]) # second = str(sys.argv[2] first = 'data/releases/2.2/schema.rdfa' second = 'data/releases/3.0/schema.rdfa' g1.parse(first, format='rdfa', pgraph=p)#, charset="utf8") g2.parse(second, format='rdfa', pgraph=p)#, charset="utf8") in_both, in_first, in_second = graph_diff(g1, g2) in_both.bind('schema', sdons) in_first.bind('schema', sdons) in_second.bind('schema', sdons) print in_both.serialize(format="n3") # print in_first.serialize(format="n3") # print in_second.serialize(format="n3")