Example #1
0
 def check_graph(graph,type):
     """check if update or create by comparision with live graph"""
     if not graph:
         return False
     result=graph.query("""SELECT DISTINCT ?subject WHERE {?subject ?b ?c}""")
     """for every subject of changeset graph try to find other triples in DBpedia live to differ between add/update/delete"""
     events=[]
     for subject in result:
         resource=subject[0]
         if(resource.find(DBpedia.DBPEDIAURL)==0): #apply only for resources on server with DBPEDIA URL
             live_resource=DBpedia.liveize(resource) #online version of dbpedia live have different URIs as changeset URIs
             onl_graph=rdflib.Graph()
         #try:
             onl_graph.parse(live_resource)
             onl_iso = to_isomorphic(onl_graph)
             loc_iso = to_isomorphic(graph)
             in_both, in_onl, in_loc = graph_diff(onl_iso,loc_iso)
             event_type="notupdated"
             event=None
             for res_of_diff, b, c in in_onl:
                 # if live graph has more triples about resource it should be an update
                 if(str(live_resource)==str(res_of_diff)): 
                     event_type="update"
                     break;
             if(event_type=="notupdated" and type=="added"):
                 event = ResourceChange(uri=str(live_resource), changetype="CREATE")
             elif(event_type=="update" and type=="added"):
                 event = ResourceChange(uri=str(live_resource), changetype="UPDATE")
             else:
                 event = ResourceChange(uri=str(live_resource), changetype="DELETE")
             events.append(event)
         #except Exception as e:
             #print "Error parsing %s: %s" % (live_resource,e)
             #self.notify_observers(event)
     return events
Example #2
0
 def rdf_comparator(self, old_data: str, new_data: str) -> bool:
     old_graph = Graph()
     new_graph = Graph()
     old_graph.parse(data=old_data, format="turtle")
     new_graph.parse(data=new_data, format="turtle")
     old_iso = to_isomorphic(old_graph)
     # Remove the metadata specific triples
     for t in list(old_iso.triples((None, MMNS.generation_date, None))):
         old_iso.remove(t)
     new_iso = to_isomorphic(new_graph)
     for t in list(new_iso.triples((None, MMNS.generation_date, None))):
         new_iso.remove(t)
     # Graph compare takes a Looong time
     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
     # if old_iso != new_iso:
     #     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
     old_len = len(list(in_old))
     new_len = len(list(in_new))
     if old_len or new_len:
         if old_len:
             print("----- Old graph only -----")
             self._print_triples(in_old)
         if new_len:
             print("----- New Grapn Only -----")
             self._print_triples(in_new)
         self.assertTrue(False, "RDF file mismatch")
         return False
     return True
def graphsAreTheSame(g1, g2, preprocessupdate=None):
    g1Copy = clone_graph(g1)
    g2Copy = clone_graph(g2)
    if preprocessupdate:
        g1Copy.update(preprocessupdate)
        g2Copy.update(preprocessupdate)
    return compare.to_isomorphic(g1Copy) == compare.to_isomorphic(g2Copy)
def run_test(t_identifier, expected_output, source_type):
    expected_output_graph = ConjunctiveGraph()

    if os.path.isfile(config["properties"]["output_results"]):
        os.system("rm " + config["properties"]["output_results"])

    if expected_output:
        expected_output_graph.parse("./output.nq", format="nquads")

    os.system("mkdir results/" + t_identifier)
    os.system(config["properties"]["engine_command"] + " > results/" +
              t_identifier + "/" + source_type + ".log")

    # if there is output file
    if os.path.isfile(config["properties"]["output_results"]):
        extension = config["properties"]["output_results"].split(".")[-1]
        os.system("cp " + config["properties"]["output_results"] +
                  " results/" + t_identifier + "/output-" + source_type + "." +
                  extension)
        # and expected output is true
        if expected_output:
            output_graph = ConjunctiveGraph()
            iso_expected = compare.to_isomorphic(expected_output_graph)
            # trying to parse the output (e.g., not valid RDF)
            try:
                output_graph.parse(
                    config["properties"]["output_results"],
                    format=config["properties"]["output_format"])
                iso_output = compare.to_isomorphic(output_graph)
                # and graphs are equal
                if iso_expected == iso_output:
                    result = passed
                # and graphs are distinct
                else:
                    print("Output RDF does not match with the expected RDF")
                    result = failed
            # output is not valid RDF
            except:
                print("Output RDF is invalid")
                result = failed

        # and expected output is false
        else:
            print("Output RDF found but none was expected")
            result = failed
    # if there is not output file
    else:
        # and expected output is true
        if expected_output:
            print("No RDF output found while output was expected")
            result = failed
        # expected output is false
        else:
            result = passed

    results.append([
        config["tester"]["tester_name"], config["engine"]["engine_name"],
        source_type, t_identifier, result
    ])
    print(t_identifier + "," + result)
Example #5
0
def test_issue682_signing_named_graphs():
    ns = Namespace("http://love.com#")

    mary = BNode()
    john = URIRef("http://love.com/lovers/john#")

    cmary=URIRef("http://love.com/lovers/mary#")
    cjohn=URIRef("http://love.com/lovers/john#")

    store = IOMemory()

    g = ConjunctiveGraph(store=store)
    g.bind("love",ns)

    gmary = Graph(store=store, identifier=cmary)

    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))

    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    ig = to_isomorphic(g)
    igmary = to_isomorphic(gmary)

    assert len(igmary) == len(gmary)
    assert len(ig) == len(g)
    assert len(igmary) < len(ig)
    assert ig.graph_digest() != igmary.graph_digest()
Example #6
0
    def test_issue655(self):
        # make sure that inf and nan are serialized correctly
        dt = XSD['double'].n3()
        self.assertEqual(
            Literal(float("inf"))._literal_n3(True),
            '"INF"^^%s' % dt
        )
        self.assertEqual(
            Literal(float("-inf"))._literal_n3(True),
            '"-INF"^^%s' % dt
        )
        self.assertEqual(
            Literal(float("nan"))._literal_n3(True),
            '"NaN"^^%s' % dt
        )

        dt = XSD['decimal'].n3()
        self.assertEqual(
            Literal(Decimal("inf"))._literal_n3(True),
            '"INF"^^%s' % dt
        )
        self.assertEqual(
            Literal(Decimal("-inf"))._literal_n3(True),
            '"-INF"^^%s' % dt
        )
        self.assertEqual(
            Literal(Decimal("nan"))._literal_n3(True),
            '"NaN"^^%s' % dt
        )

        self.assertEqual(
            Literal("inf", datatype=XSD['decimal'])._literal_n3(True),
            '"INF"^^%s' % dt
        )

        # assert that non-numerical aren't changed
        self.assertEqual(
            Literal('inf')._literal_n3(True),
            '"inf"'
        )
        self.assertEqual(
            Literal('nan')._literal_n3(True),
            '"nan"'
        )

        PROV = Namespace('http://www.w3.org/ns/prov#')

        bob = URIRef("http://example.org/object/Bob")

        # g1 is a simple graph with an infinite and a nan values
        g1 = Graph()
        g1.add((bob, PROV.value, Literal(float("inf"))))
        g1.add((bob, PROV.value, Literal(float("nan"))))

        # Build g2 out of the deserialisation of g1 serialisation
        g2 = Graph()
        g2.parse(data=g1.serialize(format='turtle'), format='turtle')

        self.assertTrue(to_isomorphic(g1) == to_isomorphic(g2))
Example #7
0
 def run_test(self, tested_file=None, result_file=None, metadata_url=None, mode=CONST_STANDARD_MODE):
     result_graph_url = self._tests_location + result_file
     metadata_url = self._tests_location + metadata_url if metadata_url is not None else None
     csv_url = self._tests_location + tested_file if tested_file is not None else None
     converted = CSVWConverter.to_rdf(csv_url, metadata_url, mode)
     expected = Graph()
     expected.parse(result_graph_url)
     self.change_urls_in_result(expected)
     self.assertEqual(to_isomorphic(converted), to_isomorphic(expected))
Example #8
0
def test_graph_diff(g1, g2):
    in_both, only_in_first, only_in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2))
    only_in_first.namespace_manager = g1.namespace_manager
    only_in_second.namespace_manager = g2.namespace_manager
    ok_(len(only_in_second) == 0, f"""
<<<
{only_in_first.serialize(format='n3').decode('utf-8')}
===
{only_in_second.serialize(format='n3').decode('utf-8')}
>>>
""")
Example #9
0
def test_compiler_v1_4():
    compiler = RDFCompiler()
    with open("tests/data/metadata_v14.ttl", "r") as _input_file:
        expected_graph = Graph()
        expected_graph.parse(data=_input_file.read(), format="ttl")
        _ = compiler.visit(metadata_v_1_4)
        expected = to_isomorphic(expected_graph)
        got = to_isomorphic(compiler.graph)
        for (t1, t2) in _squashed_graphs_triples(expected, got):
            assert t1 == t2
        assert isomorphic(expected, got)
Example #10
0
def test_creates_entity_with_type(factory):
    ross = factory("rf_me")
    ross.rdf_type.add(factory('foaf_Person'))

    expected = Graph()
    expected.add((
        URIRef('http://rossfenning.co.uk/#me'),
        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
        URIRef('http://xmlns.com/foaf/0.1/Person')))

    assert to_isomorphic(factory.store) == to_isomorphic(expected)
Example #11
0
    def test_load_single(self, mock_sparql_delete, mock_sparql_insert):
        with Store(self.data_path) as store:
            store.add("0000-0003-1527-0030")
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNone(last_update)

        graph1, add_graph1, delete_graph1 = load_single(
            "0000-0003-1527-0030", None, None, None, self.data_path,
            "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")

        self.assertEqual(319, len(add_graph1))
        self.assertEqual(0, len(delete_graph1))

        self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1))

        with Store(self.data_path) as store:
            # Last update now set
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNotNone(last_update)

        # Make sure turtle file created
        self.assertTrue(
            os.path.exists(
                os.path.join(self.data_path, "0000-0003-1527-0030.ttl")))

        # Now change a fact and run again. Changed fact is provided by vcr recording.
        # Changed year of Amherst degree.
        # Had to rig the Accept-Encoding to create the vcr recording with:
        # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid,
        #                  headers={"Accept": "application/json", "Accept-Encoding": "identity"})

        graph2, add_graph2, delete_graph2 = load_single(
            "0000-0003-1527-0030", None, None, None, self.data_path,
            "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")

        self.assertEqual(319, len(graph2))
        self.assertEqual(17, len(add_graph2))
        self.assertEqual(17, len(delete_graph2))

        mock_sparql_insert.assert_has_calls([
            call(add_graph1, "http://vivo.mydomain.edu/sparql",
                 "*****@*****.**", "password"),
            call(add_graph2, "http://vivo.mydomain.edu/sparql",
                 "*****@*****.**", "password")
        ])
        mock_sparql_delete.assert_has_calls([
            call(delete_graph1, "http://vivo.mydomain.edu/sparql",
                 "*****@*****.**", "password"),
            call(delete_graph2, "http://vivo.mydomain.edu/sparql",
                 "*****@*****.**", "password")
        ])
Example #12
0
    def do_algorithm(self, source_content: str,
                     target_content: str) -> List[SyncOperation]:
        source_g = Graph().parse(format='turtle', data=source_content)
        target_g = Graph().parse(format='turtle', data=target_content)
        source_g_iso = to_isomorphic(source_g)
        target_g_iso = to_isomorphic(target_g)
        _, removals_graph, additions_graph = graph_diff(
            source_g_iso, target_g_iso)

        additions_ops = self._create_add_ops_from(additions_graph)
        removals_ops = self._create_remove_ops_from(removals_graph)
        return removals_ops + additions_ops
Example #13
0
def test_uses_alias(factory):
    factory.addAlias('favourite_cheese', 'http://rossfenning.co.uk/#favourite-cheese')

    ross = factory("rf_me")
    ross.favourite_cheese.add('Stinking Bishop')

    expected = Graph()
    expected.add((
        URIRef('http://rossfenning.co.uk/#me'),
        URIRef('http://rossfenning.co.uk/#favourite-cheese'),
        Literal('Stinking Bishop')))

    assert to_isomorphic(factory.store) == to_isomorphic(expected)
Example #14
0
def compare_graphs(actual, expected):
    actual_iso = to_isomorphic(actual)
    expected_iso = to_isomorphic(expected)

    if actual_iso != expected_iso:
        _, in_first, in_second = graph_diff(actual_iso, expected_iso)
        print("The actual and expected graphs differ")
        print("----- Contents of actual graph not in expected graph -----")
        dump_ttl_sorted(in_first)
        print("----- Contents of expected graph not in actual graph -----")
        dump_ttl_sorted(in_second)

    assert actual_iso == expected_iso
Example #15
0
def test_adds_props_during_construction(store):
    factory = ThingFactory(store)

    # We must use a list for the value as name is not a functional property (can only have one value)
    _ = factory("rf_me", foaf_name=['Ross Fenning'])

    expected = Graph()
    expected.add((
        URIRef('http://rossfenning.co.uk/#me'),
        URIRef('http://xmlns.com/foaf/0.1/name'),
        Literal('Ross Fenning')))

    assert to_isomorphic(store) == to_isomorphic(expected)
Example #16
0
def main():
    values = ap.parse_args()
    format1 = guess_format(values.file1)
    format2 = guess_format(values.file2)
    g1: Graph = Graph().parse(values.file1, format=format1)
    g2: Graph = Graph().parse(values.file2, format=format2)
    iso1: IsomorphicGraph = to_isomorphic(g1)
    iso2: IsomorphicGraph = to_isomorphic(g2)
    _in_both, in_first, in_second = graph_diff(iso1, iso2)
    print(f"Only in {values.file1}")
    dump_nt_sorted(in_first)

    print(f"Only in {values.file2}")
    dump_nt_sorted(in_second)
Example #17
0
    def process(self,_, f):
        local_file = download(f, "btc-2014", "archives/")
        graph = ConjunctiveGraph('Sleepycat')
        # first time create the store:
        graph.open(local_file+".triplestore", create = True)
        for line in gzip.GzipFile(fileobj=open(local_file, 'rb')):
            try:
                graph.parse(data=line, format="nquads")
            except Exception as e:
                sys.stderr.write("ERROR: %s %s\n"%(line, e))

        results = {}
        for context in graph.contexts():
            try:
                uri = context.identifier
                g = ConjunctiveGraph()
                g += context
                sys.stderr.write("Processing %s with %s triples...\n"%(uri, len(g)))
                sys.stderr.flush()
    
                stats = collections.defaultdict(str)
                stats["id"] = uri
                ig = to_isomorphic(g)
                graph_digest = ig.graph_digest(stats)
            
                sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime']))
            except Exception as e:
                sys.stderr.write("ERROR: %s %s\n"%(uri, e))
                stats['error'] = str(e)
            sys.stderr.flush()
            stats_line = [unicode(stats[c]).encode('ascii', 'ignore') for c in stat_cols]
            results[uri] = stats_line
        sys.stderr.write("Digested %s into %s graphs.\n"%(f, len(results)))
        results_string = '\n'.join([','.join(row) for row in results.values()])
        yield "benchmark", results_string
Example #18
0
    def digest_graph(self, uri, nquads):
        nquads = '\n'.join(list(nquads))
        sys.stderr.write("Processing %s (%d)...\n" % (uri, len(nquads)))
        stats = collections.defaultdict(str)
        stats["id"] = uri
        try:
            g = ConjunctiveGraph()
            g.parse(data=nquads, format="nquads")
            stats['ontology'] = g.value(predicate=RDF.type,
                                        object=OWL.Class) is not None
            sys.stderr.flush()
            stats['lines'] = len(nquads.split('\n'))
            ig = to_isomorphic(g)
            graph_digest = ig.graph_digest(stats)
            #stats['graph_digest'] = graph_digest
        except Exception as e:
            sys.stderr.write("ERROR: %s %s\n" % (stats['id'], e))
            stats['error'] = str(e)
        #if stats['to_hash_runtime'] > 0.1:
        sys.stderr.write("Processed %s with %s triples in %s sec.\n" %
                         (uri, len(g), stats['to_hash_runtime']))

        stats_line = [str(stats[c]) for c in stat_cols]

        result_string = StringIO.StringIO()
        result_writer = csv.writer(result_string)
        result_writer.writerow(stats_line)
        yield "benchmark", result_string.getvalue()
Example #19
0
    def digest_graph(self, uri, nquads):
        nquads = '\n'.join(list(nquads))
        sys.stderr.write("Processing %s (%d)...\n"%(uri, len(nquads)))
        stats = collections.defaultdict(str)
        stats["id"] = uri
        try:
            g = ConjunctiveGraph()
            g.parse(data=nquads, format="nquads")
            stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None
            sys.stderr.flush()
            stats['lines'] = len(nquads.split('\n'))
            ig = to_isomorphic(g)
            graph_digest = ig.graph_digest(stats)
            #stats['graph_digest'] = graph_digest
        except Exception as e:
            sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e))
            stats['error'] = str(e)
        #if stats['to_hash_runtime'] > 0.1:
        sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime']))
        
        stats_line = [str(stats[c]) for c in stat_cols]

        result_string = StringIO.StringIO()
        result_writer = csv.writer(result_string)
        result_writer.writerow(stats_line)
        yield "benchmark", result_string.getvalue()
Example #20
0
    def digest_multigraph(self, _, nquads):
        try:
            nquads = '\n'.join(list(nquads))
            g = ConjunctiveGraph()
            g.parse(data=nquads, format="nquads")
        except:
            return

        result = None
        
        for graph in g.contexts():
            sys.stderr.write("Processing %s...\n"%graph.identifier)
            stats = collections.defaultdict(str)
            stats["id"] = graph.identifier
            try:
                ig = to_isomorphic(graph)
                graph_digest = ig.graph_digest(stats)
                #stats['graph_digest'] = graph_digest
            except Exception as e:
                sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e))
                stats['error'] = str(e)
            #if stats['to_hash_runtime'] > 0.1:
            sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(graph.identifier, len(graph), stats['to_hash_runtime']))
        
            stats_line = [str(stats[c]) for c in stat_cols]

            result_string = StringIO.StringIO()
            result_writer = csv.writer(result_string)
            result_writer.writerow(stats_line)
            if result is None:
                result = result_string.getvalue()
            else:
                result = result + "\n" + result_string.getvalue()
        yield "benchmark", result
Example #21
0
def process_one(uri):
    stats = collections.defaultdict(str)
    stats["id"] = uri
    stats['debug'] = True
    try:
        g = ConjunctiveGraph()
        g.parse(data=requests.get(uri,
                                  headers={
                                      "Accept": "application/rdf+xml"
                                  }).text,
                format="xml")
        #print g.serialize(format="turtle")
        stats['lines'] = len(g)
        sys.stderr.write("Processing %s (%d)...\n" % (uri, stats['lines']))
        sys.stderr.flush()
        stats['ontology'] = g.value(predicate=RDF.type,
                                    object=OWL.Class) is not None
        ig = to_isomorphic(g)
        graph_digest = ig.graph_digest(stats)
        sys.stderr.write("Processed %s with %s triples in %s sec.\n" %
                         (uri, len(g), stats['to_hash_runtime']))
    except Exception as e:
        sys.stderr.write("ERROR: %s %s\n" % (uri, e))
        stats['error'] = str(e)
    sys.stderr.flush()
    print '\n'.join(
        ['%s:\t%s' % (key, str(value)) for key, value in stats.items()])
    return [str(stats[c]) for c in stat_cols]
Example #22
0
    def write(self):
        doc_node = self.create_doc()
        # Add creation info
        creation_info_node = self.create_creation_info()
        ci_triple = (doc_node, self.spdx_namespace.creationInfo,
                     creation_info_node)
        self.graph.add(ci_triple)
        # Add review info
        review_nodes = self.reviews()
        for review in review_nodes:
            self.graph.add((doc_node, self.spdx_namespace.reviewed, review))
        # Add extracted licenses
        licenses = map(self.create_extracted_license,
                       self.document.extracted_licenses)
        for lic in licenses:
            self.graph.add(
                (doc_node, self.spdx_namespace.hasExtractedLicensingInfo, lic))
        # Add files
        files = self.files()
        for file_node in files:
            self.graph.add(
                (doc_node, self.spdx_namespace.referencesFile, file_node))
        self.add_file_dependencies()
        # Add package
        package_node = self.packages()
        package_triple = (doc_node, self.spdx_namespace.describesPackage,
                          package_node)
        self.graph.add(package_triple)

        # normalize the graph to ensure that the sort order is stable
        self.graph = to_isomorphic(self.graph)

        # Write file
        self.graph.serialize(self.out, 'pretty-xml', encoding='utf-8')
Example #23
0
 def digest_graphs(self, _, line):
     if ".nq" not in line:
         return
     #sys.stderr.write("Downloading %s\n"%line)
     #local_file = download(line, "btc-2014", "archives/")
     #sys.stderr.write("Downloaded %s\n"%line)
     #f = gzip.GzipFile(fileobj=open(local_file, 'rb'))
     #allGraphs = ConjunctiveGraph(store='Sleepycat')
     #allGraphs.open("local_store", create=True)
     #for line in f:
     #    try:
     #        allGraphs.parse(data=line, format="nquads")
     #    except:
     #        sys.stderr.write( "BAD LINE: %s"% line)
     #    #    pass
     #sys.stderr.write("Parsed %s"%line)
     #yield 'benchmark', ','.join(stat_cols)
     #results = StringIO.StringIO()
     #resultsWriter = csv.writer(results)
     results = {}
     graphs = collections.defaultdict(str)
     i = 0
     #for g in allGraphs.contexts():
     #    i += 1
     for uri, lines in self.segment_graphs(_, line):
         #if uri is None:
         #    continue
         i += 1
         graphs[uri] += '\n' + lines
         stats = collections.defaultdict(str)
         stats["id"] = uri
         try:
             g = ConjunctiveGraph()
             g.parse(data=graphs[uri], format="nquads")
             sys.stderr.flush()
             if len(g) == 0:
                 sys.stderr.write("%s (%d)" % (graphs[uri], len(g)))
             stats['lines'] = len(graphs[uri].split('\n'))
             ig = to_isomorphic(g)
             graph_digest = ig.graph_digest(stats)
             #stats['graph_digest'] = graph_digest
         except Exception as e:
             sys.stderr.write("ERROR: %s %s\n" % (stats['id'], e))
             stats['error'] = str(e)
         sys.stderr.write("Processed %s with %s triples in %s sec.\n" %
                          (uri, len(g), stats['to_hash_runtime']))
         stats_line = [str(stats[c]) for c in stat_cols]
         results[uri] = stats_line
         #resultsWriter.writerow(stats_line)
     try:
         shutil.rmtree(store_dir)
     except:
         pass
     sys.stderr.write("Digested %s into %s graphs.\n" % (line, len(graphs)))
     results_string = StringIO.StringIO()
     results_writer = csv.writer(results_string)
     for result in results.values():
         results_writer.writerow(result)
     yield "benchmark", results_string.getvalue()
Example #24
0
def main():
    from rdflib import Graph
    from rdflib.compare import to_isomorphic, graph_diff

    with open(sys.argv[1]) as f:
        d1 = f.read()
    with open(sys.argv[2]) as f:
        d2 = f.read()

    print('Loading graph 1 from ' + sys.argv[1])
    g1 = Graph().parse(format='n3', data=d1)

    print('Loading graph 2 from ' + sys.argv[2])
    g2 = Graph().parse(format='n3', data=d2)

    iso1 = to_isomorphic(g1)
    iso2 = to_isomorphic(g2)
Example #25
0
def get_digest_value(rdf, mimetype):
    graph = Graph()
    graph.load(StringIO(rdf),format=mimetype)
    stats = {}
    ig = to_isomorphic(graph)
    result = ig.graph_digest(stats)
    print(stats)
    return result
Example #26
0
def main():
    from rdflib import Graph
    from rdflib.compare import to_isomorphic, graph_diff

    with open(sys.argv[1]) as f:
        d1 = f.read()
    with open(sys.argv[2]) as f:
        d2 = f.read()

    print('Loading graph 1 from ' + sys.argv[1])
    g1 = Graph().parse(format='n3', data=d1)

    print('Loading graph 2 from ' + sys.argv[2])
    g2 = Graph().parse(format='n3', data=d2)

    iso1 = to_isomorphic(g1)
    iso2 = to_isomorphic(g2)
Example #27
0
 def digest_graphs(self, _, line):
     if ".nq" not in line:
         return
     #sys.stderr.write("Downloading %s\n"%line)
     #local_file = download(line, "btc-2014", "archives/")
     #sys.stderr.write("Downloaded %s\n"%line)
     #f = gzip.GzipFile(fileobj=open(local_file, 'rb'))
     #allGraphs = ConjunctiveGraph(store='Sleepycat')
     #allGraphs.open("local_store", create=True)
     #for line in f:
     #    try:
     #        allGraphs.parse(data=line, format="nquads")
     #    except:
     #        sys.stderr.write( "BAD LINE: %s"% line)
     #    #    pass
     #sys.stderr.write("Parsed %s"%line)
     #yield 'benchmark', ','.join(stat_cols)
     #results = StringIO.StringIO()
     #resultsWriter = csv.writer(results)
     results = {}
     graphs = collections.defaultdict(str)
     i = 0
     #for g in allGraphs.contexts():
     #    i += 1
     for uri, lines in self.segment_graphs(_, line):
         #if uri is None:
         #    continue
         i += 1    
         graphs[uri] += '\n'+lines
         stats = collections.defaultdict(str)
         stats["id"] = uri
         try:
             g = ConjunctiveGraph()
             g.parse(data=graphs[uri], format="nquads")
             sys.stderr.flush()
             if len(g) == 0:
                 sys.stderr.write("%s (%d)" % (graphs[uri], len(g)))
             stats['lines'] = len(graphs[uri].split('\n'))
             ig = to_isomorphic(g)
             graph_digest = ig.graph_digest(stats)
             #stats['graph_digest'] = graph_digest
         except Exception as e:
             sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e))
             stats['error'] = str(e)
         sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime']))
         stats_line = [str(stats[c]) for c in stat_cols]
         results[uri] = stats_line
         #resultsWriter.writerow(stats_line)
     try:
         shutil.rmtree(store_dir)
     except:
         pass
     sys.stderr.write("Digested %s into %s graphs.\n"%(line, len(graphs)))
     results_string = StringIO.StringIO()
     results_writer = csv.writer(results_string)
     for result in results.values():
         results_writer.writerow(result)
     yield "benchmark", results_string.getvalue()
Example #28
0
def graphdiff(first, second):
    """
    Diff between graph instances, should be replaced/included in quit diff
    """
    from rdflib.compare import to_isomorphic, graph_diff

    diffs = OrderedDict()
    iris = set()

    if first is not None and isinstance(first, InMemoryAggregatedGraph):
        first_identifiers = list((g.identifier for g in first.graphs()))
        iris = iris.union(first_identifiers)
    if second is not None and isinstance(second, InMemoryAggregatedGraph):
        second_identifiers = list((g.identifier for g in second.graphs()))
        iris = iris.union(second_identifiers)

    for iri in sorted(list(iris)):
        changes = diffs.get(iri, [])

        if (first is not None and iri in first_identifiers) and (
                second is not None and iri in second_identifiers):
            g1 = first.get_context(iri)
            g2 = second.get_context(iri)
            in_both, in_first, in_second = graph_diff(to_isomorphic(g1),
                                                      to_isomorphic(g2))

            if len(in_second) > 0:
                changes.append(
                    ('additions', ((s, p, o) for s, p, o in in_second)))
            if len(in_first) > 0:
                changes.append(
                    ('removals', ((s, p, o) for s, p, o in in_first)))
        elif first is not None and iri in first_identifiers:
            changes.append(
                ('removals', ((s, p, o)
                              for s, p, o in first.get_context(iri))))
        elif second is not None and iri in second_identifiers:
            changes.append(
                ('additions', ((s, p, o)
                               for s, p, o in second.get_context(iri))))
        else:
            continue

        diffs[iri] = changes
    return diffs
Example #29
0
    def test_load_single(self, mock_sparql_delete, mock_sparql_insert):
        with Store(self.data_path) as store:
            store.add("0000-0003-1527-0030")
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNone(last_update)

        graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path,
                                                        "http://vivo.mydomain.edu/sparql", "*****@*****.**",
                                                        "password")

        self.assertEqual(319, len(add_graph1))
        self.assertEqual(0, len(delete_graph1))

        self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1))

        with Store(self.data_path) as store:
            # Last update now set
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNotNone(last_update)

        # Make sure turtle file created
        self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl")))

        # Now change a fact and run again. Changed fact is provided by vcr recording.
        # Changed year of Amherst degree.
        # Had to rig the Accept-Encoding to create the vcr recording with:
        # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid,
        #                  headers={"Accept": "application/json", "Accept-Encoding": "identity"})

        graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None,
                                                        self.data_path, "http://vivo.mydomain.edu/sparql",
                                                        "*****@*****.**", "password")

        self.assertEqual(319, len(graph2))
        self.assertEqual(17, len(add_graph2))
        self.assertEqual(17, len(delete_graph2))

        mock_sparql_insert.assert_has_calls([
            call(add_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"),
            call(add_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
        mock_sparql_delete.assert_has_calls([
            call(delete_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"),
            call(delete_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
Example #30
0
 def rem_metadata(g: Graph) -> IsomorphicGraph:
     g_iso = to_isomorphic(g)
     for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.generation_date, None))):
         g_iso.remove(t)
     for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.source_file_date, None))):
         g_iso.remove(t)
     for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.source_file_size, None))):
         g_iso.remove(t)
     return g_iso
Example #31
0
    def rdf_comparator(self, old_data: str, new_data: str,
                       new_file: str) -> None:
        """
        RDF comparator.  Compare two graphs and, if they don't match, save a turtle image of new_data in
        new_file and raise an error
        :param old_data: Turtle representation of expected RDF
        :param new_data: Turtle representation of actual RDF
        :param new_file: Save actual RDF here if mismatch
        :return:
        """
        old_graph = Graph()
        new_graph = Graph()
        old_graph.parse(data=old_data, format="turtle")
        new_graph.parse(data=new_data, format="turtle")
        old_iso = to_isomorphic(old_graph)
        # Remove the metadata specific triples
        for t in list(
                old_iso.triples(
                    (None, METAMODEL_NAMESPACE.generation_date, None))):
            old_iso.remove(t)
        new_iso = to_isomorphic(new_graph)
        for t in list(
                new_iso.triples(
                    (None, METAMODEL_NAMESPACE.generation_date, None))):
            new_iso.remove(t)

        # Graph compare takes a Looong time
        in_both, in_old, in_new = graph_diff(old_iso, new_iso)
        # if old_iso != new_iso:
        #     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
        old_len = len(list(in_old))
        new_len = len(list(in_new))
        if old_len or new_len:
            if old_len:
                print("----- Old graph only -----")
                self._print_triples(in_old)
            if new_len:
                print("----- New Grapn Only -----")
                self._print_triples(in_new)
            with open(new_file, 'w') as newf:
                newf.write(new_data)
            self.assertTrue(False, "RDF file mismatch")
Example #32
0
 def rem_metadata(g: Graph) -> IsomorphicGraph:
     # Remove list declarations from target
     for s in g.subjects(RDF.type, RDF.List):
         g.remove((s, RDF.type, RDF.List))
     for t in g:
         if t[1] in (LINKML.generation_date, LINKML.source_file_date,
                     LINKML.source_file_size, TYPE.generation_date,
                     TYPE.source_file_date, TYPE.source_file_size):
             g.remove(t)
     g_iso = to_isomorphic(g)
     return g_iso
Example #33
0
    def test_load_single(self, mock_sparql_delete, mock_sparql_insert):
        with Store(self.data_path) as store:
            store.add("0000-0003-1527-0030")
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNone(last_update)

        graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path,
                                                        "http://vivo.mydomain.edu/sparql", "*****@*****.**",
                                                        "password")

        self.assertEqual(232, len(add_graph1))
        self.assertEqual(0, len(delete_graph1))

        self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1))

        with Store(self.data_path) as store:
            #Last update now set
            (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
                store["0000-0003-1527-0030"]
            self.assertIsNotNone(last_update)

        #Make sure turtle file created
        self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl")))

        #Now change a fact and run again. Changed fact is provided by vcr recording.
        #Changed year of Amherst degree.
        graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None,
                                                        self.data_path, "http://vivo.mydomain.edu/sparql",
                                                        "*****@*****.**", "password")

        self.assertEqual(232, len(graph2))
        self.assertEqual(17, len(add_graph2))
        self.assertEqual(17, len(delete_graph2))

        mock_sparql_insert.assert_has_calls([
            call(add_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"),
            call(add_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
        mock_sparql_delete.assert_has_calls([
            call(delete_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"),
            call(delete_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
Example #34
0
def graphdiff(first, second):
    """
    Diff between graph instances, should be replaced/included in quit diff
    """
    from rdflib.compare import to_isomorphic, graph_diff

    diffs = OrderedDict()
    iris = set()

    if first is not None and isinstance(first, InMemoryAggregatedGraph):
        first_identifiers = list((g.identifier for g in first.graphs()))
        iris = iris.union(first_identifiers)
    if second is not None and isinstance(second, InMemoryAggregatedGraph):
        second_identifiers = list((g.identifier for g in second.graphs()))
        iris = iris.union(second_identifiers)

    for iri in sorted(list(iris)):
        changes = diffs.get(iri, [])

        if (
            first is not None and iri in first_identifiers
        ) and (
            second is not None and iri in second_identifiers
        ):
            g1 = first.get_context(iri)
            g2 = second.get_context(iri)
            in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2))

            if len(in_second) > 0:
                changes.append(('additions', ((s, p, o) for s, p, o in in_second)))
            if len(in_first) > 0:
                changes.append(('removals', ((s, p, o) for s, p, o in in_first)))
        elif first is not None and iri in first_identifiers:
            changes.append(('removals', ((s, p, o) for s, p, o in first.get_context(iri))))
        elif second is not None and iri in second_identifiers:
            changes.append(('additions', ((s, p, o) for s, p, o in second.get_context(iri))))
        else:
            continue

        diffs[iri] = changes
    return diffs
Example #35
0
    def test_graphs_equal(self):
        self.origin = reset_store_and_setup_origin()

        # TODO: make this file callable and with parameter: check URIs
        self.origin.GET(only_follow_uris=[])

        g1 = self.origin._graph
        g2 = self.origin.get_graph()

        # normal rdflib.compare does not work correctly with
        # conjunctiveGraph, unless there is only one graph within that
        self.assertEqual(len(list(g1.contexts())), 1)
        self.assertEqual(len(list(g2.contexts())), 1)

        self.assertEqual(len(g1), len(g2))

        self.assertEqual(compare.to_isomorphic(g1), compare.to_isomorphic(g2))

        nsbindungs_orig = dict(g1.namespace_manager.namespaces())
        nsbindungs_new = dict(g2.namespace_manager.namespaces())
        self.assertEqual(nsbindungs_orig, nsbindungs_new)
Example #36
0
    def _are_added_triples(self, cur_subj):
        subj = cur_subj
        cur_subj_g = cur_subj.g
        prev_subj_g = Graph()
        query = "CONSTRUCT {<%s> ?p ?o} WHERE {<%s> ?p ?o}" % (subj, subj)
        result = self.ts.query(query)

        if result:
            for s, p, o in result:
                prev_subj_g.add((s, p, o))

            iso1 = to_isomorphic(prev_subj_g)
            iso2 = to_isomorphic(cur_subj_g)
            if iso1 == iso2:  # the graphs are the same
                return None
            else:
                in_both, in_first, in_second = graph_diff(iso1, iso2)
                query_string = u"INSERT DATA { GRAPH <%s> { " % cur_subj_g.identifier
                query_string += in_second.serialize(
                    format="nt11", encoding="utf-8").decode("utf-8")
                return query_string.replace('\n\n', '') + "} }"
def canonicalize(resource):
    digest = to_isomorphic(resource.graph).graph_digest()
    #canonical = list(iso[:RDFS.label:resource.label()])[0]
    #print str(canonical), '\n', iso.serialize(format='turtle')
    skolemized = _prefix[str(digest)]
    for s, p, o in resource.graph.triples((None, None, resource.identifier)):
        resource.graph.remove((s, p, o))
        resource.graph.add((s, p, skolemized))
    for s, p, o in resource.graph.triples((resource.identifier, None, None)):
        resource.graph.remove((s, p, o))
        resource.graph.add((skolemized, p, o))
    return resource.graph.resource(skolemized)
def canonicalize(resource):
    digest = to_isomorphic(resource.graph).graph_digest()
    #canonical = list(iso[:RDFS.label:resource.label()])[0]
    #print str(canonical), '\n', iso.serialize(format='turtle')
    skolemized = _prefix[str(digest)]
    for s, p, o in resource.graph.triples((None, None, resource.identifier)):
        resource.graph.remove((s, p, o))
        resource.graph.add((s, p, skolemized))
    for s, p, o in resource.graph.triples((resource.identifier, None, None)):
        resource.graph.remove((s, p, o))
        resource.graph.add((skolemized, p, o))
    return resource.graph.resource(skolemized)
Example #39
0
    def test_issue655(self):
        # make sure that inf and nan are serialized correctly
        dt = XSD['double'].n3()
        self.assertEqual(
            Literal(float("inf"))._literal_n3(True), '"INF"^^%s' % dt)
        self.assertEqual(
            Literal(float("-inf"))._literal_n3(True), '"-INF"^^%s' % dt)
        self.assertEqual(
            Literal(float("nan"))._literal_n3(True), '"NaN"^^%s' % dt)

        dt = XSD['decimal'].n3()
        self.assertEqual(
            Literal(Decimal("inf"))._literal_n3(True), '"INF"^^%s' % dt)
        self.assertEqual(
            Literal(Decimal("-inf"))._literal_n3(True), '"-INF"^^%s' % dt)
        self.assertEqual(
            Literal(Decimal("nan"))._literal_n3(True), '"NaN"^^%s' % dt)

        self.assertEqual(
            Literal("inf", datatype=XSD['decimal'])._literal_n3(True),
            '"INF"^^%s' % dt)

        # assert that non-numerical aren't changed
        self.assertEqual(Literal('inf')._literal_n3(True), '"inf"')
        self.assertEqual(Literal('nan')._literal_n3(True), '"nan"')

        PROV = Namespace('http://www.w3.org/ns/prov#')

        bob = URIRef("http://example.org/object/Bob")

        # g1 is a simple graph with an infinite and a nan values
        g1 = Graph()
        g1.add((bob, PROV.value, Literal(float("inf"))))
        g1.add((bob, PROV.value, Literal(float("nan"))))

        # Build g2 out of the deserialisation of g1 serialisation
        g2 = Graph()
        g2.parse(data=g1.serialize(format='turtle'), format='turtle')

        self.assertTrue(to_isomorphic(g1) == to_isomorphic(g2))
Example #40
0
    def write(self):
        doc_node = self.create_doc()
        # Add creation info
        creation_info_node = self.create_creation_info()
        ci_triple = (doc_node, self.spdx_namespace.creationInfo,
                     creation_info_node)
        self.graph.add(ci_triple)
        # Add review info
        review_nodes = self.reviews()
        for review in review_nodes:
            self.graph.add((doc_node, self.spdx_namespace.reviewed, review))
        # Add external document references info
        ext_doc_ref_nodes = self.ext_doc_refs()
        for ext_doc_ref in ext_doc_ref_nodes:
            ext_doc_ref_triple = (
                doc_node,
                self.spdx_namespace.externalDocumentRef,
                ext_doc_ref,
            )
            self.graph.add(ext_doc_ref_triple)
        # Add extracted licenses
        licenses = map(self.create_extracted_license,
                       self.document.extracted_licenses)
        for lic in licenses:
            self.graph.add(
                (doc_node, self.spdx_namespace.hasExtractedLicensingInfo, lic))
        # Add files
        files = self.files()
        for file_node in files:
            self.graph.add(
                (doc_node, self.spdx_namespace.referencesFile, file_node))
        self.add_file_dependencies()
        # Add package
        for package_node in self.packages():
            package_triple = (doc_node, self.spdx_namespace.describesPackage,
                              package_node)
            self.graph.add(package_triple)
        """# Add relationship
        relate_node = self.relationships()
        relate_triple = (doc_node, self.spdx_namespace.relationship, relate_node)
        self.graph.add(relate_triple)"""
        # Add snippet
        snippet_nodes = self.snippets()
        for snippet in snippet_nodes:
            self.graph.add((doc_node, self.spdx_namespace.Snippet, snippet))

        # normalize the graph to ensure that the sort order is stable
        self.graph = to_isomorphic(self.graph)

        # Write file
        self.graph.serialize(self.out, "pretty-xml", encoding="utf-8")
Example #41
0
    def readIsomorphicGraph(self, file):
        graph = ConjunctiveGraph(identifier='')

        # check if we handle a directory or a seperate file
        if isdir(file):
            # for a better readability rename variable
            dir = file
            for path, dirs, files in walk(file):
                for file in files:
                    absfile = join(path, file)
                    format = rdflib.util.guess_format(absfile)

                    if format is not None:
                        graph.parse(absfile,
                                    format=format,
                                    publicID=self.nsQuitDiff)

        elif isfile(file):
            format = rdflib.util.guess_format(file)

            if format is not None:
                graph.parse(file, format=format, publicID=self.nsQuitDiff)

        contextDict = {}
        contextDict[self.nsQuitDiff] = Graph()

        for subgraph in graph.contexts():
            # TODO we have to copy all the triples to a new ConjunctiveGraph
            # because https://rdflib.readthedocs.io/en/stable/_modules/rdflib/compare.html takes the complete store
            # and thus doesn't support quads
            triples = subgraph.triples((None, None, None))
            if isinstance(subgraph.identifier, BNode) or str(
                    subgraph.identifier) == self.nsQuitDiff:
                subgraphConjunctive = contextDict[self.nsQuitDiff]
            else:
                try:
                    subGraphConjunctive = contextDict[subgraph.identifier]
                except:
                    contextDict[subgraph.identifier] = ConjunctiveGraph()
                    subgraphConjunctive = contextDict[subgraph.identifier]

            for triple in triples:
                subgraphConjunctive.add(triple)
            # end TODO hack

        graphDict = {}

        for identifier, graph in contextDict.items():
            graphDict[identifier] = compare.to_isomorphic(graph)

        return graphDict
Example #42
0
    def readIsomorphicGraph(self, file):
        graph = ConjunctiveGraph(identifier='')

        # check if we handle a directory or a seperate file
        if isdir(file):
            # for a better readability rename variable
            dir = file
            for path, dirs, files in walk(file):
                for file in files:
                    absfile = join(path, file)
                    format = rdflib.util.guess_format(absfile)

                    if format is not None:
                        graph.parse(absfile, format=format, publicID=self.nsQuitDiff)

        elif isfile(file):
            format = rdflib.util.guess_format(file)

            if format is not None:
                graph.parse(file, format=format, publicID=self.nsQuitDiff)

        contextDict = {}
        contextDict[self.nsQuitDiff] = Graph()

        for subgraph in graph.contexts():
            # TODO we have to copy all the triples to a new ConjunctiveGraph
            # because https://rdflib.readthedocs.io/en/stable/_modules/rdflib/compare.html takes the complete store
            # and thus doesn't support quads
            triples = subgraph.triples((None, None, None))
            if isinstance(subgraph.identifier, BNode) or str(subgraph.identifier) == self.nsQuitDiff:
                subgraphConjunctive = contextDict[self.nsQuitDiff]
            else:
                try:
                    subGraphConjunctive = contextDict[subgraph.identifier]
                except:
                    contextDict[subgraph.identifier] = ConjunctiveGraph()
                    subgraphConjunctive = contextDict[subgraph.identifier]

            for triple in triples:
                subgraphConjunctive.add(triple)
            # end TODO hack

        graphDict = {}

        for identifier, graph in contextDict.items():
            graphDict[identifier] = compare.to_isomorphic(graph)

        return graphDict
 def worker(q, finished_tasks, dl_lock):
     try:
         while True:
             stats = q.get()
             og = Graph()
             try:
                 og.load(stats["download_url"])
                 print(stats["ontology"], stats["id"])
                 ig = to_isomorphic(og)
                 graph_digest = ig.graph_digest(stats)
                 finished_tasks.put(stats)
             except Exception as e:
                 print("ERROR", stats["id"], e)
                 stats["error"] = str(e)
                 finished_tasks.put(stats)
     except queue.Empty:
         pass
Example #44
0
 def worker(q, finished_tasks, dl_lock):
     try:
         while True:
             stats = q.get()
             og = Graph()
             try:
                 og.load(stats['download_url'])
                 print stats['ontology'], stats['id']
                 ig = to_isomorphic(og)
                 graph_digest = ig.graph_digest(stats)
                 finished_tasks.put(stats)
             except Exception as e:
                 print 'ERROR', stats['id'], e
                 stats['error'] = str(e)
                 finished_tasks.put(stats)
     except Empty:
         pass
Example #45
0
 def worker(q, finished_tasks, dl_lock):
     try:
         while True:
             stats = q.get()
             og = Graph()
             try:
                 og.load(stats['download_url'])
                 print(stats['ontology'], stats['id'])
                 ig = to_isomorphic(og)
                 graph_digest = ig.graph_digest(stats)
                 finished_tasks.put(stats)
             except Exception as e:
                 print('ERROR', stats['id'], e)
                 stats['error'] = str(e)
                 finished_tasks.put(stats)
     except queue.Empty:
         pass
def digest_graph(uri, turtle):
    stats = collections.defaultdict(str)
    stats["id"] = uri
    #stats['lines'] = len(nquads.split('\n'))
    #sys.stderr.flush()
    try:
        g = ConjunctiveGraph()
        g.parse(data=turtle, format="turtle")
        #sys.stderr.write("Processing %s (%d)...\n"%(uri, len(g)))
        sys.stderr.write('.')
        sys.stderr.flush()
        stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None
        ig = to_isomorphic(g)
        graph_digest = ig.graph_digest(stats)
        #sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime']))
    except Exception as e:
        sys.stderr.write("ERROR: %s %s\n"%(uri, e))
        sys.stderr.flush()
        stats['error'] = str(e)
        #print nquads
    return [unicode(stats[c]).encode("ascii","ignore") for c in stat_cols]
def process_one(uri):
    stats = collections.defaultdict(str)
    stats["id"] = uri
    stats['debug'] = True
    try:
        g = ConjunctiveGraph()
        g.parse(data=requests.get(uri,headers={"Accept":"application/rdf+xml"}).text,format="xml")
        #print g.serialize(format="turtle")
        stats['lines'] = len(g)
        sys.stderr.write("Processing %s (%d)...\n"%(uri, stats['lines']))
        sys.stderr.flush()
        stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None
        ig = to_isomorphic(g)
        graph_digest = ig.graph_digest(stats)
        sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime']))
    except Exception as e:
        sys.stderr.write("ERROR: %s %s\n"%(uri, e))
        stats['error'] = str(e)
    sys.stderr.flush()
    print '\n'.join(['%s:\t%s'%(key, str(value)) for key, value in stats.items()])
    return [str(stats[c]) for c in stat_cols]
Example #48
0
 def worker(q, finished_tasks, dl_lock):
     try:
         while True:
             stats = q.get()
             og = Graph()
             try:
                 try:
                     dl_lock.acquire()
                     og.load(stats['download_url'] + "?apikey=%s" % apikey)
                 finally:
                     dl_lock.release()
                 print(stats['ontology'], stats['id'])
                 ig = to_isomorphic(og)
                 graph_digest = ig.graph_digest(stats)
                 finished_tasks.put(stats)
             except Exception as e:
                 print('ERROR', stats['id'], e)
                 stats['error'] = str(e)
                 finished_tasks.put(stats)
     except Empty:
         pass
Example #49
0
    def GET(
        self,
        GRAPH_SIZE_LIMIT=30000,
        only_follow_uris=None,
        handle_owl_imports=False,
        raise_errors=True,
        skip_urls=None,
        httphandler=None,
    ):

        if not self.uri:
            raise Exception("Please provide URI first")

        if skip_urls is not None and self.uri.encode("utf8") in skip_urls:
            self.add_error("Skipped")
            self.processed = True
            return

        logger.info(u"GET %s..." % self.uri)

        if self.has_unsaved_changes():
            if self.processed:
                raise Exception("Please save all changes before querying "
                                "again. Merging not supported yet")
            else:
                logger.warning("There were Resource objects created before "
                               "processing the resource's origin.")

        now = datetime.datetime.now()
        # self.timedelta = datetime.timedelta(minutes=1)
        if hasattr(self, "timedelta") and hasattr(self, 'last_processed'):
            time_since_last_processed = now - self.last_processed
            if (time_since_last_processed < self.timedelta):
                logger.info(
                    "Not processing %s again because was processed only %s ago" % (self.uri, time_since_last_processed))
                return
            self.last_processed = now

        try:
            data = self.backend.GET(self.uri, httphandler=httphandler)
        except urllib2.HTTPError as e:
            if e.code in [
                401,
                403,
                503,  # Service Temporarily Unavailable
                404,  # Not Found
            ]:
                self.add_error(e.code)
            if raise_errors:
                raise e
            else:
                return
        except urllib2.URLError as e:
            self.add_error("timeout")
            if raise_errors:
                raise e
            else:
                return
        except ContentNegotiationError as e:
            logger.error(e.message)
            if raise_errors:
                raise e
            else:
                return

        graph = rdflib.graph.ConjunctiveGraph(identifier=self.uri)

        try:
            if data:
                # Important: Do not pass data=data without publicID=uri because
                # relative URIs (#deri) won't be an absolute uri in that case!
                publicID = self.uri

                reference_time = datetime.datetime.now()

                graph.parse(data=data, publicID=publicID, format=self.backend.format)

                now = datetime.datetime.now()
                self.graph_parse_time = now - reference_time

                # normal rdflib.compare does not work correctly with
                # ConjunctiveGraph, unless there is only one graph within that
        except SAXParseException as e:
            self.add_error("SAXParseException")
            logger.error("SAXParseException: %s" % self)
            if raise_errors:
                raise e
            else:
                return
        except rdflib.exceptions.ParserError as e:
            self.add_error("ParserError")
            logger.error("ParserError: %s" % self)
            if raise_errors:
                raise e
            else:
                return
        except IOError as e:
            self.add_error("IOError")
            logger.error("IOError: %s" % self)
            if raise_errors:
                raise e
            else:
                return

        self.processed = True

        if hasattr(self, "errors"):
            delattr(self, "errors")

        g_length = len(graph)

        if g_length > 0:
            if len(list(graph.contexts())) > 1:
                # detect problems with graph contexts: rdflib can only
                # compare graphs with one context. If a graph has more
                # contexts this might result in wrong comparisons of graphs
                # Still ignored here as ldtools is more robust by doing so.
                logger.error("The graph has more than one context. This"
                             "might cause problems comparing the graphs!")

        if g_length > GRAPH_SIZE_LIMIT:
            logger.error("Maximum graph size exceeded. Thr graph is %s "
                         "triples big. Limit is set to %s. The aquired "
                         "graph exceeds that! Pass GRAPH_SIZE_LIMIT to set it "
                         "differently." % (g_length, GRAPH_SIZE_LIMIT))
            return

        if hasattr(self, "_graph"):
            # we already assured that there are no unsaved_changes
            # --> get_graph() == _graph

            logger.info(u"Already crawled: %s. Comparing graphs..." % self.uri)

            if compare.to_isomorphic(self._graph) ==\
               compare.to_isomorphic(graph):
                return
            else:
                logging.warning("GET retrieved updates for %s!" % self.uri)
                my_graph_diff(self._graph, graph)

                for resource in self.get_resources():
                    resource.delete()
                delattr(self, "handled")

        if hasattr(self, "handled"):
            return

        self._graph = graph

        graph_handler = GraphHandler(
            only_follow_uris=only_follow_uris,
            handle_owl_imports=handle_owl_imports,
            origin=self)
        graph_handler.populate_resources(graph=graph)

        self.handled = True
Example #50
0
def my_graph_diff(graph1, graph2):
    """Compares graph2 to graph1 and highlights everything that changed.
    Colored if pygments available"""

    # quick fix for wrong type
    if not type(graph1) == type(graph2) == rdflib.Graph:
        if type(graph1) == rdflib.ConjunctiveGraph:
            g1contexts = list(graph1.contexts())
            assert len(g1contexts) == 1
            graph1 = g1contexts[0]
        if type(graph2) == rdflib.ConjunctiveGraph:
            g2contexts = list(graph2.contexts())
            assert len(g2contexts) == 1
            graph2 = g2contexts[0]

    # Return if both graphs are isomorphic
    iso1 = compare.to_isomorphic(graph1)
    iso2 = compare.to_isomorphic(graph2)

    if graph1.identifier == graph2.identifier:
        str_bit = u"The 2 '%s' Graphs" % graph1.identifier
    else:
        str_bit = (u"Graphs '%s' and '%s'"
                   % (graph1.identifier, graph2.identifier))

    if iso1 == iso2:
        logger.debug(u"%s are isomorphic" % str_bit)
        return

    print(u"Differences between %s." % str_bit)

    in_both, in_first, in_second = compare.graph_diff(iso1, iso2)

    def dump_nt_sorted(g):
        return sorted(g.serialize(format='nt').splitlines())

    sorted_first = dump_nt_sorted(in_first)
    sorted_second = dump_nt_sorted(in_second)

    import difflib

    diff = difflib.unified_diff(
        sorted_first,
        sorted_second,
        u'Original',
        u'Current',
        lineterm=''
    )

    try:
        from pygments import highlight
        from pygments.formatters import terminal
        from pygments.lexers import web

        lexer = web.XmlLexer()
        formatter = terminal.TerminalFormatter()
        print(highlight(u'\n'.join(diff), lexer, formatter))
    except ImportError:
        logger.info("Install pygments for colored diffs")
        print(u'\n'.join(diff))
    except UnicodeDecodeError:
        print(u"Only in first", unicode(sorted_first))
        print(u"Only in second", unicode(sorted_second))
Example #51
0
File: test.py Project: niklasl/oort
 def isograph(doc):
     g = ConjunctiveGraph().parse(data=etree.tostring(doc))
     return to_isomorphic(g)
Example #52
0
from rdflib import Graph
from rdflib.compare import to_isomorphic, graph_diff
import sys

if len(sys.argv)>=3:
   F1 = sys.argv[1]
   F2= sys.argv[2] 
else:
   F1 = "/home/barry/Downloads/instance.ttl"
   F2 = "/home/barry/Downloads/t2.ttl"
g1 = Graph()
g1.parse(F1, format="turtle")

g2 = Graph()
g2.parse(F2, format="turtle")

iso1 = to_isomorphic(g1)
iso2 = to_isomorphic(g2)

in_both, in_first, in_second = graph_diff(iso1, iso2)

if len(sys.argv)==4:
   print(in_first.serialize(format="n3").decode('utf-8'))
else:
   print(in_second.serialize(format="n3").decode('utf-8'))