configParser = ConfigurationParser(options.configfile) print '[%s] Starting analysis...' % (strftime("%a, %d %b %Y %H:%M:%S", localtime())) sparql_analyzer = DumpAnalyzer(configParser.sparql_endpoint, configParser.db_identifier, configParser.db_configstring) #sparql_analyzer = SPARQLAnalyzer('http://lod.b3kat.de/sparql', 'b3kat') sparql_analyzer.open() sparql_analyzer.load_graph() print '[%s] URI pattern: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), sparql_analyzer.uri_pattern) print '[%s] Number of triples: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(sparql_analyzer.graph)) num_classes = sparql_analyzer.get_classes() print '[%s] Number of classes: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(num_classes)) num_properties = sparql_analyzer.get_properties() print '[%s] Number of properties: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(num_properties)) num_subjects = sparql_analyzer.get_subjects() print '[%s] Number of subjects: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(num_subjects)) num_objects = sparql_analyzer.get_objects() print '[%s] Number of objects: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(num_objects)) num_entities = sparql_analyzer.get_entities() print '[%s] Number of entities: %s' % (strftime("%a, %d %b %Y %H:%M:%S", localtime()), len(num_entities)) num_linksets = sparql_analyzer.get_linksets() print '[%s] Linksets: ' % (strftime("%a, %d %b %Y %H:%M:%S", localtime())) for key in num_linksets.keys(): print '|%s' % (key) d = num_linksets[key] for key in d.keys(): print '|--------%s: %s' % (key, d[key]) print '[%s] Finished!'% (strftime("%a, %d %b %Y %H:%M:%S", localtime()))
class DumpAnalyzerTestCase(unittest.TestCase): @classmethod def setUpClass(self): self.sparql_analyzer = DumpAnalyzer('morelab.rdf', 'test', 'user=postgres password=p0stgr3s host=localhost dbname=rdfstore') self.sparql_analyzer.open() self.sparql_analyzer.load_graph() @classmethod def tearDownClass(self): self.sparql_analyzer.close() def test_get_classes(self): expected_classes = 13 result_classes = len(self.sparql_analyzer.get_classes()) self.assertEqual(result_classes, expected_classes) def test_get_properties(self): expected_properties = 79 result_properties = len(self.sparql_analyzer.get_properties()) self.assertEqual(result_properties, expected_properties) def test_get_subjects(self): expected_subjects = 241 result_subjects = len(self.sparql_analyzer.get_subjects()) self.assertEqual(result_subjects, expected_subjects) def test_get_objects(self): expected_objects = 1341 result_objects = len(self.sparql_analyzer.get_objects()) self.assertEqual(result_objects, expected_objects) def test_get_class_instances(self): expected_class_instances = 57 result_class_instances = len(self.sparql_analyzer.get_class_instances('http://swrc.ontoware.org/ontology#Article')) self.assertEqual(result_class_instances, expected_class_instances) def test_get_property_count(self): expected_property_count = 478 result_property_count = len(self.sparql_analyzer.get_property_count('http://xmlns.com/foaf/0.1/maker')) self.assertEqual(result_property_count, expected_property_count) def test_get_all_links(self): expected_links = 1877 result_links = len(self.sparql_analyzer.get_all_links()) self.assertEqual(result_links, expected_links) def test_get_uri_pattern(self): #t1 = time.time() expected_uri_pattern = 'http://www.morelab.deusto.es/resource/' result_uri_pattern = self.sparql_analyzer.get_uri_pattern() #t2 = time.time() #print t2-t1 #print result_uri_pattern self.assertEqual(result_uri_pattern[1], expected_uri_pattern) def test_get_entities(self): expected_entities = 192 result_entities = len(self.sparql_analyzer.get_entities()) self.assertEqual(result_entities, expected_entities) def test_get_outgoing_links(self): expected_outgoing_links = 997 result_outgoing_links = len(self.sparql_analyzer.get_outgoing_links()) self.assertEqual(result_outgoing_links, expected_outgoing_links) '''def test_get_patterns(self): expected_pattern = 'http://www.morelab.deusto.es/resource/' result_pattern = self.sparql_analyzer.get_patterns(url_list) self.assertEqual(result_pattern, expected_pattern)''' def test_get_linksets(self): expected_linksets = eval("{'http://sws.geonames.org/': {'http://xmlns.com/foaf/0.1/based_near': 12, 'http://www.w3.org/2000/10/swap/pim/contact#nearestAirport': 1}, 'http://dbpedia.org/': {'http://xmlns.com/foaf/0.1/interest': 1}, 'http://www.wikier.org/': {'http://xmlns.com/foaf/0.1/isDescribedIn': 2}, 'http://littera.deusto.es/prof/abaitua/': {'http://www.w3.org/2000/01/rdf-schema#seeAlso': 1, 'http://xmlns.com/foaf/0.1/knows': 12}, 'http://www.w3.org/2001/': {'http://xmlns.com/foaf/0.1/interest': 9}, 'http://data.bibbase.org/author/': {'http://www.w3.org/2002/07/owl#sameAs': 1}, 'http://dblp.l3s.de/d2r/resource/': {'http://purl.org/dc/terms/partOf': 22, 'http://purl.org/dc/elements/1.1/creator': 32, 'http://swrc.ontoware.org/ontology#journal': 14, 'http://xmlns.com/foaf/0.1/maker': 32, 'http://swrc.ontoware.org/ontology#series': 22, 'http://www.w3.org/2002/07/owl#sameAs': 41}, 'http://dx.doi.org/': {'http://www.w3.org/2000/01/rdf-schema#seeAlso': 18}}") result_linksets = self.sparql_analyzer.get_linksets() self.assertDictEqual(result_linksets, expected_linksets)