def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() _logger.debug("SetUp complete")
def initSsdEndpoint(session): result = {} result["datasetEndpoint"] = DataSetEndpoint(session) dataset_path = os.path.join(os.path.dirname(__file__), "resources", "data", "businessInfo.csv") result["dataset"] = result["datasetEndpoint"].upload(dataset_path) result["ontologyEndpoint"] = OntologyEndpoint(session) result["ontology"] = result["ontologyEndpoint"].upload( Ontology().uri("http://www.semanticweb.org/serene/example_ontology"). owl_class("Place", ["name", "postalCode"]).owl_class( "City", is_a="Place").owl_class("Person", { "name": str, "phone": int, "birthDate": datetime.datetime }).owl_class("Organization", { "name": str, "phone": int, "email": str }).owl_class("Event", { "startDate": datetime.datetime, "endDate": datetime.datetime }).owl_class("State", is_a="Place").link("Person", "bornIn", "Place").link( "Organization", "ceo", "Person").link("Place", "isPartOf", "Place").link( "Person", "livesIn", "Place").link( "Event", "location", "Place").link( "Organization", "location", "Place").link( "Organization", "operatesIn", "City").link( "Place", "nearby", "Place").link( "Event", "organizer", "Person").link( "City", "state", "State").link( "Person", "worksFor", "Organization")) result["ssdEndpoint"] = SSDEndpoint(session, result["datasetEndpoint"], result["ontologyEndpoint"]) result["localSsd"] = (SSD( result["dataset"], result["ontology"], name="business-info").map( Column("company"), DataNode(ClassNode("Organization"), "name")).map( Column("ceo"), DataNode(ClassNode("Person"), "name")).map( Column("city"), DataNode(ClassNode("City"), "name")).map( Column("state"), DataNode(ClassNode("State"), "name")).link( "Organization", "operatesIn", "City").link("Organization", "ceo", "Person").link( "City", "state", "State")) return result
def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage()
class TestEvaluateSSD(TestWithServer): """ Tests the comparison of SSDs """ def __init__(self, method_name="runTest"): super().__init__(method_name) self._datasets = None self._ontologies = None path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources") self._test_owl = os.path.join(path, 'owl', 'dataintegration_report_ontology.ttl') self._museum_owl_dir = os.path.join(path, 'owl', 'museum_edm') self._business_file = os.path.join(path, 'data', 'businessInfo.csv') self._business_ssd = os.path.join(path, 'ssd', 'businessInfo.ssd') self._cities_file = os.path.join(path, 'data', 'getCities.csv') self._cities_ssd = os.path.join(path, 'ssd', 'getCities.ssd') self._tricky_cities_ssd = os.path.join(path, 'ssd', 'tricky.ssd') self._objects_owl = os.path.join(path, 'owl', 'objects.ttl') self._paintings_file = os.path.join(path, 'data', 'paintings.csv') self._paintings_ssd = os.path.join(path, 'ssd', 'paintings.ssd') self._museum_file = os.path.join(path, 'data', 'museum.csv') self._museum_ssd = os.path.join(path, 'ssd', 'museum.ssd') self._ssd_path = os.path.join(path, 'ssd') self._data_path = os.path.join(path, 'data') def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() def _clear_storage(self): """Removes all server elements""" for ssd in self._ssds.items: self._ssds.remove(ssd) for ds in self._datasets.items: self._datasets.remove(ds) for on in self._ontologies.items: self._ontologies.remove(on) def tearDown(self): """ Be sure to remove all storage elements once a test is finished... :return: """ self._clear_storage() def test_evaluate_business(self): """ Tests evaluation for business :return: """ dataset = self._datasets.upload(self._business_file) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] new_json = dataset.bind_ssd(self._business_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 4) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 3) # these are only object properties res = self._ssds.compare(ssd, ssd) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_tricky_cities(self): """ Here the ssd has two class nodes of the same type :return: """ self._datasets.upload(self._cities_file) on = self._ontologies.upload(self._test_owl) dataset = self._datasets.items[0] assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] new_json = dataset.bind_ssd(self._tricky_cities_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 2) self.assertEqual(len(ssd.data_nodes), 2) self.assertEqual(len(ssd.mappings), 2) self.assertEqual(len(ssd.object_links), 1) # these are only object properties res = self._ssds.compare(ssd, ssd) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_ssd_evaluate_tricky_cities(self): """ Here the ssd has two class nodes of the same type. We use the evaluate method of ssd and not from the server. :return: """ self._datasets.upload(self._cities_file) on = self._ontologies.upload(self._test_owl) dataset = self._datasets.items[0] assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] new_json = dataset.bind_ssd(self._tricky_cities_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 2) self.assertEqual(len(ssd.data_nodes), 2) self.assertEqual(len(ssd.mappings), 2) self.assertEqual(len(ssd.object_links), 1) # these are only object properties res = ssd.evaluate(ssd) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_country_names(self): """ Tests evaluation for country_names :return: """ path = os.path.join(self._data_path, "country_names.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "country_names.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 1) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) #self.assertEqual(len(ssd.links), 4) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 0) # these are only object properties res = self._ssds.compare(ssd, ssd, False, False) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_country_names_zero(self): """ Tests evaluation for country_names If we ignore everything, then it will all 0 :return: """ path = os.path.join(self._data_path, "country_names.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) ssd_path = os.path.join(self._ssd_path, "country_names.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) res = self._ssds.compare(ssd, ssd, True, True) print(res) self.assertEqual(res['precision'], 0) self.assertEqual(res['recall'], 0) self.assertEqual(res['jaccard'], 0) def test_evaluate_places_dif(self): """ Tests evaluation for places_dif :return: """ path = os.path.join(self._data_path, "places_dif.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "places_dif.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 4) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.links), 7) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 3) # these are only object properties res = self._ssds.compare(ssd, ssd) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_places_mix(self): """ Tests evaluation for places_mix :return: """ path = os.path.join(self._data_path, "places_mix.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "places_mix.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 2) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.links), 5) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 1) # these are only object properties res = self._ssds.compare(ssd, ssd, False, True) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_paintings(self): """ Here we have a class node with no data nodes :return: """ self._datasets.upload(self._paintings_file) on = self._ontologies.upload(self._objects_owl) dataset = self._datasets.items[0] #print(dataset) assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] #print("namespaces: ", ontology._prefixes) #print("class nodes: ", list(ontology._iclass_nodes())) #print("data nodes: ", list(ontology._idata_nodes())) #print("links: ", list(ontology._ilinks())) new_json = dataset.bind_ssd(self._paintings_ssd, [ontology], str(ontology._prefixes[''])) # print("************************") # print("new json...") # pprint(new_json) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) # pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 3) self.assertEqual(len(ssd.links), 4) self.assertEqual(len(ssd.data_links), 2) self.assertEqual(len(ssd.object_links), 2) self.assertEqual(len(ssd.data_nodes), 2) self.assertEqual(len(ssd.mappings), 2) res = self._ssds.compare(ssd, ssd, True, True) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1) def test_evaluate_museum(self): """ Here we have a class node with no data nodes, a list of ontologies, class instance link. Not all columns from file get mapped. :return: """ dataset = self._datasets.upload(self._museum_file) ontologies = [] for path in os.listdir(self._museum_owl_dir): f = os.path.join(self._museum_owl_dir, path) ontologies.append(self._ontologies.upload(f)) new_json = dataset.bind_ssd(self._museum_ssd, ontologies, KARMA_DEFAULT_NS) empty_ssd = SSD(dataset, ontologies) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 6) self.assertEqual(len(ssd.links), 14) # class instance, data property, object property self.assertEqual(len(ssd.data_nodes), 10) self.assertEqual(len(ssd.mappings), 10) res = self._ssds.compare(ssd, ssd, False, False) print(res) self.assertEqual(res['precision'], 1) self.assertEqual(res['recall'], 1) self.assertEqual(res['jaccard'], 1)
class TestMuseum(TestWithServer): """ Tests the JsonReader/JsonWriter for SSD """ def __init__(self, method_name="runTest"): super().__init__(method_name) self._datasets = None self._ontologies = None self._benchmark_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources", "museum_benchmark") self._museum_owl_dir = os.path.join(self._benchmark_path, "owl") self._museum_data = os.path.join(self._benchmark_path, 'dataset') self._museum_ssd = os.path.join(self._benchmark_path, 'ssd') def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() _logger.debug("SetUp complete") def tearDown(self): """ Be sure to remove all storage elements once a test is finished... :return: """ self._clear_storage() def _add_owls(self): # add ontologies ontologies = [] for path in os.listdir(self._museum_owl_dir): f = os.path.join(self._museum_owl_dir, path) ontologies.append(self._ontologies.upload(f)) return ontologies def _add_datasets(self, ontologies): # we need to unzip first the data if os.path.exists(os.path.join(self._museum_data, "data.tar.gz")): with tarfile.open(os.path.join(self._museum_data, "data.tar.gz")) as f: f.extractall(path=self._museum_data) # add datasets with their ssds for ds in os.listdir(self._museum_data): if ds.endswith(".gz"): continue # skip archive ds_f = os.path.join(self._museum_data, ds) ds_name = os.path.splitext(ds)[0] ssd_f = os.path.join(self._museum_ssd, ds_name + ".ssd") _logger.debug("Adding dataset: {}".format(ds_f)) dataset = self._datasets.upload(ds_f, description="museum_benchmark") _logger.debug("Adding ssd: {}".format(ssd_f)) new_json = dataset.bind_ssd(ssd_f, ontologies, KARMA_DEFAULT_NS) empty_ssd = SSD(dataset, ontologies) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self._ssds.upload(ssd) # we remove the csv dataset os.remove(ds_f) def _clear_storage(self): """Removes all server elements""" for ssd in self._ssds.items: self._ssds.remove(ssd) for ds in self._datasets.items: self._datasets.remove(ds) for on in self._ontologies.items: self._ontologies.remove(on) def test_evaluate_museum(self): """ Test that museum benchmark can be uploaded! :return: """ ontologies = self._add_owls() self._add_datasets(ontologies) self.assertEqual(len(self._ssds.items), 29) self.assertEqual(len(self._datasets.items), 29) self.assertEqual(len(self._ontologies.items), 11) def test_mappings(self): """ Get label data for museum benchmark files :return: """ ontologies = self._add_owls() self._add_datasets(ontologies) self.assertEqual(len(self._ssds.items), 29) self.assertEqual(len(self._datasets.items), 29) self.assertEqual(len(self._ontologies.items), 11) for ssd in self._ssds.items: print("name: ", ssd.name) print("mappings: ",ssd.mappings) csvF = ssd.name + ".columnmap.txt" with open(csvF, "w+") as csvfile: csvwriter = csv.writer(csvfile, dialect="excel") csvwriter.writerow(["key", "column_name", "source_name", "semantic_type"]) for key, value in ssd.mappings.items(): label = key.class_node.label + "---" + key.label column = value.name print("column: ", column, ", label:", label) csvwriter.writerow([column, column, ssd.name, label]) self.fail() def test_s27(self): """ :return: """ ontologies = self._add_owls() self.assertEqual(len(self._ontologies.items), 11) self._add_datasets(ontologies) self.assertEqual(len(self._ssds.items), 29) self.assertEqual(len(self._datasets.items), 29) # first grab the JSON object from the dataset... datasets = self._datasets.items s27_ds = "" for ds in datasets: if "s27-s-the-huntington.json" in ds.filename: s27_ds = ds # secondly grab the reference SSD s27_ssd = os.path.join(self._benchmark_path, "ssd", "s27-s-the-huntington.json.ssd") # bind the dataset and ssd together... new_json = s27_ds.bind_ssd(s27_ssd, ontologies, KARMA_DEFAULT_NS) # create a new ssd and update with the reference JSON empty_ssd = SSD(s27_ds, ontologies) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) # upload to the server and ensure it is parsed correctly... f = self._ssds.upload(ssd) self.assertEqual(set(ssd.mappings), set(f.mappings))
def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() assert(os.path.isfile(self._test_file))
class TestSSD(TestWithServer): """ Tests the SSD class """ def __init__(self, method_name="runTest"): super().__init__(method_name) self._datasets = None self._ontologies = None self._ssds = None path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources") self._test_file = os.path.join(path, 'data', 'businessInfo.csv') self._test_owl = os.path.join(path, 'owl', 'dataintegration_report_ontology.ttl') self._test_ssd = os.path.join(path, 'ssd', 'businessInfo.ssd') def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() assert(os.path.isfile(self._test_file)) def _clear_storage(self): """Removes all server elements""" for ssd in self._ssds.items: self._ssds.remove(ssd) for ds in self._datasets.items: self._datasets.remove(ds) for on in self._ontologies.items: self._ontologies.remove(on) def tearDown(self): """ Be sure to remove all storage elements once a test is finished... :return: """ self._clear_storage() def test_create(self): """ Tests the SSD creation :return: """ self._build_simple() def _build_simple(self): ds = self._datasets.upload(self._test_file) on = self._ontologies.upload(self._test_owl) single = SSD(dataset=ds, ontology=on) self.assertEqual(len(single.data_nodes), 0) self.assertEqual(len(single.links), 0) self.assertEqual(len(single.columns), 4) return single def test_map_simple(self): """ Tests the map function for SSD mapping with one map :return: """ simple = self._build_simple() simple.map(Column("ceo"), DataNode(ClassNode("Person"), "name")) self.assertEqual(len(simple.class_nodes), 1) self.assertEqual(len(simple.data_nodes), 1) self.assertEqual(len(simple.data_links), 1) self.assertEqual(len(simple.object_links), 0) def test_map_full(self): """ Tests the map function for SSD mapping with full map :return: """ simple = self._build_simple() (simple .map(Column("company"), DataNode(ClassNode("Organization"), "name")) .map(Column("ceo"), DataNode(ClassNode("Person"), "name")) .map(Column("city"), DataNode(ClassNode("City"), "name")) .map(Column("state"), DataNode(ClassNode("State"), "name"))) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_duplicate_column_block(self): """ Tests the map function can only map a column once :return: """ simple = self._build_simple() with self.assertRaises(Exception): (simple .map(Column("company"), "Organization.name") .map(Column("ceo"), "Person.name") .map(Column("city"), "City.name") .map(Column("state"), "State.name") .map(Column("company"), "State.name") .map(Column("company"), "Person.name")) def test_duplicate_data_node_block(self): """ Tests the map function can only map a data node once :return: """ simple = self._build_simple() with self.assertRaises(Exception): (simple .map(Column("company"), "Organization.name") .map(Column("ceo"), "Person.name") .map(Column("city"), "Person.name") .map(Column("state"), "State.name")) def test_map_short_hand(self): """ Tests the map function for SSD mapping with short hand strings :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name")) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_remove(self): """ Tests the removal function when removing data nodes and columns :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .remove(DataNode(ClassNode("Person"), "name"))) self.assertEqual(len(simple.class_nodes), 3) self.assertEqual(len(simple.data_nodes), 3) self.assertEqual(len(simple.data_links), 3) self.assertEqual(len(simple.object_links), 0) def test_remove_column(self): """ Tests the removal function when removing data nodes and columns :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .remove(DataNode(ClassNode("Person"), "name")) .remove(Column("city"))) self.assertEqual(len(simple.class_nodes), 2) self.assertEqual(len(simple.data_nodes), 2) self.assertEqual(len(simple.data_links), 2) self.assertEqual(len(simple.object_links), 0) def test_remove_restore(self): """ Tests the removal function and then adding back :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .remove(DataNode(ClassNode("Person"), "name")) .remove(Column("city")) .map("ceo", "Person.name") .map("city", "City.name")) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_remove_links(self): """ Tests the removal function and then adding back :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .link("Organization", "ceo", "Person") .remove_link("ceo")) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_map_multi_instance(self): """ Tests the map function with multiple instances :return: """ simple = self._build_simple() (simple .map(Column("company"), DataNode(ClassNode("Person", 0), "name")) .map(Column("ceo"), DataNode(ClassNode("Person", 1), "name")) .map(Column("city"), DataNode(ClassNode("Person", 2), "name")) .map(Column("state"), DataNode(ClassNode("Person", 3), "name"))) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_map_multi_instance_links(self): """ Tests the map function with multiple instances in the links :return: """ simple = self._build_simple() (simple .map(Column("company"), DataNode(ClassNode("City", 0), "name")) .map(Column("ceo"), DataNode(ClassNode("City", 1), "name")) .map(Column("city"), DataNode(ClassNode("City", 2), "name")) .map(Column("state"), DataNode(ClassNode("City", 3), "name")) .link(ClassNode("City", 0), "nearby", ClassNode("City", 1)) .link(ClassNode("City", 1), "nearby", ClassNode("City", 2)) .link(ClassNode("City", 2), "nearby", ClassNode("City", 3))) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 3) def test_map_ambiguous_class(self): """ Should throw error to not upload :return: """ simple = self._build_simple() with self.assertRaises(Exception): (simple .map("English", "Place.name") .map("German", "Place.name") .map("French", "Place.name") .map("Russian", "Place.name")) def test_map_multi_data_prop(self): """ Multiple data properties should be allowed on a single class if index is specified """ simple = self._build_simple() # should be ok. (simple .map("company", DataNode(ClassNode("Place"), "name", 0)) .map("ceo", DataNode(ClassNode("Place"), "name", 1)) .map("city", DataNode(ClassNode("Place"), "name", 2)) .map("state", DataNode(ClassNode("Place"), "name", 3))) self.assertEqual(len(simple.class_nodes), 1) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0) def test_map_overwrite(self): """ Tests that re-specifying a link or class node should overwrite :return: """ simple = self._build_simple() (simple .map("company", "Person.name") .map("ceo", "Person.birthDate") .map("city", "City.name") .map("state", "State.name") .link("City", "state", "State") .link("Person", "bornIn", "City") .link("City", "state", "State") .link("Person", "bornIn", "City")) self.assertEqual(len(simple.class_nodes), 3) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 2) def test_map_links(self): """ Tests the map function for SSD mapping with full map :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .link("City", "state", "State") .link("Organization", "location", "City") .link("Person", "worksFor", "Organization")) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 3) def test_default_map_prefix(self): """ Tests that the map function adds the default prefixes when missing :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name")) prefixes = [z.prefix for z in simple.data_nodes] self.assertEqual(prefixes, [simple.default_namespace for _ in prefixes]) prefixes = [z.prefix for z in simple.class_nodes] self.assertEqual(prefixes, [simple.default_namespace for _ in prefixes]) def test_default_link_prefix(self): """ Tests that the link function adds the default prefixes when missing :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", DataNode(ClassNode("City", prefix=simple.default_namespace), "name")) .map("state", DataNode(ClassNode("State", prefix=simple.default_namespace), "name")) .link("City", "isPartOf", "State")) # the last link should use default namespace, if not, there will be an # ambiguity error... prefixes = [z.prefix for z in simple.data_nodes] self.assertEqual(prefixes, [simple.default_namespace for _ in prefixes]) prefixes = [z.prefix for z in simple.class_nodes] self.assertEqual(prefixes, [simple.default_namespace for _ in prefixes]) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 1) def test_stored(self): """ Tests that the storage flag resets when changes are made locally :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .link("Organization", "ceo", "Person") .link("Person", "livesIn", "City") .link("City", "state", "State")) uploaded = self._ssds.upload(simple) self.assertTrue(uploaded.stored) uploaded.link("City", "isPartOf", "State") self.assertFalse(uploaded.stored) self._ssds.remove(uploaded) def test_map_link_with_no_data_nodes(self): """ Tests the link function when there are no data nodes :return: """ simple = self._build_simple() # has the only link between Person (which has no data nodes) # # .--- Person -----. # / \ # Organization Place Event # / / \ | # name name postalCode endDate (simple .map("company", "Organization.name") .map("city", "Place.name") .map("state", "Place.postalCode") .map("ceo", "Event.endDate") .link("Person", "livesIn", "Place") .link("Organization", "ceo", "Person")) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 2) def test_json_writer(self): """ Tests the json writer :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .link("City", "state", "State") .link("Organization", "location", "City") .link("Person", "worksFor", "Organization")) j = SSDJsonWriter(simple).to_dict() now = datetime.datetime.now().strftime(format="%Y-%m-%dT%H:%M:%S.%f") j["dateCreated"] = now j["dateModified"] = now test = self._build_simple().update(j, self._datasets, self._ontologies) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.object_links), 3) self.assertEqual(len(simple.data_links), 4) self.assertSetEqual(set(simple.class_nodes), set(test.class_nodes)) self.assertSetEqual(set(simple.data_nodes), set(test.data_nodes)) self.assertSetEqual(set(simple.object_links), set(test.object_links)) self.assertDictEqual(simple.mappings, test.mappings) self.assertSetEqual(set(simple.data_links), set(test.data_links))
class TestSSDJson(TestWithServer): """ Tests the JsonReader/JsonWriter for SSD """ def __init__(self, method_name="runTest"): super().__init__(method_name) self._datasets = None self._ontologies = None path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources") self._test_owl = os.path.join(path, 'owl', 'dataintegration_report_ontology.ttl') self._museum_owl_dir = os.path.join(path, 'owl', 'museum_edm') self._business_file = os.path.join(path, 'data', 'businessInfo.csv') self._business_ssd = os.path.join(path, 'ssd', 'businessInfo.ssd') self._cities_file = os.path.join(path, 'data', 'getCities.csv') self._cities_ssd = os.path.join(path, 'ssd', 'getCities.ssd') self._tricky_cities_ssd = os.path.join(path, 'ssd', 'tricky.ssd') self._objects_owl = os.path.join(path, 'owl', 'objects.ttl') self._paintings_file = os.path.join(path, 'data', 'paintings.csv') self._paintings_ssd = os.path.join(path, 'ssd', 'paintings.ssd') self._museum_file = os.path.join(path, 'data', 'museum.csv') self._museum_ssd = os.path.join(path, 'ssd', 'museum.ssd') self._ssd_path = os.path.join(path, 'ssd') self._data_path = os.path.join(path, 'data') def setUp(self): self._datasets = DataSetEndpoint(self._session) self._ontologies = OntologyEndpoint(self._session) self._ssds = SSDEndpoint(self._session, self._datasets, self._ontologies) self._clear_storage() def _clear_storage(self): """Removes all server elements""" for ssd in self._ssds.items: self._ssds.remove(ssd) for ds in self._datasets.items: self._datasets.remove(ds) for on in self._ontologies.items: self._ontologies.remove(on) def tearDown(self): """ Be sure to remove all storage elements once a test is finished... :return: """ self._clear_storage() def test_business(self): """ Tests business info :return: """ dataset = self._datasets.upload(self._business_file) ontology = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) new_json = dataset.bind_ssd(self._business_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, [ontology]) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 4) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.links), 7) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 3) # these are only object properties uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(self._ssds.items), 1) self.assertEqual(len(uploaded.class_nodes), 4) self.assertEqual(len(uploaded.data_nodes), 4) self.assertEqual(len(uploaded.mappings), 4) self.assertEqual(len(uploaded.links), 7) # class and data links self.assertEqual(len(uploaded.data_links), 4) # these are only data properties self.assertEqual(len(uploaded.object_links), 3) # these are only object properties def test_tricky_cities(self): """ Here the ssd has two class nodes of the same type :return: """ self._datasets.upload(self._cities_file) on = self._ontologies.upload(self._test_owl) dataset = self._datasets.items[0] assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] new_json = dataset.bind_ssd(self._tricky_cities_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) #pprint(ssd.json) self.assertEqual(len(ssd.class_nodes), 2) self.assertEqual(len(ssd.data_nodes), 2) self.assertEqual(len(ssd.mappings), 2) self.assertEqual(len(ssd.object_links), 1) # these are only object properties uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(self._ssds.items), 1) self.assertEqual(len(uploaded.class_nodes), 2) self.assertEqual(len(uploaded.data_nodes), 2) self.assertEqual(len(uploaded.mappings), 2) self.assertEqual(len(uploaded.object_links), 1) # these are only object properties def test_evaluate_country_names(self): """ Tests evaluation for country_names :return: """ path = os.path.join(self._data_path, "country_names.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "country_names.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 1) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 0) # these are only object properties uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(uploaded.class_nodes), 1) self.assertEqual(len(uploaded.data_nodes), 4) self.assertEqual(len(uploaded.mappings), 4) self.assertEqual(len(uploaded.data_links), 4) # these are only data properties self.assertEqual(len(uploaded.object_links), 0) # these are only object properties def test_evaluate_places_dif(self): """ Tests evaluation for places_dif :return: """ path = os.path.join(self._data_path, "places_dif.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "places_dif.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 4) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.links), 7) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 3) # these are only object properties uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(uploaded.class_nodes), 4) self.assertEqual(len(uploaded.data_nodes), 4) self.assertEqual(len(uploaded.mappings), 4) self.assertEqual(len(uploaded.links), 7) # class and data links self.assertEqual(len(uploaded.data_links), 4) # these are only data properties self.assertEqual(len(uploaded.object_links), 3) # these are only object properties def test_evaluate_places_mix(self): """ Tests evaluation for places_mix :return: """ path = os.path.join(self._data_path, "places_mix.csv") dataset = self._datasets.upload(path) on = self._ontologies.upload(self._test_owl) assert (issubclass(type(dataset), DataSet)) ssd_path = os.path.join(self._ssd_path, "places_mix.ssd") new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 2) self.assertEqual(len(ssd.data_nodes), 4) self.assertEqual(len(ssd.mappings), 4) self.assertEqual(len(ssd.links), 5) # class and data links self.assertEqual(len(ssd.data_links), 4) # these are only data properties self.assertEqual(len(ssd.object_links), 1) # these are only object properties uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(uploaded.class_nodes), 2) self.assertEqual(len(uploaded.data_nodes), 4) self.assertEqual(len(uploaded.mappings), 4) self.assertEqual(len(uploaded.links), 5) # class and data links self.assertEqual(len(uploaded.data_links), 4) # these are only data properties self.assertEqual(len(uploaded.object_links), 1) # these are only object properties def test_evaluate_paintings(self): """ Here we have a class node with no data nodes :return: """ self._datasets.upload(self._paintings_file) on = self._ontologies.upload(self._objects_owl) dataset = self._datasets.items[0] assert (issubclass(type(dataset), DataSet)) ontology = self._ontologies.items[0] new_json = dataset.bind_ssd(self._paintings_ssd, [ontology], str(ontology._prefixes[''])) empty_ssd = SSD(dataset, on) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 3) self.assertEqual(len(ssd.links), 4) self.assertEqual(len(ssd.data_links), 2) self.assertEqual(len(ssd.object_links), 2) self.assertEqual(len(ssd.data_nodes), 2) self.assertEqual(len(ssd.mappings), 2) uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(uploaded.class_nodes), 3) self.assertEqual(len(uploaded.links), 4) self.assertEqual(len(uploaded.data_links), 2) self.assertEqual(len(uploaded.object_links), 2) self.assertEqual(len(uploaded.data_nodes), 2) self.assertEqual(len(uploaded.mappings), 2) def test_evaluate_museum(self): """ Here we have a class node with no data nodes, a list of ontologies, class instance link. Not all columns from file get mapped. :return: """ dataset = self._datasets.upload(self._museum_file) ontologies = [] for path in os.listdir(self._museum_owl_dir): f = os.path.join(self._museum_owl_dir, path) ontologies.append(self._ontologies.upload(f)) assert (issubclass(type(dataset), DataSet)) self.assertEqual(len(ontologies), 11) new_json = dataset.bind_ssd(self._museum_ssd, ontologies, KARMA_DEFAULT_NS) empty_ssd = SSD(dataset, ontologies) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) self.assertEqual(len(ssd.class_nodes), 6) self.assertEqual(len(ssd.links), 14) # class instance, data property, object property self.assertEqual(len(ssd.data_links), 9) # class instance, data property self.assertEqual(len(ssd.object_links), 5) # object property self.assertEqual(len(ssd.data_nodes), 10) self.assertEqual(len(ssd.mappings), 10) uploaded = self._ssds.upload(ssd) self.assertTrue(uploaded.stored) self.assertEqual(len(uploaded.class_nodes), 6) self.assertEqual(len(uploaded.links), 14) # class instance, data property, object property self.assertEqual(len(uploaded.data_links), 9) # class instance, data property self.assertEqual(len(uploaded.object_links), 5) # object property self.assertEqual(len(uploaded.data_nodes), 10) self.assertEqual(len(uploaded.mappings), 10) def test_show_multi_levels(self): data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources", "museum_benchmark", "data_copy", "s07-s-13.json.csv") dataset = self._datasets.upload(data_path) print("--------------------1") ontologies = [] for path in os.listdir(self._museum_owl_dir): f = os.path.join(self._museum_owl_dir, path) ontologies.append(self._ontologies.upload(f)) print("--------------------2") self.assertEqual(len(ontologies), 11) ssd_path = os.path.join(self._ssd_path, "s07_many_levels.ssd") new_json = dataset.bind_ssd(ssd_path, ontologies, KARMA_DEFAULT_NS) empty_ssd = SSD(dataset, ontologies) ssd = empty_ssd.update(new_json, self._datasets, self._ontologies) print("HERE!!!!!!!!!!!!!!!!!!!") ssd.show() print(ssd) self.fail()