def test_map_multi_instance_links(self): """ Tests the map function with multiple instances in the links :return: """ simple = self._build_simple() (simple .map(Column("company"), DataNode(ClassNode("City", 0), "name")) .map(Column("ceo"), DataNode(ClassNode("City", 1), "name")) .map(Column("city"), DataNode(ClassNode("City", 2), "name")) .map(Column("state"), DataNode(ClassNode("City", 3), "name")) .link(ClassNode("City", 0), "nearby", ClassNode("City", 1)) .link(ClassNode("City", 1), "nearby", ClassNode("City", 2)) .link(ClassNode("City", 2), "nearby", ClassNode("City", 3))) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 3)
def test_duplicate_data_node_block(self): """ Tests the map function can only map a data node once :return: """ simple = self._build_simple() with self.assertRaises(Exception): (simple .map(Column("company"), "Organization.name") .map(Column("ceo"), "Person.name") .map(Column("city"), "Person.name") .map(Column("state"), "State.name"))
def test_eq(self): """ Checks that the Column equality is operating correctly :return: """ self.assertEqual( self.column.update(self.column_json, self.sample_ds), Column("testee2").update(self.column_json, self.sample_ds))
def test_map_full(self): """ Tests the map function for SSD mapping with full map :return: """ simple = self._build_simple() (simple .map(Column("company"), DataNode(ClassNode("Organization"), "name")) .map(Column("ceo"), DataNode(ClassNode("Person"), "name")) .map(Column("city"), DataNode(ClassNode("City"), "name")) .map(Column("state"), DataNode(ClassNode("State"), "name"))) self.assertEqual(len(simple.class_nodes), 4) self.assertEqual(len(simple.data_nodes), 4) self.assertEqual(len(simple.data_links), 4) self.assertEqual(len(simple.object_links), 0)
def test_add(self): """ Tests that the add method functions correctly :return: """ server_octo = self._octopus() self.assertEqual(server_octo.stored, True) # upload a test dataset... ds = self._serene.datasets data_path = os.path.join("tests", "resources", "data") postal_code_ds = ds.upload( os.path.join(data_path, 'postalCodeLookup.csv')) # upload a test ssd postal_code_ssd = (self._serene.SSD( postal_code_ds, self.ontology, name='postal-code').map(Column("zipcode"), "Place.postalCode").map( Column("city"), "City.name").map(Column("state"), "State.name")) # check there is a new ssd available ssd_len = len(server_octo.ssds) server_octo.add(postal_code_ssd) self.assertEqual(ssd_len + 1, len(server_octo.ssds)) self.assertEqual(server_octo.stored, False) # upload a test ontology test_ontology = self._ontology() # check that there is a new ontology available... onto_len = len(server_octo.ontologies) server_octo.add(test_ontology) self.assertEqual(onto_len + 1, len(server_octo.ontologies)) self.assertEqual(server_octo.stored, False) with self.assertRaises(ValueError): server_octo.add("junk")
def test_map_simple(self): """ Tests the map function for SSD mapping with one map :return: """ simple = self._build_simple() simple.map(Column("ceo"), DataNode(ClassNode("Person"), "name")) self.assertEqual(len(simple.class_nodes), 1) self.assertEqual(len(simple.data_nodes), 1) self.assertEqual(len(simple.data_links), 1) self.assertEqual(len(simple.object_links), 0)
def __init__(self, json): """ Initialize a DataSet object with a json response :param json: """ self.id = json['id'] self.columns = [Column('--').update(c, self) for c in json['columns']] self.filename = json['filename'] self.path = json['path'] self.type_map = json['typeMap'] self.description = json['description'] self.date_created = convert_datetime(json['dateCreated']) self.date_modified = convert_datetime(json['dateModified']) self.sample = pd.DataFrame({c.name: c.sample for c in self.columns}) self._stored = True
def test_remove_column(self): """ Tests the removal function when removing data nodes and columns :return: """ simple = self._build_simple() (simple .map("company", "Organization.name") .map("ceo", "Person.name") .map("city", "City.name") .map("state", "State.name") .remove(DataNode(ClassNode("Person"), "name")) .remove(Column("city"))) self.assertEqual(len(simple.class_nodes), 2) self.assertEqual(len(simple.data_nodes), 2) self.assertEqual(len(simple.data_links), 2) self.assertEqual(len(simple.object_links), 0)
def _ssds(self, datasets, ontology): """ Load up the example SSDs :param datasets: :param ontology: :return: """ business_info_ssd = (self._serene.SSD( datasets[0], ontology, name='business-info').map( Column("company"), DataNode(ClassNode("Organization"), "name")).map( Column("ceo"), DataNode(ClassNode("Person"), "name")).map( Column("city"), DataNode(ClassNode("City"), "name")).map( Column("state"), DataNode(ClassNode("State"), "name")).link( "Organization", "operatesIn", "City").link( "Organization", "ceo", "Person").link("City", "state", "State")) employee_address_ssd = (self._serene.SSD( datasets[1], ontology, name='employee-addr').map( "name", "Person.name").map("address", "Place.name").map( "postcode", "Place.postalCode").link("Person", "livesIn", "Place")) get_cities_ssd = (self._serene.SSD( datasets[2], ontology, name='cities').map(Column("city"), DataNode(ClassNode("City"), "name")).map( Column("state"), DataNode(ClassNode("State"), "name")).link( "City", "state", "State")) get_employees_ssd = (self._serene.SSD( datasets[3], ontology, name='employees').map( Column("employer"), DataNode(ClassNode("Organization"), "name")).map(Column("employee"), DataNode(ClassNode("Person"), "name")).link( "Person", "worksFor", "Organization")) postal_code_ssd = (self._serene.SSD( datasets[4], ontology, name='postal-code').map(Column("zipcode"), "Place.postalCode").map( Column("city"), "City.name").map(Column("state"), "State.name").link( "City", "state", "State").link("City", "isPartOf", "Place")) ssds = [ business_info_ssd, employee_address_ssd, get_cities_ssd, get_employees_ssd, postal_code_ssd ] for ssd in ssds: self._serene.ssds.upload(ssd) return ssds
def setUp(self): self.column = Column("testee")
class TestColumn(unittest.TestCase): """ Tests the Column class """ def __init__(self, methodName='runTest'): """ Initializes the Column testing suite """ super().__init__(methodName) self.sample_ds = DataSet({ 'dateCreated': '2017-03-16T15:29:03.388', 'dateModified': '2017-03-16T15:29:03.388', 'description': '', 'filename': 'businessInfo.csv', 'id': 2035625835, 'path': 'tests/resources/data/businessinfo.csv', 'typeMap': {}, 'columns': [{ 'datasetID': 2035625835, 'id': 1234567890, 'index': 0, 'logicalType': 'string', 'name': 'company', 'path': 'tests/resources/data/businessinfo.csv', 'sample': ['Data61'], 'size': 59 }, { 'datasetID': 2035625835, 'id': 123456789, 'index': 1, 'logicalType': 'string', 'name': 'ceo', 'path': 'tests/resources/data/businessinfo.csv', 'sample': ['Garv Mcowen'], 'size': 59 }] }) self.column_json = { 'datasetID': 2035625835, 'id': 1246005714, 'index': 0, 'logicalType': 'string', 'name': 'company', 'path': 'tests/resources/data/businessinfo.csv', 'sample': ['Data61'], 'size': 59 } self.column = None def setUp(self): self.column = Column("testee") def test_update(self): """ Tests that the column is updated correctly from json. :return: """ self.column.update(self.column_json, self.sample_ds) self.assertEqual(self.column.index, self.column_json["index"]) self.assertEqual(self.column.filename, self.column_json["path"]) self.assertEqual(self.column.name, self.column_json["name"]) self.assertEqual(self.column.id, self.column_json["id"]) self.assertEqual(self.column.size, self.column_json["size"]) self.assertEqual(self.column.datasetID, self.column_json["datasetID"]) self.assertEqual(self.column.sample, self.column_json["sample"]) self.assertEqual(self.column.logicalType, self.column_json["logicalType"]) def test_repr(self): """Tests that the string is output correctly""" self.assertEqual(repr(self.column), "Column(testee)") def test_eq(self): """ Checks that the Column equality is operating correctly :return: """ self.assertEqual( self.column.update(self.column_json, self.sample_ds), Column("testee2").update(self.column_json, self.sample_ds)) def test_hash(self): """ Tests that the hash codes of Column are calculated correctly :return: """ self.assertEqual( hash(self.column.update(self.column_json, self.sample_ds)), hash((self.column_json["name"], self.column_json["id"], self.column_json["datasetID"])))