def testColumnsWithoutData(self): tdf = TicDatFactory(data=[["a"], ["b"]]) for x in ["", "x"]: file = os.path.join(_scratchDir, "no_data.xls" + x) tdf.xls.write_file(tdf.TicDat(), file) dat = tdf.xls.create_tic_dat(file) self.assertFalse(dat._len_dict())
def testDups(self): if not self.can_run: return tdf = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) tdf2 = TicDatFactory( **{t: [[], ["a", "b", "c"]] for t in tdf.all_tables}) td = tdf2.TicDat( **{ t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2], ["new", 1, 2]] for t in tdf.all_tables }) f = makeCleanPath(os.path.join(_scratchDir, "testDups.db")) tdf2.sql.write_db_data(td, f) dups = tdf.sql.find_duplicates(f) self.assertTrue(dups == { 'three': { (1, 2, 2): 2 }, 'two': { (1, 2): 3 }, 'one': { 1: 3, 2: 2 } })
def _test_generic_free_copy(self, ticDat, tdf, skip_tables=None): assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables) replace_name = lambda f: "name_" if f == "name" else f clean_tdf = TicDatFactory( **{ t: [list(map(replace_name, pks)), dfs] for t, (pks, dfs) in tdf.schema().items() }) temp_tdf = TicDatFactory( **{ t: v if t in (skip_tables or []) else '*' for t, v in clean_tdf.schema().items() }) temp_dat = temp_tdf.TicDat( **{t: getattr(ticDat, t) for t in (skip_tables or [])}) for t in temp_tdf.generic_tables: setattr( temp_dat, t, getattr( clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False), t)) generic_free_dat, _ = utils.create_generic_free(temp_dat, temp_tdf) check_dat = clean_tdf.TicDat() for t in temp_tdf.generic_tables: for r in getattr(generic_free_dat, t): pks = clean_tdf.primary_key_fields[t] getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \ {df:r[df] for df in clean_tdf.data_fields.get(t, [])} for t in (skip_tables or []): for k, v in getattr(generic_free_dat, t).items(): getattr(check_dat, t)[k] = v self.assertTrue( clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
def test_nullables(self): core_path = os.path.join(_scratchDir, "nullables") pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]]) pdf.set_data_type("table_with_stuffs", "field one") pdf.set_data_type("table_with_stuffs", "field two", number_allowed=False, strings_allowed='*', nullable=True) dat = TicDatFactory(**pdf.schema()).TicDat( table_with_stuffs=[[101, "022"], [202, None], [303, "111"]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) self.assertFalse(pdf.find_data_type_failures(dat)) for attr, path in [["csv", core_path + "_csv"], ["xls", core_path + ".xlsx"], ["sql", core_path + ".db"], ["json", core_path + ".json"]]: f_or_d = "directory" if attr == "csv" else "file" write_func, write_kwargs = utils._get_write_function_and_kwargs( pdf, path, f_or_d) write_func(dat, path, **write_kwargs) dat_1 = utils._get_dat_object(pdf, "create_pan_dat", path, f_or_d, False) self.assertTrue( pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True))
def testCaseSpaceTableNames(self): tdf = TicDatFactory(table_one=[["a"], ["b", "c"]], table_two=[["this", "that"], []]) dir_path = os.path.join(_scratchDir, "case_space") dat = tdf.TicDat(table_one=[['a', 2, 3], ['b', 5, 6]], table_two=[["a", "b"], ["c", "d"], ["x", "z"]]) tdf.csv.write_directory(dat, makeCleanDir(dir_path), case_space_table_names=True) self.assertTrue( all( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["Table One", "Table Two"])) self.assertFalse( any( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["table_one", "table_two"])) self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path))) tdf.csv.write_directory(dat, makeCleanDir(dir_path), case_space_table_names=False) self.assertFalse( any( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["Table One", "Table Two"])) self.assertTrue( all( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["table_one", "table_two"])) self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path)))
def testDups(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False], ) * 2))): tdf = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) tdf2 = TicDatFactory( **{t: [[], ["a", "b", "c"]] for t in tdf.all_tables}) td = tdf2.TicDat( **{ t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2], ["new", 1, 2]] for t in tdf.all_tables }) dups = tdf.opalytics.find_duplicates(create_inputset_mock( tdf2, td, hack), raw_data=raw_data) self.assertTrue(dups == { 'three': { (1, 2, 2): 2 }, 'two': { (1, 2): 3 }, 'one': { 1: 3, 2: 2 } })
def test_parameters(self): core_path = os.path.join(_scratchDir, "parameters") pdf = PanDatFactory(parameters=[["Key"], ["Value"]]) pdf.add_parameter("Something", 100) pdf.add_parameter("Different", 'boo', strings_allowed='*', number_allowed=False) dat = TicDatFactory(**pdf.schema()).TicDat( parameters=[["Something", float("inf")], ["Different", "inf"]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) for attr, path in [["sql", core_path + ".db"], ["csv", core_path + "_csv"], ["json", core_path + ".json"], ["xls", core_path + ".xlsx"]]: func = "write_directory" if attr == "csv" else "write_file" getattr(getattr(pdf, attr), func)(dat, path) dat_1 = getattr(pdf, attr).create_pan_dat(path) self.assertTrue(pdf._same_data(dat, dat_1)) core_path = os.path.join(_scratchDir, "parameters_two") dat = TicDatFactory(**pdf.schema()).TicDat( parameters=[["Something", float("inf")], ["Different", "05701"]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) for attr, path in [["sql", core_path + ".db"], ["csv", core_path + "_csv"], ["xls", core_path + ".xlsx"], ["json", core_path + ".json"]]: func = "write_directory" if attr == "csv" else "write_file" getattr(getattr(pdf, attr), func)(dat, path) dat_1 = getattr(pdf, attr).create_pan_dat(path) self.assertTrue(pdf._same_data(dat, dat_1))
def testTryCreateSpace(self): def test_(schema_factory, data_factory): tdf = TicDatFactory(**schema_factory()) dat = tdf.copy_tic_dat(data_factory()) mapping = tlingo._try_create_space_case_mapping(tdf, dat)["mapping"] remapdat = tlingo._apply_space_case_mapping( tdf, dat, {v: k for k, v in mapping.items()}) mapmapdat = tlingo._apply_space_case_mapping( tdf, remapdat, mapping) self.assertTrue(tdf._same_data(dat, mapmapdat)) self.assertFalse(tdf._same_data(dat, remapdat)) test_(dietSchema, dietData) test_(netflowSchema, netflowData) test_(sillyMeSchema, lambda: TicDatFactory(**sillyMeSchema()).TicDat(**sillyMeData())) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) dat.foods["ice_cream"] = dat.foods["ice cream"] dat.categories["ICE CREAM"] = {} dat.categories["fAt"] = dat.categories["fat"] failures = tlingo._try_create_space_case_mapping(tdf, dat)["failures"] self.assertTrue( failures == { 'ICE_CREAM': ('ICE CREAM', 'ice cream', 'ice_cream'), 'FAT': ('fAt', 'fat') })
def testDataTypes_two(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**tdf.schema()) def makeIt(): rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.foods[None] = 101 rtn.categories["1"] = {"maxNutrition": 100, "minNutrition": 40} rtn.categories["2"] = [10, 20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f, p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.copy_to_pandas(rtn, drop_pk_columns=False) dat = makeIt() errs = pdf.find_data_type_failures(dat) self.assertTrue(len(errs) == 2 and not pdf.find_duplicates(dat)) dat_copied = pdf.copy_pan_dat(dat) pdf.replace_data_type_failures(dat) self.assertTrue(pdf._same_data(dat, dat_copied, epsilon=0.00001)) pdf2 = pdf.clone() pdf2.set_default_value("foods", "name", "a") pdf2.set_default_value("nutritionQuantities", "food", "a") pdf2.replace_data_type_failures(dat_copied) self.assertFalse(pdf._same_data(dat, dat_copied, epsilon=0.00001)) self.assertFalse(pdf.find_data_type_failures(dat_copied)) dups = pdf.find_duplicates(dat_copied) self.assertTrue( len(dups) == 2 and len(dups["foods"]) == 1 and len(dups["nutritionQuantities"]) == 2) from pandas import isnull def noneify(iter_of_tuples): return { tuple(None if isnull(_) else _ for _ in tuple_) for tuple_ in iter_of_tuples } self.assertTrue( noneify(errs['nutritionQuantities', 'food'].itertuples( index=False)) == {(None, "1", 5), (None, "2", 5)}) self.assertTrue( noneify(errs['foods', 'name'].itertuples(index=False)) == {(None, 101)}) pdf = PanDatFactory(**tdf.schema()) pdf.set_data_type("foods", "name", nullable=True, strings_allowed='*') pdf.set_data_type("nutritionQuantities", "food", nullable=True, strings_allowed='*') self.assertFalse(pdf.find_data_type_failures(dat)) pdf.set_data_type("foods", "cost", nullable=False) errs = pdf.find_data_type_failures(dat) self.assertTrue(len(errs) == 1) self.assertTrue( noneify(errs['foods', 'cost'].itertuples(index=False)) == {('b', None)})
def testDups(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(one=[["a"], ["b, c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) tdf2 = TicDatFactory( **{t: [[], ["a", "b", "c"]] for t in tdf.all_tables}) td = tdf2.TicDat( **{ t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2], [11, 1, 2]] for t in tdf.all_tables }) f = makeCleanPath(os.path.join(_scratchDir, "testDups.accdb")) tdf2.mdb.write_file(td, f) #shutil.copy(f, "dups.accdb") #uncomment to make readonly test file as .accdb dups = tdf.mdb.find_duplicates(f) self.assertTrue(dups == { 'three': { (1, 2, 2): 2 }, 'two': { (1, 2): 3 }, 'one': { 1: 3, 2: 2 } })
def testSortedTables(self): test1 = TicDatFactory(table3=[["PK3", "FK1", "FK2"], ["Val D"]], table2=[["PK2"], ["Val A", "Val B"]], table1=[["PK1"], ["Val C"]]) test1.add_foreign_key("table3", "table1", ["FK1", "PK1"]) test1.add_foreign_key("table3", "table2", ["FK2", "PK2"]) self.assertTrue(tlingo._sorted_tables(test1)[-1] == 'table3')
def testSpacey(self): if not _can_unit_test: return tdf = TicDatFactory(**spacesSchema()) spacesData = { "a_table": { 1: { "a Data 3": 3, "a Data 2": 2, "a Data 1": 1 }, 22: (1.1, 12, 12), 0.23: (11, 12, 11) }, "b_table": { ("1", "2", "3"): 1, ("a", "b", "b"): 12 }, "c_table": (("1", "2", "3", 4), { "c Data 4": 55, "c Data 2": "b", "c Data 3": "c", "c Data 1": "a" }, ("a", "b", "12", 24)) } dat = tdf.TicDat(**spacesData) filePath = "spaces.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat2))
def testSpacey(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**spacesSchema()) spacesData = { "a_table" : {1 : {"a Data 3":3, "a Data 2":2, "a Data 1":1}, 22 : (1.1, 12, 12), 0.23 : (11, 12, 11)}, "b_table" : {("1", "2", "3") : 1, ("a", "b", "b") : 12}, "c_table" : (("1", "2", "3", 4), {"c Data 4":55, "c Data 2":"b", "c Data 3":"c", "c Data 1":"a"}, ("a", "b", "12", 24) ) } dat = tdf.TicDat(**spacesData) filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.accdb")) tdf.mdb.write_schema(filePath, a_table = {"a Field":"double"}, c_table = {"c Data 1":"text", "c Data 2":"text", "c Data 3":"text", "c Data 4":"int"}) tdf.mdb.write_file(dat, filePath) self.assertFalse(tdf.mdb.find_duplicates(filePath)) dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat,dat2)) with py.connect(_connection_str(filePath)) as con: for t in tdf.all_tables: con.cursor().execute("SELECT * INTO [%s] FROM %s"%(t.replace("_", " "), t)).commit() con.cursor().execute("DROP TABLE %s"%t).commit() #shutil.copy(filePath, "spaces.accdb") #uncomment to make readonly test file as .accdb dat3 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat3))
def _test_generic_copy(self, ticDat, tdf, skip_tables=None): assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables) path = makeCleanDir(os.path.join(_scratchDir, "generic_copy")) replace_name = lambda f : "name_" if f == "name" else f clean_tdf = TicDatFactory(**{t:[list(map(replace_name, pks)), dfs] for t,(pks, dfs) in tdf.schema().items()}) temp_tdf = TicDatFactory(**{t:v if t in (skip_tables or []) else '*' for t,v in clean_tdf.schema().items()}) temp_dat = temp_tdf.TicDat(**{t:getattr(ticDat, t) for t in (skip_tables or [])}) for t in temp_tdf.generic_tables: setattr(temp_dat, t, getattr(clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False) ,t)) temp_tdf.sql.write_db_data(temp_dat, os.path.join(path, "f.db")) temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f1.sql"), include_schema=False) temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f2.sql"), include_schema=True) for file_name, includes_schema in [("f.db", False), ("f1.sql", False), ("f2.sql", True)]: file_path = os.path.join(path, file_name) if file_path.endswith(".db"): self.assertFalse(temp_tdf.sql.find_duplicates(file_path)) read_dat = temp_tdf.sql.create_tic_dat(file_path) else: read_dat = temp_tdf.sql.create_tic_dat_from_sql(file_path, includes_schema) generic_free_dat, _ = utils.create_generic_free(read_dat, temp_tdf) check_dat = clean_tdf.TicDat() for t in temp_tdf.generic_tables: for r in getattr(generic_free_dat, t): pks = clean_tdf.primary_key_fields[t] getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \ {df:r[df] for df in clean_tdf.data_fields.get(t, [])} for t in (skip_tables or []): for k,v in getattr(generic_free_dat, t).items(): getattr(check_dat, t)[k] = v self.assertTrue(clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
def testDenormalizedErrors(self): if not self.canRun: return c = clean_denormalization_errors f = utils.find_denormalized_sub_table_failures tdf = TicDatFactory(**spacesSchema()) dat = tdf.TicDat(**spacesData()) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3"))) dat.b_table[2,2,3] = "boger" self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3"))) chk = f(p(), "b Field 2",("b Field 1", "b Field 3")) self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}}) dat.b_table[2,2,4] = "boger" dat.b_table[1,'b','b'] = "boger" chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data")) self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)}, 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}})) ex = self.firesException(lambda : f(p(), ["b Data"],"wtf")) self.assertTrue("wtf isn't a column" in ex) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}}) dat.c_table.append((1, 2, 3, 4)) dat.c_table.append((1, 2, 1, 4)) dat.c_table.append((1, 2, 1, 5)) dat.c_table.append((1, 2, 3, 6)) chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}, (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
def testFindDups(self): pdf = PanDatFactory(**sillyMeSchema()) tdf = TicDatFactory( **{ k: [[], list(pkfs) + list(dfs)] for k, (pkfs, dfs) in sillyMeSchema().items() }) rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)] ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables}) panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat)) dups = pdf.find_duplicates(panDat) self.assertTrue(set(dups) == {'a'} and set(dups['a']['aField']) == {1}) dups = pdf.find_duplicates(panDat, as_table=False, keep=False) self.assertTrue( set(dups) == {'a'} and dups['a'].value_counts()[True] == 2) dups = pdf.find_duplicates(panDat, as_table=False) self.assertTrue( set(dups) == {'a'} and dups['a'].value_counts()[True] == 1) rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 2, 3, 40)] ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables}) panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat)) dups = pdf.find_duplicates(panDat, keep=False) self.assertTrue( set(dups) == {'a', 'b'} and set(dups['a']['aField']) == {1}) dups = pdf.find_duplicates(panDat, as_table=False, keep=False) self.assertTrue({k: v.value_counts()[True] for k, v in dups.items()} == { 'a': 3, 'b': 2 })
def testDups(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) tdf2 = TicDatFactory( **{t: [[], ["a", "b", "c"]] for t in tdf.all_tables}) td = tdf2.TicDat( **{ t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2], ["new", 1, 2]] for t in tdf.all_tables }) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "dups")), "file.json") tdf2.json.write_file(td, writePath, verbose=verbose) dups = tdf.json.find_duplicates(writePath) self.assertTrue(dups == { 'three': { (1, 2, 2): 2 }, 'two': { (1, 2): 3 }, 'one': { 1: 3, 2: 2 } })
def testDietWithInfFlagging(self): diet_pdf = PanDatFactory(**dietSchema()) addDietDataTypes(diet_pdf) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_to_pandas(tdf.copy_tic_dat(dietData()), drop_pk_columns=False) diet_pdf.set_infinity_io_flag(999999999) core_path = os.path.join(_scratchDir, "diet_with_inf_flagging") diet_pdf.sql.write_file(dat, core_path + ".db") diet_pdf.csv.write_directory(dat, core_path + "_csv") diet_pdf.json.write_file(dat, core_path + ".json") diet_pdf.xls.write_file(dat, core_path + ".xlsx") for attr, f in [["sql", core_path + ".db"], ["csv", core_path + "_csv"], ["json", core_path + ".json"], ["xls", core_path + ".xlsx"]]: dat_1 = getattr(diet_pdf, attr).create_pan_dat(f) self.assertTrue(diet_pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = diet_pdf.clone() dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = PanDatFactory(**diet_pdf.schema()) dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertFalse(pdf._same_data(dat, dat_1, epsilon=1e-5)) protein = dat_1.categories["name"] == "protein" self.assertTrue( list(dat_1.categories[protein]["maxNutrition"])[0] == 999999999) dat_1.categories.loc[protein, "maxNutrition"] = float("inf") self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5))
def _testPdfReproduction(self, pdf): def _tdfs_same(pdf, pdf2): self.assertTrue(pdf.schema() == pdf2.schema()) self.assertTrue(set(pdf.foreign_keys) == set(pdf2.foreign_keys)) self.assertTrue(pdf.data_types == pdf2.data_types) self.assertTrue(pdf.default_values == pdf2.default_values) _tdfs_same(pdf, TicDatFactory.create_from_full_schema(pdf.schema(True))) _tdfs_same(pdf, TicDatFactory.create_from_full_schema(_deep_anonymize(pdf.schema(True))))
def testSpaces(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testTwo(self): objOrig = dietData() staticFactory = TicDatFactory(**dietSchema()) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), lambda _t : staticFactory.good_tic_dat_table(_t, t))
def testSillyTwoTables(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) filePath = os.path.join(_scratchDir, "sillyMeTwoTables.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
def test_datetime(self): core_path = os.path.join(_scratchDir, "parameters") pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]], parameters=[["a"], ["b"]]) pdf.add_parameter("p1", "Dec 15 1970", datetime=True) pdf.add_parameter("p2", None, datetime=True, nullable=True) pdf.set_data_type("table_with_stuffs", "field one", datetime=True) pdf.set_data_type("table_with_stuffs", "field two", datetime=True, nullable=True) dat = TicDatFactory(**pdf.schema()).TicDat( table_with_stuffs=[[dateutil.parser.parse("July 11 1972"), None], [ datetime.datetime.now(), dateutil.parser.parse("Sept 11 2011") ]], parameters=[["p1", "7/11/1911"], ["p2", None]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) self.assertFalse( pdf.find_data_type_failures(dat) or pdf.find_data_row_failures(dat)) for attr, path in [["csv", core_path + "_csv"], ["xls", core_path + ".xlsx"], ["sql", core_path + ".db"], ["json", core_path + ".json"]]: func = "write_directory" if attr == "csv" else "write_file" getattr(getattr(pdf, attr), func)(dat, path) dat_1 = getattr(pdf, attr).create_pan_dat(path) self.assertFalse(pdf._same_data(dat, dat_1)) self.assertFalse( pdf.find_data_type_failures(dat_1) or pdf.find_data_row_failures(dat_1)) dat_1 = pdf.copy_to_tic_dat(dat_1) self.assertTrue(set(dat_1.parameters) == {'p1', 'p2'}) self.assertTrue( isinstance(dat_1.parameters["p1"]["b"], (datetime.datetime, numpy.datetime64)) and not pd.isnull(dat_1.parameters["p1"]["b"])) self.assertTrue(pd.isnull(dat_1.parameters["p2"]["b"])) self.assertTrue( all( isinstance(_, (datetime.datetime, numpy.datetime64)) and not pd.isnull(_) for _ in dat_1.table_with_stuffs)) self.assertTrue( all( isinstance(_, (datetime.datetime, numpy.datetime64)) or _ is None or utils.safe_apply(math.isnan)(_) for v in dat_1.table_with_stuffs.values() for _ in v.values())) self.assertTrue({ pd.isnull(_) for v in dat_1.table_with_stuffs.values() for _ in v.values() } == {True, False})
def testSillyTwoTables(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) dirPath = os.path.join(_scratchDir, "sillyTwoTables") tdf.csv.write_directory(ticDat, dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testNetflow(self): if not _can_unit_test: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.all_tables})) filePath = "netflow.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt(): mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue( "Unable to recognize field nimrod in table nodes" in self. firesException(lambda: tdfHacked.mdb.create_tic_dat(filePath)))
def testWeirdDiets(self): if not self.can_run: return filePath = os.path.join(_scratchDir, "weirdDiet.db") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.sql.write_db_data(dat2, filePath , allow_overwrite=True) self.assertFalse(tdf2.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.sql.write_db_data(dat3, makeCleanPath(filePath)) with sql.connect(filePath) as con: con.execute("ALTER TABLE nutrition_quantities RENAME TO [nutrition quantities]") sqlTicDat2 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, sqlTicDat2)) with sql.connect(filePath) as con: con.execute("create table nutrition_quantities(boger)") self.assertTrue(self.firesException(lambda : tdf3.sql.create_tic_dat(filePath)))
def testWeirdDiets(self): if not _can_accdb_unit_test: return filePath = os.path.join(_scratchDir, "weirdDiet.accdb") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.mdb.write_file(dat2, filePath , allow_overwrite=True) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.mdb.write_file(dat3, makeCleanPath(filePath)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("SELECT * INTO [nutrition quantities] FROM nutrition_quantities").commit() con.cursor().execute("DROP TABLE nutrition_quantities").commit() accdbTicDat2 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, accdbTicDat2)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("create table nutrition_quantities (boger int)").commit() self.assertTrue(self.firesException(lambda : tdf3.mdb.create_tic_dat(filePath)))
def testMissingTable(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) tdf2 = TicDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) ticDat2 = tdf2.copy_tic_dat(dietData()) filePath = makeCleanPath(os.path.join(_scratchDir, "diet_missing.xlsx")) tdf2.xls.write_file(ticDat2, filePath) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities) self.assertTrue(ticDat3.categories and ticDat3.foods) tdf2 = TicDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) ticDat2 = tdf2.copy_tic_dat(dietData()) filePath = makeCleanPath(os.path.join(_scratchDir, "diet_missing.xlsx")) tdf2.xls.write_file(ticDat2, filePath) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities or ticDat3.foods) self.assertTrue(ticDat3.categories)
def testSpacesOpalytics(self): if not self.can_run: return for hack, raw_data in list(itertools.product(*(([True, False], ) * 2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat)))
def test_(schema_factory, data_factory): tdf = TicDatFactory(**schema_factory()) dat = tdf.copy_tic_dat(data_factory()) mapping = tlingo._try_create_space_case_mapping(tdf, dat)["mapping"] remapdat = tlingo._apply_space_case_mapping( tdf, dat, {v: k for k, v in mapping.items()}) mapmapdat = tlingo._apply_space_case_mapping( tdf, remapdat, mapping) self.assertTrue(tdf._same_data(dat, mapmapdat)) self.assertFalse(tdf._same_data(dat, remapdat))
def testNetflow(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.copy_tic_dat(netflowData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data))) ticDat.nodes[12] = {} self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testThree(self): objOrig = netflowData() staticFactory = TicDatFactory(**netflowSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) objOrig.commodities.append(12.3) objOrig.arcs[(1, 2)] = [12] self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes")) self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost")) self.assertTrue(firesException(lambda : self._assertSame( objOrig.commodities, ticDat.commodities, goodTable("commodities")) )) self.assertTrue(firesException(lambda : self._assertSame( objOrig.arcs, ticDat.arcs, goodTable("arcs")) )) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12) self.assertTrue(12.3 in ticDat.commodities) objOrig.cost[5]=5 self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat (**{t:getattr(objOrig,t) for t in tables})))) objOrig = netflowData() def editMeBadly(t) : def rtn() : t.cost["hack"] = 12 return rtn def editMeWell(t) : def rtn() : t.cost["hack", "my", "balls"] = 12.12 return rtn self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in (ticDat, staticFactory.freeze_me(staticFactory.TicDat())))) def attributeMe(t) : def rtn() : t.boger="bogerwoger" return rtn self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe( staticFactory.freeze_me(staticFactory.TicDat())))) mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}) for t in tables : self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t)) self.assertTrue(firesException(editMeBadly(mutable))) self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable))) self.assertTrue(firesException(lambda : self._assertSame( objOrig.cost, mutable.cost, goodTable("cost")) ))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) filePath = os.path.join(_scratchDir, "silly.db") tdf.sql.write_db_data(ticDat, filePath) ticDat2 = tdf2.sql.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.sql.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.sql.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.sql.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, 12) tdf.sql.write_db_data(ticDat, makeCleanPath(filePath)) ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testSeven(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = {} rtn.categories["1"] = {} rtn.categories["2"] = [0,1] self.assertTrue(rtn.categories["2"]["minNutrition"] == 0) self.assertTrue(rtn.categories["2"]["maxNutrition"] == 1) rtn.nutritionQuantities['junk',1] = {} return tdf.freeze_me(rtn) td = makeIt() self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (0,0) and td.nutritionQuantities['junk',1]["qty"] == 0) tdf = TicDatFactory(**dietSchema()) tdf.set_default_values(foods = {"cost":"dontcare"},nutritionQuantities = {"qty":100} ) td = makeIt() self.assertTrue(td.foods["a"]["cost"]=='dontcare' and td.categories["1"].values() == (0,0) and td.nutritionQuantities['junk',1]["qty"] == 100) tdf = TicDatFactory(**dietSchema()) tdf.set_default_value("categories", "minNutrition", 1) tdf.set_default_value("categories", "maxNutrition", 2) td = makeIt() self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (1,2) and td.nutritionQuantities['junk',1]["qty"] == 0)
def testRowOffsets(self): tdf = TicDatFactory(boger = [[],["the", "big", "boger"]], woger = [[], ["the", "real", "big", "woger"]]) td = tdf.freeze_me(tdf.TicDat(boger = ([1, 2, 3], [12, 24, 36], tdf.data_fields["boger"], [100, 200, 400]), woger = ([[1, 2, 3, 4]]*4) + [tdf.data_fields["woger"]] + ([[100, 200, 300, 400]]*5))) filePath = os.path.join(_scratchDir, "rowoff.xls") tdf.xls.write_file(td, filePath) td1= tdf.xls.create_tic_dat(filePath) td2 = tdf.xls.create_tic_dat(filePath, {"woger": 5}) td3 = tdf.xls.create_tic_dat(filePath, {"woger":5, "boger":3}) self.assertTrue(tdf._same_data(td, td1)) tdCheck = tdf.TicDat(boger = td2.boger, woger = td.woger) self.assertTrue(tdf._same_data(td, tdCheck)) self.assertTrue(all (td2.woger[i]["big"] == 300 for i in range(5))) self.assertTrue(all (td3.woger[i]["real"] == 200 for i in range(5))) self.assertTrue(td3.boger[0]["big"] == 200 and len(td3.boger) == 1)
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) def changeIt() : xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue(self.firesException(lambda : tdfHacked.xls.write_file(ticDat, filePath))) tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("nodes : name" in self.firesException(lambda :tdf.xls.create_tic_dat(filePath)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables})) filePath = os.path.join(_scratchDir, "netflow.mdb") tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt() : mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath))) tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.mdb.create_tic_dat(filePath)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.get_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # minor flaw - strings that are floatable get turned into floats when reading csvs del(ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.get_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith( "Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow"))) self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow"))) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.sql") tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeIt() : sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath))) tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.sql.create_tic_dat(filePath))) ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDatNew.cost['Pencils', 'booger', 'wooger'] = 10 ticDatNew.cost['junker', 'Detroit', 'New York'] = 20 ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] = 20 ticDatNew.arcs['booger', 'wooger'] = 112 self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for f,v in tdf.find_foreign_key_failures(ticDatNew).items()} == {('arcs', 'nodes', u'destination'): {('booger', 'wooger')}, ('arcs', 'nodes', u'source'): {('booger', 'wooger')}, ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'), ('junker', 'Detroit', 'New York')}, ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'), ('Pencils', 'booger', 'wooger')}, ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}})
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.mdb")) tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeit() : mdbTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath))) tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "diet.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) ex = self.firesException(lambda : tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables})) self.assertTrue("field names could not be found" in ex) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}, headers_present=False) self.assertTrue(tdf._same_data(xlsTicDat, ticDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:2 for t in tdf.all_tables}, headers_present=False) self.assertFalse(tdf._same_data(xlsTicDat, ticDat)) self.assertTrue(all(len(getattr(ticDat, t))-1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) filePath = os.path.join(_scratchDir, "silly.xls") tdf.xls.write_file(ticDat, filePath) ticDat2 = tdf2.xls.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.xls.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.xls.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat6 = tdf6.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) def writeData(data, write_header = True): import xlwt book = xlwt.Workbook() for t in tdf.all_tables : sheet = book.add_sheet(t) if write_header : for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) : sheet.write(0, i, f) for rowInd, row in enumerate(data) : for fieldInd, cellValue in enumerate(row): sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False) self.assertTrue(self.firesException(lambda : tdf.xls.create_tic_dat(filePath, freeze_it=True))) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath, headers_present=False) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) ticDat.a["theboger"] = (1, None, 12) tdf.xls.write_file(ticDat, filePath, allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True) # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here. # not sure how to handle this, but documenting for now. self.assertFalse(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "") writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def testOne(self): def _cleanIt(x) : x.foods['macaroni'] = {"cost": 2.09} x.foods['milk'] = {"cost":0.89} return x dataObj = dietData() tdf = TicDatFactory(**dietSchema()) self.assertTrue(tdf.good_tic_dat_object(dataObj)) dataObj2 = tdf.copy_tic_dat(dataObj) dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True) dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4))) dataObj = _cleanIt(dataObj) self.assertTrue(tdf.good_tic_dat_object(dataObj)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3))) def hackit(x) : x.foods["macaroni"] = 100 self.assertTrue(self.firesException(lambda :hackit(dataObj3))) hackit(dataObj2) self.assertTrue(not tdf._same_data(dataObj, dataObj2) and tdf._same_data(dataObj, dataObj3)) msg = [] dataObj.foods[("milk", "cookies")] = {"cost": float("inf")} dataObj.boger = object() self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append)) self.assertTrue({"foods : Inconsistent key lengths"} == set(msg)) self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t) for t in ("categories", "nutritionQuantities"))) dataObj = dietData() dataObj.categories["boger"] = {"cost":1} dataObj.categories["boger"] = {"cost":1} self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append)) self.assertTrue({'foods : Inconsistent key lengths', 'categories : Inconsistent data field name keys.'} == set(msg)) ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t) for t in tdf.primary_key_fields}))).message self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
def testFive(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables})) obfudat = tdf.obfusimplify(dat, freeze_it=1) self.assertFalse(tdf._same_data(dat, obfudat.copy)) for (s,d),r in obfudat.copy.arcs.items(): self.assertFalse((s,d) in dat.arcs) self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"]) obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"]) self.assertTrue(tdf._same_data(obfudat.copy, dat)) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) mone, one2one = "many-to-one", "one-to-one" fk, fkm = _ForeignKey, _ForeignKeyMapping self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'nodes', fkm('source',u'name'), mone), fk("cost", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf.clear_foreign_keys("cost") self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) addDietForeignKeys(tdf) self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone), fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)}) tdf.TicDat() self.assertTrue(self.firesException(lambda : tdf.clear_foreign_keys("nutritionQuantities"))) self.assertTrue(tdf.foreign_keys) tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.clear_foreign_keys("nutritionQuantities") self.assertFalse(tdf.foreign_keys) tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]], goodChild = [["gk"], ["gd1", "gd2"]], badChild = [["bk1", "bk2"], ["bd"]], appendageChild = [["ak"], ["ad1", "ad2"]], appendageBadChild = [["bk1", "bk2"], []]) tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk")) tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"]) self.assertTrue("many-to-many" in self.firesException(lambda : tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"]))) tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"]) tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1"))) fks = tdf.foreign_keys _getfk = lambda t : next(_ for _ in fks if _.native_table == t) self.assertTrue(_getfk("goodChild").cardinality == "many-to-one") self.assertTrue(_getfk("badChild").cardinality == "many-to-one") self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one") self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one") tdf.clear_foreign_keys("appendageBadChild") self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys) tdf.clear_foreign_keys() self.assertFalse(tdf.foreign_keys)
def testSix(self): tdf = TicDatFactory(plants = [["name"], ["stuff", "otherstuff"]], lines = [["name"], ["plant", "weird stuff"]], line_descriptor = [["name"], ["booger"]], products = [["name"],["gover"]], production = [["line", "product"], ["min", "max"]], pureTestingTable = [[], ["line", "plant", "product", "something"]], extraProduction = [["line", "product"], ["extramin", "extramax"]], weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]]) tdf.add_foreign_key("production", "lines", ("line", "name")) tdf.add_foreign_key("production", "products", ("product", "name")) tdf.add_foreign_key("lines", "plants", ("plant", "name")) tdf.add_foreign_key("line_descriptor", "lines", ("name", "name")) for f in set(tdf.data_fields["pureTestingTable"]).difference({"something"}): tdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name")) tdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product"))) tdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product"))) tdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product"))) goodDat = tdf.TicDat() goodDat.plants["Cleveland"] = ["this", "that"] goodDat.plants["Newark"]["otherstuff"] =1 goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle" for i,p in enumerate(goodDat.plants): goodDat.lines[i]["plant"] = p for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)): goodDat.production[pl, pd] = {"min":1, "max":10+i} badDat1 = tdf.copy_tic_dat(goodDat) badDat1.production["notaline", "widgets"] = [0,1] badDat2 = tdf.copy_tic_dat(badDat1) fk, fkm = _ForeignKey, _ForeignKeyMapping self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) == {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'): (('notaline',), (('notaline', 'widgets'),))}) badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark" self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) == {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one'): (('notnewark',), ('notaline',))}) tdf.remove_foreign_keys_failures(badDat1, propagate=False) tdf.remove_foreign_keys_failures(badDat2, propagate=True) self.assertTrue(tdf._same_data(badDat2, goodDat) and not tdf.find_foreign_key_failures(badDat2)) self.assertTrue(tdf.find_foreign_key_failures(badDat1) == {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'): (('notaline',), (('notaline', 'widgets'),))}) tdf.remove_foreign_keys_failures(badDat1, propagate=False) self.assertTrue(tdf._same_data(badDat1, goodDat) and not tdf.find_foreign_key_failures(badDat1)) _ = len(goodDat.lines) for i,p in enumerate(goodDat.plants.keys() + goodDat.plants.keys()): goodDat.lines[i+_]["plant"] = p for l in goodDat.lines: if i%2: goodDat.line_descriptor[l] = i+10 for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))): goodDat.pureTestingTable.append((l,pl,pdct,i)) self.assertFalse(tdf.find_foreign_key_failures(goodDat)) badDat = tdf.copy_tic_dat(goodDat) badDat.pureTestingTable.append(("j", "u", "nk", "ay")) l = len(goodDat.pureTestingTable) self.assertTrue(tdf.find_foreign_key_failures(badDat) == {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'): (('u',),(l,)), fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'): (('nk',), (l,)), fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one'): (('j',), (l,))}) obfudat = tdf.obfusimplify(goodDat, freeze_it=True) self.assertTrue(all(len(getattr(obfudat.copy, t)) == len(getattr(goodDat, t)) for t in tdf.all_tables)) for n in goodDat.plants.keys() + goodDat.lines.keys() + goodDat.products.keys() : self.assertTrue(n in {_[1] for _ in obfudat.renamings.values()}) self.assertFalse(n in obfudat.renamings) self.assertTrue(obfudat.copy.plants['P2']['otherstuff'] == 1) self.assertFalse(tdf._same_data(obfudat.copy, goodDat)) for k,r in obfudat.copy.line_descriptor.items(): i = r.values()[0] - 10 self.assertTrue(i%2 and (goodDat.line_descriptor[i].values()[0] == i+10)) obfudat2 = tdf.obfusimplify(goodDat, {"plants": "P", "lines" : "L", "products" :"PR"}) self.assertTrue(tdf._same_data(obfudat.copy, obfudat2.copy)) obfudat3 = tdf.obfusimplify(goodDat, skip_tables=["plants", "lines", "products"]) self.assertTrue(tdf._same_data(obfudat3.copy, goodDat)) obfudat4 = tdf.obfusimplify(goodDat, skip_tables=["lines", "products"]) self.assertFalse(tdf._same_data(obfudat4.copy, goodDat)) self.assertFalse(tdf._same_data(obfudat4.copy, obfudat.copy))
def testEight(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40} rtn.categories["2"] = [10,20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f,p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) tdf.set_default_value("foods", "cost", 2) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001}) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat)) self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in failed['nutritionQuantities', 'qty'].pks)) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and fixedDat.nutritionQuantities['a', 2]["qty"] == 12) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2}) self.assertTrue(tdf._same_data(fixedDat, fixedDat2)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=True) tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False) failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost") fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) tdf.enable_foreign_key_links() dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) def makeIt() : if not tdf.foreign_keys: tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) orig = netflowData() rtn = tdf.copy_tic_dat(orig) for n in rtn.nodes["Detroit"].arcs_source: rtn.arcs["Detroit", n] = n self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables)) return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed="*") dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))}) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) netflowData_ = tdf.copy_tic_dat(netflowData()) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_)) fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()), {("arcs", "capacity"):80, ("cost","cost") :"imok"})) fixedDat.arcs["Detroit", "Boston"] = 100 fixedDat.arcs["Detroit", "Seattle"] = 120 self.assertTrue(tdf._same_data(fixedDat, netflowData_))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) filePath = os.path.join(_scratchDir, "silly.mdb") self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def sillyMeCleanData() : return { "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")}, "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12}, "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) ) } ticDat = tdf.TicDat(**sillyMeCleanData()) self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def makeCleanSchema() : tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"}, b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"}) return filePath tdf.mdb.write_file(ticDat, makeCleanSchema()) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) ticDat2 = tdf2.mdb.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.mdb.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.mdb.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.mdb.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, "twelve") tdf.mdb.write_file(ticDat, makeCleanSchema()) ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testDiet(self): def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) doTheTests(TicDatFactory(**dietSchema())) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) tdf.set_default_values(categories = {'maxNutrition': float("inf"), 'minNutrition': 0.0}, foods = {'cost': 0.0}, nutritionQuantities = {'qty': 0.0}) addDietForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities")) self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities")) ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}) origTicDat = tdf.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, origTicDat)) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12 self.assertTrue(tdf.find_foreign_key_failures(ticDat) == {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)), ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'): (('boger',), (('hot dog', 'boger'),))}) self.assertFalse(tdf._same_data(ticDat, origTicDat)) tdf.remove_foreign_keys_failures(ticDat) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) self.assertTrue(tdf._same_data(ticDat, origTicDat)) doTheTests(tdf)
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def testFour(self): objOrig = sillyMeData() staticFactory = TicDatFactory(**sillyMeSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**objOrig)) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(objOrig[t], getattr(ticDat,t), goodTable(t)) pickedData = staticFactory.TicDat(**staticFactory.as_dict(ticDat)) self.assertTrue(staticFactory._same_data(ticDat, pickedData)) mutTicDat = staticFactory.TicDat() for k,v in ticDat.a.items() : mutTicDat.a[k] = v.values() for k,v in ticDat.b.items() : mutTicDat.b[k] = v.values()[0] for r in ticDat.c: mutTicDat.c.append(r) for t in tables : self._assertSame(getattr(mutTicDat, t), getattr(ticDat,t), goodTable(t)) self.assertTrue("theboger" not in mutTicDat.a) mutTicDat.a["theboger"]["aData2"] =22 self.assertTrue("theboger" in mutTicDat.a and mutTicDat.a["theboger"].values() == (0, 22, 0)) newSchema = sillyMeSchema() newSchema["a"][1] += ("aData4",) newFactory = TicDatFactory(**newSchema) def makeNewTicDat() : return newFactory.TicDat(a=ticDat.a, b=ticDat.b, c=ticDat.c) newTicDat = makeNewTicDat() self.assertFalse(staticFactory.good_tic_dat_object(newTicDat)) self.assertTrue(newFactory.good_tic_dat_object(ticDat)) self.assertTrue(newFactory._same_data(makeNewTicDat(), newTicDat)) newTicDat.a[ticDat.a.keys()[0]]["aData4"]=12 self.assertFalse(newFactory._same_data(makeNewTicDat(), newTicDat))