def testCaseSpaceTableNames(self): tdf = TicDatFactory(table_one=[["a"], ["b", "c"]], table_two=[["this", "that"], []]) dir_path = os.path.join(_scratchDir, "case_space") dat = tdf.TicDat(table_one=[['a', 2, 3], ['b', 5, 6]], table_two=[["a", "b"], ["c", "d"], ["x", "z"]]) tdf.csv.write_directory(dat, makeCleanDir(dir_path), case_space_table_names=True) self.assertTrue( all( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["Table One", "Table Two"])) self.assertFalse( any( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["table_one", "table_two"])) self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path))) tdf.csv.write_directory(dat, makeCleanDir(dir_path), case_space_table_names=False) self.assertFalse( any( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["Table One", "Table Two"])) self.assertTrue( all( os.path.exists(os.path.join(dir_path, _ + ".csv")) for _ in ["table_one", "table_two"])) self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path)))
def testMissingTable(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) tdf2 = TicDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) ticDat2 = tdf2.copy_tic_dat(dietData()) dirPath = os.path.join(_scratchDir, "diet_missing") tdf2.csv.write_directory(ticDat2, makeCleanDir(dirPath)) ticDat3 = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities) self.assertTrue(ticDat3.categories and ticDat3.foods) tdf2 = TicDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) ticDat2 = tdf2.copy_tic_dat(dietData()) tdf2.csv.write_directory(ticDat2, makeCleanDir(dirPath)) ticDat3 = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities or ticDat3.foods) self.assertTrue(ticDat3.categories)
def runTheTests(fastOnly=True) : td = TicDatFactory() if not hasattr(td, "xls") : print "!!!!!!!!!FAILING XLS UNIT TESTS DUE TO FAILURE TO LOAD XLS LIBRARIES!!!!!!!!" return makeCleanDir(_scratchDir) runSuite(TestXls, fastOnly=fastOnly) shutil.rmtree(_scratchDir)
def runTheTests(fastOnly=True) : td = TicDatFactory() if not hasattr(td, "mdb") : print "!!!!!!!!!FAILING MDB UNIT TESTS DUE TO FAILURE TO LOAD MDB LIBRARIES!!!!!!!!" return if not td.mdb.can_write_new_file : print "!!!!!!!!!FAILING MDB UNIT TESTS DUE TO FAILURE TO WRITE NEW MDB FILES!!!!!!!!" return makeCleanDir(_scratchDir) runSuite(TestMdb, fastOnly=fastOnly) shutil.rmtree(_scratchDir)
def _test_generic_copy(self, ticDat, tdf, skip_tables=None): assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables) path = makeCleanDir(os.path.join(_scratchDir, "generic_copy")) replace_name = lambda f : "name_" if f == "name" else f clean_tdf = TicDatFactory(**{t:[list(map(replace_name, pks)), dfs] for t,(pks, dfs) in tdf.schema().items()}) temp_tdf = TicDatFactory(**{t:v if t in (skip_tables or []) else '*' for t,v in clean_tdf.schema().items()}) temp_dat = temp_tdf.TicDat(**{t:getattr(ticDat, t) for t in (skip_tables or [])}) for t in temp_tdf.generic_tables: setattr(temp_dat, t, getattr(clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False) ,t)) temp_tdf.sql.write_db_data(temp_dat, os.path.join(path, "f.db")) temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f1.sql"), include_schema=False) temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f2.sql"), include_schema=True) for file_name, includes_schema in [("f.db", False), ("f1.sql", False), ("f2.sql", True)]: file_path = os.path.join(path, file_name) if file_path.endswith(".db"): self.assertFalse(temp_tdf.sql.find_duplicates(file_path)) read_dat = temp_tdf.sql.create_tic_dat(file_path) else: read_dat = temp_tdf.sql.create_tic_dat_from_sql(file_path, includes_schema) generic_free_dat, _ = utils.create_generic_free(read_dat, temp_tdf) check_dat = clean_tdf.TicDat() for t in temp_tdf.generic_tables: for r in getattr(generic_free_dat, t): pks = clean_tdf.primary_key_fields[t] getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \ {df:r[df] for df in clean_tdf.data_fields.get(t, [])} for t in (skip_tables or []): for k,v in getattr(generic_free_dat, t).items(): getattr(check_dat, t)[k] = v self.assertTrue(clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
def testDups(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) tdf2 = TicDatFactory( **{t: [[], ["a", "b", "c"]] for t in tdf.all_tables}) td = tdf2.TicDat( **{ t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2], ["new", 1, 2]] for t in tdf.all_tables }) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "dups")), "file.json") tdf2.json.write_file(td, writePath, verbose=verbose) dups = tdf.json.find_duplicates(writePath) self.assertTrue(dups == { 'three': { (1, 2, 2): 2 }, 'two': { (1, 2): 3 }, 'one': { 1: 3, 2: 2 } })
def testDiet(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat(**{ t: getattr(dietData(), t) for t in tdf.primary_key_fields })) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "diet")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) self.assertFalse(tdf.json.find_duplicates(writePath)) jsonTicDat = tdf.json.create_tic_dat(writePath) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) def change(): jsonTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, jsonTicDat)) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.json.write_file(dat2, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat2 = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.json.write_file(dat3, writePath, allow_overwrite=True, verbose=verbose) with open(writePath, "r") as f: jdict = json.load(f) jdict["nutrition quantities"] = jdict["nutrition_quantities"] del (jdict["nutrition_quantities"]) with open(writePath, "w") as f: json.dump(jdict, f) jsonDat3 = tdf3.json.create_tic_dat(writePath) self.assertTrue(tdf3._same_data(dat3, jsonDat3)) jdict["nutrition_quantities"] = jdict["nutrition quantities"] with open(writePath, "w") as f: json.dump(jdict, f) self.assertTrue( self.firesException(lambda: tdf3.json.create_tic_dat(writePath)))
def testSilly(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "netflow")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertFalse(tdf.json.find_duplicates(writePath)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
def testNetflow(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) self.assertTrue( tdf._same_data(ticDat, tdf.json.create_tic_dat( tdf.json.write_file(ticDat, "")), epsilon=0.0001)) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "netflow")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertFalse(tdf.json.find_duplicates(writePath)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) ticDat.nodes[12] = {} tdf.json.write_file(ticDat, writePath, verbose=verbose, allow_overwrite=True) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) # unlike csv, json format respects strings that are floatable del (ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue( firesException(lambda: tdf.json.write_file( ticDat, writePath, verbose=verbose))) tdf.json.write_file(ticDat, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
def testNulls(self): tdf = TicDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: tdf.set_data_type("table", f, nullable=True) dat = tdf.TicDat(table=[[None, 100], [200, "this"], ["that", 300], [300, None], [400, "that"]]) dir_path = os.path.join(_scratchDir, "boolDefaults") tdf.csv.write_directory(dat, dir_path) dat_1 = tdf.csv.create_tic_dat(dir_path) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = TicDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: tdf.set_data_type("table", f, max=float("inf"), inclusive_max=True) tdf.set_infinity_io_flag(None) dat_inf = tdf.TicDat( table=[[float("inf"), 100], [200, "this"], ["that", 300], [300, float("inf")], [400, "that"]]) dat_1 = tdf.csv.create_tic_dat(dir_path) self.assertTrue(tdf._same_data(dat_inf, dat_1)) tdf.csv.write_directory(dat_inf, makeCleanDir(dir_path)) dat_1 = tdf.csv.create_tic_dat(dir_path) self.assertTrue(tdf._same_data(dat_inf, dat_1)) tdf = TicDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: tdf.set_data_type("table", f, min=-float("inf"), inclusive_min=True) tdf.set_infinity_io_flag(None) dat_1 = tdf.csv.create_tic_dat(dir_path) self.assertFalse(tdf._same_data(dat_inf, dat_1)) dat_inf = tdf.TicDat( table=[[float("-inf"), 100], [200, "this"], ["that", 300], [300, -float("inf")], [400, "that"]]) self.assertTrue(tdf._same_data(dat_inf, dat_1))
def setUpClass(cls): makeCleanDir(_scratchDir)
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.find_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.find_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def setUpClass(cls): # uncomment the following line to run on old test machines #tdmdb._dbq = "*.mdb" makeCleanDir(_scratchDir)
def round_trip(): tdf.csv.write_directory(dat_n, makeCleanDir(dir_path)) return tdf.csv.create_tic_dat(dir_path)
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) panDat2 = pdf2.copy_pan_dat(panDat) dirPath = os.path.join(_scratchDir, "diet_missing_csv") pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse(len(panDat3.nutritionQuantities)) self.assertTrue(len(panDat3.categories) and len(panDat3.foods)) pdf2 = PanDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) panDat2 = pdf2.copy_pan_dat(panDat) pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse( len(panDat3.nutritionQuantities) or len(panDat3.foods)) self.assertTrue(len(panDat3.categories)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)