def testFour(self): objOrig = sillyMeData() staticFactory = TicDatFactory(**sillyMeSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**objOrig)) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(objOrig[t], getattr(ticDat,t), goodTable(t)) pickedData = staticFactory.TicDat(**staticFactory.as_dict(ticDat)) self.assertTrue(staticFactory._same_data(ticDat, pickedData)) mutTicDat = staticFactory.TicDat() for k,v in ticDat.a.items() : mutTicDat.a[k] = v.values() for k,v in ticDat.b.items() : mutTicDat.b[k] = v.values()[0] for r in ticDat.c: mutTicDat.c.append(r) for t in tables : self._assertSame(getattr(mutTicDat, t), getattr(ticDat,t), goodTable(t)) self.assertTrue("theboger" not in mutTicDat.a) mutTicDat.a["theboger"]["aData2"] =22 self.assertTrue("theboger" in mutTicDat.a and mutTicDat.a["theboger"].values() == (0, 22, 0)) newSchema = sillyMeSchema() newSchema["a"][1] += ("aData4",) newFactory = TicDatFactory(**newSchema) def makeNewTicDat() : return newFactory.TicDat(a=ticDat.a, b=ticDat.b, c=ticDat.c) newTicDat = makeNewTicDat() self.assertFalse(staticFactory.good_tic_dat_object(newTicDat)) self.assertTrue(newFactory.good_tic_dat_object(ticDat)) self.assertTrue(newFactory._same_data(makeNewTicDat(), newTicDat)) newTicDat.a[ticDat.a.keys()[0]]["aData4"]=12 self.assertFalse(newFactory._same_data(makeNewTicDat(), newTicDat))
def testSillyCleaningOpalyticsOne(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.set_data_type("c", "cData4", number_allowed=False, strings_allowed=['d']) ticDat = tdf.TicDat(**sillyMeData()) input_set = create_inputset_mock(tdf, ticDat) pdf = PanDatFactory(**sillyMeSchema()) pdf.set_data_type("c", "cData4", number_allowed=False, strings_allowed=['d']) panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.c.pop() ticDat.c.pop(0) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testFindDups(self): pdf = PanDatFactory(**sillyMeSchema()) tdf = TicDatFactory( **{ k: [[], list(pkfs) + list(dfs)] for k, (pkfs, dfs) in sillyMeSchema().items() }) rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)] ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables}) panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat)) dups = pdf.find_duplicates(panDat) self.assertTrue(set(dups) == {'a'} and set(dups['a']['aField']) == {1}) dups = pdf.find_duplicates(panDat, as_table=False, keep=False) self.assertTrue( set(dups) == {'a'} and dups['a'].value_counts()[True] == 2) dups = pdf.find_duplicates(panDat, as_table=False) self.assertTrue( set(dups) == {'a'} and dups['a'].value_counts()[True] == 1) rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 2, 3, 40)] ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables}) panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat)) dups = pdf.find_duplicates(panDat, keep=False) self.assertTrue( set(dups) == {'a', 'b'} and set(dups['a']['aField']) == {1}) dups = pdf.find_duplicates(panDat, as_table=False, keep=False) self.assertTrue({k: v.value_counts()[True] for k, v in dups.items()} == { 'a': 3, 'b': 2 })
def testTryCreateSpace(self): def test_(schema_factory, data_factory): tdf = TicDatFactory(**schema_factory()) dat = tdf.copy_tic_dat(data_factory()) mapping = tlingo._try_create_space_case_mapping(tdf, dat)["mapping"] remapdat = tlingo._apply_space_case_mapping( tdf, dat, {v: k for k, v in mapping.items()}) mapmapdat = tlingo._apply_space_case_mapping( tdf, remapdat, mapping) self.assertTrue(tdf._same_data(dat, mapmapdat)) self.assertFalse(tdf._same_data(dat, remapdat)) test_(dietSchema, dietData) test_(netflowSchema, netflowData) test_(sillyMeSchema, lambda: TicDatFactory(**sillyMeSchema()).TicDat(**sillyMeData())) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) dat.foods["ice_cream"] = dat.foods["ice cream"] dat.categories["ICE CREAM"] = {} dat.categories["fAt"] = dat.categories["fat"] failures = tlingo._try_create_space_case_mapping(tdf, dat)["failures"] self.assertTrue( failures == { 'ICE_CREAM': ('ICE CREAM', 'ice cream', 'ice_cream'), 'FAT': ('fAt', 'fat') })
def testSillyTwoTables(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) filePath = os.path.join(_scratchDir, "sillyMeTwoTables.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
def testSillyTwoTables(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) dirPath = os.path.join(_scratchDir, "sillyTwoTables") tdf.csv.write_directory(ticDat, dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me(tdf.TicDat(**sillyMeData())) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "ooooo" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testSillyCleaningOpalyticsTwo(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.add_data_row_predicate("c", lambda row: row["cData4"] == 'd') ticDat = tdf.TicDat(**sillyMeData()) input_set = create_inputset_mock(tdf, ticDat) pdf = PanDatFactory(**sillyMeSchema()) pdf.add_data_row_predicate("c", lambda row: row["cData4"] == 'd') panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.c.pop() ticDat.c.pop(0) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testSillyTwoTables(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data))) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testSilly(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "netflow")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertFalse(tdf.json.find_duplicates(writePath)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
def testSilly(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, ) for t in ("a", "b"): schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"], ()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a", "c")) tdf5 = tdf5.clone() filePath = os.path.join(_scratchDir, "silly.db") tdf.sql.write_db_data(ticDat, filePath) self.assertFalse(tdf.sql.find_duplicates(filePath)) ticDat2 = tdf2.sql.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.sql.create_tic_dat(filePath) for t in ["a", "b"]: for k, v in getattr(ticDat4, t).items(): for _k, _v in v.items(): self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]): self.assertTrue(t == "b") else: self.assertTrue(t == "a") ticDat5 = tdf5.sql.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue( callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException( lambda: tdf6.sql.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, 12) tdf.sql.write_db_data(ticDat, makeCleanPath(filePath)) ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testSillyTwoTables(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) tdf.set_data_type("a", "aField", strings_allowed='*', number_allowed=True) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) dirPath = os.path.join(_scratchDir, "sillyTwoTables") tdf.csv.write_directory(ticDat, dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testSillyCleaningThree(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.add_data_row_predicate("c", lambda row : row["cData4"] != 4) tdf.add_data_row_predicate("c", lambda row : row["cData4"] != 24) ticDat = tdf.TicDat(**sillyMeData()) input_set = create_inputset_mock(tdf, ticDat) self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat)) ticDatPurged = tdf.opalytics.create_tic_dat(input_set) self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) ticDat.c.pop() ticDat.c.pop(0) self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
def testSilly(self): if not self.canRun: return tdf = TicDatFactory(**dict({"d" : [("dData1", "dData2", "dData3", "dData4"),[]], "e" : [["eData"],[]]}, **sillyMeSchema())) ticDat = tdf.copy_to_pandas(tdf.TicDat(**sillyMeData())) self.assertFalse(len(ticDat.d) + len(ticDat.e)) oldDat = tdf.freeze_me(tdf.TicDat(**dict({"d" : {(1,2,3,4):{}, (1, "b","c","d"):{}, ("a", 2,"c","d"):{}}, "e" : {11:{},"boger":{}}}, **sillyMeData()))) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True) def checkTicDat(): self.assertTrue(len(ticDat.d) ==3 and len(ticDat.e) == 2) self.assertTrue(set(ticDat.d.index.values) == {(1,2,3,4), (1, "b","c","d"), ("a", 2,"c","d")}) self.assertTrue(set(ticDat.e.index.values) == {11,"boger"}) self.assertTrue(len(ticDat.c) == len(oldDat.c) == 3) self.assertTrue(ticDat.c.loc[i] == oldDat.c[i] for i in range(3)) checkTicDat() self.assertFalse(hasattr(ticDat.d, "dData1") or hasattr(ticDat.e, "eData")) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) checkTicDat() self.assertTrue(ticDat.e.loc[11].values[0] == 11) if sys.version_info[0] == 2: self.assertTrue(len(ticDat.d.dData1.sloc[1,:,:,:]) == 2) else : # very strange infrequent bug issue that I will investigate later self.assertTrue(len(ticDat.d.dData1.sloc[1]) == 2) ticDat = tdf.copy_to_pandas(oldDat) checkTicDat() if sys.version_info[0] == 2: self.assertTrue(len(ticDat.d.dData1.sloc[1,:,:,:]) == 2) else: self.assertTrue(len(ticDat.d.dData1.sloc[1]) == 2) self.assertTrue(ticDat.e.loc[11].values[0] == 11) self.assertTrue(set(ticDat.d.columns) == {"dData%s"%s for s in range(5)[1:]}) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) ticDat.b = ticDat.b.bData rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
def testSillyTwoTablesOpalytics(self): if not self.can_run: return for hack, raw_data in list(itertools.product(*(([True, False], ) * 2))): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat))) ticDat = tdf.TicDat(**sillyMeDataTwoTables()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat)))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) filePath = os.path.join(_scratchDir, "silly.db") tdf.sql.write_db_data(ticDat, filePath) ticDat2 = tdf2.sql.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.sql.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.sql.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.sql.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, 12) tdf.sql.write_db_data(ticDat, makeCleanPath(filePath)) ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testSilly(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, ) for t in ("a", "b"): schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"], ()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a", "c")) filePath = os.path.join(_scratchDir, "silly.xls") tdf.xls.write_file(ticDat, filePath) ticDat2 = tdf2.xls.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.xls.create_tic_dat(filePath) for t in ["a", "b"]: for k, v in getattr(ticDat4, t).items(): for _k, _v in v.items(): self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]): self.assertTrue(t == "b") else: self.assertTrue(t == "a") ticDat5 = tdf5.xls.create_tic_dat(filePath, treat_inf_as_infinity=False) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue( callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat6 = tdf6.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue( firesException(lambda: tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) def writeData(data, write_header="same"): assert filePath.endswith(".xls") assert not write_header or write_header in ("lower", "same", "duped") import xlwt book = xlwt.Workbook() for t in tdf.all_tables: sheet = book.add_sheet(t) if write_header: all_fields = tdf.primary_key_fields.get( t, ()) + tdf.data_fields.get(t, ()) for i, f in enumerate( (2 if write_header == "duped" else 1) * all_fields): sheet.write( 0, i, f.lower() if write_header == "lower" or i >= len(all_fields) else f) for rowInd, row in enumerate(data): for fieldInd, cellValue in enumerate( (2 if write_header == "duped" else 1) * row): sheet.write(rowInd + (1 if write_header else 0), fieldInd, cellValue) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) if write_header in [ "lower", "same" ]: # will use pandas to generate the xlsx file version file_path_x = filePath + "x" if os.path.exists(file_path_x): os.remove(file_path_x) writer = utils.pd.ExcelWriter(file_path_x) for t, (pks, dfs) in tdf.schema().items(): fields = pks + dfs if write_header == "lower": fields = [_.lower() for _ in fields] d = {f: [] for f in fields} for row in data: for f, c in zip(fields, row): d[f].append(c) utils.pd.DataFrame(d).to_excel(writer, t, index=False) writer.save() writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header="duped") self.assertTrue( self.firesException( lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True))) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header="lower") ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False) self.assertTrue( self.firesException( lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True))) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.find_duplicates(filePath, headers_present=False) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) ticDat.a["theboger"] = (1, None, 12) tdf.xls.write_file(ticDat, filePath, allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True) # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here. # not sure how to handle this, but documenting for now. self.assertFalse(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "") # the workaround for this flaw is to set the data type to be nullabe but not allow the empty string tdfwa = TicDatFactory(**sillyMeSchema()) tdfwa.set_data_type("a", "aData2", nullable=True) ticDatNone = tdfwa.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) # checking the same thing with .xlsx - using openpyxl, None is indeed recovered even without tdfwa munging! tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath + "x", freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) ticDatNone = tdfwa.xls.create_tic_dat(filePath + "x", freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 20, 30, 12)]) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 3) self.assertTrue( set(rowCount["b"]) == {(1, 20, 30)} and rowCount["b"][1, 20, 30] == 2)
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) filePath = os.path.join(_scratchDir, "silly.mdb") self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def sillyMeCleanData() : return { "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")}, "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12}, "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) ) } ticDat = tdf.TicDat(**sillyMeCleanData()) self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def makeCleanSchema() : tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"}, b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"}) return filePath tdf.mdb.write_file(ticDat, makeCleanSchema()) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) ticDat2 = tdf2.mdb.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.mdb.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.mdb.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.mdb.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, "twelve") tdf.mdb.write_file(ticDat, makeCleanSchema()) ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testSilly(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) filePath = os.path.join(_scratchDir, "silly.accdb") self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def sillyMeCleanData() : return { "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")}, "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12}, "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) ) } ticDat = tdf.TicDat(**sillyMeCleanData()) self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def makeCleanSchema() : tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"}, b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"}) return filePath tdf.mdb.write_file(ticDat, makeCleanSchema()) self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) ticDat2 = tdf2.mdb.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.mdb.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.mdb.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.mdb.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, "twelve") tdf.mdb.write_file(ticDat, makeCleanSchema()) ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testNine(self): for schema in (dietSchema(), sillyMeSchema(), netflowSchema()) : d = TicDatFactory(**schema).schema() assert d == {k : map(list, v) for k,v in schema.items()}
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) filePath = os.path.join(_scratchDir, "silly.xls") tdf.xls.write_file(ticDat, filePath) ticDat2 = tdf2.xls.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.xls.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.xls.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat6 = tdf6.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) def writeData(data, write_header = True): import xlwt book = xlwt.Workbook() for t in tdf.all_tables : sheet = book.add_sheet(t) if write_header : for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) : sheet.write(0, i, f) for rowInd, row in enumerate(data) : for fieldInd, cellValue in enumerate(row): sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False) self.assertTrue(self.firesException(lambda : tdf.xls.create_tic_dat(filePath, freeze_it=True))) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath, headers_present=False) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) ticDat.a["theboger"] = (1, None, 12) tdf.xls.write_file(ticDat, filePath, allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True) # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here. # not sure how to handle this, but documenting for now. self.assertFalse(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "") writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.find_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.find_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)