def testDiet(self): if not self.can_run: return for hack, raw_data, activeEnabled in list(product(*(([True, False],)*3))): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled) self.assertFalse(tdf.opalytics.find_duplicates(inputset, raw_data=raw_data)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) def change() : ticDat2.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, freeze_it=True, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf2 = TicDatFactory(**{k:[pks, list(dfs) + ["dmy"]] for k,(pks, dfs) in tdf.schema().items()}) _dat = tdf2.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(create_inputset_mock(tdf2, _dat, hack), raw_data=raw_data))) ex = self.firesException(lambda: tdf2.opalytics.create_tic_dat(inputset, raw_data=raw_data)) self.assertTrue("field dmy can't be found" in ex)
def testDiet(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat(**{ t: getattr(dietData(), t) for t in tdf.primary_key_fields })) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "diet")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) self.assertFalse(tdf.json.find_duplicates(writePath)) jsonTicDat = tdf.json.create_tic_dat(writePath) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) def change(): jsonTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, jsonTicDat)) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.json.write_file(dat2, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat2 = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.json.write_file(dat3, writePath, allow_overwrite=True, verbose=verbose) with open(writePath, "r") as f: jdict = json.load(f) jdict["nutrition quantities"] = jdict["nutrition_quantities"] del (jdict["nutrition_quantities"]) with open(writePath, "w") as f: json.dump(jdict, f) jsonDat3 = tdf3.json.create_tic_dat(writePath) self.assertTrue(tdf3._same_data(dat3, jsonDat3)) jdict["nutrition_quantities"] = jdict["nutrition quantities"] with open(writePath, "w") as f: json.dump(jdict, f) self.assertTrue( self.firesException(lambda: tdf3.json.create_tic_dat(writePath)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.get_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # minor flaw - strings that are floatable get turned into floats when reading csvs del(ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat))
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.find_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # minor flaw - strings that are floatable get turned into floats when reading csvs del(ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat))
def testDiet(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t") ).endswith("Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.csv.write_directory(dat2, dirPath, allow_overwrite=True) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) os.rename(os.path.join(dirPath, "nutritionquantities.csv"), os.path.join(dirPath, "nutritionquantities.csv".upper())) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.csv.write_directory(dat3, dirPath, allow_overwrite=True) os.rename(os.path.join(dirPath, "nutrition_quantities.csv"), os.path.join(dirPath, "nutrition quantities.csv")) csvDat3 = tdf3.csv.create_tic_dat(dirPath) self.assertTrue(tdf3._same_data(dat3, csvDat3)) shutil.copy(os.path.join(dirPath, "nutrition quantities.csv"), os.path.join(dirPath, "nutrition_quantities.csv")) self.assertTrue(self.firesException(lambda : tdf3.csv.create_tic_dat(dirPath)))
def firesException(self, f, troubleshoot=False): if troubleshoot: import ipdb ipdb.set_trace() f() e = firesException(f) if e: self.assertTrue("TicDatError" in e.__class__.__name__) return str(e)
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.get_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith( "Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.find_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False, allow_overwrite=True) self.assertTrue( self.firesException( lambda: tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # the casting to floats is controlled by data types and default values ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf2 = TicDatFactory(**netflowSchema()) tdf2.set_data_type("nodes", "name", strings_allowed='*', number_allowed=True) csvTicDat = tdf2.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) del (ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue( firesException(lambda: tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testInjection(self): if not self.can_run: return problems = [ "'", "''", '"', '""'] tdf = TicDatFactory(boger = [["a"], ["b"]]) dat = tdf.TicDat() for v,k in enumerate(problems): dat.boger[k]=v dat.boger[v]=k filePath = makeCleanPath(os.path.join(_scratchDir, "injection.db")) tdf.sql.write_db_data(dat, filePath) dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat,dat2)) filePath = makeCleanPath(os.path.join(_scratchDir, "injection.sql")) tdf.sql.write_sql_file(dat, filePath) self.assertTrue(firesException(lambda : tdf.sql.create_tic_dat_from_sql(filePath)))
def testNetflow(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) self.assertTrue( tdf._same_data(ticDat, tdf.json.create_tic_dat( tdf.json.write_file(ticDat, "")), epsilon=0.0001)) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "netflow")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertFalse(tdf.json.find_duplicates(writePath)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) ticDat.nodes[12] = {} tdf.json.write_file(ticDat, writePath, verbose=verbose, allow_overwrite=True) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) # unlike csv, json format respects strings that are floatable del (ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue( firesException(lambda: tdf.json.write_file( ticDat, writePath, verbose=verbose))) tdf.json.write_file(ticDat, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
def testBiggie(self): if not self.can_run: return tdf = TicDatFactory(boger=[["the"], ["big", "boger"]], moger=[["the", "big"], ["boger"]], woger=[[], ["the", "big", "boger"]]) smalldat = tdf.TicDat( boger={k: [(k + 1) % 10, (k + 2) % 5] for k in range(100)}, moger={(k, (k + 1) % 10): (k + 2) % 5 for k in range(75)}, woger=[[k, (k + 1) % 10, (k + 2) % 5] for k in range(101)]) filePath = os.path.join(_scratchDir, "smallBiggie.xls") tdf.xls.write_file(smalldat, filePath) smalldat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(smalldat, smalldat2)) filePath = makeCleanPath(filePath + "x") tdf.xls.write_file(smalldat, filePath) smalldat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(smalldat, smalldat2)) bigdat = tdf.TicDat( boger={k: [(k + 1) % 10, (k + 2) % 5] for k in range(65537)}, moger={(k, (k + 1) % 10): (k + 2) % 5 for k in range(75)}, woger=[[k, (k + 1) % 10, (k + 2) % 5] for k in range(65537)]) filePath = os.path.join(_scratchDir, "bigBiggie.xls") self.assertTrue( firesException(lambda: tdf.xls.write_file(bigdat, filePath))) filePath = makeCleanPath(filePath + "x") tdf.xls.write_file(bigdat, filePath) bigdat2 = tdf.xls.create_tic_dat(filePath) # the following is just to GD slow #self.assertTrue(tdf._same_data(bigdat, bigdat2)) self.assertTrue( all( len(getattr(bigdat, t)) == len(getattr(bigdat2, t)) for t in tdf.all_tables))
def testOne(self): def _cleanIt(x) : x.foods['macaroni'] = {"cost": 2.09} x.foods['milk'] = {"cost":0.89} return x dataObj = dietData() tdf = TicDatFactory(**dietSchema()) self.assertTrue(tdf.good_tic_dat_object(dataObj)) dataObj2 = tdf.copy_tic_dat(dataObj) dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True) dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4))) dataObj = _cleanIt(dataObj) self.assertTrue(tdf.good_tic_dat_object(dataObj)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3))) def hackit(x) : x.foods["macaroni"] = 100 self.assertTrue(self.firesException(lambda :hackit(dataObj3))) hackit(dataObj2) self.assertTrue(not tdf._same_data(dataObj, dataObj2) and tdf._same_data(dataObj, dataObj3)) msg = [] dataObj.foods[("milk", "cookies")] = {"cost": float("inf")} dataObj.boger = object() self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append)) self.assertTrue({"foods : Inconsistent key lengths"} == set(msg)) self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t) for t in ("categories", "nutritionQuantities"))) dataObj = dietData() dataObj.categories["boger"] = {"cost":1} dataObj.categories["boger"] = {"cost":1} self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append)) self.assertTrue({'foods : Inconsistent key lengths', 'categories : Inconsistent data field name keys.'} == set(msg)) ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t) for t in tdf.primary_key_fields}))).message self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
def testJsonSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.json.write_file(panDat, filePath) panDat2 = pdf2.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, "")) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "")) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat5 = pdf2.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat5)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath, orient='columns', index=True) # the following doesn't generate a TicDatError, which is fine self.assertTrue( firesException(lambda: pdf.json.create_pan_dat(filePath))) panDat2 = pdf.json.create_pan_dat(filePath, orient='columns') self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file( panDat, "", orient='columns'), orient="columns") self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def firesException(self, f): e = firesException(f) if e: self.assertTrue("TicDatError" in e.__class__.__name__) return str(e)
def testSilly(self): if not self.can_run: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, ) for t in ("a", "b"): schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"], ()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a", "c")) filePath = os.path.join(_scratchDir, "silly.xls") tdf.xls.write_file(ticDat, filePath) ticDat2 = tdf2.xls.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.xls.create_tic_dat(filePath) for t in ["a", "b"]: for k, v in getattr(ticDat4, t).items(): for _k, _v in v.items(): self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]): self.assertTrue(t == "b") else: self.assertTrue(t == "a") ticDat5 = tdf5.xls.create_tic_dat(filePath, treat_inf_as_infinity=False) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue( callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat6 = tdf6.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue( firesException(lambda: tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) def writeData(data, write_header="same"): assert filePath.endswith(".xls") assert not write_header or write_header in ("lower", "same", "duped") import xlwt book = xlwt.Workbook() for t in tdf.all_tables: sheet = book.add_sheet(t) if write_header: all_fields = tdf.primary_key_fields.get( t, ()) + tdf.data_fields.get(t, ()) for i, f in enumerate( (2 if write_header == "duped" else 1) * all_fields): sheet.write( 0, i, f.lower() if write_header == "lower" or i >= len(all_fields) else f) for rowInd, row in enumerate(data): for fieldInd, cellValue in enumerate( (2 if write_header == "duped" else 1) * row): sheet.write(rowInd + (1 if write_header else 0), fieldInd, cellValue) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) if write_header in [ "lower", "same" ]: # will use pandas to generate the xlsx file version file_path_x = filePath + "x" if os.path.exists(file_path_x): os.remove(file_path_x) writer = utils.pd.ExcelWriter(file_path_x) for t, (pks, dfs) in tdf.schema().items(): fields = pks + dfs if write_header == "lower": fields = [_.lower() for _ in fields] d = {f: [] for f in fields} for row in data: for f, c in zip(fields, row): d[f].append(c) utils.pd.DataFrame(d).to_excel(writer, t, index=False) writer.save() writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header="duped") self.assertTrue( self.firesException( lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True))) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header="lower") ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False) self.assertTrue( self.firesException( lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True))) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.find_duplicates(filePath, headers_present=False) self.assertTrue( set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 2) ticDat.a["theboger"] = (1, None, 12) tdf.xls.write_file(ticDat, filePath, allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True) # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here. # not sure how to handle this, but documenting for now. self.assertFalse(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "") # the workaround for this flaw is to set the data type to be nullabe but not allow the empty string tdfwa = TicDatFactory(**sillyMeSchema()) tdfwa.set_data_type("a", "aData2", nullable=True) ticDatNone = tdfwa.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) # checking the same thing with .xlsx - using openpyxl, None is indeed recovered even without tdfwa munging! tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath + "x", freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) ticDatNone = tdfwa.xls.create_tic_dat(filePath + "x", freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 20, 30, 12)]) for f in [filePath, filePath + "x"]: rowCount = tdf.xls.find_duplicates(f) self.assertTrue( set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1] == 3) self.assertTrue( set(rowCount["b"]) == {(1, 20, 30)} and rowCount["b"][1, 20, 30] == 2)
def testDiet(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) self._test_generic_free_copy(oldDat, tdf) self._test_generic_free_copy(oldDat, tdf, ["nutritionQuantities"]) ticDat = tdf.copy_to_pandas(oldDat) for k in oldDat.foods: self.assertTrue(oldDat.foods[k]["cost"] == ticDat.foods.cost[k]) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == ticDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == ticDat.nutritionQuantities.qty[k1, k2]) nut = ticDat.nutritionQuantities self.assertTrue(firesException(lambda: nut.qty.loc[:, "fatty"])) self.assertTrue(firesException(lambda: nut.qty.loc["chickeny", :])) self.assertFalse(firesException(lambda: nut.qty.sloc[:, "fatty"])) self.assertFalse(firesException(lambda: nut.qty.sloc["chickeny", :])) self.assertTrue(0 == sum(nut.qty.sloc[:, "fatty"]) == sum(nut.qty.sloc[ "chickeny", :])) self.assertTrue( sum(nut.qty.sloc[:, "fat"]) == sum(nut.qty.loc[:, "fat"]) == sum( r["qty"] for (f, c), r in oldDat.nutritionQuantities.items() if c == "fat")) self.assertTrue( sum(nut.qty.sloc["chicken", :]) == sum(nut.qty.loc["chicken", :]) == sum(r["qty"] for (f, c), r in oldDat.nutritionQuantities.items() if f == "chicken")) rebornTicDat = tdf.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) tdf2 = TicDatFactory(**{t: '*' for t in tdf.all_tables}) self.assertTrue( firesException( lambda: tdf2.set_data_type("nutritionQuantities", "qty"))) genTicDat = tdf2.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == genTicDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == genTicDat.nutritionQuantities.qty[k1, k2]) self.assertFalse(tdf.good_tic_dat_object(genTicDat)) self.assertTrue(tdf2.good_tic_dat_object(genTicDat)) rebornTicDat = tdf.TicDat( **{t: getattr(genTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) rebornGenTicDat = tdf2.TicDat(**tdf2.as_dict(genTicDat)) for t, pks in tdf.primary_key_fields.items(): getattr(rebornGenTicDat, t).index.names = pks rebornTicDat = tdf.TicDat( **{t: getattr(rebornGenTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) tdf3 = TicDatFactory(**dict(dietSchema(), **{"categories": '*'})) self.assertFalse( firesException( lambda: tdf3.set_data_type("nutritionQuantities", "qty"))) mixTicDat = tdf3.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == mixTicDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == mixTicDat.nutritionQuantities[k1, k2]["qty"]) self.assertFalse(tdf2.good_tic_dat_object(mixTicDat)) self.assertFalse(tdf3.good_tic_dat_object(genTicDat)) self.assertTrue(tdf3.good_tic_dat_object(mixTicDat)) rebornTicDat = tdf.TicDat( **{t: getattr(mixTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
def testSilly(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) filePath = os.path.join(_scratchDir, "silly.accdb") self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def sillyMeCleanData() : return { "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")}, "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12}, "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) ) } ticDat = tdf.TicDat(**sillyMeCleanData()) self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def makeCleanSchema() : tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"}, b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"}) return filePath tdf.mdb.write_file(ticDat, makeCleanSchema()) self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) ticDat2 = tdf2.mdb.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.mdb.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.mdb.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.mdb.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, "twelve") tdf.mdb.write_file(ticDat, makeCleanSchema()) ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def testThree(self): objOrig = netflowData() staticFactory = TicDatFactory(**netflowSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) objOrig.commodities.append(12.3) objOrig.arcs[(1, 2)] = [12] self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes")) self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost")) self.assertTrue(firesException(lambda : self._assertSame( objOrig.commodities, ticDat.commodities, goodTable("commodities")) )) self.assertTrue(firesException(lambda : self._assertSame( objOrig.arcs, ticDat.arcs, goodTable("arcs")) )) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12) self.assertTrue(12.3 in ticDat.commodities) objOrig.cost[5]=5 self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat (**{t:getattr(objOrig,t) for t in tables})))) objOrig = netflowData() def editMeBadly(t) : def rtn() : t.cost["hack"] = 12 return rtn def editMeWell(t) : def rtn() : t.cost["hack", "my", "balls"] = 12.12 return rtn self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in (ticDat, staticFactory.freeze_me(staticFactory.TicDat())))) def attributeMe(t) : def rtn() : t.boger="bogerwoger" return rtn self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe( staticFactory.freeze_me(staticFactory.TicDat())))) mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}) for t in tables : self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t)) self.assertTrue(firesException(editMeBadly(mutable))) self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable))) self.assertTrue(firesException(lambda : self._assertSame( objOrig.cost, mutable.cost, goodTable("cost")) ))
def firesException(self, f): e = firesException(f) if e : self.assertTrue("TicDatError" in e.__class__.__name__) return e.message
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) filePath = os.path.join(_scratchDir, "silly.xls") tdf.xls.write_file(ticDat, filePath) ticDat2 = tdf2.xls.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.xls.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.xls.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat6 = tdf6.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) def writeData(data, write_header = True): import xlwt book = xlwt.Workbook() for t in tdf.all_tables : sheet = book.add_sheet(t) if write_header : for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) : sheet.write(0, i, f) for rowInd, row in enumerate(data) : for fieldInd, cellValue in enumerate(row): sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False) self.assertTrue(self.firesException(lambda : tdf.xls.create_tic_dat(filePath, freeze_it=True))) ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40) rowCount = tdf.xls.get_duplicates(filePath, headers_present=False) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) ticDat.a["theboger"] = (1, None, 12) tdf.xls.write_file(ticDat, filePath, allow_overwrite=True) ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True) # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here. # not sure how to handle this, but documenting for now. self.assertFalse(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "") writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.xls.get_duplicates(filePath) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def testJsonSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.json.write_file(panDat, filePath) panDat2 = pdf2.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) re_fielded_schema = { "categories": (("name", ), ["maxNutrition", "minNutrition"]), "foods": [["name"], []], "nutritionQuantities": (["food", "category"], ["qty"]) } pdf3 = PanDatFactory(**re_fielded_schema) panDat3 = pdf3.json.create_pan_dat(filePath) for t, (pks, dfs) in re_fielded_schema.items(): self.assertTrue( list(pks) + list(dfs) == list(getattr(panDat3, t).columns)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, "")) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "")) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat5 = pdf2.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat5)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath, orient='columns', index=True) # the following doesn't generate a TicDatError, which is fine self.assertTrue( firesException(lambda: pdf.json.create_pan_dat(filePath))) panDat2 = pdf.json.create_pan_dat(filePath, orient='columns') self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file( panDat, "", orient='columns'), orient="columns") self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) filePath = os.path.join(_scratchDir, "silly.mdb") self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def sillyMeCleanData() : return { "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")}, "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12}, "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) ) } ticDat = tdf.TicDat(**sillyMeCleanData()) self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath)))) def makeCleanSchema() : tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"}, b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"}) return filePath tdf.mdb.write_file(ticDat, makeCleanSchema()) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [["dField"],()] tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6)) tdf5.set_generator_tables(("a","c")) ticDat2 = tdf2.mdb.create_tic_dat(filePath) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3)) ticDat4 = tdf4.mdb.create_tic_dat(filePath) for t in ["a","b"]: for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") ticDat5 = tdf5.mdb.create_tic_dat(filePath) self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) self.assertTrue("table d" in self.firesException(lambda : tdf6.mdb.create_tic_dat(filePath))) ticDat.a["theboger"] = (1, None, "twelve") tdf.mdb.write_file(ticDat, makeCleanSchema()) ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, ticDatNone)) self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
def doTest(headersPresent) : tdf = TicDatFactory(**sillyMeSchema()) ticDat = tdf.TicDat(**sillyMeData()) schema2 = sillyMeSchema() schema2["b"][0] = ("bField2", "bField1", "bField3") schema3 = sillyMeSchema() schema3["a"][1] = ("aData2", "aData3", "aData1") schema4 = sillyMeSchema() schema4["a"][1] = ("aData1", "aData3") schema5 = sillyMeSchema() _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,) for t in ("a", "b") : schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0]) schema5["a"][0], schema5["b"][0] = (), [] schema5b = sillyMeSchema() for t in ("a", "b") : schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1]) schema5b["a"][0], schema5b["b"][0] = (), [] schema6 = sillyMeSchema() schema6["d"] = [("dField",),[]] tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema5b, schema6)) tdf5.set_generator_tables(["a", "c"]) tdf5b.set_generator_tables(("a", "c")) dirPath = makeCleanDir(os.path.join(_scratchDir, "silly")) tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent) ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2)) ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3)) if headersPresent : ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent) for t in ("a", "b") : for k,v in getattr(ticDat4, t).items() : for _k, _v in v.items() : self.assertTrue(getattr(ticDat, t)[k][_k] == _v) if set(v) == set(getattr(ticDat, t)[k]) : self.assertTrue(t == "b") else : self.assertTrue(t == "a") else : self.assertTrue(self.firesException(lambda : tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent))) ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent) (self.assertTrue if headersPresent else self.assertFalse)( tdf5._same_data(tdf._keyless(ticDat), ticDat5)) self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b)) ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b)) self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b)) ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent) self.assertTrue(tdf._same_data(ticDat, ticDat6)) self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6))) self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d)) allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())] for t in tdf.all_tables}) def writeData(data): td = allDataTdf.TicDat(a = data, b=data, c=data) allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]) ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True) self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3) self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40) rowCount = tdf.csv.find_duplicates(dirPath, headers_present= headersPresent) self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2) writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)]) rowCount = tdf.csv.find_duplicates(dirPath, headers_present=headersPresent) self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3) self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)