def testDietCleaningOpalytics(self): sch = dietSchema() sch["categories"][-1].append("_active") tdf1 = TicDatFactory(**dietSchema()) tdf2 = TicDatFactory(**sch) ticDat2 = tdf2.copy_tic_dat(dietData()) for v in ticDat2.categories.values(): v["_active"] = True ticDat2.categories["fat"]["_active"] = False ticDat1 = tdf1.copy_tic_dat(dietData()) input_set = create_inputset_mock_with_active_hack(tdf2, ticDat2) pdf1 = PanDatFactory(**tdf1.schema()) panDat = pdf1.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf1._same_data(pdf1.copy_to_tic_dat(panDat), ticDat1)) panDatPurged = pdf1.opalytics.create_pan_dat(input_set) self.assertFalse( tdf1._same_data(pdf1.copy_to_tic_dat(panDatPurged), ticDat1)) ticDat1.categories.pop("fat") tdf1.remove_foreign_key_failures(ticDat1) self.assertTrue( tdf1._same_data(pdf1.copy_to_tic_dat(panDatPurged), ticDat1))
def testMissingTable(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) tdf2 = TicDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) ticDat2 = tdf2.copy_tic_dat(dietData()) filePath = makeCleanPath(os.path.join(_scratchDir, "diet_missing.xlsx")) tdf2.xls.write_file(ticDat2, filePath) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities) self.assertTrue(ticDat3.categories and ticDat3.foods) tdf2 = TicDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) ticDat2 = tdf2.copy_tic_dat(dietData()) filePath = makeCleanPath(os.path.join(_scratchDir, "diet_missing.xlsx")) tdf2.xls.write_file(ticDat2, filePath) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf2._same_data(ticDat2, ticDat3)) self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables)) self.assertFalse(ticDat3.nutritionQuantities or ticDat3.foods) self.assertTrue(ticDat3.categories)
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def testDiet(self): if not self.can_run: return for hack, raw_data, activeEnabled in list(product(*(([True, False],)*3))): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled) self.assertFalse(tdf.opalytics.find_duplicates(inputset, raw_data=raw_data)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) def change() : ticDat2.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, freeze_it=True, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf2 = TicDatFactory(**{k:[pks, list(dfs) + ["dmy"]] for k,(pks, dfs) in tdf.schema().items()}) _dat = tdf2.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(create_inputset_mock(tdf2, _dat, hack), raw_data=raw_data))) ex = self.firesException(lambda: tdf2.opalytics.create_tic_dat(inputset, raw_data=raw_data)) self.assertTrue("field dmy can't be found" in ex)
def testDictConstructions(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) panDat3_1 = pdf.PanDat(**{t:list(map(list, getattr(panDat, t).itertuples(index=False))) for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3, panDat3_1])) panDat.foods["extra"] = 12 panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.foods["extra"]) == {12}) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="records") for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3])) panDat.cost["extra"] = "boger" panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.cost["extra"]) == {"boger"})
def testDietCleaningFive(self): tdf = TicDatFactory(**dietSchema()) tdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) tdf.set_data_type("categories", "minNutrition", max=0, inclusive_max=True) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) input_set = create_inputset_mock(tdf, ticDat) self.assertTrue( tdf._same_data( tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat)) ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False) self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) ticDat.categories.pop("fat") ticDat.categories.pop("calories") ticDat.categories.pop("protein") self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) tdf.remove_foreign_keys_failures(ticDat) self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
def testDietWithInfFlagging(self): tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) tdf.set_infinity_io_flag(999999999) file_one = os.path.join(_scratchDir, "dietInfFlag.xls") file_two = os.path.join(_scratchDir, "dietInfFlag.xlsx") tdf.xls.write_file(dat, file_one) tdf.xls.write_file(dat, file_two) dat_1 = tdf.xls.create_tic_dat(file_one) dat_2 = tdf.xls.create_tic_dat(file_two) self.assertTrue(tdf._same_data(dat, dat_1)) self.assertTrue(tdf._same_data(dat, dat_2)) tdf = tdf.clone() dat_1 = tdf.xls.create_tic_dat(file_one) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = TicDatFactory(**dietSchema()) dat_1 = tdf.xls.create_tic_dat(file_one) dat_2 = tdf.xls.create_tic_dat(file_two) self.assertFalse(tdf._same_data(dat, dat_1)) self.assertFalse(tdf._same_data(dat, dat_2)) self.assertTrue( {_.categories["protein"]["maxNutrition"] for _ in [dat_1, dat_2]} == {999999999}) for _ in [dat_1, dat_2]: _.categories["protein"]["maxNutrition"] = float("inf") self.assertTrue(tdf._same_data(dat, dat_1)) self.assertTrue(tdf._same_data(dat, dat_2))
def testTryCreateSpace(self): def test_(schema_factory, data_factory): tdf = TicDatFactory(**schema_factory()) dat = tdf.copy_tic_dat(data_factory()) mapping = tlingo._try_create_space_case_mapping(tdf, dat)["mapping"] remapdat = tlingo._apply_space_case_mapping( tdf, dat, {v: k for k, v in mapping.items()}) mapmapdat = tlingo._apply_space_case_mapping( tdf, remapdat, mapping) self.assertTrue(tdf._same_data(dat, mapmapdat)) self.assertFalse(tdf._same_data(dat, remapdat)) test_(dietSchema, dietData) test_(netflowSchema, netflowData) test_(sillyMeSchema, lambda: TicDatFactory(**sillyMeSchema()).TicDat(**sillyMeData())) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) dat.foods["ice_cream"] = dat.foods["ice cream"] dat.categories["ICE CREAM"] = {} dat.categories["fAt"] = dat.categories["fat"] failures = tlingo._try_create_space_case_mapping(tdf, dat)["failures"] self.assertTrue( failures == { 'ICE_CREAM': ('ICE CREAM', 'ice cream', 'ice_cream'), 'FAT': ('fAt', 'fat') })
def testDietCleaningOpalytisThree(self): tdf = TicDatFactory(**dietSchema()) tdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) pdf = PanDatFactory(**tdf.schema()) pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) addDietForeignKeys(pdf) input_set = create_inputset_mock(tdf, ticDat) panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.categories.pop("fat") self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testDiet(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.accdb")) tdf.mdb.write_file(ticDat, filePath) #shutil.copy(filePath, "diet.accdb") #uncomment to make readonly test file as .accdb self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) def changeit(): accdbTicDat.categories["calories"]["minNutrition"] = 12 changeit() self.assertFalse(tdf._same_data(ticDat, accdbTicDat)) self.assertTrue( self.firesException(lambda: tdf.mdb.write_file(ticDat, filePath))) tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True) accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
def testDietOpalytics(self): if not self.can_run: return for hack, raw_data, activeEnabled in list( itertools.product(*(([True, False], ) * 3))): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled) pdf = PanDatFactory(**dietSchema()) panDat = pdf.opalytics.create_pan_dat(inputset) self.assertFalse(pdf.find_duplicates(panDat)) ticDat2 = pdf.copy_to_tic_dat(panDat) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf2 = TicDatFactory( **{ k: [pks, list(dfs) + ["dmy"]] for k, (pks, dfs) in tdf.schema().items() }) _dat = tdf2.copy_tic_dat(ticDat) panDat = pdf.opalytics.create_pan_dat( create_inputset_mock(tdf2, _dat, hack)) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat))) pdf2 = PanDatFactory(**tdf2.schema()) ex = self.firesException(lambda: pdf2.opalytics.create_pan_dat( inputset, raw_data=raw_data)) self.assertTrue( all(_ in ex for _ in ["(table, field) pairs missing"] + ["'%s', 'dmy'" % _ for _ in pdf2.all_tables]))
def testWeirdDiets(self): if not self.can_run: return filePath = os.path.join(_scratchDir, "weirdDiet.db") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.sql.write_db_data(dat2, filePath , allow_overwrite=True) self.assertFalse(tdf2.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.sql.write_db_data(dat3, makeCleanPath(filePath)) with sql.connect(filePath) as con: con.execute("ALTER TABLE nutrition_quantities RENAME TO [nutrition quantities]") sqlTicDat2 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, sqlTicDat2)) with sql.connect(filePath) as con: con.execute("create table nutrition_quantities(boger)") self.assertTrue(self.firesException(lambda : tdf3.sql.create_tic_dat(filePath)))
def testDietWithInfFlagging(self): diet_pdf = PanDatFactory(**dietSchema()) addDietDataTypes(diet_pdf) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_to_pandas(tdf.copy_tic_dat(dietData()), drop_pk_columns=False) diet_pdf.set_infinity_io_flag(999999999) core_path = os.path.join(_scratchDir, "diet_with_inf_flagging") diet_pdf.sql.write_file(dat, core_path + ".db") diet_pdf.csv.write_directory(dat, core_path + "_csv") diet_pdf.json.write_file(dat, core_path + ".json") diet_pdf.xls.write_file(dat, core_path + ".xlsx") for attr, f in [["sql", core_path + ".db"], ["csv", core_path + "_csv"], ["json", core_path + ".json"], ["xls", core_path + ".xlsx"]]: dat_1 = getattr(diet_pdf, attr).create_pan_dat(f) self.assertTrue(diet_pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = diet_pdf.clone() dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = PanDatFactory(**diet_pdf.schema()) dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertFalse(pdf._same_data(dat, dat_1, epsilon=1e-5)) protein = dat_1.categories["name"] == "protein" self.assertTrue( list(dat_1.categories[protein]["maxNutrition"])[0] == 999999999) dat_1.categories.loc[protein, "maxNutrition"] = float("inf") self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5))
def testDietCleaningOpalyticsTwo(self): tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.set_data_type("categories", "maxNutrition", min=66, inclusive_max=True) ticDat = tdf.copy_tic_dat(dietData()) input_set = create_inputset_mock(tdf, ticDat) pdf = PanDatFactory(**dietSchema()) addDietForeignKeys(pdf) pdf.set_data_type("categories", "maxNutrition", min=66, inclusive_max=True) panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.categories.pop("fat") self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testWeirdDiets(self): if not _can_accdb_unit_test: return filePath = os.path.join(_scratchDir, "weirdDiet.accdb") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.mdb.write_file(dat2, filePath , allow_overwrite=True) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.mdb.write_file(dat3, makeCleanPath(filePath)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("SELECT * INTO [nutrition quantities] FROM nutrition_quantities").commit() con.cursor().execute("DROP TABLE nutrition_quantities").commit() accdbTicDat2 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, accdbTicDat2)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("create table nutrition_quantities (boger int)").commit() self.assertTrue(self.firesException(lambda : tdf3.mdb.create_tic_dat(filePath)))
def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) self.assertFalse(tdf.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
def testDiet(self): def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) doTheTests(TicDatFactory(**dietSchema())) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) tdf.set_default_values(categories = {'maxNutrition': float("inf"), 'minNutrition': 0.0}, foods = {'cost': 0.0}, nutritionQuantities = {'qty': 0.0}) addDietForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities")) self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities")) ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}) origTicDat = tdf.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, origTicDat)) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12 self.assertTrue(tdf.find_foreign_key_failures(ticDat) == {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)), ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'): (('boger',), (('hot dog', 'boger'),))}) self.assertFalse(tdf._same_data(ticDat, origTicDat)) tdf.remove_foreign_keys_failures(ticDat) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) self.assertTrue(tdf._same_data(ticDat, origTicDat)) doTheTests(tdf)
def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
def testSqlSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.db") pdf.sql.write_file(panDat, filePath) sqlPanDat = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.sql.write_file(panDat, filePath) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.db") pdf.sql.write_file(panDat, filePath) panDat2 = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat))
def testTwo(self): objOrig = dietData() staticFactory = TicDatFactory(**dietSchema()) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), lambda _t : staticFactory.good_tic_dat_table(_t, t))
def testDiet(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat(**{ t: getattr(dietData(), t) for t in tdf.primary_key_fields })) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "diet")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) self.assertFalse(tdf.json.find_duplicates(writePath)) jsonTicDat = tdf.json.create_tic_dat(writePath) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) def change(): jsonTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, jsonTicDat)) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.json.write_file(dat2, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat2 = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.json.write_file(dat3, writePath, allow_overwrite=True, verbose=verbose) with open(writePath, "r") as f: jdict = json.load(f) jdict["nutrition quantities"] = jdict["nutrition_quantities"] del (jdict["nutrition_quantities"]) with open(writePath, "w") as f: json.dump(jdict, f) jsonDat3 = tdf3.json.create_tic_dat(writePath) self.assertTrue(tdf3._same_data(dat3, jsonDat3)) jdict["nutrition_quantities"] = jdict["nutrition quantities"] with open(writePath, "w") as f: json.dump(jdict, f) self.assertTrue( self.firesException(lambda: tdf3.json.create_tic_dat(writePath)))
def testMissingTable(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat) tdf2 = TicDatFactory(**(dict(dietSchema(), missing_table=[["a"],["b"]]))) ticDat2 = tdf2.opalytics.create_tic_dat(inputset) self.assertTrue(tdf._same_data(ticDat, ticDat2)) self.assertFalse(ticDat2.missing_table)
def testOne(self): def _cleanIt(x) : x.foods['macaroni'] = {"cost": 2.09} x.foods['milk'] = {"cost":0.89} return x dataObj = dietData() tdf = TicDatFactory(**dietSchema()) self.assertTrue(tdf.good_tic_dat_object(dataObj)) dataObj2 = tdf.copy_tic_dat(dataObj) dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True) dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4))) dataObj = _cleanIt(dataObj) self.assertTrue(tdf.good_tic_dat_object(dataObj)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3))) def hackit(x) : x.foods["macaroni"] = 100 self.assertTrue(self.firesException(lambda :hackit(dataObj3))) hackit(dataObj2) self.assertTrue(not tdf._same_data(dataObj, dataObj2) and tdf._same_data(dataObj, dataObj3)) msg = [] dataObj.foods[("milk", "cookies")] = {"cost": float("inf")} dataObj.boger = object() self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append)) self.assertTrue({"foods : Inconsistent key lengths"} == set(msg)) self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t) for t in ("categories", "nutritionQuantities"))) dataObj = dietData() dataObj.categories["boger"] = {"cost":1} dataObj.categories["boger"] = {"cost":1} self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append)) self.assertTrue({'foods : Inconsistent key lengths', 'categories : Inconsistent data field name keys.'} == set(msg)) ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t) for t in tdf.primary_key_fields}))).message self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
def testDietCleaningThree_2(self): tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) ticDat.categories.pop("fat") input_set = create_inputset_mock(tdf, ticDat) self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat)) ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False) self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
def testDietWithInfFlagging(self): tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) tdf.set_infinity_io_flag(999999999) path = os.path.join(_scratchDir, "dietInfFlag") tdf.csv.write_directory(dat, path) dat_1 = tdf.csv.create_tic_dat(path) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = tdf.clone() dat_1 = tdf.csv.create_tic_dat(path) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = TicDatFactory(**dietSchema()) dat_1 = tdf.csv.create_tic_dat(path) self.assertFalse(tdf._same_data(dat, dat_1))
def testXlsSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.xlsx") pdf.xls.write_file(panDat, filePath) xlsPanDat = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) pdf_shrunk = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) self.assertTrue(len(pdf_shrunk.all_tables) == len(pdf.all_tables) - 1) xlsPanDatShrunk = pdf_shrunk.xls.create_pan_dat(filePath) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) filePathShrunk = os.path.join(_scratchDir, "diet_shrunk.xlsx") self.assertTrue( self.firesException( lambda: pdf.xls.create_pan_dat(filePathShrunk))) pdf_shrunk.xls.write_file(panDat, filePathShrunk) xlsPanDatShrunk = pdf.xls.create_pan_dat(filePathShrunk) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.xls.write_file(panDat, filePath) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.xlsx") pdf.xls.write_file(panDat, filePath) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat))
def testDiet(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) filePath = os.path.join(_scratchDir, "diet.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x") self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) self.assertFalse( tdf._same_data( ticDat, tdf.xls.create_tic_dat(filePath + "x", treat_inf_as_infinity=False))) xlsTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.find_duplicates(filePath)) ex = self.firesException(lambda: tdf.xls.create_tic_dat( filePath, row_offsets={t: 1 for t in tdf.all_tables})) self.assertTrue("field names could not be found" in ex) xlsTicDat = tdf.xls.create_tic_dat( filePath, row_offsets={t: 1 for t in tdf.all_tables}, headers_present=False) self.assertTrue(tdf._same_data(xlsTicDat, ticDat)) xlsTicDat = tdf.xls.create_tic_dat( filePath, row_offsets={t: 2 for t in tdf.all_tables}, headers_present=False) self.assertFalse(tdf._same_data(xlsTicDat, ticDat)) self.assertTrue( all( len(getattr(ticDat, t)) - 1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
def testDiet(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t") ).endswith("Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.csv.write_directory(dat2, dirPath, allow_overwrite=True) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) os.rename(os.path.join(dirPath, "nutritionquantities.csv"), os.path.join(dirPath, "nutritionquantities.csv".upper())) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.csv.write_directory(dat3, dirPath, allow_overwrite=True) os.rename(os.path.join(dirPath, "nutrition_quantities.csv"), os.path.join(dirPath, "nutrition quantities.csv")) csvDat3 = tdf3.csv.create_tic_dat(dirPath) self.assertTrue(tdf3._same_data(dat3, csvDat3)) shutil.copy(os.path.join(dirPath, "nutrition quantities.csv"), os.path.join(dirPath, "nutrition_quantities.csv")) self.assertTrue(self.firesException(lambda : tdf3.csv.create_tic_dat(dirPath)))
def testDietWithInfFlagging(self): tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) tdf.set_infinity_io_flag(999999999) file_one = os.path.join(_scratchDir, "dietInfFlag.sql") file_two = os.path.join(_scratchDir, "dietInfFlag.db") tdf.sql.write_sql_file(dat, file_one) tdf.sql.write_db_data(dat, file_two) dat_1 = tdf.sql.create_tic_dat_from_sql(file_one) dat_2 = tdf.sql.create_tic_dat(file_two) self.assertTrue(tdf._same_data(dat, dat_1)) self.assertTrue(tdf._same_data(dat, dat_2)) tdf = tdf.clone() dat_1 = tdf.sql.create_tic_dat_from_sql(file_one) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = TicDatFactory(**dietSchema()) dat_1 = tdf.sql.create_tic_dat_from_sql(file_one) self.assertFalse(tdf._same_data(dat, dat_1))
def testDietWithInfFlagging(self): tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_tic_dat(dietData()) tdf.set_infinity_io_flag(999999999) file_one = os.path.join(_scratchDir, "dietInfFlag_1.json") file_two = os.path.join(_scratchDir, "dietInfFlag_2.json") tdf.json.write_file(dat, file_one, verbose=True) tdf.json.write_file(dat, file_two, verbose=False) dat_1 = tdf.json.create_tic_dat(file_one) dat_2 = tdf.json.create_tic_dat(file_two) self.assertTrue(tdf._same_data(dat, dat_1)) self.assertTrue(tdf._same_data(dat, dat_2)) tdf = tdf.clone() dat_1 = tdf.json.create_tic_dat(file_one) self.assertTrue(tdf._same_data(dat, dat_1)) tdf = TicDatFactory(**dietSchema()) dat_1 = tdf.json.create_tic_dat(file_one) self.assertFalse(tdf._same_data(dat, dat_1))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.mdb")) tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeit() : mdbTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath))) tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields}) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertFalse(tdf._same_data(oldDat, newDat)) oldDat.categories["protein"][ "maxNutrition"] = 12 # Remove infinity from the data changedDatStr = create_opl_text(tdf, oldDat) changedDat = read_opl_text(tdf, changedDatStr) self.assertTrue(tdf._same_data(oldDat, changedDat)) tdf.opl_prepend = "pre_" origStr, changedDatStr = changedDatStr, create_opl_text(tdf, oldDat) changedDat = read_opl_text(tdf, changedDatStr) self.assertTrue(tdf._same_data(oldDat, changedDat)) self.assertFalse(origStr == changedDatStr)
def testJsonCross(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet_cross.json") pdf.json.write_file(panDat, filePath) ticDat2 = tdf.json.create_tic_dat(filePath, from_pandas=True) self.assertTrue(tdf._same_data(ticDat, ticDat2, epsilon=0.0001)) tdf.json.write_file(ticDat, filePath, allow_overwrite=True, to_pandas=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=0.0001))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "diet.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) ex = self.firesException(lambda : tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables})) self.assertTrue("field names could not be found" in ex) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}, headers_present=False) self.assertTrue(tdf._same_data(xlsTicDat, ticDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:2 for t in tdf.all_tables}, headers_present=False) self.assertFalse(tdf._same_data(xlsTicDat, ticDat)) self.assertTrue(all(len(getattr(ticDat, t))-1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
def testDiet(self): if not _can_unit_test: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) filePath = "diet.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeit(): mdbTicDat.categories["calories"]["minNutrition"] = 12 changeit() self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.get_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith( "Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testLongName(self): prepend = "b" * 20 tdf = TicDatFactory( **{prepend * 2 + t: v for t, v in dietSchema().items()}) self.assertTrue( self.firesException( lambda: tdf.xls._verify_differentiable_sheet_names())) tdf = TicDatFactory( **{prepend + t: v for t, v in dietSchema().items()}) ticDat = tdf.freeze_me( tdf.TicDat( **{ t: getattr(dietData(), t.replace(prepend, "")) for t in tdf.primary_key_fields })) filePath = os.path.join(_scratchDir, "longname.xls") tdf.xls.write_file(ticDat, filePath) self.assertFalse(tdf.xls.find_duplicates(filePath)) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))