def testNetflow(self): if not self.canRun: return tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) oldDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) self._test_generic_free_copy(oldDat, tdf) self._test_generic_free_copy(oldDat, tdf, ["arcs", "nodes"]) ticDat = tdf.copy_to_pandas(oldDat, ["arcs", "cost"]) self.assertTrue(all(hasattr(ticDat, t) == (t in ["arcs", "cost"]) for t in tdf.all_tables)) self.assertTrue(len(ticDat.arcs.capacity.sloc["Boston",:]) == len(oldDat.nodes["Boston"].arcs_source) == 0) self.assertTrue(len(ticDat.arcs.capacity.sloc[:,"Boston"]) == len(oldDat.nodes["Boston"].arcs_destination) == 2) self.assertTrue(all(ticDat.arcs.capacity.sloc[:,"Boston"][src] == r["capacity"] for src, r in oldDat.nodes["Boston"].arcs_destination.items())) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) # because we have single pk field tables, dropping the pk columns is probelmatic self.assertFalse(tdf._same_data(rebornTicDat, oldDat)) # but with the default argument all is well ticDat = tdf.copy_to_pandas(oldDat) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) self.assertTrue(set(ticDat.inflow.columns) == {"quantity"}) self.assertTrue(set(ticDat.nodes.columns) == {"name"})
def testDataTypes(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [10,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 ticdat.nutritionQuantities['a', 2] = 12 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_data_type_failures(pandat)) pandat_copy = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat)) self.assertTrue(pdf._same_data(pandat, pandat_copy, epsilon=0.00001)) pdf = PanDatFactory(**dietSchema()) pdf.set_data_type("foods", "cost", nullable=False) pdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) failed = pdf.find_data_type_failures(pandat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'}) self.assertTrue(set({(v["food"], v["category"]) for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) failed = pdf.find_data_type_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) fixed = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat), {("nutritionQuantities", "qty"): 5.15}) self.assertTrue(set(fixed.foods["cost"]) == {0.0, 12.0}) self.assertTrue(set(fixed.nutritionQuantities["qty"]) == {5.15, 12.0}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_data_type_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.set_data_type("arcs", "capacity", strings_allowed="*") self.assertFalse(pdf.find_data_type_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) failed = pdf.find_data_type_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")}) pdf.replace_data_type_failures(pandat) self.assertTrue(set(pandat.arcs["capacity"]) == {120, 'Boston', 0, 'Seattle'})
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me(tdf.TicDat(**sillyMeData())) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "ooooo" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "stuff" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testNetflow_oplrunRequired(self): self.assertTrue(_can_run_oplrun_tests) in_tdf = TicDatFactory(**netflowSchema()) in_tdf.enable_foreign_key_links() soln_tdf = TicDatFactory(flow=[["source", "destination", "commodity"], ["quantity"]], parameters=[["paramKey"], ["value"]]) dat = in_tdf.TicDat( ** {t: getattr(netflowData(), t) for t in in_tdf.primary_key_fields}) opl_soln = opl_run(get_testing_file_path("sample_netflow.mod"), in_tdf, dat, soln_tdf) self.assertTrue( nearlySame(opl_soln.parameters["Total Cost"]["value"], 5500)) self.assertTrue( nearlySame( opl_soln.flow["Pens", "Detroit", "New York"]["quantity"], 30))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields}) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertFalse(tdf._same_data(oldDat, newDat)) oldDat.categories["protein"][ "maxNutrition"] = 12 # Remove infinity from the data changedDatStr = create_opl_text(tdf, oldDat) changedDat = read_opl_text(tdf, changedDatStr) self.assertTrue(tdf._same_data(oldDat, changedDat)) tdf.opl_prepend = "pre_" origStr, changedDatStr = changedDatStr, create_opl_text(tdf, oldDat) changedDat = read_opl_text(tdf, changedDatStr) self.assertTrue(tdf._same_data(oldDat, changedDat)) self.assertFalse(origStr == changedDatStr)
def testDataPredicates(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [21,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) ticdat.nutritionQuantities['a', 2] = 12 ticdat.categories["3"] = ['a', 100] pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) def perform_predicate_checks(sch): pdf = PanDatFactory(**sch) pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost") good_qty = lambda qty : 5 < qty <= 12 pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty") pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= row["minNutrition"], "minmax") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'}) self.assertTrue(set({(v["food"], v["category"]) for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2'}) failed = pdf.find_data_row_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) failed = pdf.find_data_row_failures(pandat_2) self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'}) perform_predicate_checks(dietSchema()) perform_predicate_checks({t:'*' for t in dietSchema()}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.add_data_row_predicate("arcs", lambda row: True, "capacity") self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"] pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")})
def testDataPredicates(self): # this test won't run properly if the -O flag is applied if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [21,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) ticdat.nutritionQuantities['a', 2] = 12 ticdat.categories["3"] = ['a', 100] pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) def perform_predicate_checks(sch): pdf = PanDatFactory(**sch) pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost") good_qty = lambda qty : 5 < qty <= 12 pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty") pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= row["minNutrition"], "minmax") pdf2 = PanDatFactory(**sch) def make_error_message_predicate(f, name): def error_message_predicate(row): rtn = f(row) if rtn: return True return f"{name} failed!" return error_message_predicate for t, preds in pdf._data_row_predicates.items(): for p_name, rpi in preds.items(): pdf2.add_data_row_predicate(t, make_error_message_predicate(rpi.predicate, p_name), predicate_name=p_name, predicate_failure_response="Error Message") failed = pdf.find_data_row_failures(pandat) failed2 = pdf2.find_data_row_failures(pandat) self.assertTrue(set(failed) == set(failed2) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == set(failed2['foods', 'cost']["name"]) == {'b'}) for f in [failed, failed2]: self.assertTrue(set({(v["food"], v["category"]) for v in f['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(set(f['categories', 'minmax']["name"]) == {'2'}) for t, n in failed2: self.assertTrue(set(failed2[t, n]["Error Message"]) == {f'{n} failed!'}) for _pdf in [pdf, pdf2]: failed = _pdf.find_data_row_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) ex = [] try: _pdf.find_data_row_failures(pandat_2) except Exception as e: ex[:] = [str(e.__class__)] self.assertTrue("TypeError" in ex[0]) failed = _pdf.find_data_row_failures(pandat_2, exception_handling="Handled as Failure") self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'}) failed = pdf2.find_data_row_failures(pandat_2, exception_handling="Handled as Failure") df = failed['categories', 'minmax'] err_str = list(df[df['name'] == '3']["Error Message"])[0] self.assertTrue(err_str=="Exception<'>=' not supported between instances of 'int' and 'str'>") perform_predicate_checks(dietSchema()) perform_predicate_checks({t:'*' for t in dietSchema()}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.add_data_row_predicate("arcs", lambda row: True, "capacity") self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"] pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")}) pdf = PanDatFactory(table=[[],["Field", "Error Message", "Error Message (1)"]]) pdf.add_data_row_predicate("table", predicate=lambda row: f"Oops {row['Field']}" if row["Field"] > 1 else True, predicate_name="silly", predicate_failure_response="Error Message") df = DataFrame({"Field":[2, 1], "Error Message":["what", "go"], "Error Message (1)": ["now", "go"]}) fails = pdf.find_data_row_failures(pdf.PanDat(table=df)) df = fails["table", "silly"] self.assertTrue(list(df.columns) == ["Field", "Error Message", "Error Message (1)", "Error Message (2)"]) self.assertTrue(set(df["Field"]) == {2} and set(df["Error Message (2)"]) == {'Oops 2'})
def testEight(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40} rtn.categories["2"] = [10,20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f,p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) tdf.set_default_value("foods", "cost", 2) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001}) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat)) self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in failed['nutritionQuantities', 'qty'].pks)) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and fixedDat.nutritionQuantities['a', 2]["qty"] == 12) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2}) self.assertTrue(tdf._same_data(fixedDat, fixedDat2)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=True) tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False) failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost") fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) tdf.enable_foreign_key_links() dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) def makeIt() : if not tdf.foreign_keys: tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) orig = netflowData() rtn = tdf.copy_tic_dat(orig) for n in rtn.nodes["Detroit"].arcs_source: rtn.arcs["Detroit", n] = n self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables)) return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed="*") dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))}) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) netflowData_ = tdf.copy_tic_dat(netflowData()) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_)) fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()), {("arcs", "capacity"):80, ("cost","cost") :"imok"})) fixedDat.arcs["Detroit", "Boston"] = 100 fixedDat.arcs["Detroit", "Seattle"] = 120 self.assertTrue(tdf._same_data(fixedDat, netflowData_))
def testDiet(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) self._test_generic_free_copy(oldDat, tdf) self._test_generic_free_copy(oldDat, tdf, ["nutritionQuantities"]) ticDat = tdf.copy_to_pandas(oldDat) for k in oldDat.foods: self.assertTrue(oldDat.foods[k]["cost"] == ticDat.foods.cost[k]) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == ticDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == ticDat.nutritionQuantities.qty[k1, k2]) nut = ticDat.nutritionQuantities self.assertTrue(firesException(lambda: nut.qty.loc[:, "fatty"])) self.assertTrue(firesException(lambda: nut.qty.loc["chickeny", :])) self.assertFalse(firesException(lambda: nut.qty.sloc[:, "fatty"])) self.assertFalse(firesException(lambda: nut.qty.sloc["chickeny", :])) self.assertTrue(0 == sum(nut.qty.sloc[:, "fatty"]) == sum(nut.qty.sloc[ "chickeny", :])) self.assertTrue( sum(nut.qty.sloc[:, "fat"]) == sum(nut.qty.loc[:, "fat"]) == sum( r["qty"] for (f, c), r in oldDat.nutritionQuantities.items() if c == "fat")) self.assertTrue( sum(nut.qty.sloc["chicken", :]) == sum(nut.qty.loc["chicken", :]) == sum(r["qty"] for (f, c), r in oldDat.nutritionQuantities.items() if f == "chicken")) rebornTicDat = tdf.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) tdf2 = TicDatFactory(**{t: '*' for t in tdf.all_tables}) self.assertTrue( firesException( lambda: tdf2.set_data_type("nutritionQuantities", "qty"))) genTicDat = tdf2.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == genTicDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == genTicDat.nutritionQuantities.qty[k1, k2]) self.assertFalse(tdf.good_tic_dat_object(genTicDat)) self.assertTrue(tdf2.good_tic_dat_object(genTicDat)) rebornTicDat = tdf.TicDat( **{t: getattr(genTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) rebornGenTicDat = tdf2.TicDat(**tdf2.as_dict(genTicDat)) for t, pks in tdf.primary_key_fields.items(): getattr(rebornGenTicDat, t).index.names = pks rebornTicDat = tdf.TicDat( **{t: getattr(rebornGenTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) tdf3 = TicDatFactory(**dict(dietSchema(), **{"categories": '*'})) self.assertFalse( firesException( lambda: tdf3.set_data_type("nutritionQuantities", "qty"))) mixTicDat = tdf3.TicDat( **{t: getattr(ticDat, t) for t in tdf.all_tables}) for k in oldDat.categories: self.assertTrue(oldDat.categories[k]["minNutrition"] == mixTicDat.categories.minNutrition[k]) for k1, k2 in oldDat.nutritionQuantities: self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] == mixTicDat.nutritionQuantities[k1, k2]["qty"]) self.assertFalse(tdf2.good_tic_dat_object(mixTicDat)) self.assertFalse(tdf3.good_tic_dat_object(genTicDat)) self.assertTrue(tdf3.good_tic_dat_object(mixTicDat)) rebornTicDat = tdf.TicDat( **{t: getattr(mixTicDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
def testRoundTrips(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) self.assertTrue(pdf.good_pan_dat_object(pan_dat)) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(tdf._same_data(oldDat, tic_dat)) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) oldDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) self.assertTrue(pdf.good_pan_dat_object(pan_dat)) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(tdf._same_data(oldDat, tic_dat)) pdf = PanDatFactory(table=[["a", "b"], ["c"]]) pan_dat = pdf.PanDat(table=utils.DataFrame({ "a": [1, 2, 1, 1], "b": [10, 10, 10, 11], "c": [101, 102, 103, 104] })) self.assertTrue( len(pdf.find_duplicates(pan_dat, keep=False)["table"]) == 2) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(len(tic_dat.table) == len(pan_dat.table) - 1) tdf = TicDatFactory(**pdf.schema()) tic_dat = tdf.TicDat(table=[[1, 2, 3], [None, 2, 3], [2, 1, None]]) self.assertTrue(len(tic_dat.table) == 3) tic_dat_two = pdf.copy_to_tic_dat( tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)) self.assertFalse(tdf._same_data(tic_dat, tic_dat_two)) tic_dat3 = tdf.TicDat( table=[[1, 2, 3], [float("nan"), 2, 3], [2, 1, float("nan")]]) # this fails because _same_data isn't smart enough to check against nan in the keys, # because float("nan") != float("nan") self.assertFalse(tdf._same_data(tic_dat3, tic_dat_two)) pdf = PanDatFactory(table=[["a"], ["b", "c"]]) tdf = TicDatFactory(**pdf.schema()) tic_dat = tdf.TicDat(table=[[1, 2, 3], [2, None, 3], [2, 1, None]]) tic_dat_two = pdf.copy_to_tic_dat( tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)) self.assertFalse(tdf._same_data(tic_dat, tic_dat_two)) tic_dat3 = tdf.TicDat( table=[[1, 2, 3], [2, float("nan"), 3], [2, 1, float("nan")]]) # _same_data works fine in checking nan equivalence in data rows - which maybe self.assertTrue( tdf._same_data(tic_dat3, tic_dat_two, nans_are_same_for_data_rows=True))