def testAdditionalFKs(self): pdf = PanDatFactory(pt1=[["F1"], []], pt2=[["F2"], []], pt3=[["F1", "F2"], []], pt4=[["F1"], ["F2"]], pt5=[[], ["F1", "F2"]]) for c in ["pt3", "pt4", "pt5"]: pdf.add_foreign_key(c, "pt1", ["F1", "F1"]) pdf.add_foreign_key(c, "pt2", ["F2", "F2"]) tdf = TicDatFactory(**pdf.schema()) def pan_dat_(_): rtn = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _)) self.assertFalse(pdf.find_duplicates(rtn)) return rtn ticDat = tdf.TicDat(pt1=[1, 2, 3, 4], pt2=[5, 6, 7, 8]) for f1, f2 in itertools.product(range(1, 5), range(5, 9)): ticDat.pt3[f1, f2] = {} ticDat.pt4[f1] = f2 ticDat.pt5.append((f1, f2)) origDat = tdf.copy_tic_dat(ticDat, freeze_it=True) self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(origDat))) ticDat.pt3["no", 6] = ticDat.pt3[1, "no"] = {} ticDat.pt4["no"] = 6 ticDat.pt4["nono"] = 6.01 panDat = pan_dat_(ticDat) fails1 = pdf.find_foreign_key_failures(panDat) self.assertTrue(fails1) pdf.remove_foreign_key_failures(panDat) self.assertFalse(pdf.find_foreign_key_failures(panDat)) self.assertTrue(pdf._same_data(panDat, pan_dat_(origDat))) orig_lens = {t: len(getattr(origDat, t)) for t in tdf.all_tables} ticDat.pt3["no", 6] = ticDat.pt3[1, "no"] = {} ticDat.pt4["no"] = 6 ticDat.pt4["nono"] = 6.01 ticDat.pt5.append(("no", 6)) ticDat.pt5.append((1, "no")) panDat = pan_dat_(ticDat) fails2 = pdf.find_foreign_key_failures(panDat) self.assertTrue( set(fails1) != set(fails2) and set(fails1).issubset(fails2)) pdf.remove_foreign_key_failures(panDat) self.assertFalse(pdf.find_foreign_key_failures(panDat)) self.assertTrue({t: len(getattr(panDat, t)) for t in tdf.all_tables} == orig_lens)
def testXToManyTwo(self): input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]], child_one=[["F1", "F2", "F3"], []], child_two=[["F1", "F2"], ["F3"]], child_three=[[], ["F1", "F2", "F3"]]) for t in ["child_one", "child_two", "child_three"]: input_schema.add_foreign_key(t, "parent", [["F1"] * 2, ["F2"] * 2, ["F3"] * 2]) self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-one", "many-to-one"}) rows = [[1, 2, 3], [1, 2.1, 3], [4, 5, 6], [4, 5.1, 6], [7, 8, 9]] tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat(parent=rows, child_one=rows, child_two=rows, child_three=rows) self.assertTrue( all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables)) orig_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.child_one[1, 2, 4] = {} dat.child_two[1, 2.2] = 3 dat.child_three.append([1, 2, 4]) new_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) fk_fails = input_schema.find_foreign_key_failures(new_pan_dat) self.assertTrue(len(fk_fails) == 3) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat)) input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]], child_one=[["F1", "F2", "F3"], []], child_two=[["F1", "F2"], ["F3"]], child_three=[[], ["F1", "F2", "F3"]]) for t in ["child_one", "child_two", "child_three"]: input_schema.add_foreign_key(t, "parent", [["F1"] * 2, ["F3"] * 2]) tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat(parent=rows, child_one=rows, child_two=rows, child_three=rows) self.assertTrue( all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables)) orig_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.child_one[1, 2, 4] = {} dat.child_two[1, 2.2] = 4 dat.child_three.append([1, 2, 4]) new_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertTrue( len(input_schema.find_foreign_key_failures(new_pan_dat)) == 3) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))
def testBasicFKs(self): for cloning in [True, False, "*"]: clone_me_maybe = lambda x : x.clone(tdf.all_tables if cloning == "*" else None) if cloning else x pdf = PanDatFactory(plants = [["name"], ["stuff", "otherstuff"]], lines = [["name"], ["plant", "weird stuff"]], line_descriptor = [["name"], ["booger"]], products = [["name"],["gover"]], production = [["line", "product"], ["min", "max"]], pureTestingTable = [[], ["line", "plant", "product", "something"]], extraProduction = [["line", "product"], ["extramin", "extramax"]], weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]]) pdf.add_foreign_key("production", "lines", ("line", "name")) pdf.add_foreign_key("production", "products", ("product", "name")) pdf.add_foreign_key("lines", "plants", ("plant", "name")) pdf.add_foreign_key("line_descriptor", "lines", ("name", "name")) for f in set(pdf.data_fields["pureTestingTable"]).difference({"something"}): pdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name")) pdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product"))) pdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product"))) pdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product"))) self._testPdfReproduction(pdf) pdf = clone_me_maybe(pdf) tdf = TicDatFactory(**pdf.schema()) goodDat = tdf.TicDat() goodDat.plants["Cleveland"] = ["this", "that"] goodDat.plants["Newark"]["otherstuff"] =1 goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle" for i,p in enumerate(goodDat.plants): goodDat.lines[i]["plant"] = p for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)): goodDat.production[pl, pd] = {"min":1, "max":10+i} badDat1 = tdf.copy_tic_dat(goodDat) badDat1.production["notaline", "widgets"] = [0,1] badDat2 = tdf.copy_tic_dat(badDat1) def pan_dat_(_): rtn = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _)) self.assertFalse(pdf.find_duplicates(rtn)) return rtn fk, fkm = ForeignKey, ForeignKeyMapping fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1)) fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2)) self.assertTrue(set(fk_fails1) == set(fk_fails2) == {fk('production', 'lines', fkm('line', 'name'), 'many-to-one')}) self.assertTrue(set(pdf.find_foreign_key_failures(pan_dat_(badDat1), verbosity="Low")) == set(pdf.find_foreign_key_failures(pan_dat_(badDat2), verbosity="Low")) == {('production', 'lines', ('line', 'name'))}) for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]: self.assertTrue(set(row_fails["line"]) == {"notaline"} and set(row_fails["product"]) == {"widgets"}) badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark" fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1)) fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2)) self.assertTrue(set(fk_fails1) == set(fk_fails2) == {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one')}) for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]: self.assertTrue(set(row_fails["name"]) == {"notaline"} and set(row_fails["plant"]) == {"notnewark"}) for bad in [badDat1, badDat2]: bad_pan = pdf.remove_foreign_key_failures(pan_dat_(bad)) self.assertFalse(pdf.find_foreign_key_failures(bad_pan)) self.assertTrue(pdf._same_data(bad_pan, pan_dat_(goodDat))) _ = len(goodDat.lines) for i,p in enumerate(list(goodDat.plants.keys()) + list(goodDat.plants.keys())): goodDat.lines[i+_]["plant"] = p for l in goodDat.lines: if i%2: goodDat.line_descriptor[l] = i+10 for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))): goodDat.pureTestingTable.append((l,pl,pdct,i)) self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(goodDat))) badDat = tdf.copy_tic_dat(goodDat) badDat.pureTestingTable.append(("j", "u", "nk", "ay")) fk_fails = pdf.find_foreign_key_failures(pan_dat_(badDat)) self.assertTrue(set(fk_fails) == {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'), fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'), fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one')}) for df in fk_fails.values(): df = df.T c = df.columns[0] self.assertTrue({'ay', 'j', 'nk', 'u'} == set(df[c]))
def testXToMany(self): input_schema = PanDatFactory (roster = [["Name"],["Grade", "Arrival Inning", "Departure Inning", "Min Innings Played", "Max Innings Played"]], positions = [["Position"],["Position Importance", "Position Group", "Consecutive Innings Only"]], innings = [["Inning"],["Inning Group"]], position_constraints = [["Position Group", "Inning Group", "Grade"], ["Min Players", "Max Players"]]) input_schema.add_foreign_key("position_constraints", "roster", ["Grade", "Grade"]) input_schema.add_foreign_key("position_constraints", "positions", ["Position Group", "Position Group"]) input_schema.add_foreign_key("position_constraints", "innings", ["Inning Group", "Inning Group"]) self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"many-to-many"}) tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat() for i,p in enumerate(["bob", "joe", "fred", "alice", "lisa", "joean", "ginny"]): dat.roster[p]["Grade"] = (i%3)+1 dat.roster["dummy"]["Grade"] = "whatevers" for i,p in enumerate(["pitcher", "catcher", "1b", "2b", "ss", "3b", "lf", "cf", "rf"]): dat.positions[p]["Position Group"] = "PG %s"%((i%4)+1) for i in range(1, 10): dat.innings[i]["Inning Group"] = "before stretch" if i < 7 else "after stretch" dat.innings[0] ={} for pg, ig, g in itertools.product(["PG %s"%i for i in range(1,5)], ["before stretch", "after stretch"], [1, 2, 3]): dat.position_constraints[pg, ig, g] = {} orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.position_constraints["no", "no", "no"] = dat.position_constraints[1, 2, 3] = {} new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema._same_data(orig_pan_dat, new_pan_dat)) fk_fails = input_schema.find_foreign_key_failures(new_pan_dat) fk_fails_2 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low") fk_fails_3 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low", as_table=False) self.assertTrue({tuple(k)[:2] + (tuple(k[2]),): len(v) for k,v in fk_fails.items()} == {k:len(v) for k,v in fk_fails_2.items()} == {k:v.count(True) for k,v in fk_fails_3.items()} == {('position_constraints', 'innings', ("Inning Group", "Inning Group")): 2, ('position_constraints', 'positions', ("Position Group", "Position Group")): 2, ('position_constraints', 'roster', ("Grade", "Grade")): 1}) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat)) input_schema = PanDatFactory(table_one=[["One", "Two"], []], table_two=[["One"], ["Two"]]) input_schema.add_foreign_key("table_two", "table_one", ["One", "One"]) self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-many"}) tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat(table_one = [[1,2], [3,4], [5,6], [7,8]], table_two = {1:2, 3:4, 5:6}) orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.table_two[9]=10 new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat)) fk_fails = input_schema.find_foreign_key_failures(new_pan_dat) self.assertTrue({tuple(k)[:2]:len(v) for k,v in fk_fails.items()} == {('table_two', 'table_one'): 1}) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))