コード例 #1
0
 def testDupsOpalytics(self):
     if not self.can_run:
         return
     for hack in [True, False]:
         tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], []])
         tdf2 = TicDatFactory(
             **{t: [[], ["a", "b", "c"]]
                for t in tdf.all_tables})
         td = tdf2.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         inputset = create_inputset_mock(tdf2, td, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=True)
         self.assertTrue(
             all(len(getattr(panDat, t)) == 6 for t in tdf.all_tables))
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=False)
         self.assertTrue(
             all(len(getattr(panDat, t)) < 6 for t in tdf.all_tables))
         td_1 = tdf.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         td_2 = pdf.copy_to_tic_dat(panDat)
         self.assertTrue(
             all(
                 set(getattr(td_1, t)) == set(getattr(td_2, t))
                 for t in tdf.all_tables))
コード例 #2
0
    def testDietCleaningOpalytics(self):
        sch = dietSchema()
        sch["categories"][-1].append("_active")
        tdf1 = TicDatFactory(**dietSchema())
        tdf2 = TicDatFactory(**sch)

        ticDat2 = tdf2.copy_tic_dat(dietData())
        for v in ticDat2.categories.values():
            v["_active"] = True
        ticDat2.categories["fat"]["_active"] = False
        ticDat1 = tdf1.copy_tic_dat(dietData())

        input_set = create_inputset_mock_with_active_hack(tdf2, ticDat2)
        pdf1 = PanDatFactory(**tdf1.schema())
        panDat = pdf1.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf1._same_data(pdf1.copy_to_tic_dat(panDat), ticDat1))

        panDatPurged = pdf1.opalytics.create_pan_dat(input_set)
        self.assertFalse(
            tdf1._same_data(pdf1.copy_to_tic_dat(panDatPurged), ticDat1))

        ticDat1.categories.pop("fat")
        tdf1.remove_foreign_key_failures(ticDat1)
        self.assertTrue(
            tdf1._same_data(pdf1.copy_to_tic_dat(panDatPurged), ticDat1))
コード例 #3
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def test_nullables(self):
        core_path = os.path.join(_scratchDir, "nullables")
        pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]])
        pdf.set_data_type("table_with_stuffs", "field one")
        pdf.set_data_type("table_with_stuffs",
                          "field two",
                          number_allowed=False,
                          strings_allowed='*',
                          nullable=True)
        dat = TicDatFactory(**pdf.schema()).TicDat(
            table_with_stuffs=[[101, "022"], [202, None], [303, "111"]])
        dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
            dat, drop_pk_columns=False)
        self.assertFalse(pdf.find_data_type_failures(dat))

        for attr, path in [["csv", core_path + "_csv"],
                           ["xls", core_path + ".xlsx"],
                           ["sql", core_path + ".db"],
                           ["json", core_path + ".json"]]:
            f_or_d = "directory" if attr == "csv" else "file"
            write_func, write_kwargs = utils._get_write_function_and_kwargs(
                pdf, path, f_or_d)
            write_func(dat, path, **write_kwargs)
            dat_1 = utils._get_dat_object(pdf, "create_pan_dat", path, f_or_d,
                                          False)
            self.assertTrue(
                pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True))
コード例 #4
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def testSqlSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.db")
        pdf.sql.write_file(panDat, filePath)
        sqlPanDat = pdf.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.sql.write_file(panDat, filePath)
        sqlPanDat = pdf2.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.db")
        pdf.sql.write_file(panDat, filePath)
        panDat2 = pdf.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        sqlPanDat = pdf2.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))
コード例 #5
0
ファイル: testpandat_utils.py プロジェクト: adampkehoe/ticdat
 def testFindDups(self):
     pdf = PanDatFactory(**sillyMeSchema())
     tdf = TicDatFactory(
         **{
             k: [[], list(pkfs) + list(dfs)]
             for k, (pkfs, dfs) in sillyMeSchema().items()
         })
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat)
     self.assertTrue(set(dups) == {'a'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 2)
     dups = pdf.find_duplicates(panDat, as_table=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 1)
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 2, 3, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat, keep=False)
     self.assertTrue(
         set(dups) == {'a', 'b'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue({k: v.value_counts()[True]
                      for k, v in dups.items()} == {
                          'a': 3,
                          'b': 2
                      })
コード例 #6
0
 def testSpacesOpalytics(self):
     if not self.can_run:
         return
     for hack, raw_data in list(itertools.product(*(([True, False], ) *
                                                    2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         inputset = create_inputset_mock(tdf, ticDat, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data)
         self.assertTrue(tdf._same_data(ticDat,
                                        pdf.copy_to_tic_dat(panDat)))
コード例 #7
0
    def testMissingOpalyticsTable(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData()))
        inputset = create_inputset_mock(tdf, ticDat)

        pdf = PanDatFactory(
            **(dict(dietSchema(), missing_table=[["a"], ["b"]])))
        panDat = pdf.opalytics.create_pan_dat(inputset)
        ticDat2 = pdf.copy_to_tic_dat(panDat)
        self.assertTrue(tdf._same_data(ticDat, ticDat2))
        self.assertFalse(ticDat2.missing_table)
コード例 #8
0
    def testDietCleaningOpalytisThree(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        pdf = PanDatFactory(**tdf.schema())
        pdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        addDietForeignKeys(pdf)

        input_set = create_inputset_mock(tdf, ticDat)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
コード例 #9
0
    def testDietCleaningOpalyticsTwo(self):
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)
        pdf = PanDatFactory(**dietSchema())
        addDietForeignKeys(pdf)
        pdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
コード例 #10
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
 def testDateTimeTwo(self):
     file = os.path.join(_scratchDir, "datetime_pd.xls")
     df = utils.pd.DataFrame({
         "a":
         list(
             map(utils.pd.Timestamp, [
                 "June 13 1960 4:30PM", "Dec 11 1970 1AM",
                 "Sept 11 2001 9:30AM"
             ]))
     })
     df.to_excel(file, "Cool Runnings")
     pdf = PanDatFactory(cool_runnings=[["a"], []])
     pdf.set_data_type("cool_runnings", "a", datetime=True)
     dat = pdf.xls.create_pan_dat(file)
     self.assertTrue(set(dat.cool_runnings["a"]) == set(df["a"]))
コード例 #11
0
    def testSillyCleaningOpalyticsOne(self):
        tdf = TicDatFactory(**sillyMeSchema())
        tdf.set_data_type("c",
                          "cData4",
                          number_allowed=False,
                          strings_allowed=['d'])
        ticDat = tdf.TicDat(**sillyMeData())

        input_set = create_inputset_mock(tdf, ticDat)

        pdf = PanDatFactory(**sillyMeSchema())
        pdf.set_data_type("c",
                          "cData4",
                          number_allowed=False,
                          strings_allowed=['d'])

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.c.pop()
        ticDat.c.pop(0)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
コード例 #12
0
 def perform_predicate_checks(sch):
     pdf = PanDatFactory(**sch)
     pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost")
     good_qty = lambda qty :  numericish(qty) and 5 < qty <= 12
     pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty")
     pdf.add_data_row_predicate("categories",
                                lambda row: all(map(numericish, [row["minNutrition"], row["maxNutrition"]]))
                                            and row["maxNutrition"] >= row["minNutrition"],
                                "minmax")
     failed = pdf.find_data_row_failures(pandat)
     self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')})
     self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'})
     self.assertTrue(set({(v["food"], v["category"])
                          for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) ==
                         {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
     self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2'})
     failed = pdf.find_data_row_failures(pandat, as_table=False)
     self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True])
コード例 #13
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
 def testJsonCross(self):
     if not self.can_run:
         return
     tdf = TicDatFactory(**dietSchema())
     pdf = PanDatFactory(**dietSchema())
     ticDat = tdf.freeze_me(
         tdf.TicDat(
             **{t: getattr(dietData(), t)
                for t in tdf.primary_key_fields}))
     panDat = pan_dat_maker(dietSchema(), ticDat)
     filePath = os.path.join(_scratchDir, "diet_cross.json")
     pdf.json.write_file(panDat, filePath)
     ticDat2 = tdf.json.create_tic_dat(filePath, from_pandas=True)
     self.assertTrue(tdf._same_data(ticDat, ticDat2, epsilon=0.0001))
     tdf.json.write_file(ticDat,
                         filePath,
                         allow_overwrite=True,
                         to_pandas=True)
     panDat2 = pdf.json.create_pan_dat(filePath)
     self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=0.0001))
コード例 #14
0
    def testDietOpalytics(self):
        if not self.can_run:
            return
        for hack, raw_data, activeEnabled in list(
                itertools.product(*(([True, False], ) * 3))):
            tdf = TicDatFactory(**dietSchema())
            ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData()))
            inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled)

            pdf = PanDatFactory(**dietSchema())
            panDat = pdf.opalytics.create_pan_dat(inputset)
            self.assertFalse(pdf.find_duplicates(panDat))
            ticDat2 = pdf.copy_to_tic_dat(panDat)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

            tdf2 = TicDatFactory(
                **{
                    k: [pks, list(dfs) + ["dmy"]]
                    for k, (pks, dfs) in tdf.schema().items()
                })
            _dat = tdf2.copy_tic_dat(ticDat)
            panDat = pdf.opalytics.create_pan_dat(
                create_inputset_mock(tdf2, _dat, hack))

            self.assertTrue(tdf._same_data(ticDat,
                                           pdf.copy_to_tic_dat(panDat)))

            pdf2 = PanDatFactory(**tdf2.schema())
            ex = self.firesException(lambda: pdf2.opalytics.create_pan_dat(
                inputset, raw_data=raw_data))
            self.assertTrue(
                all(_ in ex for _ in ["(table, field) pairs missing"] +
                    ["'%s', 'dmy'" % _ for _ in pdf2.all_tables]))
コード例 #15
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def testXlsSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".xlsx"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
コード例 #16
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def testJsonSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".json"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
コード例 #17
0
    def testSillyCleaningOpalyticsThree(self):
        tdf = TicDatFactory(**sillyMeSchema())
        tdf.add_data_row_predicate("c", lambda row: row["cData4"] != 4)
        tdf.add_data_row_predicate("c", lambda row: row["cData4"] != 24)
        ticDat = tdf.TicDat(**sillyMeData())

        input_set = create_inputset_mock(tdf, ticDat)

        pdf = PanDatFactory(**sillyMeSchema())
        pdf.add_data_row_predicate("c", lambda row: row["cData4"] != 4)
        pdf.add_data_row_predicate("c", lambda row: row["cData4"] != 24)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.c.pop()
        ticDat.c.pop(0)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
コード例 #18
0
ファイル: testpandat_utils.py プロジェクト: adampkehoe/ticdat
 def test_data_type_max_failures(self):
     pdf = PanDatFactory(table_one=[["Field"], []],
                         table_two=[[], ["Field"]])
     for t in ["table_one", "table_two"]:
         pdf.set_data_type(t, "Field")
     dat = pdf.PanDat(table_one=DataFrame(
         {"Field": list(range(1, 11)) + [-_ for _ in range(1, 11)]}),
                      table_two=DataFrame(
                          {"Field": [10.1] * 10 + [-2] * 10}))
     errs = pdf.find_data_type_failures(dat)
     self.assertTrue(
         len(errs) == 2 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_data_type_failures(dat, max_failures=11)
     self.assertTrue(len(errs) == 2)
     self.assertTrue(
         any(len(_) == 10 for _ in errs.values())
         and any(len(_) == 1 for _ in errs.values()))
     errs = pdf.find_data_type_failures(dat, max_failures=10)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_data_type_failures(dat, max_failures=9)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 9 for _ in errs.values()))
コード例 #19
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def testSqlSpaceyTwo(self):
        if not self.can_run:
            return
        self.assertTrue(pandatio.sql,
                        "this unit test requires SQLite installed")

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(
            **{
                "a_table": {
                    1: [1, 2, "3"],
                    22.2: (12, 0.12, "something"),
                    0.23: (11, 12, "thirt")
                },
                "b_table": {
                    (1, 2, "foo"): 1,
                    (1012.22, 4, "0012"): 12
                },
                "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5),
                            ("another", 5, 12.5, 24))
            })
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".db"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path=None,
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path="",
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(None, con)
        self.assertTrue(pdf._same_data(panDat, panDat2))
コード例 #20
0
ファイル: testpandat_utils.py プロジェクト: adampkehoe/ticdat
 def test_fk_max_failures(self):
     tdf = TicDatFactory(**dietSchema())
     addDietForeignKeys(tdf)
     dat = tdf.TicDat(nutritionQuantities=[[f"food_{_}", f"cat_{_}", 10]
                                           for _ in range(10)])
     pan_dat = tdf.copy_to_pandas(dat, drop_pk_columns=False)
     pdf = PanDatFactory.create_from_full_schema(
         tdf.schema(include_ancillary_info=True))
     errs = pdf.find_foreign_key_failures(pan_dat)
     self.assertTrue(
         len(errs) == 2 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=11)
     self.assertTrue(
         len(errs) == 2 and set(map(len, errs.values())) == {10, 1})
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=10)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=9)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 9 for _ in errs.values()))
コード例 #21
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
 def test_missing_tables(self):
     core_path = os.path.join(_scratchDir, "missing_tables")
     pdf_1 = PanDatFactory(this=[["Something"], ["Another"]])
     pdf_2 = PanDatFactory(
         **dict(pdf_1.schema(), that=[["What", "Ever"], []]))
     dat = pdf_1.PanDat(this={
         "Something": ["a", "b", "c"],
         "Another": [2, 3, 5]
     })
     for attr, path in [["sql", core_path + ".db"],
                        ["csv", core_path + "_csv"],
                        ["json", core_path + ".json"],
                        ["xls", core_path + ".xlsx"]]:
         func = "write_directory" if attr == "csv" else "write_file"
         getattr(getattr(pdf_1, attr), func)(dat, path)
         dat_1 = getattr(pdf_2, attr).create_pan_dat(path)
         self.assertTrue(pdf_1._same_data(dat, dat_1))
コード例 #22
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def testCsvSpacey(self):
        if not self.can_run:
            return
        self.assertTrue(pandatio.sql,
                        "this unit test requires SQLite installed")

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(
            **{
                "a_table": {
                    1: [1, 2, "3"],
                    22.2: (12, 0.12, "something"),
                    0.23: (11, 12, "thirt")
                },
                "b_table": {
                    (1, 2, "foo"): 1,
                    (1012.22, 4, "0012"): 12
                },
                "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5),
                            ("another", 5, 12.5, 24))
            })
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "spaces_2_csv")
        pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "spaces_2_2_csv")
        pdf.csv.write_directory(panDat,
                                dirPath,
                                case_space_table_names=True,
                                sep=":")
        panDat2 = pdf.csv.create_pan_dat(dirPath, sep=":")
        self.assertTrue(pdf._same_data(panDat, panDat2))
コード例 #23
0
    def testVariousCoverages(self):
        pdf = PanDatFactory(**dietSchema())
        _d = dict(categories={"minNutrition": 0, "maxNutrition": float("inf")},
                               foods={"cost": 0}, nutritionQuantities={"qty": 0})
        pdf.set_default_values(**_d)
        self.assertTrue(pdf._default_values == _d)
        pdf = PanDatFactory(**netflowSchema())
        addNetflowForeignKeys(pdf)
        pdf.clear_foreign_keys("arcs")
        self.assertTrue({_[0] for _ in pdf._foreign_keys} == {"cost", "inflow"})

        pdf.add_data_row_predicate("arcs", lambda row: True)
        pdf.add_data_row_predicate("arcs", lambda row: True, "dummy")
        pdf.add_data_row_predicate("arcs", None, 0)
        pdf = pdf.clone()
        self.assertTrue(set(pdf._data_row_predicates["arcs"]) == {"dummy"})
コード例 #24
0
    def testDataTypes(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())

        ticdat = tdf.TicDat()
        ticdat.foods["a"] = 12
        ticdat.foods["b"] = None
        ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40}
        ticdat.categories["2"] = [10,20]
        for f, p in itertools.product(ticdat.foods, ticdat.categories):
            ticdat.nutritionQuantities[f,p] = 5
        ticdat.nutritionQuantities['a', 2] = 12

        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))

        self.assertFalse(pdf.find_data_type_failures(pandat))
        pandat_copy = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat))
        self.assertTrue(pdf._same_data(pandat, pandat_copy, epsilon=0.00001))

        pdf = PanDatFactory(**dietSchema())
        pdf.set_data_type("foods", "cost", nullable=False)
        pdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        failed = pdf.find_data_type_failures(pandat)
        self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'})
        self.assertTrue(set({(v["food"], v["category"])
                             for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) ==
                            {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})

        failed = pdf.find_data_type_failures(pandat, as_table=False)
        self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True])
        fixed = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat), {("nutritionQuantities", "qty"): 5.15})
        self.assertTrue(set(fixed.foods["cost"]) == {0.0, 12.0})
        self.assertTrue(set(fixed.nutritionQuantities["qty"]) == {5.15, 12.0})

        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        pdf = PanDatFactory(**netflowSchema())
        ticdat = tdf.copy_tic_dat(netflowData())
        for n in ticdat.nodes["Detroit"].arcs_source:
            ticdat.arcs["Detroit", n] = n
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_data_type_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.set_data_type("arcs", "capacity", strings_allowed="*")
        self.assertFalse(pdf.find_data_type_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"])
        failed = pdf.find_data_type_failures(pandat)
        self.assertTrue(set(failed) == {('arcs', 'capacity')})
        self.assertTrue(set({(v["source"], v["destination"])
                             for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")})
        pdf.replace_data_type_failures(pandat)
        self.assertTrue(set(pandat.arcs["capacity"]) == {120, 'Boston', 0, 'Seattle'})
コード例 #25
0
 def make_pdf():
     pdf = PanDatFactory(data_table = [["a"], ["b", "c"]],
                         parameters = [["a"], ["b"]])
     pdf.add_parameter("Something", 100, max=100, inclusive_max=True)
     pdf.add_parameter("Another thing", 5, must_be_int=True)
     pdf.add_parameter("Untyped thing", "whatever", enforce_type_rules=False)
     pdf.add_parameter("Last", 'boo', number_allowed=False, strings_allowed='*')
     return PanDatFactory.create_from_full_schema(pdf.schema(True))
コード例 #26
0
    def testDictConstructions(self):
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables})
        panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables})
        panDat3_1 = pdf.PanDat(**{t:list(map(list, getattr(panDat, t).itertuples(index=False)))
                                  for t in pdf.all_tables})

        self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3, panDat3_1]))
        panDat.foods["extra"] = 12
        panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables})
        self.assertTrue(pdf._same_data(panDat, panDat4))
        self.assertTrue(set(panDat4.foods["extra"]) == {12})

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables})
        panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="records") for t in pdf.all_tables})
        self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3]))
        panDat.cost["extra"] = "boger"
        panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables})
        self.assertTrue(pdf._same_data(panDat, panDat4))
        self.assertTrue(set(panDat4.cost["extra"]) == {"boger"})
コード例 #27
0
    def testAdditionalFKs(self):
        pdf = PanDatFactory(pt1 = [["F1"],[]], pt2 = [["F2"],[]], pt3 = [["F1","F2"],[]],
                            pt4 = [["F1"],["F2"]], pt5 = [[],["F1","F2"]])
        for c in ["pt3", "pt4", "pt5"]:
            pdf.add_foreign_key(c, "pt1", ["F1", "F1"])
            pdf.add_foreign_key(c, "pt2", ["F2", "F2"])
        tdf = TicDatFactory(**pdf.schema())
        def pan_dat_(_):
            rtn =  pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _))
            self.assertFalse(pdf.find_duplicates(rtn))
            return rtn
        ticDat = tdf.TicDat(pt1=[1, 2, 3, 4], pt2=[5, 6, 7, 8])
        for f1, f2 in itertools.product(range(1,5), range(5,9)):
            ticDat.pt3[f1, f2] = {}
            ticDat.pt4[f1] = f2
            ticDat.pt5.append((f1, f2))
        origDat = tdf.copy_tic_dat(ticDat, freeze_it=True)
        self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(origDat)))
        ticDat.pt3["no",6] = ticDat.pt3[1, "no"] = {}
        ticDat.pt4["no"] = 6
        ticDat.pt4["nono"]=6.01
        panDat = pan_dat_(ticDat)
        fails1 = pdf.find_foreign_key_failures(panDat)
        self.assertTrue(fails1)
        pdf.remove_foreign_key_failures(panDat)
        self.assertFalse(pdf.find_foreign_key_failures(panDat))
        self.assertTrue(pdf._same_data(panDat, pan_dat_(origDat)))

        orig_lens = {t:len(getattr(origDat, t)) for t in tdf.all_tables}
        ticDat.pt3["no",6] = ticDat.pt3[1, "no"] = {}
        ticDat.pt4["no"] = 6
        ticDat.pt4["nono"]=6.01
        ticDat.pt5.append(("no",6))
        ticDat.pt5.append((1, "no"))
        panDat = pan_dat_(ticDat)
        fails2 = pdf.find_foreign_key_failures(panDat)
        self.assertTrue(set(fails1) != set(fails2) and set(fails1).issubset(fails2))
        pdf.remove_foreign_key_failures(panDat)
        self.assertFalse(pdf.find_foreign_key_failures(panDat))
        self.assertTrue({t:len(getattr(panDat, t)) for t in tdf.all_tables} == orig_lens)
コード例 #28
0
    def testBasicFKs(self):
        for cloning in [True, False, "*"]:
            clone_me_maybe = lambda x : x.clone(tdf.all_tables if cloning == "*" else None) if cloning else x

            pdf = PanDatFactory(plants = [["name"], ["stuff", "otherstuff"]],
                                lines = [["name"], ["plant", "weird stuff"]],
                                line_descriptor = [["name"], ["booger"]],
                                products = [["name"],["gover"]],
                                production = [["line", "product"], ["min", "max"]],
                                pureTestingTable = [[], ["line", "plant", "product", "something"]],
                                extraProduction = [["line", "product"], ["extramin", "extramax"]],
                                weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]])
            pdf.add_foreign_key("production", "lines", ("line", "name"))
            pdf.add_foreign_key("production", "products", ("product", "name"))
            pdf.add_foreign_key("lines", "plants", ("plant", "name"))
            pdf.add_foreign_key("line_descriptor", "lines", ("name", "name"))
            for f in set(pdf.data_fields["pureTestingTable"]).difference({"something"}):
                pdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name"))
            pdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product")))
            pdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product")))
            pdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product")))
            self._testPdfReproduction(pdf)
            pdf = clone_me_maybe(pdf)

            tdf = TicDatFactory(**pdf.schema())
            goodDat = tdf.TicDat()
            goodDat.plants["Cleveland"] = ["this", "that"]
            goodDat.plants["Newark"]["otherstuff"] =1
            goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle"

            for i,p in enumerate(goodDat.plants):
                goodDat.lines[i]["plant"] = p

            for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)):
                goodDat.production[pl, pd] = {"min":1, "max":10+i}

            badDat1 = tdf.copy_tic_dat(goodDat)
            badDat1.production["notaline", "widgets"] = [0,1]
            badDat2 = tdf.copy_tic_dat(badDat1)


            def pan_dat_(_):
                rtn = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _))
                self.assertFalse(pdf.find_duplicates(rtn))
                return rtn
            fk, fkm = ForeignKey, ForeignKeyMapping
            fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1))
            fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2))

            self.assertTrue(set(fk_fails1) == set(fk_fails2) ==
                            {fk('production', 'lines', fkm('line', 'name'), 'many-to-one')})
            self.assertTrue(set(pdf.find_foreign_key_failures(pan_dat_(badDat1), verbosity="Low")) ==
                            set(pdf.find_foreign_key_failures(pan_dat_(badDat2), verbosity="Low")) ==
                             {('production', 'lines', ('line', 'name'))})
            for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]:
                self.assertTrue(set(row_fails["line"]) == {"notaline"} and set(row_fails["product"]) == {"widgets"})

            badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark"
            fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1))
            fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2))
            self.assertTrue(set(fk_fails1) == set(fk_fails2) ==
                            {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one')})
            for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]:
                self.assertTrue(set(row_fails["name"]) == {"notaline"} and set(row_fails["plant"]) == {"notnewark"})


            for bad in [badDat1, badDat2]:
                bad_pan = pdf.remove_foreign_key_failures(pan_dat_(bad))
                self.assertFalse(pdf.find_foreign_key_failures(bad_pan))
                self.assertTrue(pdf._same_data(bad_pan, pan_dat_(goodDat)))


            _ = len(goodDat.lines)
            for i,p in enumerate(list(goodDat.plants.keys()) + list(goodDat.plants.keys())):
                goodDat.lines[i+_]["plant"] = p
            for l in goodDat.lines:
                if i%2:
                    goodDat.line_descriptor[l] = i+10

            for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))):
                goodDat.pureTestingTable.append((l,pl,pdct,i))
            self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(goodDat)))
            badDat = tdf.copy_tic_dat(goodDat)
            badDat.pureTestingTable.append(("j", "u", "nk", "ay"))
            fk_fails = pdf.find_foreign_key_failures(pan_dat_(badDat))
            self.assertTrue(set(fk_fails) ==
                {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'),
                 fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'),
                 fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one')})

            for df in fk_fails.values():
                df = df.T
                c = df.columns[0]
                self.assertTrue({'ay', 'j', 'nk', 'u'} == set(df[c]))
コード例 #29
0
    def testXToManyTwo(self):
        input_schema = PanDatFactory (parent = [["F1", "F2"],["F3"]], child_one = [["F1", "F2", "F3"], []],
                                      child_two = [["F1", "F2"], ["F3"]], child_three = [[],["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent", [["F1"]*2, ["F2"]*2, ["F3"]*2])
        self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-one", "many-to-one"})

        rows =[[1,2,3], [1,2.1,3], [4,5,6],[4,5.1,6],[7,8,9]]
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent = rows, child_one = rows, child_two = rows, child_three=rows)
        self.assertTrue(all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1,2.2]=3
        dat.child_three.append([1,2,4])
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        self.assertTrue(len(fk_fails) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))

        input_schema = PanDatFactory (parent = [["F1", "F2"],["F3"]], child_one = [["F1", "F2", "F3"], []],
                                      child_two = [["F1", "F2"], ["F3"]], child_three = [[],["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent", [["F1"]*2, ["F3"]*2])
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent=rows, child_one=rows, child_two=rows, child_three=rows)
        self.assertTrue(all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1,2.2]=4
        dat.child_three.append([1,2,4])
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertTrue(len(input_schema.find_foreign_key_failures(new_pan_dat)) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))
コード例 #30
0
    def testXToMany(self):
        input_schema = PanDatFactory (roster = [["Name"],["Grade", "Arrival Inning", "Departure Inning",
                                                          "Min Innings Played", "Max Innings Played"]],
                                      positions = [["Position"],["Position Importance", "Position Group",
                                                                 "Consecutive Innings Only"]],
                                      innings = [["Inning"],["Inning Group"]],
                                      position_constraints = [["Position Group", "Inning Group", "Grade"],
                                                              ["Min Players", "Max Players"]])
        input_schema.add_foreign_key("position_constraints", "roster", ["Grade", "Grade"])
        input_schema.add_foreign_key("position_constraints", "positions", ["Position Group", "Position Group"])
        input_schema.add_foreign_key("position_constraints", "innings", ["Inning Group", "Inning Group"])

        self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"many-to-many"})

        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat()
        for i,p in enumerate(["bob", "joe", "fred", "alice", "lisa", "joean", "ginny"]):
            dat.roster[p]["Grade"] = (i%3)+1
        dat.roster["dummy"]["Grade"]  = "whatevers"
        for i,p in enumerate(["pitcher", "catcher", "1b", "2b", "ss", "3b", "lf", "cf", "rf"]):
            dat.positions[p]["Position Group"] = "PG %s"%((i%4)+1)
        for i in range(1, 10):
            dat.innings[i]["Inning Group"] = "before stretch" if i < 7 else "after stretch"
        dat.innings[0] ={}
        for pg, ig, g in itertools.product(["PG %s"%i for i in range(1,5)], ["before stretch", "after stretch"],
                                           [1, 2, 3]):
            dat.position_constraints[pg, ig, g] = {}

        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))

        dat.position_constraints["no", "no", "no"] = dat.position_constraints[1, 2, 3] = {}
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema._same_data(orig_pan_dat, new_pan_dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        fk_fails_2 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low")
        fk_fails_3 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low", as_table=False)
        self.assertTrue({tuple(k)[:2] + (tuple(k[2]),): len(v) for k,v in fk_fails.items()} ==
                        {k:len(v) for k,v in fk_fails_2.items()} ==
                        {k:v.count(True) for k,v in fk_fails_3.items()} ==
                        {('position_constraints', 'innings', ("Inning Group", "Inning Group")): 2,
                         ('position_constraints', 'positions', ("Position Group", "Position Group")): 2,
                         ('position_constraints', 'roster', ("Grade", "Grade")): 1})
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))

        input_schema = PanDatFactory(table_one=[["One", "Two"], []],
                                     table_two=[["One"], ["Two"]])
        input_schema.add_foreign_key("table_two", "table_one", ["One", "One"])
        self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-many"})

        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(table_one = [[1,2], [3,4], [5,6], [7,8]], table_two = {1:2, 3:4, 5:6})

        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.table_two[9]=10
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        self.assertTrue({tuple(k)[:2]:len(v) for k,v in fk_fails.items()} == {('table_two', 'table_one'): 1})
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))