Example #1
0
    def testDiet(self):
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
    def testDietCleaningOpalytisThree(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        pdf = PanDatFactory(**tdf.schema())
        pdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        addDietForeignKeys(pdf)

        input_set = create_inputset_mock(tdf, ticDat)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
    def testDietCleaningOpalyticsTwo(self):
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)
        pdf = PanDatFactory(**dietSchema())
        addDietForeignKeys(pdf)
        pdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
    def testDietCleaningFive(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        tdf.set_data_type("categories",
                          "minNutrition",
                          max=0,
                          inclusive_max=True)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(
            tdf._same_data(
                tdf.opalytics.create_tic_dat(input_set, raw_data=True),
                ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))

        ticDat.categories.pop("fat")
        ticDat.categories.pop("calories")
        ticDat.categories.pop("protein")

        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
    def testDietCleaningThree_2(self):
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())
        ticDat.categories.pop("fat")
        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
Example #6
0
 def test_fk_max_failures(self):
     tdf = TicDatFactory(**dietSchema())
     addDietForeignKeys(tdf)
     dat = tdf.TicDat(nutritionQuantities=[[f"food_{_}", f"cat_{_}", 10]
                                           for _ in range(10)])
     pan_dat = tdf.copy_to_pandas(dat, drop_pk_columns=False)
     pdf = PanDatFactory.create_from_full_schema(
         tdf.schema(include_ancillary_info=True))
     errs = pdf.find_foreign_key_failures(pan_dat)
     self.assertTrue(
         len(errs) == 2 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=11)
     self.assertTrue(
         len(errs) == 2 and set(map(len, errs.values())) == {10, 1})
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=10)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=9)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 9 for _ in errs.values()))
Example #7
0
    def testDiet(self):
        if not self.can_run:
            return
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            self.assertFalse(tdf.sql.find_duplicates(filePath))
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
Example #8
0
    def testFive(self):
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables}))
        obfudat = tdf.obfusimplify(dat, freeze_it=1)
        self.assertFalse(tdf._same_data(dat, obfudat.copy))
        for (s,d),r in obfudat.copy.arcs.items():
            self.assertFalse((s,d) in dat.arcs)
            self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"])
        obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"])
        self.assertTrue(tdf._same_data(obfudat.copy, dat))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        mone, one2one = "many-to-one",  "one-to-one"
        fk, fkm = _ForeignKey, _ForeignKeyMapping
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'nodes', fkm('source',u'name'), mone),
                            fk("cost", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf.clear_foreign_keys("cost")
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        addDietForeignKeys(tdf)

        self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone),
                                                  fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)})

        tdf.TicDat()
        self.assertTrue(self.firesException(lambda  : tdf.clear_foreign_keys("nutritionQuantities")))
        self.assertTrue(tdf.foreign_keys)
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.clear_foreign_keys("nutritionQuantities")
        self.assertFalse(tdf.foreign_keys)

        tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]],
                            goodChild = [["gk"], ["gd1", "gd2"]],
                            badChild = [["bk1", "bk2"], ["bd"]],
                            appendageChild = [["ak"], ["ad1", "ad2"]],
                            appendageBadChild = [["bk1", "bk2"], []])
        tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk"))
        tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"])
        self.assertTrue("many-to-many" in self.firesException(lambda :
                tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"])))
        tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"])
        tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1")))
        fks = tdf.foreign_keys
        _getfk = lambda t : next(_ for _ in fks if _.native_table == t)
        self.assertTrue(_getfk("goodChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("badChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one")
        self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one")

        tdf.clear_foreign_keys("appendageBadChild")
        self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys)
        tdf.clear_foreign_keys()
        self.assertFalse(tdf.foreign_keys)