Example #1
0
 def testSeven(self):
     tdf = TicDatFactory(**dietSchema())
     def makeIt() :
         rtn = tdf.TicDat()
         rtn.foods["a"] = {}
         rtn.categories["1"] = {}
         rtn.categories["2"] = [0,1]
         self.assertTrue(rtn.categories["2"]["minNutrition"] == 0)
         self.assertTrue(rtn.categories["2"]["maxNutrition"] == 1)
         rtn.nutritionQuantities['junk',1] = {}
         return tdf.freeze_me(rtn)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_values(foods = {"cost":"dontcare"},nutritionQuantities = {"qty":100} )
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]=='dontcare' and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 100)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_value("categories", "minNutrition", 1)
     tdf.set_default_value("categories", "maxNutrition", 2)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (1,2) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
Example #2
0
    def testDiet(self):
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
Example #3
0
 def testTwo(self):
     objOrig = dietData()
     staticFactory = TicDatFactory(**dietSchema())
     tables = set(staticFactory.primary_key_fields)
     ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
     self.assertTrue(staticFactory.good_tic_dat_object(ticDat))
     for t in tables :
         self._assertSame(getattr(objOrig, t), getattr(ticDat,t),
                                 lambda _t : staticFactory.good_tic_dat_table(_t, t))
Example #4
0
    def testDiet(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        filePath = os.path.join(_scratchDir, "diet.xls")
        tdf.xls.write_file(ticDat, filePath)
        xlsTicDat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        tdf.xls.write_file(ticDat, filePath + "x")
        self.assertFalse(
            tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x")))
        self.assertTrue(
            tdf._same_data(
                ticDat,
                tdf.xls.create_tic_dat(filePath + "x",
                                       treat_large_as_inf=True)))
        xlsTicDat.categories["calories"]["minNutrition"] = 12
        self.assertFalse(tdf._same_data(ticDat, xlsTicDat))

        self.assertFalse(tdf.xls.find_duplicates(filePath))

        ex = self.firesException(lambda: tdf.xls.create_tic_dat(
            filePath, row_offsets={t: 1
                                   for t in tdf.all_tables}))
        self.assertTrue("field names could not be found" in ex)
        xlsTicDat = tdf.xls.create_tic_dat(
            filePath,
            row_offsets={t: 1
                         for t in tdf.all_tables},
            headers_present=False)
        self.assertTrue(tdf._same_data(xlsTicDat, ticDat))
        xlsTicDat = tdf.xls.create_tic_dat(
            filePath,
            row_offsets={t: 2
                         for t in tdf.all_tables},
            headers_present=False)
        self.assertFalse(tdf._same_data(xlsTicDat, ticDat))
        self.assertTrue(
            all(
                len(getattr(ticDat, t)) - 1 == len(getattr(xlsTicDat, t))
                for t in tdf.all_tables))
    def testDietCleaningTwo(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("categories", "maxNutrition", min=66, inclusive_max=True)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
    def testDietCleaningThree(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories", lambda row : row["maxNutrition"] >= 66)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
Example #7
0
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        filePath = makeCleanPath(os.path.join(_scratchDir, "diet.mdb"))
        tdf.mdb.write_file(ticDat, filePath)
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        def changeit() :
            mdbTicDat.categories["calories"]["minNutrition"]=12
        changeit()
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))

        self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath)))
        tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True)
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertTrue(self.firesException(changeit))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
Example #8
0
    def testDataTypes_two(self):
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**tdf.schema())
        def makeIt() :
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.foods[None] = 101
            rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40}
            rtn.categories["2"] = [10,20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f,p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.copy_to_pandas(rtn, drop_pk_columns=False)
        dat = makeIt()
        errs = pdf.find_data_type_failures(dat)
        self.assertTrue(len(errs) == 2 and not pdf.find_duplicates(dat))
        dat_copied = pdf.copy_pan_dat(dat)
        pdf.replace_data_type_failures(dat)
        self.assertTrue(pdf._same_data(dat, dat_copied, epsilon=0.00001))
        pdf2 = pdf.clone()
        pdf2.set_default_value("foods", "name", "a")
        pdf2.set_default_value("nutritionQuantities", "food", "a")
        pdf2.replace_data_type_failures(dat_copied)
        self.assertFalse(pdf._same_data(dat, dat_copied, epsilon=0.00001))
        self.assertFalse(pdf.find_data_type_failures(dat_copied))
        dups = pdf.find_duplicates(dat_copied)
        self.assertTrue(len(dups) == 2 and len(dups["foods"]) == 1 and len(dups["nutritionQuantities"]) == 2)

        from pandas import isnull
        def noneify(iter_of_tuples):
            return {tuple(None if isnull(_) else _ for _ in tuple_) for tuple_ in iter_of_tuples}
        self.assertTrue(noneify(errs['nutritionQuantities', 'food'].itertuples(index=False)) ==
                        {(None, "1", 5), (None, "2", 5)})
        self.assertTrue(noneify(errs['foods', 'name'].itertuples(index=False)) == {(None, 101)})
        pdf = PanDatFactory(**tdf.schema())
        pdf.set_data_type("foods", "name", nullable=True, strings_allowed='*')
        pdf.set_data_type("nutritionQuantities", "food", nullable=True, strings_allowed='*')
        self.assertFalse(pdf.find_data_type_failures(dat))
        pdf.set_data_type("foods", "cost", nullable=False)
        errs = pdf.find_data_type_failures(dat)
        self.assertTrue(len(errs) == 1)
        self.assertTrue(noneify(errs['foods', 'cost'].itertuples(index=False)) == {('b', None)})
Example #9
0
 def testDiet(self):
     tdf = TicDatFactory(**dietSchema())
     tdf.enable_foreign_key_links()
     oldDat = tdf.TicDat(
         **{t: getattr(dietData(), t)
            for t in tdf.primary_key_fields})
     oldDatStr = create_opl_text(tdf, oldDat)
     newDat = read_opl_text(tdf, oldDatStr)
     self.assertFalse(tdf._same_data(oldDat, newDat))
     oldDat.categories["protein"][
         "maxNutrition"] = 12  # Remove infinity from the data
     changedDatStr = create_opl_text(tdf, oldDat)
     changedDat = read_opl_text(tdf, changedDatStr)
     self.assertTrue(tdf._same_data(oldDat, changedDat))
     tdf.opl_prepend = "pre_"
     origStr, changedDatStr = changedDatStr, create_opl_text(tdf, oldDat)
     changedDat = read_opl_text(tdf, changedDatStr)
     self.assertTrue(tdf._same_data(oldDat, changedDat))
     self.assertFalse(origStr == changedDatStr)
    def testDiet(self):
        if not self.can_run:
            return
        for hack, raw_data, activeEnabled in list(
                product(*(([True, False], ) * 3))):
            tdf = TicDatFactory(**dietSchema())
            ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData()))
            inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled)
            self.assertFalse(
                tdf.opalytics.find_duplicates(inputset, raw_data=raw_data))
            ticDat2 = tdf.opalytics.create_tic_dat(inputset, raw_data=raw_data)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

            def change():
                ticDat2.categories["calories"]["minNutrition"] = 12

            self.assertFalse(firesException(change))
            self.assertFalse(tdf._same_data(ticDat, ticDat2))

            ticDat2 = tdf.opalytics.create_tic_dat(inputset,
                                                   freeze_it=True,
                                                   raw_data=raw_data)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))
            self.assertTrue(firesException(change))
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

            tdf2 = TicDatFactory(
                **{
                    k: [pks, list(dfs) + ["dmy"]]
                    for k, (pks, dfs) in tdf.schema().items()
                })
            _dat = tdf2.copy_tic_dat(ticDat)
            self.assertTrue(
                tdf._same_data(
                    ticDat,
                    tdf.opalytics.create_tic_dat(create_inputset_mock(
                        tdf2, _dat, hack),
                                                 raw_data=raw_data)))

            ex = self.firesException(lambda: tdf2.opalytics.create_tic_dat(
                inputset, raw_data=raw_data))
            self.assertTrue("field dmy can't be found" in ex)
Example #11
0
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        filePath = os.path.join(_scratchDir, "diet.xls")
        tdf.xls.write_file(ticDat, filePath)
        xlsTicDat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        xlsTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(tdf._same_data(ticDat, xlsTicDat))

        self.assertFalse(tdf.xls.get_duplicates(filePath))

        ex = self.firesException(lambda :
                                 tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}))
        self.assertTrue("field names could not be found" in ex)
        xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}, headers_present=False)
        self.assertTrue(tdf._same_data(xlsTicDat, ticDat))
        xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:2 for t in tdf.all_tables}, headers_present=False)
        self.assertFalse(tdf._same_data(xlsTicDat, ticDat))
        self.assertTrue(all(len(getattr(ticDat, t))-1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
Example #12
0
 def test_fk_max_failures(self):
     tdf = TicDatFactory(**dietSchema())
     addDietForeignKeys(tdf)
     dat = tdf.TicDat(nutritionQuantities=[[f"food_{_}", f"cat_{_}", 10]
                                           for _ in range(10)])
     pan_dat = tdf.copy_to_pandas(dat, drop_pk_columns=False)
     pdf = PanDatFactory.create_from_full_schema(
         tdf.schema(include_ancillary_info=True))
     errs = pdf.find_foreign_key_failures(pan_dat)
     self.assertTrue(
         len(errs) == 2 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=11)
     self.assertTrue(
         len(errs) == 2 and set(map(len, errs.values())) == {10, 1})
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=10)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 10 for _ in errs.values()))
     errs = pdf.find_foreign_key_failures(pan_dat, max_failures=9)
     self.assertTrue(
         len(errs) == 1 and all(len(_) == 9 for _ in errs.values()))
Example #13
0
    def testCsvSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "netflow_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.csv.write_directory(panDat, dirPath)
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath, decimal=",")
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertFalse(pdf._same_data(panDat, panDat2))
        panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",")
        self.assertTrue(pdf._same_data(panDat, panDat2))
Example #14
0
    def testVariousCoverages(self):
        pdf = PanDatFactory(**dietSchema())
        _d = dict(categories={"minNutrition": 0, "maxNutrition": float("inf")},
                               foods={"cost": 0}, nutritionQuantities={"qty": 0})
        pdf.set_default_values(**_d)
        self.assertTrue(pdf._default_values == _d)
        pdf = PanDatFactory(**netflowSchema())
        addNetflowForeignKeys(pdf)
        pdf.clear_foreign_keys("arcs")
        self.assertTrue({_[0] for _ in pdf._foreign_keys} == {"cost", "inflow"})

        pdf.add_data_row_predicate("arcs", lambda row: True)
        pdf.add_data_row_predicate("arcs", lambda row: True, "dummy")
        pdf.add_data_row_predicate("arcs", None, 0)
        pdf = pdf.clone()
        self.assertTrue(set(pdf._data_row_predicates["arcs"]) == {"dummy"})
        pdf = PanDatFactory(pdf_table_one=[["A Field"], []], pdf_table_two=[["B Field"],[]],
                            pdf_table_three=[["C Field"], []])
        pdf.add_foreign_key("pdf_table_one", "pdf_table_two", ["A Field", "B Field"])
        pdf.add_foreign_key("pdf_table_two", "pdf_table_three", ["B Field", "C Field"])
        pdf.add_foreign_key("pdf_table_three", "pdf_table_one", ["C Field", "A Field"])
Example #15
0
    def testDietCleaningOpalyticsFour(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        tdf.set_data_type("categories",
                          "minNutrition",
                          max=0,
                          inclusive_max=True)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        pdf = PanDatFactory(**tdf.schema())
        pdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        pdf.set_data_type("categories",
                          "minNutrition",
                          max=0,
                          inclusive_max=True)
        pdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        addDietForeignKeys(pdf)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        ticDat.categories.pop("calories")
        ticDat.categories.pop("protein")

        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
Example #16
0
    def testOne(self):
        def _cleanIt(x) :
            x.foods['macaroni'] = {"cost": 2.09}
            x.foods['milk'] = {"cost":0.89}
            return x
        dataObj = dietData()
        tdf = TicDatFactory(**dietSchema())
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        dataObj2 = tdf.copy_tic_dat(dataObj)
        dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True)
        dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4)))
        dataObj = _cleanIt(dataObj)
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3)))
        def hackit(x) :
            x.foods["macaroni"] = 100
        self.assertTrue(self.firesException(lambda :hackit(dataObj3)))
        hackit(dataObj2)
        self.assertTrue(not tdf._same_data(dataObj, dataObj2) and  tdf._same_data(dataObj, dataObj3))

        msg = []
        dataObj.foods[("milk", "cookies")] = {"cost": float("inf")}
        dataObj.boger = object()
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append))
        self.assertTrue({"foods : Inconsistent key lengths"} == set(msg))
        self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t)
                            for t in ("categories", "nutritionQuantities")))

        dataObj = dietData()
        dataObj.categories["boger"] = {"cost":1}
        dataObj.categories["boger"] = {"cost":1}
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append))
        self.assertTrue({'foods : Inconsistent key lengths',
                         'categories : Inconsistent data field name keys.'} == set(msg))
        ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t)
                                                                for t in tdf.primary_key_fields}))).message
        self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
Example #17
0
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        dirPath = os.path.join(_scratchDir, "diet")
        tdf.csv.write_directory(ticDat,dirPath)
        self.assertFalse(tdf.csv.get_duplicates(dirPath))
        csvTicDat = tdf.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        def change() :
            csvTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(firesException(change))
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))

        self.assertTrue(self.firesException(lambda  :
            tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith(
                                                                        "Invalid dialect excel_t"))

        tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab")
        self.assertTrue(firesException(change))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
Example #18
0
    def testDiet(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        filePath = makeCleanPath(os.path.join(_scratchDir, "diet.accdb"))
        tdf.mdb.write_file(ticDat, filePath)
        #shutil.copy(filePath, "diet.accdb") #uncomment to make readonly test file as .accdb
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
        def changeit() :
            accdbTicDat.categories["calories"]["minNutrition"]=12
        changeit()
        self.assertFalse(tdf._same_data(ticDat, accdbTicDat))

        self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath)))
        tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True)
        accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
        self.assertTrue(self.firesException(changeit))
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
Example #19
0
    def testDiet(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        filePath = "diet.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        def changeit():
            mdbTicDat.categories["calories"]["minNutrition"] = 12

        changeit()
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertTrue(self.firesException(changeit))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
Example #20
0
    def testFive(self):
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables}))
        obfudat = tdf.obfusimplify(dat, freeze_it=1)
        self.assertFalse(tdf._same_data(dat, obfudat.copy))
        for (s,d),r in obfudat.copy.arcs.items():
            self.assertFalse((s,d) in dat.arcs)
            self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"])
        obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"])
        self.assertTrue(tdf._same_data(obfudat.copy, dat))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        mone, one2one = "many-to-one",  "one-to-one"
        fk, fkm = _ForeignKey, _ForeignKeyMapping
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'nodes', fkm('source',u'name'), mone),
                            fk("cost", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf.clear_foreign_keys("cost")
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        addDietForeignKeys(tdf)

        self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone),
                                                  fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)})

        tdf.TicDat()
        self.assertTrue(self.firesException(lambda  : tdf.clear_foreign_keys("nutritionQuantities")))
        self.assertTrue(tdf.foreign_keys)
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.clear_foreign_keys("nutritionQuantities")
        self.assertFalse(tdf.foreign_keys)

        tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]],
                            goodChild = [["gk"], ["gd1", "gd2"]],
                            badChild = [["bk1", "bk2"], ["bd"]],
                            appendageChild = [["ak"], ["ad1", "ad2"]],
                            appendageBadChild = [["bk1", "bk2"], []])
        tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk"))
        tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"])
        self.assertTrue("many-to-many" in self.firesException(lambda :
                tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"])))
        tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"])
        tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1")))
        fks = tdf.foreign_keys
        _getfk = lambda t : next(_ for _ in fks if _.native_table == t)
        self.assertTrue(_getfk("goodChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("badChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one")
        self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one")

        tdf.clear_foreign_keys("appendageBadChild")
        self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys)
        tdf.clear_foreign_keys()
        self.assertFalse(tdf.foreign_keys)
Example #21
0
    def testJsonSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.json.write_file(panDat, filePath)
        panDat2 = pdf2.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))

        re_fielded_schema = {
            "categories": (("name", ), ["maxNutrition", "minNutrition"]),
            "foods": [["name"], []],
            "nutritionQuantities": (["food", "category"], ["qty"])
        }
        pdf3 = PanDatFactory(**re_fielded_schema)
        panDat3 = pdf3.json.create_pan_dat(filePath)
        for t, (pks, dfs) in re_fielded_schema.items():
            self.assertTrue(
                list(pks) + list(dfs) == list(getattr(panDat3, t).columns))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, ""))
        self.assertTrue(pdf._same_data(panDat, panDat3))
        dicted = json.loads(pdf.json.write_file(panDat, ""))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat5 = pdf2.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat5))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath, orient='columns', index=True)
        # the following doesn't generate a TicDatError, which is fine
        self.assertTrue(
            firesException(lambda: pdf.json.create_pan_dat(filePath)))
        panDat2 = pdf.json.create_pan_dat(filePath, orient='columns')
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(
            panDat, "", orient='columns'),
                                          orient="columns")
        self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))
        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
Example #22
0
    def testDataRowPredicatesTwo(self):
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        num_calls=[0]
        mess_it_up=[]
        def pre_processor(dat):
            num_calls[0] += 1
            if mess_it_up:
                dat.messing_it_up+=1
            return {t:len(getattr(dat, t)) for t in tdf.all_tables}
        pdf.add_data_row_predicate("foods", lambda row, y: y==12, predicate_kwargs_maker=lambda dat: {"y":12})
        pdf.add_data_row_predicate("categories", lambda row, nutritionQuantities, foods, categories:
                               row["name"] == "fat" or categories == 4,
                               predicate_name="catfat", predicate_kwargs_maker=pre_processor)
        pdf.add_data_row_predicate("foods", lambda row, nutritionQuantities, foods, categories:
                               row["name"] == "pizza" or foods == 9,
                               predicate_name= "foodza", predicate_kwargs_maker=pre_processor)
        def dummy_kwargs_maker(dat):
            if pdf.good_pan_dat_object(dat):
                return {"x":1}
        for t in tdf.all_tables:
            pdf.add_data_row_predicate(t, lambda row, x: x==1, predicate_name=f"dummy_{t}",
                                       predicate_kwargs_maker=dummy_kwargs_maker)
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tdf.copy_tic_dat(dietData())))
        self.assertFalse(pdf.find_data_row_failures(pandat))
        self.assertTrue(num_calls[0] == 1)
        pandat.foods = pandat.foods[pandat.foods["name"] != "pizza"].copy()
        pandat.categories = pandat.categories[pandat.categories["name"] != "fat"].copy()
        fails = pdf.find_data_row_failures(pandat)
        self.assertTrue(num_calls[0] == 2)
        self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')})
        self.assertTrue(set(fails['categories', 'catfat']["name"]) == set(dietData().categories).difference(["fat"]))
        self.assertTrue(set(fails['foods', 'foodza']["name"]) == set(dietData().foods).difference(["pizza"]))

        mess_it_up.append(1)
        ex = []
        try:
            pdf.find_data_row_failures(pandat)
        except Exception as e:
            ex[:] = [str(e.__class__)]
        self.assertTrue("AttributeError" in ex[0])
        fails = pdf.find_data_row_failures(pandat, exception_handling="Handled as Failure")
        self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')})
        self.assertTrue(num_calls[0] == 4)
        for v in fails.values():
            self.assertTrue(v.primary_key == '*' and "no attribute" in v.error_message)
        pdf = pdf.clone()
        fails = pdf.find_data_row_failures(pandat, exception_handling="Handled as Failure")
        self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')})
        mess_it_up=[]
        def fail_on_bad_name(row, bad_name):
            if row["name"] == bad_name:
                return f"{bad_name} is bad"
            return True
        pdf.add_data_row_predicate("foods", fail_on_bad_name, predicate_name="baddy",
                                   predicate_kwargs_maker=lambda dat: {"bad_name": sorted(dat.foods["name"])[0]},
                                   predicate_failure_response="Error Message")
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tdf.copy_tic_dat(dietData())))
        fails = pdf.find_data_row_failures(pandat)
        self.assertTrue(set(map(tuple, fails)) == {('foods', 'baddy')})
        self.assertTrue(len(fails['foods', 'baddy']) == 1)
        self.assertTrue(list(fails['foods', 'baddy']["Error Message"])[0] == "chicken is bad")
Example #23
0
    def testDataPredicates(self):
        # this test won't run properly if the -O flag is applied
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())

        ticdat = tdf.TicDat()
        ticdat.foods["a"] = 12
        ticdat.foods["b"] = None
        ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40}
        ticdat.categories["2"] = [21,20]
        for f, p in itertools.product(ticdat.foods, ticdat.categories):
            ticdat.nutritionQuantities[f,p] = 5


        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_duplicates(pandat))
        self.assertFalse(pdf.find_data_row_failures(pandat))

        ticdat.nutritionQuantities['a', 2] = 12
        ticdat.categories["3"] = ['a', 100]
        pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))

        def perform_predicate_checks(sch):
            pdf = PanDatFactory(**sch)
            pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost")
            good_qty = lambda qty : 5 < qty <= 12
            pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty")
            pdf.add_data_row_predicate("categories",
                                       lambda row: row["maxNutrition"] >= row["minNutrition"],
                                       "minmax")
            pdf2 = PanDatFactory(**sch)
            def make_error_message_predicate(f, name):
                def error_message_predicate(row):
                    rtn = f(row)
                    if rtn:
                        return True
                    return f"{name} failed!"
                return error_message_predicate
            for t, preds in pdf._data_row_predicates.items():
                for p_name, rpi in preds.items():
                    pdf2.add_data_row_predicate(t, make_error_message_predicate(rpi.predicate, p_name),
                                                predicate_name=p_name, predicate_failure_response="Error Message")
            failed = pdf.find_data_row_failures(pandat)
            failed2 = pdf2.find_data_row_failures(pandat)
            self.assertTrue(set(failed) == set(failed2) ==  {('foods', 'cost'),
                                            ('nutritionQuantities', 'qty'), ('categories', 'minmax')})
            self.assertTrue(set(failed['foods', 'cost']["name"]) == set(failed2['foods', 'cost']["name"]) == {'b'})
            for f in [failed, failed2]:
                self.assertTrue(set({(v["food"], v["category"])
                                     for v in f['nutritionQuantities', 'qty'].T.to_dict().values()}) ==
                                    {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
                self.assertTrue(set(f['categories', 'minmax']["name"]) == {'2'})
            for t, n in failed2:
                self.assertTrue(set(failed2[t, n]["Error Message"]) == {f'{n} failed!'})
            for _pdf in [pdf, pdf2]:
                failed = _pdf.find_data_row_failures(pandat, as_table=False)
                self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True])
                ex = []
                try:
                    _pdf.find_data_row_failures(pandat_2)
                except Exception as e:
                    ex[:] = [str(e.__class__)]
                self.assertTrue("TypeError" in ex[0])
                failed = _pdf.find_data_row_failures(pandat_2, exception_handling="Handled as Failure")
                self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'})
            failed = pdf2.find_data_row_failures(pandat_2, exception_handling="Handled as Failure")
            df = failed['categories', 'minmax']
            err_str = list(df[df['name'] == '3']["Error Message"])[0]
            self.assertTrue(err_str=="Exception<'>=' not supported between instances of 'int' and 'str'>")

        perform_predicate_checks(dietSchema())
        perform_predicate_checks({t:'*' for t in dietSchema()})

        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        pdf = PanDatFactory(**netflowSchema())
        ticdat = tdf.copy_tic_dat(netflowData())
        for n in ticdat.nodes["Detroit"].arcs_source:
            ticdat.arcs["Detroit", n] = n
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_duplicates(pandat))
        self.assertFalse(pdf.find_data_row_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.add_data_row_predicate("arcs", lambda row: True, "capacity")
        self.assertFalse(pdf.find_data_row_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"]
        pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity")
        failed = pdf.find_data_row_failures(pandat)
        self.assertTrue(set(failed) == {('arcs', 'capacity')})
        self.assertTrue(set({(v["source"], v["destination"])
                             for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")})

        pdf = PanDatFactory(table=[[],["Field", "Error Message", "Error Message (1)"]])
        pdf.add_data_row_predicate("table", predicate=lambda row: f"Oops {row['Field']}" if row["Field"] > 1 else True,
                                   predicate_name="silly", predicate_failure_response="Error Message")
        df = DataFrame({"Field":[2, 1], "Error Message":["what", "go"], "Error Message (1)": ["now", "go"]})
        fails = pdf.find_data_row_failures(pdf.PanDat(table=df))
        df = fails["table", "silly"]
        self.assertTrue(list(df.columns) == ["Field", "Error Message", "Error Message (1)", "Error Message (2)"])
        self.assertTrue(set(df["Field"]) == {2} and set(df["Error Message (2)"]) == {'Oops 2'})
Example #24
0
    def testDiet(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        dirPath = os.path.join(_scratchDir, "diet")
        tdf.csv.write_directory(ticDat, dirPath)
        self.assertFalse(tdf.csv.find_duplicates(dirPath))
        csvTicDat = tdf.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        def change():
            csvTicDat.categories["calories"]["minNutrition"] = 12

        self.assertFalse(firesException(change))
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))

        self.assertTrue(
            self.firesException(lambda: tdf.csv.write_directory(
                ticDat, dirPath, dialect="excel_t")).endswith(
                    "Invalid dialect excel_t"))

        tdf.csv.write_directory(ticDat,
                                dirPath,
                                dialect="excel-tab",
                                allow_overwrite=True)
        self.assertTrue(
            self.firesException(
                lambda: tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath,
                                           freeze_it=True,
                                           dialect="excel-tab")
        self.assertTrue(firesException(change))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.csv.write_directory(dat2, dirPath, allow_overwrite=True)
        csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat2))
        os.rename(os.path.join(dirPath, "nutritionquantities.csv"),
                  os.path.join(dirPath, "nutritionquantities.csv".upper()))
        csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat2))

        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.csv.write_directory(dat3, dirPath, allow_overwrite=True)
        os.rename(os.path.join(dirPath, "nutrition_quantities.csv"),
                  os.path.join(dirPath, "nutrition quantities.csv"))
        csvDat3 = tdf3.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf3._same_data(dat3, csvDat3))
        shutil.copy(os.path.join(dirPath, "nutrition quantities.csv"),
                    os.path.join(dirPath, "nutrition_quantities.csv"))
        self.assertTrue(
            self.firesException(lambda: tdf3.csv.create_tic_dat(dirPath)))
Example #25
0
 def testNine(self):
     for schema in (dietSchema(), sillyMeSchema(), netflowSchema()) :
         d = TicDatFactory(**schema).schema()
         assert d == {k : map(list, v) for k,v in schema.items()}
Example #26
0
    def testEight(self):
        tdf = TicDatFactory(**dietSchema())
        def makeIt() :
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40}
            rtn.categories["2"] = [10,20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f,p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        tdf.set_default_value("foods", "cost", 2)
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) ==
                        {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
        self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001})
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat))
        self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in
                            failed['nutritionQuantities', 'qty'].pks))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and
                        fixedDat.nutritionQuantities['a', 2]["qty"] == 12)

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2})
        self.assertTrue(tdf._same_data(fixedDat, fixedDat2))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=True)
        tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False)
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost")
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0)

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        tdf.enable_foreign_key_links()
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        def makeIt() :
            if not tdf.foreign_keys:
                tdf.enable_foreign_key_links()
                addNetflowForeignKeys(tdf)
            orig = netflowData()
            rtn = tdf.copy_tic_dat(orig)
            for n in rtn.nodes["Detroit"].arcs_source:
                rtn.arcs["Detroit", n] = n
            self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables))
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed="*")
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"])
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))})
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        netflowData_ = tdf.copy_tic_dat(netflowData())
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_))
        fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()),
                                        {("arcs", "capacity"):80, ("cost","cost") :"imok"}))
        fixedDat.arcs["Detroit", "Boston"] = 100
        fixedDat.arcs["Detroit", "Seattle"] = 120
        self.assertTrue(tdf._same_data(fixedDat, netflowData_))
Example #27
0
    def testDataTypes(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())

        ticdat = tdf.TicDat()
        ticdat.foods["a"] = 12
        ticdat.foods["b"] = None
        ticdat.categories["1"] = {"maxNutrition": 100, "minNutrition": 40}
        ticdat.categories["2"] = [10, 20]
        for f, p in itertools.product(ticdat.foods, ticdat.categories):
            ticdat.nutritionQuantities[f, p] = 5
        ticdat.nutritionQuantities['a', 2] = 12

        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))

        self.assertFalse(pdf.find_data_type_failures(pandat))
        pandat_copy = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat))
        self.assertTrue(pdf._same_data(pandat, pandat_copy, epsilon=0.00001))

        pdf = PanDatFactory(**dietSchema())
        pdf.set_data_type("foods", "cost", nullable=False)
        pdf.set_data_type("nutritionQuantities",
                          "qty",
                          min=5,
                          inclusive_min=False,
                          max=12,
                          inclusive_max=True)
        failed = pdf.find_data_type_failures(pandat)
        self.assertTrue(
            set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'})
        self.assertTrue(
            set({(v["food"], v["category"])
                 for v in failed['nutritionQuantities',
                                 'qty'].T.to_dict().values()}) == {(
                                     'b', '1'), ('a', '2'), ('a',
                                                             '1'), ('b', '2')})

        failed = pdf.find_data_type_failures(pandat, as_table=False)
        self.assertTrue(4 == failed['nutritionQuantities',
                                    'qty'].value_counts()[True])
        fixed = pdf.replace_data_type_failures(
            pdf.copy_pan_dat(pandat), {("nutritionQuantities", "qty"): 5.15})
        self.assertTrue(set(fixed.foods["cost"]) == {0.0, 12.0})
        self.assertTrue(set(fixed.nutritionQuantities["qty"]) == {5.15, 12.0})

        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        pdf = PanDatFactory(**netflowSchema())
        ticdat = tdf.copy_tic_dat(netflowData())
        for n in ticdat.nodes["Detroit"].arcs_source:
            ticdat.arcs["Detroit", n] = n
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_data_type_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.set_data_type("arcs", "capacity", strings_allowed="*")
        self.assertFalse(pdf.find_data_type_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.set_data_type("arcs",
                          "capacity",
                          strings_allowed=["Boston", "Seattle", "lumberjack"])
        failed = pdf.find_data_type_failures(pandat)
        self.assertTrue(set(failed) == {('arcs', 'capacity')})
        self.assertTrue(
            set({(v["source"], v["destination"])
                 for v in failed['arcs',
                                 'capacity'].T.to_dict().values()}) == {(
                                     "Detroit", "New York")})
        pdf.replace_data_type_failures(pandat)
        self.assertTrue(
            set(pandat.arcs["capacity"]) == {120, 'Boston', 0, 'Seattle'})
Example #28
0
    def testCsvSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        pdf2 = PanDatFactory(**{
            k: v
            for k, v in dietSchema().items() if k != "nutritionQuantities"
        })
        panDat2 = pdf2.copy_pan_dat(panDat)
        dirPath = os.path.join(_scratchDir, "diet_missing_csv")
        pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath))
        panDat3 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf2._same_data(panDat2, panDat3))
        self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables))
        self.assertFalse(len(panDat3.nutritionQuantities))
        self.assertTrue(len(panDat3.categories) and len(panDat3.foods))

        pdf2 = PanDatFactory(
            **{k: v
               for k, v in dietSchema().items() if k == "categories"})
        panDat2 = pdf2.copy_pan_dat(panDat)
        pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath))
        panDat3 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf2._same_data(panDat2, panDat3))
        self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables))
        self.assertFalse(
            len(panDat3.nutritionQuantities) or len(panDat3.foods))
        self.assertTrue(len(panDat3.categories))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "netflow_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.csv.write_directory(panDat, dirPath)
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath, decimal=",")
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertFalse(pdf._same_data(panDat, panDat2))
        panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",")
        self.assertTrue(pdf._same_data(panDat, panDat2))
Example #29
0
    def testJsonSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.json.write_file(panDat, filePath)
        panDat2 = pdf2.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, ""))
        self.assertTrue(pdf._same_data(panDat, panDat3))
        dicted = json.loads(pdf.json.write_file(panDat, ""))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat5 = pdf2.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat5))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath, orient='columns', index=True)
        # the following doesn't generate a TicDatError, which is fine
        self.assertTrue(
            firesException(lambda: pdf.json.create_pan_dat(filePath)))
        panDat2 = pdf.json.create_pan_dat(filePath, orient='columns')
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(
            panDat, "", orient='columns'),
                                          orient="columns")
        self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))
        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
Example #30
0
    def testDiet(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        tdf.enable_foreign_key_links()
        oldDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        self._test_generic_free_copy(oldDat, tdf)
        self._test_generic_free_copy(oldDat, tdf, ["nutritionQuantities"])
        ticDat = tdf.copy_to_pandas(oldDat)
        for k in oldDat.foods:
            self.assertTrue(oldDat.foods[k]["cost"] == ticDat.foods.cost[k])
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            ticDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            ticDat.nutritionQuantities.qty[k1, k2])
        nut = ticDat.nutritionQuantities
        self.assertTrue(firesException(lambda: nut.qty.loc[:, "fatty"]))
        self.assertTrue(firesException(lambda: nut.qty.loc["chickeny", :]))
        self.assertFalse(firesException(lambda: nut.qty.sloc[:, "fatty"]))
        self.assertFalse(firesException(lambda: nut.qty.sloc["chickeny", :]))
        self.assertTrue(0 == sum(nut.qty.sloc[:, "fatty"]) == sum(nut.qty.sloc[
            "chickeny", :]))
        self.assertTrue(
            sum(nut.qty.sloc[:, "fat"]) == sum(nut.qty.loc[:, "fat"]) == sum(
                r["qty"] for (f, c), r in oldDat.nutritionQuantities.items()
                if c == "fat"))
        self.assertTrue(
            sum(nut.qty.sloc["chicken", :]) == sum(nut.qty.loc["chicken", :])
            == sum(r["qty"]
                   for (f, c), r in oldDat.nutritionQuantities.items()
                   if f == "chicken"))

        rebornTicDat = tdf.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf2 = TicDatFactory(**{t: '*' for t in tdf.all_tables})
        self.assertTrue(
            firesException(
                lambda: tdf2.set_data_type("nutritionQuantities", "qty")))
        genTicDat = tdf2.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})

        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            genTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            genTicDat.nutritionQuantities.qty[k1, k2])
        self.assertFalse(tdf.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf2.good_tic_dat_object(genTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(genTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
        rebornGenTicDat = tdf2.TicDat(**tdf2.as_dict(genTicDat))
        for t, pks in tdf.primary_key_fields.items():
            getattr(rebornGenTicDat, t).index.names = pks
        rebornTicDat = tdf.TicDat(
            **{t: getattr(rebornGenTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf3 = TicDatFactory(**dict(dietSchema(), **{"categories": '*'}))
        self.assertFalse(
            firesException(
                lambda: tdf3.set_data_type("nutritionQuantities", "qty")))
        mixTicDat = tdf3.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            mixTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            mixTicDat.nutritionQuantities[k1, k2]["qty"])
        self.assertFalse(tdf2.good_tic_dat_object(mixTicDat))
        self.assertFalse(tdf3.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf3.good_tic_dat_object(mixTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(mixTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
Example #31
0
    def testDiet(self):
        if not self.can_run:
            return
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            self.assertFalse(tdf.sql.find_duplicates(filePath))
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
Example #32
0
    def testDefaultAdd(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        xlsFilePath = os.path.join(_scratchDir, "diet_add.xlsx")
        pdf.xls.write_file(panDat, xlsFilePath)
        sqlFilePath = os.path.join(_scratchDir, "diet_add.sql")
        pdf.sql.write_file(panDat, sqlFilePath)
        csvDirPath = os.path.join(_scratchDir, "diet_add_csv")
        pdf.csv.write_directory(panDat,
                                csvDirPath,
                                case_space_table_names=True)

        pdf2 = PanDatFactory(
            **{
                k: [p, d] if k != "foods" else [p, list(d) + ["extra"]]
                for k, (p, d) in dietSchema().items()
            })
        ex = self.firesException(lambda: pdf2.xls.create_pan_dat(xlsFilePath))
        self.assertTrue("missing" in ex and "extra" in ex)
        ex = self.firesException(lambda: pdf2.sql.create_pan_dat(sqlFilePath))
        self.assertTrue("missing" in ex and "extra" in ex)
        ex = self.firesException(lambda: pdf2.csv.create_pan_dat(csvDirPath))
        self.assertTrue("missing" in ex and "extra" in ex)
        ex = self.firesException(
            lambda: pdf2.json.create_pan_dat(pdf.json.write_file(panDat, "")))
        self.assertTrue("missing" in ex and "extra" in ex)

        panDat2 = pdf2.sql.create_pan_dat(sqlFilePath,
                                          fill_missing_fields=True)
        self.assertTrue(set(panDat2.foods["extra"]) == {0})
        panDat2.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        panDat2 = pdf2.xls.create_pan_dat(xlsFilePath,
                                          fill_missing_fields=True)
        self.assertTrue(set(panDat2.foods["extra"]) == {0})
        panDat2.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        panDat2 = pdf2.csv.create_pan_dat(csvDirPath, fill_missing_fields=True)
        self.assertTrue(set(panDat2.foods["extra"]) == {0})
        panDat2.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        panDat2 = pdf2.json.create_pan_dat(pdf.json.write_file(panDat, ""),
                                           fill_missing_fields=True)
        self.assertTrue(set(panDat2.foods["extra"]) == {0})
        panDat2.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))

        pdf3 = PanDatFactory(**pdf2.schema())
        pdf3.set_default_value("foods", "extra", 13)
        panDat3 = pdf3.sql.create_pan_dat(sqlFilePath,
                                          fill_missing_fields=True)
        self.assertTrue(set(panDat3.foods["extra"]) == {13})
        panDat3.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat3))

        panDat3 = pdf3.xls.create_pan_dat(xlsFilePath,
                                          fill_missing_fields=True)
        self.assertTrue(set(panDat3.foods["extra"]) == {13})
        panDat3.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat3))

        panDat3 = pdf3.csv.create_pan_dat(csvDirPath, fill_missing_fields=True)
        self.assertTrue(set(panDat3.foods["extra"]) == {13})
        panDat3.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat3))

        panDat3 = pdf3.json.create_pan_dat(pdf.json.write_file(panDat, ""),
                                           fill_missing_fields=True)
        self.assertTrue(set(panDat3.foods["extra"]) == {13})
        panDat3.foods.drop("extra", axis=1, inplace=True)
        self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))
Example #33
0
    def testDataPredicates(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())

        ticdat = tdf.TicDat()
        ticdat.foods["a"] = 12
        ticdat.foods["b"] = None
        ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40}
        ticdat.categories["2"] = [21,20]
        for f, p in itertools.product(ticdat.foods, ticdat.categories):
            ticdat.nutritionQuantities[f,p] = 5


        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_duplicates(pandat))
        self.assertFalse(pdf.find_data_row_failures(pandat))

        ticdat.nutritionQuantities['a', 2] = 12
        ticdat.categories["3"] = ['a', 100]
        pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))

        def perform_predicate_checks(sch):
            pdf = PanDatFactory(**sch)
            pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost")
            good_qty = lambda qty : 5 < qty <= 12
            pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty")
            pdf.add_data_row_predicate("categories",
                                       lambda row: row["maxNutrition"] >= row["minNutrition"],
                                       "minmax")
            failed = pdf.find_data_row_failures(pandat)
            self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')})
            self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'})
            self.assertTrue(set({(v["food"], v["category"])
                                 for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) ==
                                {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
            self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2'})
            failed = pdf.find_data_row_failures(pandat, as_table=False)
            self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True])
            failed = pdf.find_data_row_failures(pandat_2)
            self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'})

        perform_predicate_checks(dietSchema())
        perform_predicate_checks({t:'*' for t in dietSchema()})

        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        pdf = PanDatFactory(**netflowSchema())
        ticdat = tdf.copy_tic_dat(netflowData())
        for n in ticdat.nodes["Detroit"].arcs_source:
            ticdat.arcs["Detroit", n] = n
        pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat))
        self.assertFalse(pdf.find_duplicates(pandat))
        self.assertFalse(pdf.find_data_row_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        pdf.add_data_row_predicate("arcs", lambda row: True, "capacity")
        self.assertFalse(pdf.find_data_row_failures(pandat))

        pdf = PanDatFactory(**netflowSchema())
        good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"]
        pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity")
        failed = pdf.find_data_row_failures(pandat)
        self.assertTrue(set(failed) == {('arcs', 'capacity')})
        self.assertTrue(set({(v["source"], v["destination"])
                             for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")})
Example #34
0
    def testRoundTrips(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        tdf.enable_foreign_key_links()
        oldDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        pdf = PanDatFactory.create_from_full_schema(
            tdf.schema(include_ancillary_info=True))
        pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False)
        self.assertTrue(pdf.good_pan_dat_object(pan_dat))
        tic_dat = pdf.copy_to_tic_dat(pan_dat)
        self.assertTrue(tdf._same_data(oldDat, tic_dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        oldDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        pdf = PanDatFactory.create_from_full_schema(
            tdf.schema(include_ancillary_info=True))
        pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False)
        self.assertTrue(pdf.good_pan_dat_object(pan_dat))
        tic_dat = pdf.copy_to_tic_dat(pan_dat)
        self.assertTrue(tdf._same_data(oldDat, tic_dat))

        pdf = PanDatFactory(table=[["a", "b"], ["c"]])
        pan_dat = pdf.PanDat(table=utils.DataFrame({
            "a": [1, 2, 1, 1],
            "b": [10, 10, 10, 11],
            "c": [101, 102, 103, 104]
        }))
        self.assertTrue(
            len(pdf.find_duplicates(pan_dat, keep=False)["table"]) == 2)
        tic_dat = pdf.copy_to_tic_dat(pan_dat)
        self.assertTrue(len(tic_dat.table) == len(pan_dat.table) - 1)

        tdf = TicDatFactory(**pdf.schema())
        tic_dat = tdf.TicDat(table=[[1, 2, 3], [None, 2, 3], [2, 1, None]])
        self.assertTrue(len(tic_dat.table) == 3)
        tic_dat_two = pdf.copy_to_tic_dat(
            tdf.copy_to_pandas(tic_dat, drop_pk_columns=False))
        self.assertFalse(tdf._same_data(tic_dat, tic_dat_two))
        tic_dat3 = tdf.TicDat(
            table=[[1, 2, 3], [float("nan"), 2, 3], [2, 1, float("nan")]])
        # this fails because _same_data isn't smart enough to check against nan in the keys,
        # because float("nan") != float("nan")
        self.assertFalse(tdf._same_data(tic_dat3, tic_dat_two))

        pdf = PanDatFactory(table=[["a"], ["b", "c"]])
        tdf = TicDatFactory(**pdf.schema())
        tic_dat = tdf.TicDat(table=[[1, 2, 3], [2, None, 3], [2, 1, None]])
        tic_dat_two = pdf.copy_to_tic_dat(
            tdf.copy_to_pandas(tic_dat, drop_pk_columns=False))
        self.assertFalse(tdf._same_data(tic_dat, tic_dat_two))
        tic_dat3 = tdf.TicDat(
            table=[[1, 2, 3], [2, float("nan"), 3], [2, 1, float("nan")]])
        # _same_data works fine in checking nan equivalence in data rows - which maybe
        self.assertTrue(
            tdf._same_data(tic_dat3,
                           tic_dat_two,
                           nans_are_same_for_data_rows=True))