예제 #1
0
    def testDiet(self):
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
예제 #2
0
    def testDietCleaningFive(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        tdf.set_data_type("categories",
                          "minNutrition",
                          max=0,
                          inclusive_max=True)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(
            tdf._same_data(
                tdf.opalytics.create_tic_dat(input_set, raw_data=True),
                ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))

        ticDat.categories.pop("fat")
        ticDat.categories.pop("calories")
        ticDat.categories.pop("protein")

        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
예제 #3
0
    def testDietCleaning(self):
        sch = dietSchema()
        sch["categories"][-1].append("_active")
        tdf1 = TicDatFactory(**dietSchema())
        tdf2 = TicDatFactory(**sch)

        ticDat2 = tdf2.copy_tic_dat(dietData())
        for v in ticDat2.categories.values():
            v["_active"] = True
        ticDat2.categories["fat"]["_active"] = False
        ticDat1 = tdf1.copy_tic_dat(dietData())

        input_set = create_inputset_mock_with_active_hack(tdf2, ticDat2)
        self.assertTrue(
            tdf1._same_data(
                tdf1.opalytics.create_tic_dat(input_set, raw_data=True),
                ticDat1))

        ticDatPurged = tdf1.opalytics.create_tic_dat(input_set)
        self.assertFalse(tdf1._same_data(ticDatPurged, ticDat1))

        ticDat1.categories.pop("fat")
        tdf1.remove_foreign_keys_failures(ticDat1)

        self.assertTrue(tdf1._same_data(ticDatPurged, ticDat1))
예제 #4
0
    def testDietCleaningFour(self):
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())
        ticDat.categories.pop("fat")
        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(
            tdf._same_data(
                tdf.opalytics.create_tic_dat(input_set, raw_data=True),
                ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
예제 #5
0
    def testSix(self):
        tdf = TicDatFactory(plants = [["name"], ["stuff", "otherstuff"]],
                            lines = [["name"], ["plant", "weird stuff"]],
                            line_descriptor = [["name"], ["booger"]],
                            products = [["name"],["gover"]],
                            production = [["line", "product"], ["min", "max"]],
                            pureTestingTable = [[], ["line", "plant", "product", "something"]],
                            extraProduction = [["line", "product"], ["extramin", "extramax"]],
                            weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]])
        tdf.add_foreign_key("production", "lines", ("line", "name"))
        tdf.add_foreign_key("production", "products", ("product", "name"))
        tdf.add_foreign_key("lines", "plants", ("plant", "name"))
        tdf.add_foreign_key("line_descriptor", "lines", ("name", "name"))
        for f in set(tdf.data_fields["pureTestingTable"]).difference({"something"}):
            tdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name"))
        tdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product")))
        tdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product")))
        tdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product")))

        goodDat = tdf.TicDat()
        goodDat.plants["Cleveland"] = ["this", "that"]
        goodDat.plants["Newark"]["otherstuff"] =1
        goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle"

        for i,p in enumerate(goodDat.plants):
            goodDat.lines[i]["plant"] = p

        for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)):
            goodDat.production[pl, pd] = {"min":1, "max":10+i}

        badDat1 = tdf.copy_tic_dat(goodDat)
        badDat1.production["notaline", "widgets"] = [0,1]
        badDat2 = tdf.copy_tic_dat(badDat1)

        fk, fkm = _ForeignKey, _ForeignKeyMapping
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) ==
                        {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'):
                             (('notaline',), (('notaline', 'widgets'),))})
        badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark"
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) ==
                        {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one'):
                             (('notnewark',), ('notaline',))})
        tdf.remove_foreign_keys_failures(badDat1, propagate=False)
        tdf.remove_foreign_keys_failures(badDat2, propagate=True)
        self.assertTrue(tdf._same_data(badDat2, goodDat) and not tdf.find_foreign_key_failures(badDat2))
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) ==
                {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'):
                     (('notaline',), (('notaline', 'widgets'),))})

        tdf.remove_foreign_keys_failures(badDat1, propagate=False)
        self.assertTrue(tdf._same_data(badDat1, goodDat) and not tdf.find_foreign_key_failures(badDat1))

        _ = len(goodDat.lines)
        for i,p in enumerate(goodDat.plants.keys() + goodDat.plants.keys()):
            goodDat.lines[i+_]["plant"] = p
        for l in goodDat.lines:
            if i%2:
                goodDat.line_descriptor[l] = i+10

        for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))):
            goodDat.pureTestingTable.append((l,pl,pdct,i))
        self.assertFalse(tdf.find_foreign_key_failures(goodDat))
        badDat = tdf.copy_tic_dat(goodDat)
        badDat.pureTestingTable.append(("j", "u", "nk", "ay"))
        l = len(goodDat.pureTestingTable)
        self.assertTrue(tdf.find_foreign_key_failures(badDat) ==
         {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'): (('u',),(l,)),
          fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'): (('nk',), (l,)),
          fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one'): (('j',), (l,))})

        obfudat = tdf.obfusimplify(goodDat, freeze_it=True)
        self.assertTrue(all(len(getattr(obfudat.copy, t)) == len(getattr(goodDat, t))
                            for t in tdf.all_tables))
        for n in goodDat.plants.keys() + goodDat.lines.keys() + goodDat.products.keys() :
            self.assertTrue(n in {_[1] for _ in obfudat.renamings.values()})
            self.assertFalse(n in obfudat.renamings)
        self.assertTrue(obfudat.copy.plants['P2']['otherstuff'] == 1)
        self.assertFalse(tdf._same_data(obfudat.copy, goodDat))
        for k,r in obfudat.copy.line_descriptor.items():
            i = r.values()[0] - 10
            self.assertTrue(i%2 and (goodDat.line_descriptor[i].values()[0] == i+10))

        obfudat2 = tdf.obfusimplify(goodDat, {"plants": "P", "lines" : "L", "products" :"PR"})
        self.assertTrue(tdf._same_data(obfudat.copy, obfudat2.copy))

        obfudat3 = tdf.obfusimplify(goodDat, skip_tables=["plants", "lines", "products"])
        self.assertTrue(tdf._same_data(obfudat3.copy, goodDat))

        obfudat4 = tdf.obfusimplify(goodDat, skip_tables=["lines", "products"])
        self.assertFalse(tdf._same_data(obfudat4.copy, goodDat))
        self.assertFalse(tdf._same_data(obfudat4.copy, obfudat.copy))
예제 #6
0
    def testDiet(self):
        if not self.can_run:
            return

        def doTheTests(tdf):
            ticDat = tdf.freeze_me(
                tdf.TicDat(**{
                    t: getattr(dietData(), t)
                    for t in tdf.primary_key_fields
                }))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            self.assertFalse(tdf.sql.find_duplicates(filePath))
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            def changeit():
                sqlTicDat.categories["calories"]["minNutrition"] = 12

            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(
                self.firesException(
                    lambda: tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat,
                                   filePath,
                                   include_schema=True,
                                   allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath,
                                                        includes_schema=True,
                                                        freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories={
            'maxNutrition': float("inf"),
            'minNutrition': 0.0
        },
                               foods={'cost': 0.0},
                               nutritionQuantities={'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(
            ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(
            ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(
            **{t: getattr(dietData(), t)
               for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities[
            'hot dog', 'boger'] = ticDat.nutritionQuantities['junk',
                                                             'protein'] = -12
        self.assertTrue(
            tdf.find_foreign_key_failures(ticDat) == {
                ('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'):
                (('junk', ), (('junk', 'protein'), )),
                ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
                (('boger', ), (('hot dog', 'boger'), ))
            })

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)