Esempio n. 1
0
    def testDateTime(self):
        tdf = TicDatFactory(table_with_stuffs=[["field one"], ["field two"]],
                            parameters=[["a"], ["b"]])
        tdf.add_parameter("p1", "Dec 15 1970", datetime=True)
        tdf.add_parameter("p2", None, datetime=True, nullable=True)
        tdf.set_data_type("table_with_stuffs", "field one", datetime=True)
        tdf.set_data_type("table_with_stuffs",
                          "field two",
                          datetime=True,
                          nullable=True)

        dat = tdf.TicDat(table_with_stuffs=[[
            "July 11 1972", None
        ], [datetime.datetime.now(),
            dateutil.parser.parse("Sept 11 2011")]],
                         parameters=[["p1", "7/11/1911"], ["p2", None]])
        self.assertFalse(
            tdf.find_data_type_failures(dat)
            or tdf.find_data_row_failures(dat))

        file_one = os.path.join(_scratchDir, "datetime.json")
        tdf.json.write_file(dat, file_one)
        dat_1 = tdf.json.create_tic_dat(file_one)
        self.assertFalse(tdf._same_data(dat, dat_1))
        self.assertTrue(
            isinstance(dat_1.parameters["p1"]["b"], datetime.datetime))
        self.assertTrue(
            all(
                isinstance(_, datetime.datetime)
                for _ in dat_1.table_with_stuffs))
        self.assertTrue(
            all(
                isinstance(_, datetime.datetime) or _ is None
                for v in dat_1.table_with_stuffs.values() for _ in v.values()))
Esempio n. 2
0
    def test_numericish_text(self):
        dir_path = os.path.join(_scratchDir, "numericish")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat = tdf.TicDat(
            parameters=[["a", "100"], ["b", "010"], [3, "200"], ["d", "020"]])

        def round_trip():
            tdf.csv.write_directory(dat, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertFalse(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Key",
                          strings_allowed='*',
                          number_allowed=True)
        tdf.set_default_value("parameters", "Value", "")
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Value",
                          strings_allowed='*',
                          number_allowed=False)
        dat = tdf.TicDat(parameters=[["a", "100"], ["b", "010"], ["c", "200"],
                                     ["d", "020"]])
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
Esempio n. 3
0
    def testNulls(self):
        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        dat = tdf.TicDat(table=[[None, 100], [200, "this"], ["that", 300],
                                [300, None], [400, "that"]])
        file_path = os.path.join(_scratchDir, "nulls.accdb")
        tdf.mdb.write_file(dat, file_path)
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat, dat_1))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table", f, max=float("inf"), inclusive_max=True)
        tdf.set_infinity_io_flag(None)
        dat_inf = tdf.TicDat(
            table=[[float("inf"), 100], [200, "this"], ["that", 300],
                   [300, float("inf")], [400, "that"]])
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        tdf.mdb.write_file(dat_inf, makeCleanPath(file_path))
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table",
                              f,
                              min=-float("inf"),
                              inclusive_min=True)
        tdf.set_infinity_io_flag(None)
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertFalse(tdf._same_data(dat_inf, dat_1))
        dat_inf = tdf.TicDat(
            table=[[float("-inf"), 100], [200, "this"], ["that", 300],
                   [300, -float("inf")], [400, "that"]])
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
Esempio n. 4
0
    def testSillyCleaningOpalyticsOne(self):
        tdf = TicDatFactory(**sillyMeSchema())
        tdf.set_data_type("c",
                          "cData4",
                          number_allowed=False,
                          strings_allowed=['d'])
        ticDat = tdf.TicDat(**sillyMeData())

        input_set = create_inputset_mock(tdf, ticDat)

        pdf = PanDatFactory(**sillyMeSchema())
        pdf.set_data_type("c",
                          "cData4",
                          number_allowed=False,
                          strings_allowed=['d'])

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.c.pop()
        ticDat.c.pop(0)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
Esempio n. 5
0
    def test_empty_text_none(self):
        # this is a naive data scientist who isn't using the parameters functionality
        filePath = os.path.join(_scratchDir, "empty.xls")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.xls.write_file(dat_n, filePath, allow_overwrite=True)
            return tdf.xls.create_tic_dat(filePath)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))

        tdf = TicDatFactory(parameters='*')
        dat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(dat.parameters.shape == (4, 2))
Esempio n. 6
0
    def testDietCleaningOpalyticsTwo(self):
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)
        pdf = PanDatFactory(**dietSchema())
        addDietForeignKeys(pdf)
        pdf.set_data_type("categories",
                          "maxNutrition",
                          min=66,
                          inclusive_max=True)

        panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True)
        self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat))

        panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False)
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))

        ticDat.categories.pop("fat")
        self.assertFalse(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
        tdf.remove_foreign_key_failures(ticDat)
        self.assertTrue(
            tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
Esempio n. 7
0
    def testDietCleaningFive(self):
        tdf = TicDatFactory(**dietSchema())
        tdf.add_data_row_predicate("categories",
                                   lambda row: row["maxNutrition"] >= 66)
        tdf.set_data_type("categories",
                          "minNutrition",
                          max=0,
                          inclusive_max=True)
        addDietForeignKeys(tdf)
        ticDat = tdf.copy_tic_dat(dietData())

        input_set = create_inputset_mock(tdf, ticDat)

        self.assertTrue(
            tdf._same_data(
                tdf.opalytics.create_tic_dat(input_set, raw_data=True),
                ticDat))

        ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False)
        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))

        ticDat.categories.pop("fat")
        ticDat.categories.pop("calories")
        ticDat.categories.pop("protein")

        self.assertFalse(tdf._same_data(ticDatPurged, ticDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
Esempio n. 8
0
    def testBooleansAndNulls(self):
        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        dat = tdf.TicDat(table=[[None, 100], [200, True], [False, 300],
                                [300, None], [400, False]])
        file_one = os.path.join(_scratchDir, "boolDefaults_1.json")
        file_two = os.path.join(_scratchDir, "boolDefaults_2.json")
        tdf.json.write_file(dat, file_one, verbose=True)
        tdf.json.write_file(dat, file_two, verbose=False)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat, dat_1))
        self.assertTrue(tdf._same_data(dat, dat_2))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table", f, max=float("inf"), inclusive_max=True)
        tdf.set_infinity_io_flag(None)
        dat_inf = tdf.TicDat(
            table=[[float("inf"), 100], [200, True], [False, 300],
                   [300, float("inf")], [400, False]])
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))
        tdf.json.write_file(dat_inf,
                            file_one,
                            verbose=True,
                            allow_overwrite=True)
        tdf.json.write_file(dat_inf,
                            file_two,
                            verbose=False,
                            allow_overwrite=True)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table",
                              f,
                              min=-float("inf"),
                              inclusive_min=True)
        tdf.set_infinity_io_flag(None)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertFalse(tdf._same_data(dat_inf, dat_1))
        self.assertFalse(tdf._same_data(dat_inf, dat_2))
        dat_inf = tdf.TicDat(
            table=[[float("-inf"), 100], [200, True], [False, 300],
                   [300, -float("inf")], [400, False]])
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))
Esempio n. 9
0
    def testIntHandling(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(boger=[["the"], ["big", "boger"]],
                            moger=[["the", "big"], ["boger"]],
                            woger=[[], ["the", "big", "boger"]])
        for t in ["boger", "moger", "woger"]:
            tdf.set_data_type(t, "big", must_be_int=True)
        dat = tdf.TicDat(boger={
            1: [1.0, "t"],
            "b": [12, 11.1],
            12.1: [14.0, 15.0]
        },
                         moger={
                             (1, 1.0): "t",
                             ("b", 12): 11.1,
                             (12.1, 14.0): 15.0
                         },
                         woger=[(1, 1.0, "t"), ("b", 12, 11.1),
                                (12.1, 14.0, 15.0)])
        filePath = os.path.join(_scratchDir, "intHandling.xls")
        tdf.xls.write_file(dat, filePath)
        dat2 = tdf.xls.create_tic_dat(filePath)

        tdf3 = TicDatFactory(boger=[["the"], ["big", "boger"]],
                             moger=[["the", "big"], ["boger"]],
                             woger=[[], ["the", "big", "boger"]])
        dat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertFalse(
            any(map(tdf.find_data_type_failures, [dat, dat2, dat3])))
        self.assertTrue(all(tdf._same_data(dat, _) for _ in [dat2, dat3]))

        self.assertFalse(
            all(
                isinstance(r["big"], int)
                for r in list(dat.boger.values()) + list(dat.woger)))
        self.assertTrue(
            all(
                isinstance(r["big"], int)
                for r in list(dat2.boger.values()) + list(dat2.woger)))
        self.assertFalse(
            any(
                isinstance(r["big"], int)
                for r in list(dat3.boger.values()) + list(dat3.woger)))
        self.assertTrue(
            all(isinstance(_.woger[1]["big"], int) for _ in [dat, dat2]))

        self.assertFalse(all(isinstance(k[-1], int) for k in dat.moger))
        self.assertTrue(any(isinstance(k[-1], int) for k in dat.moger))
        self.assertTrue(all(isinstance(k[-1], int) for k in dat2.moger))
        self.assertFalse(any(isinstance(k[-1], int) for k in dat3.moger))
Esempio n. 10
0
    def testNetflow(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.TicDat(
            **{t: getattr(netflowData(), t)
               for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"])
        dirPath = os.path.join(_scratchDir, "netflow")
        tdf.csv.write_directory(ticDat, dirPath)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf.csv.find_duplicates(dirPath))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        csvTicDat = tdf.csv.create_tic_dat(dirPath,
                                           freeze_it=True,
                                           headers_present=False)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf.csv.write_directory(ticDat,
                                dirPath,
                                write_header=False,
                                allow_overwrite=True)
        self.assertTrue(
            self.firesException(
                lambda: tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath,
                                           headers_present=False,
                                           freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        # the casting to floats is controlled by data types and default values
        ticDat.nodes[12] = {}
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf2 = TicDatFactory(**netflowSchema())
        tdf2.set_data_type("nodes",
                           "name",
                           strings_allowed='*',
                           number_allowed=True)
        csvTicDat = tdf2.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        del (ticDat.nodes[12])
        ticDat.nodes['12'] = {}
        self.assertTrue(
            firesException(lambda: tdf.csv.write_directory(ticDat, dirPath)))
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
Esempio n. 11
0
 def testSillyTwoTables(self):
     if not self.can_run:
         return
     tdf = TicDatFactory(**sillyMeSchema())
     tdf.set_data_type("a",
                       "aField",
                       strings_allowed='*',
                       number_allowed=True)
     ticDat = tdf.TicDat(**sillyMeDataTwoTables())
     dirPath = os.path.join(_scratchDir, "sillyTwoTables")
     tdf.csv.write_directory(ticDat, dirPath)
     self.assertFalse(tdf.csv.find_duplicates(dirPath))
     csvTicDat = tdf.csv.create_tic_dat(dirPath)
     self.assertTrue(tdf._same_data(ticDat, csvTicDat))
Esempio n. 12
0
 def testDateTimeTwo(self):
     file = os.path.join(_scratchDir, "datetime_pd.xls")
     df = utils.pd.DataFrame({
         "a":
         list(
             map(utils.pd.Timestamp, [
                 "June 13 1960 4:30PM", "Dec 11 1970 1AM",
                 "Sept 11 2001 9:30AM"
             ]))
     })
     df.to_excel(file, "Cool Runnings")
     tdf = TicDatFactory(cool_runnings=[["a"], []])
     tdf.set_data_type("cool_runnings", "a", datetime=True)
     dat = tdf.xls.create_tic_dat(file)
     self.assertTrue(set(dat.cool_runnings) == set(df["a"]))
Esempio n. 13
0
 def testCreateModText(self):
     tdf = TicDatFactory(
         table1=[["string_pk", "num_pk"], ["num_field1", "string_field2"]])
     tdf.set_data_type("table1",
                       "num_pk",
                       min=0,
                       max=float("inf"),
                       inclusive_min=True,
                       inclusive_max=False)
     tdf.set_data_type("table1",
                       "string_field2",
                       number_allowed=False,
                       strings_allowed='*')
     modStr = create_opl_mod_text(tdf)
     self.assertTrue("key string string_pk;" in modStr)
     self.assertTrue("key float num_pk;" in modStr)
     self.assertTrue("float num_field1;" in modStr)
     self.assertTrue("string string_field2;" in modStr)
Esempio n. 14
0
 def testDateTimeTwo(self):  # this is good test for datetime stuff
     file = os.path.join(_scratchDir, "datetime_pd.xls")
     df = utils.pd.DataFrame({
         "a":
         list(
             map(utils.pd.Timestamp, [
                 "June 13 1960 4:30PM", "Dec 11 1970 1AM",
                 "Sept 11 2001 9:30AM"
             ]))
     })
     tdf = TicDatFactory(cool_runnings=[["a"], []])
     tdf.set_data_type("cool_runnings", "a", datetime=True)
     df.to_excel(file, "Cool Runnings")
     dat = tdf.xls.create_tic_dat(file)
     self.assertTrue(set(dat.cool_runnings) == set(df["a"]))
     file = file + "x"
     df.to_excel(file, "Cool Runnings")
     dat = tdf.xls.create_tic_dat(file)
     for x, y in zip(sorted(dat.cool_runnings), sorted(set(df["a"]))):
         delta = x - y
         self.assertTrue(abs(delta.total_seconds()) < 1e-4)
Esempio n. 15
0
    def test_empty_text_none(self):
        dir_path = os.path.join(_scratchDir, "empty_text")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.csv.write_directory(dat_n, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertFalse(
            tdf._same_data(dat_s, dat2) or tdf._same_data(dat_n, dat2))
        self.assertTrue(
            any(r["Value"] is None for r in dat2.parameters.values()))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_data_type("parameters",
                          "Value",
                          nullable=True,
                          must_be_int=True)
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))
Esempio n. 16
0
 def testIssue45(self):
     raw_tdf = TicDatFactory(data=[["a"], ["b"]])
     tdf_nums = TicDatFactory(data=[["a"], ["b"]])
     tdf_nums.set_data_type("data", "a")
     tdf_strs = TicDatFactory(data=[["a"], ["b"]])
     tdf_strs.set_data_type("data",
                            "b",
                            strings_allowed='*',
                            number_allowed=False)
     dat_nums = tdf_nums.TicDat(data=[[1, 2], [3, 4], [22, 44]])
     dat_strs = tdf_nums.TicDat(
         data=[["1", "2"], ["3", "4"], ["022", "0044"]])
     dirs = [
         os.path.join(_scratchDir, _)
         for _ in ["dat_nums_csv", "dat_strs_csv"]
     ]
     raw_tdf.csv.write_directory(dat_nums, dirs[0])
     dat_nums_2 = tdf_nums.csv.create_tic_dat(dirs[0])
     raw_tdf.csv.write_directory(dat_strs, dirs[1])
     dat_strs_2 = tdf_strs.csv.create_tic_dat(dirs[1])
     self.assertTrue(raw_tdf._same_data(dat_nums, dat_nums_2))
     self.assertTrue(raw_tdf._same_data(dat_strs, dat_strs_2))
Esempio n. 17
0
        def doTest(headersPresent):
            tdf = TicDatFactory(**sillyMeSchema())
            for t, flds in tdf.primary_key_fields.items():
                for f in flds:
                    tdf.set_data_type(t,
                                      f,
                                      number_allowed=True,
                                      strings_allowed='*')
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, )
            for t in ("a", "b"):
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b"):
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(
                    schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField", ), []]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x)
                                                   for x in (schema2, schema3,
                                                             schema4, schema5,
                                                             schema5b,
                                                             schema6))
            for tdf_ in [tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6]:
                for t, flds in tdf_.primary_key_fields.items():
                    for f in flds:
                        tdf_.set_data_type(t,
                                           f,
                                           number_allowed=True,
                                           strings_allowed='*')
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))

            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat,
                                    dirPath,
                                    write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath,
                                              headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(
                tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath,
                                              headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                tdf._same_data(ticDat, ticDat3))

            if headersPresent:
                ticDat4 = tdf4.csv.create_tic_dat(
                    dirPath, headers_present=headersPresent)
                for t in ("a", "b"):
                    for k, v in getattr(ticDat4, t).items():
                        for _k, _v in v.items():
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]):
                            self.assertTrue(t == "b")
                        else:
                            self.assertTrue(t == "a")
            else:
                self.assertTrue(
                    self.firesException(lambda: tdf4.csv.create_tic_dat(
                        dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath,
                                              headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(
                callable(ticDat5.a) and callable(ticDat5.c)
                and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath,
                                                headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(
                callable(ticDat5b.a) and callable(ticDat5b.c)
                and not callable(ticDat5b.b))

            ticDat6 = tdf6.csv.create_tic_dat(dirPath,
                                              headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(
                firesException(lambda: tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(
                **{
                    t: [[],
                        tdf.primary_key_fields.get(t, ()) +
                        tdf.data_fields.get(t, ())]
                    for t in tdf.all_tables
                })

            def writeData(data):
                td = allDataTdf.TicDat(a=data, b=data, c=data)
                allDataTdf.csv.write_directory(td,
                                               dirPath,
                                               allow_overwrite=True,
                                               write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath,
                                               headers_present=headersPresent,
                                               freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.find_duplicates(dirPath,
                                               headers_present=headersPresent)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40),
                       (1, 20, 30, 12)])
            rowCount = tdf.csv.find_duplicates(dirPath,
                                               headers_present=headersPresent)
            self.assertTrue(
                set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 3)
            self.assertTrue(
                set(rowCount["b"]) == {(1, 20, 30)}
                and rowCount["b"][1, 20, 30] == 2)
Esempio n. 18
0
    def testSilly(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, )
        for t in ("a", "b"):
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] = (), []
        schema6 = sillyMeSchema()
        schema6["d"] = [["dField"], ()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x)
                                        for x in (schema2, schema3, schema4,
                                                  schema5, schema6))
        tdf5.set_generator_tables(("a", "c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a", "b"]:
            for k, v in getattr(ticDat4, t).items():
                for _k, _v in v.items():
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]):
                    self.assertTrue(t == "b")
                else:
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath,
                                          treat_inf_as_infinity=False)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(
            callable(ticDat5.a) and callable(ticDat5.c)
            and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(
            firesException(lambda: tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header="same"):
            assert filePath.endswith(".xls")
            assert not write_header or write_header in ("lower", "same",
                                                        "duped")
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables:
                sheet = book.add_sheet(t)
                if write_header:
                    all_fields = tdf.primary_key_fields.get(
                        t, ()) + tdf.data_fields.get(t, ())
                    for i, f in enumerate(
                        (2 if write_header == "duped" else 1) * all_fields):
                        sheet.write(
                            0, i,
                            f.lower() if write_header == "lower"
                            or i >= len(all_fields) else f)
                for rowInd, row in enumerate(data):
                    for fieldInd, cellValue in enumerate(
                        (2 if write_header == "duped" else 1) * row):
                        sheet.write(rowInd + (1 if write_header else 0),
                                    fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)
            if write_header in [
                    "lower", "same"
            ]:  # will use pandas to generate the xlsx file version
                file_path_x = filePath + "x"
                if os.path.exists(file_path_x):
                    os.remove(file_path_x)
                writer = utils.pd.ExcelWriter(file_path_x)
                for t, (pks, dfs) in tdf.schema().items():
                    fields = pks + dfs
                    if write_header == "lower":
                        fields = [_.lower() for _ in fields]
                    d = {f: [] for f in fields}
                    for row in data:
                        for f, c in zip(fields, row):
                            d[f].append(c)
                    utils.pd.DataFrame(d).to_excel(writer, t, index=False)
                writer.save()

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="duped")
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="lower")
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header=False)
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath,
                                           freeze_it=True,
                                           headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.find_duplicates(filePath, headers_present=False)
        self.assertTrue(
            set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
            and rowCount["a"][1] == 2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")
        # the workaround for this flaw is to set the data type to be nullabe but not allow the empty string
        tdfwa = TicDatFactory(**sillyMeSchema())
        tdfwa.set_data_type("a", "aData2", nullable=True)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        # checking the same thing with .xlsx - using openpyxl, None is indeed recovered even without tdfwa munging!
        tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40),
                   (1, 20, 30, 12)])
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 3)
            self.assertTrue(
                set(rowCount["b"]) == {(1, 20, 30)}
                and rowCount["b"][1, 20, 30] == 2)
Esempio n. 19
0
    def testEight(self):
        tdf = TicDatFactory(**dietSchema())
        def makeIt() :
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40}
            rtn.categories["2"] = [10,20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f,p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        tdf.set_default_value("foods", "cost", 2)
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) ==
                        {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
        self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001})
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat))
        self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in
                            failed['nutritionQuantities', 'qty'].pks))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and
                        fixedDat.nutritionQuantities['a', 2]["qty"] == 12)

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2})
        self.assertTrue(tdf._same_data(fixedDat, fixedDat2))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=True)
        tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False)
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost")
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0)

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        tdf.enable_foreign_key_links()
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        def makeIt() :
            if not tdf.foreign_keys:
                tdf.enable_foreign_key_links()
                addNetflowForeignKeys(tdf)
            orig = netflowData()
            rtn = tdf.copy_tic_dat(orig)
            for n in rtn.nodes["Detroit"].arcs_source:
                rtn.arcs["Detroit", n] = n
            self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables))
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed="*")
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"])
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))})
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        netflowData_ = tdf.copy_tic_dat(netflowData())
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_))
        fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()),
                                        {("arcs", "capacity"):80, ("cost","cost") :"imok"}))
        fixedDat.arcs["Detroit", "Boston"] = 100
        fixedDat.arcs["Detroit", "Seattle"] = 120
        self.assertTrue(tdf._same_data(fixedDat, netflowData_))
Esempio n. 20
0
    def testReadModText(self):
        tdf1 = TicDatFactory(test_1=[["sf1"], ["sf2", "nf1", "nf2"]])
        tdf1.set_data_type("test_1",
                           "sf2",
                           number_allowed=False,
                           strings_allowed='*')
        test_str = 'test_1 =  {<"s1" "s2" 1 2> <"s3" "s4" 0 0>}'
        test_dat = read_opl_text(tdf1, test_str, False)
        self.assertTrue(test_dat.test_1["s1"]["sf2"] == "s2")
        self.assertTrue(test_dat.test_1["s1"]["nf2"] == 2)
        self.assertTrue(test_dat.test_1["s2"]["nf1"] == 0)

        tdf2 = TicDatFactory(test_2=[["sf1"], []])
        test_str = 'test_2 =  {<"s3">}'
        test_dat = read_opl_text(tdf2, test_str, False)
        self.assertTrue(list(test_dat.test_2.keys())[0] == "s3")

        tdf3 = TicDatFactory(test_3=[["nf1"], []])
        tdf3.set_data_type("test_3",
                           "nf1",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        test_str = 'test_3 =  {<6> <5>}'
        test_dat = read_opl_text(tdf3, test_str, False)
        self.assertTrue(6 in test_dat.test_3.keys())
        self.assertTrue(5 in test_dat.test_3.keys())
        self.assertTrue(len(test_dat.test_3.keys()) == 2)

        tdf4 = TicDatFactory(test_4=[["nf1"], ["nf2", "nf3", "nf4"]])
        tdf4.set_data_type("test_4",
                           "nf1",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        tdf4.set_data_type("test_4",
                           "nf2",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        tdf4.set_data_type("test_4",
                           "nf3",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        tdf4.set_data_type("test_4",
                           "nf4",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        test_str = 'test_4 =  {<7 0 809 9>}'
        test_dat = read_opl_text(tdf4, test_str, False)
        self.assertTrue(7 in test_dat.test_4.keys())
        self.assertTrue(len(test_dat.test_4[7]) == 3)
        self.assertTrue(test_dat.test_4[7]["nf3"] == 809)

        tdf5 = TicDatFactory(test_5=[["sf1"], ["sf2"]])
        tdf5.set_data_type("test_5",
                           "sf2",
                           number_allowed=False,
                           strings_allowed='*')
        test_str = 'test_5 =  {<"s4" "s5">}'
        test_dat = read_opl_text(tdf5, test_str, False)
        self.assertTrue("s4" in test_dat.test_5.keys())
        self.assertTrue(test_dat.test_5["s4"]["sf2"] == "s5")

        tdf6 = TicDatFactory(test_6=[["nf1"], ["sf1"]])
        tdf6.set_data_type("test_6",
                           "nf1",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
        tdf6.set_data_type("test_6",
                           "sf1",
                           number_allowed=False,
                           strings_allowed='*')
        test_str = 'test_6 =  {<0 "s6">}'
        test_dat = read_opl_text(tdf6, test_str, False)
        self.assertTrue(0 in test_dat.test_6.keys())
        self.assertTrue(test_dat.test_6[0.0]['sf1'] == "s6")
Esempio n. 21
0
    def testDiet(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        tdf.enable_foreign_key_links()
        oldDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        self._test_generic_free_copy(oldDat, tdf)
        self._test_generic_free_copy(oldDat, tdf, ["nutritionQuantities"])
        ticDat = tdf.copy_to_pandas(oldDat)
        for k in oldDat.foods:
            self.assertTrue(oldDat.foods[k]["cost"] == ticDat.foods.cost[k])
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            ticDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            ticDat.nutritionQuantities.qty[k1, k2])
        nut = ticDat.nutritionQuantities
        self.assertTrue(firesException(lambda: nut.qty.loc[:, "fatty"]))
        self.assertTrue(firesException(lambda: nut.qty.loc["chickeny", :]))
        self.assertFalse(firesException(lambda: nut.qty.sloc[:, "fatty"]))
        self.assertFalse(firesException(lambda: nut.qty.sloc["chickeny", :]))
        self.assertTrue(0 == sum(nut.qty.sloc[:, "fatty"]) == sum(nut.qty.sloc[
            "chickeny", :]))
        self.assertTrue(
            sum(nut.qty.sloc[:, "fat"]) == sum(nut.qty.loc[:, "fat"]) == sum(
                r["qty"] for (f, c), r in oldDat.nutritionQuantities.items()
                if c == "fat"))
        self.assertTrue(
            sum(nut.qty.sloc["chicken", :]) == sum(nut.qty.loc["chicken", :])
            == sum(r["qty"]
                   for (f, c), r in oldDat.nutritionQuantities.items()
                   if f == "chicken"))

        rebornTicDat = tdf.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf2 = TicDatFactory(**{t: '*' for t in tdf.all_tables})
        self.assertTrue(
            firesException(
                lambda: tdf2.set_data_type("nutritionQuantities", "qty")))
        genTicDat = tdf2.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})

        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            genTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            genTicDat.nutritionQuantities.qty[k1, k2])
        self.assertFalse(tdf.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf2.good_tic_dat_object(genTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(genTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
        rebornGenTicDat = tdf2.TicDat(**tdf2.as_dict(genTicDat))
        for t, pks in tdf.primary_key_fields.items():
            getattr(rebornGenTicDat, t).index.names = pks
        rebornTicDat = tdf.TicDat(
            **{t: getattr(rebornGenTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf3 = TicDatFactory(**dict(dietSchema(), **{"categories": '*'}))
        self.assertFalse(
            firesException(
                lambda: tdf3.set_data_type("nutritionQuantities", "qty")))
        mixTicDat = tdf3.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            mixTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            mixTicDat.nutritionQuantities[k1, k2]["qty"])
        self.assertFalse(tdf2.good_tic_dat_object(mixTicDat))
        self.assertFalse(tdf3.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf3.good_tic_dat_object(mixTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(mixTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))