コード例 #1
0
ファイル: testxls.py プロジェクト: austin-bren/ticdat
 def testColumnsWithoutData(self):
     tdf = TicDatFactory(data=[["a"], ["b"]])
     for x in ["", "x"]:
         file = os.path.join(_scratchDir, "no_data.xls" + x)
         tdf.xls.write_file(tdf.TicDat(), file)
         dat = tdf.xls.create_tic_dat(file)
         self.assertFalse(dat._len_dict())
コード例 #2
0
 def testDups(self):
     if not self.can_run:
         return
     tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                         two=[["a", "b"], ["c"]],
                         three=[["a", "b", "c"], []])
     tdf2 = TicDatFactory(
         **{t: [[], ["a", "b", "c"]]
            for t in tdf.all_tables})
     td = tdf2.TicDat(
         **{
             t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                 ["new", 1, 2]]
             for t in tdf.all_tables
         })
     f = makeCleanPath(os.path.join(_scratchDir, "testDups.db"))
     tdf2.sql.write_db_data(td, f)
     dups = tdf.sql.find_duplicates(f)
     self.assertTrue(dups == {
         'three': {
             (1, 2, 2): 2
         },
         'two': {
             (1, 2): 3
         },
         'one': {
             1: 3,
             2: 2
         }
     })
コード例 #3
0
ファイル: testpandas.py プロジェクト: nandi6uc/ticdat
    def _test_generic_free_copy(self, ticDat, tdf, skip_tables=None):
        assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables)
        replace_name = lambda f: "name_" if f == "name" else f
        clean_tdf = TicDatFactory(
            **{
                t: [list(map(replace_name, pks)), dfs]
                for t, (pks, dfs) in tdf.schema().items()
            })

        temp_tdf = TicDatFactory(
            **{
                t: v if t in (skip_tables or []) else '*'
                for t, v in clean_tdf.schema().items()
            })
        temp_dat = temp_tdf.TicDat(
            **{t: getattr(ticDat, t)
               for t in (skip_tables or [])})
        for t in temp_tdf.generic_tables:
            setattr(
                temp_dat, t,
                getattr(
                    clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False),
                    t))
        generic_free_dat, _ = utils.create_generic_free(temp_dat, temp_tdf)
        check_dat = clean_tdf.TicDat()
        for t in temp_tdf.generic_tables:
            for r in getattr(generic_free_dat, t):
                pks = clean_tdf.primary_key_fields[t]
                getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \
                    {df:r[df] for df in clean_tdf.data_fields.get(t, [])}
        for t in (skip_tables or []):
            for k, v in getattr(generic_free_dat, t).items():
                getattr(check_dat, t)[k] = v
        self.assertTrue(
            clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
コード例 #4
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def test_nullables(self):
        core_path = os.path.join(_scratchDir, "nullables")
        pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]])
        pdf.set_data_type("table_with_stuffs", "field one")
        pdf.set_data_type("table_with_stuffs",
                          "field two",
                          number_allowed=False,
                          strings_allowed='*',
                          nullable=True)
        dat = TicDatFactory(**pdf.schema()).TicDat(
            table_with_stuffs=[[101, "022"], [202, None], [303, "111"]])
        dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
            dat, drop_pk_columns=False)
        self.assertFalse(pdf.find_data_type_failures(dat))

        for attr, path in [["csv", core_path + "_csv"],
                           ["xls", core_path + ".xlsx"],
                           ["sql", core_path + ".db"],
                           ["json", core_path + ".json"]]:
            f_or_d = "directory" if attr == "csv" else "file"
            write_func, write_kwargs = utils._get_write_function_and_kwargs(
                pdf, path, f_or_d)
            write_func(dat, path, **write_kwargs)
            dat_1 = utils._get_dat_object(pdf, "create_pan_dat", path, f_or_d,
                                          False)
            self.assertTrue(
                pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True))
コード例 #5
0
 def testCaseSpaceTableNames(self):
     tdf = TicDatFactory(table_one=[["a"], ["b", "c"]],
                         table_two=[["this", "that"], []])
     dir_path = os.path.join(_scratchDir, "case_space")
     dat = tdf.TicDat(table_one=[['a', 2, 3], ['b', 5, 6]],
                      table_two=[["a", "b"], ["c", "d"], ["x", "z"]])
     tdf.csv.write_directory(dat,
                             makeCleanDir(dir_path),
                             case_space_table_names=True)
     self.assertTrue(
         all(
             os.path.exists(os.path.join(dir_path, _ + ".csv"))
             for _ in ["Table One", "Table Two"]))
     self.assertFalse(
         any(
             os.path.exists(os.path.join(dir_path, _ + ".csv"))
             for _ in ["table_one", "table_two"]))
     self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path)))
     tdf.csv.write_directory(dat,
                             makeCleanDir(dir_path),
                             case_space_table_names=False)
     self.assertFalse(
         any(
             os.path.exists(os.path.join(dir_path, _ + ".csv"))
             for _ in ["Table One", "Table Two"]))
     self.assertTrue(
         all(
             os.path.exists(os.path.join(dir_path, _ + ".csv"))
             for _ in ["table_one", "table_two"]))
     self.assertTrue(tdf._same_data(dat, tdf.csv.create_tic_dat(dir_path)))
コード例 #6
0
 def testDups(self):
     if not self.can_run:
         return
     for hack, raw_data in list(product(*(([True, False], ) * 2))):
         tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], []])
         tdf2 = TicDatFactory(
             **{t: [[], ["a", "b", "c"]]
                for t in tdf.all_tables})
         td = tdf2.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         dups = tdf.opalytics.find_duplicates(create_inputset_mock(
             tdf2, td, hack),
                                              raw_data=raw_data)
         self.assertTrue(dups == {
             'three': {
                 (1, 2, 2): 2
             },
             'two': {
                 (1, 2): 3
             },
             'one': {
                 1: 3,
                 2: 2
             }
         })
コード例 #7
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
 def test_parameters(self):
     core_path = os.path.join(_scratchDir, "parameters")
     pdf = PanDatFactory(parameters=[["Key"], ["Value"]])
     pdf.add_parameter("Something", 100)
     pdf.add_parameter("Different",
                       'boo',
                       strings_allowed='*',
                       number_allowed=False)
     dat = TicDatFactory(**pdf.schema()).TicDat(
         parameters=[["Something", float("inf")], ["Different", "inf"]])
     dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
         dat, drop_pk_columns=False)
     for attr, path in [["sql", core_path + ".db"],
                        ["csv", core_path + "_csv"],
                        ["json", core_path + ".json"],
                        ["xls", core_path + ".xlsx"]]:
         func = "write_directory" if attr == "csv" else "write_file"
         getattr(getattr(pdf, attr), func)(dat, path)
         dat_1 = getattr(pdf, attr).create_pan_dat(path)
         self.assertTrue(pdf._same_data(dat, dat_1))
     core_path = os.path.join(_scratchDir, "parameters_two")
     dat = TicDatFactory(**pdf.schema()).TicDat(
         parameters=[["Something", float("inf")], ["Different", "05701"]])
     dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
         dat, drop_pk_columns=False)
     for attr, path in [["sql", core_path + ".db"],
                        ["csv", core_path + "_csv"],
                        ["xls", core_path + ".xlsx"],
                        ["json", core_path + ".json"]]:
         func = "write_directory" if attr == "csv" else "write_file"
         getattr(getattr(pdf, attr), func)(dat, path)
         dat_1 = getattr(pdf, attr).create_pan_dat(path)
         self.assertTrue(pdf._same_data(dat, dat_1))
コード例 #8
0
ファイル: testlingo.py プロジェクト: vn8317x/opalytics-ticdat
    def testTryCreateSpace(self):
        def test_(schema_factory, data_factory):
            tdf = TicDatFactory(**schema_factory())
            dat = tdf.copy_tic_dat(data_factory())
            mapping = tlingo._try_create_space_case_mapping(tdf,
                                                            dat)["mapping"]
            remapdat = tlingo._apply_space_case_mapping(
                tdf, dat, {v: k
                           for k, v in mapping.items()})
            mapmapdat = tlingo._apply_space_case_mapping(
                tdf, remapdat, mapping)
            self.assertTrue(tdf._same_data(dat, mapmapdat))
            self.assertFalse(tdf._same_data(dat, remapdat))

        test_(dietSchema, dietData)
        test_(netflowSchema, netflowData)
        test_(sillyMeSchema,
              lambda: TicDatFactory(**sillyMeSchema()).TicDat(**sillyMeData()))

        tdf = TicDatFactory(**dietSchema())
        dat = tdf.copy_tic_dat(dietData())
        dat.foods["ice_cream"] = dat.foods["ice cream"]
        dat.categories["ICE CREAM"] = {}
        dat.categories["fAt"] = dat.categories["fat"]
        failures = tlingo._try_create_space_case_mapping(tdf, dat)["failures"]
        self.assertTrue(
            failures == {
                'ICE_CREAM': ('ICE CREAM', 'ice cream', 'ice_cream'),
                'FAT': ('fAt', 'fat')
            })
コード例 #9
0
ファイル: testpandat_utils.py プロジェクト: adampkehoe/ticdat
    def testDataTypes_two(self):
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**tdf.schema())

        def makeIt():
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.foods[None] = 101
            rtn.categories["1"] = {"maxNutrition": 100, "minNutrition": 40}
            rtn.categories["2"] = [10, 20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f, p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.copy_to_pandas(rtn, drop_pk_columns=False)

        dat = makeIt()
        errs = pdf.find_data_type_failures(dat)
        self.assertTrue(len(errs) == 2 and not pdf.find_duplicates(dat))
        dat_copied = pdf.copy_pan_dat(dat)
        pdf.replace_data_type_failures(dat)
        self.assertTrue(pdf._same_data(dat, dat_copied, epsilon=0.00001))
        pdf2 = pdf.clone()
        pdf2.set_default_value("foods", "name", "a")
        pdf2.set_default_value("nutritionQuantities", "food", "a")
        pdf2.replace_data_type_failures(dat_copied)
        self.assertFalse(pdf._same_data(dat, dat_copied, epsilon=0.00001))
        self.assertFalse(pdf.find_data_type_failures(dat_copied))
        dups = pdf.find_duplicates(dat_copied)
        self.assertTrue(
            len(dups) == 2 and len(dups["foods"]) == 1
            and len(dups["nutritionQuantities"]) == 2)

        from pandas import isnull

        def noneify(iter_of_tuples):
            return {
                tuple(None if isnull(_) else _ for _ in tuple_)
                for tuple_ in iter_of_tuples
            }

        self.assertTrue(
            noneify(errs['nutritionQuantities', 'food'].itertuples(
                index=False)) == {(None, "1", 5), (None, "2", 5)})
        self.assertTrue(
            noneify(errs['foods',
                         'name'].itertuples(index=False)) == {(None, 101)})
        pdf = PanDatFactory(**tdf.schema())
        pdf.set_data_type("foods", "name", nullable=True, strings_allowed='*')
        pdf.set_data_type("nutritionQuantities",
                          "food",
                          nullable=True,
                          strings_allowed='*')
        self.assertFalse(pdf.find_data_type_failures(dat))
        pdf.set_data_type("foods", "cost", nullable=False)
        errs = pdf.find_data_type_failures(dat)
        self.assertTrue(len(errs) == 1)
        self.assertTrue(
            noneify(errs['foods',
                         'cost'].itertuples(index=False)) == {('b', None)})
コード例 #10
0
ファイル: testaccdb.py プロジェクト: vn8317x/opalytics-ticdat
 def testDups(self):
     if not _can_accdb_unit_test:
         return
     tdf = TicDatFactory(one=[["a"], ["b, c"]],
                         two=[["a", "b"], ["c"]],
                         three=[["a", "b", "c"], []])
     tdf2 = TicDatFactory(
         **{t: [[], ["a", "b", "c"]]
            for t in tdf.all_tables})
     td = tdf2.TicDat(
         **{
             t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                 [11, 1, 2]]
             for t in tdf.all_tables
         })
     f = makeCleanPath(os.path.join(_scratchDir, "testDups.accdb"))
     tdf2.mdb.write_file(td, f)
     #shutil.copy(f, "dups.accdb") #uncomment to make readonly test file as .accdb
     dups = tdf.mdb.find_duplicates(f)
     self.assertTrue(dups == {
         'three': {
             (1, 2, 2): 2
         },
         'two': {
             (1, 2): 3
         },
         'one': {
             1: 3,
             2: 2
         }
     })
コード例 #11
0
ファイル: testlingo.py プロジェクト: vn8317x/opalytics-ticdat
 def testSortedTables(self):
     test1 = TicDatFactory(table3=[["PK3", "FK1", "FK2"], ["Val D"]],
                           table2=[["PK2"], ["Val A", "Val B"]],
                           table1=[["PK1"], ["Val C"]])
     test1.add_foreign_key("table3", "table1", ["FK1", "PK1"])
     test1.add_foreign_key("table3", "table2", ["FK2", "PK2"])
     self.assertTrue(tlingo._sorted_tables(test1)[-1] == 'table3')
コード例 #12
0
    def testSpacey(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData = {
            "a_table": {
                1: {
                    "a Data 3": 3,
                    "a Data 2": 2,
                    "a Data 1": 1
                },
                22: (1.1, 12, 12),
                0.23: (11, 12, 11)
            },
            "b_table": {
                ("1", "2", "3"): 1,
                ("a", "b", "b"): 12
            },
            "c_table": (("1", "2", "3", 4), {
                "c Data 4": 55,
                "c Data 2": "b",
                "c Data 3": "c",
                "c Data 1": "a"
            }, ("a", "b", "12", 24))
        }

        dat = tdf.TicDat(**spacesData)
        filePath = "spaces.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat2))
コード例 #13
0
    def testSpacey(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData =  {
        "a_table" : {1 : {"a Data 3":3, "a Data 2":2, "a Data 1":1},
                     22 : (1.1, 12, 12), 0.23 : (11, 12, 11)},
        "b_table" : {("1", "2", "3") : 1, ("a", "b", "b") : 12},
        "c_table" : (("1", "2", "3", 4),
                      {"c Data 4":55, "c Data 2":"b", "c Data 3":"c", "c Data 1":"a"},
                      ("a", "b", "12", 24) ) }

        dat = tdf.TicDat(**spacesData)
        filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.accdb"))
        tdf.mdb.write_schema(filePath, a_table = {"a Field":"double"},
                                       c_table = {"c Data 1":"text", "c Data 2":"text",
                                                  "c Data 3":"text", "c Data 4":"int"})
        tdf.mdb.write_file(dat, filePath)
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        with py.connect(_connection_str(filePath)) as con:
            for t in tdf.all_tables:
                con.cursor().execute("SELECT * INTO [%s] FROM %s"%(t.replace("_", " "), t)).commit()
                con.cursor().execute("DROP TABLE %s"%t).commit()
        #shutil.copy(filePath, "spaces.accdb") #uncomment to make readonly test file as .accdb
        dat3 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat3))
コード例 #14
0
ファイル: testsql.py プロジェクト: vn8317x/opalytics-ticdat
    def _test_generic_copy(self, ticDat, tdf, skip_tables=None):
        assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables)
        path = makeCleanDir(os.path.join(_scratchDir, "generic_copy"))
        replace_name  = lambda f : "name_" if f == "name" else f
        clean_tdf = TicDatFactory(**{t:[list(map(replace_name, pks)), dfs]
                                     for t,(pks, dfs) in tdf.schema().items()})

        temp_tdf = TicDatFactory(**{t:v if t in (skip_tables or []) else '*'
                                    for t,v in clean_tdf.schema().items()})
        temp_dat = temp_tdf.TicDat(**{t:getattr(ticDat, t) for t in (skip_tables or [])})
        for t in temp_tdf.generic_tables:
            setattr(temp_dat, t, getattr(clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False) ,t))

        temp_tdf.sql.write_db_data(temp_dat, os.path.join(path, "f.db"))
        temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f1.sql"), include_schema=False)
        temp_tdf.sql.write_sql_file(temp_dat, os.path.join(path, "f2.sql"), include_schema=True)

        for file_name, includes_schema in [("f.db", False), ("f1.sql", False), ("f2.sql", True)]:
            file_path = os.path.join(path, file_name)
            if file_path.endswith(".db"):
                self.assertFalse(temp_tdf.sql.find_duplicates(file_path))
                read_dat = temp_tdf.sql.create_tic_dat(file_path)
            else:
                read_dat = temp_tdf.sql.create_tic_dat_from_sql(file_path, includes_schema)
            generic_free_dat, _ = utils.create_generic_free(read_dat, temp_tdf)
            check_dat = clean_tdf.TicDat()
            for t in temp_tdf.generic_tables:
                for r in getattr(generic_free_dat, t):
                    pks = clean_tdf.primary_key_fields[t]
                    getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \
                        {df:r[df] for df in clean_tdf.data_fields.get(t, [])}
            for t in (skip_tables or []):
                for k,v in getattr(generic_free_dat, t).items():
                    getattr(check_dat, t)[k] = v
            self.assertTrue(clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
コード例 #15
0
    def testDenormalizedErrors(self):
        if not self.canRun:
            return
        c = clean_denormalization_errors
        f = utils.find_denormalized_sub_table_failures
        tdf = TicDatFactory(**spacesSchema())
        dat = tdf.TicDat(**spacesData())
        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table
        self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3")))
        dat.b_table[2,2,3] = "boger"
        self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3")))
        chk = f(p(), "b Field 2",("b Field 1", "b Field 3"))
        self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}})
        dat.b_table[2,2,4] = "boger"
        dat.b_table[1,'b','b'] = "boger"
        chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data"))
        self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)},
                                 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}}))

        ex = self.firesException(lambda : f(p(), ["b Data"],"wtf"))
        self.assertTrue("wtf isn't a column" in ex)


        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}})
        dat.c_table.append((1, 2, 3, 4))
        dat.c_table.append((1, 2, 1, 4))
        dat.c_table.append((1, 2, 1, 5))
        dat.c_table.append((1, 2, 3, 6))
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}},
                                   (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
コード例 #16
0
ファイル: testpandat_utils.py プロジェクト: adampkehoe/ticdat
 def testFindDups(self):
     pdf = PanDatFactory(**sillyMeSchema())
     tdf = TicDatFactory(
         **{
             k: [[], list(pkfs) + list(dfs)]
             for k, (pkfs, dfs) in sillyMeSchema().items()
         })
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat)
     self.assertTrue(set(dups) == {'a'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 2)
     dups = pdf.find_duplicates(panDat, as_table=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 1)
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 2, 3, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat, keep=False)
     self.assertTrue(
         set(dups) == {'a', 'b'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue({k: v.value_counts()[True]
                      for k, v in dups.items()} == {
                          'a': 3,
                          'b': 2
                      })
コード例 #17
0
 def testDups(self):
     if not self.can_run:
         return
     for verbose in [True, False]:
         tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], []])
         tdf2 = TicDatFactory(
             **{t: [[], ["a", "b", "c"]]
                for t in tdf.all_tables})
         td = tdf2.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         writePath = os.path.join(
             makeCleanDir(os.path.join(_scratchDir, "dups")), "file.json")
         tdf2.json.write_file(td, writePath, verbose=verbose)
         dups = tdf.json.find_duplicates(writePath)
         self.assertTrue(dups == {
             'three': {
                 (1, 2, 2): 2
             },
             'two': {
                 (1, 2): 3
             },
             'one': {
                 1: 3,
                 2: 2
             }
         })
コード例 #18
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
 def testDietWithInfFlagging(self):
     diet_pdf = PanDatFactory(**dietSchema())
     addDietDataTypes(diet_pdf)
     tdf = TicDatFactory(**dietSchema())
     dat = tdf.copy_to_pandas(tdf.copy_tic_dat(dietData()),
                              drop_pk_columns=False)
     diet_pdf.set_infinity_io_flag(999999999)
     core_path = os.path.join(_scratchDir, "diet_with_inf_flagging")
     diet_pdf.sql.write_file(dat, core_path + ".db")
     diet_pdf.csv.write_directory(dat, core_path + "_csv")
     diet_pdf.json.write_file(dat, core_path + ".json")
     diet_pdf.xls.write_file(dat, core_path + ".xlsx")
     for attr, f in [["sql", core_path + ".db"],
                     ["csv", core_path + "_csv"],
                     ["json", core_path + ".json"],
                     ["xls", core_path + ".xlsx"]]:
         dat_1 = getattr(diet_pdf, attr).create_pan_dat(f)
         self.assertTrue(diet_pdf._same_data(dat, dat_1, epsilon=1e-5))
         pdf = diet_pdf.clone()
         dat_1 = getattr(pdf, attr).create_pan_dat(f)
         self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5))
         pdf = PanDatFactory(**diet_pdf.schema())
         dat_1 = getattr(pdf, attr).create_pan_dat(f)
         self.assertFalse(pdf._same_data(dat, dat_1, epsilon=1e-5))
         protein = dat_1.categories["name"] == "protein"
         self.assertTrue(
             list(dat_1.categories[protein]["maxNutrition"])[0] ==
             999999999)
         dat_1.categories.loc[protein, "maxNutrition"] = float("inf")
         self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5))
コード例 #19
0
 def _testPdfReproduction(self, pdf):
     def _tdfs_same(pdf, pdf2):
         self.assertTrue(pdf.schema() == pdf2.schema())
         self.assertTrue(set(pdf.foreign_keys) == set(pdf2.foreign_keys))
         self.assertTrue(pdf.data_types == pdf2.data_types)
         self.assertTrue(pdf.default_values == pdf2.default_values)
     _tdfs_same(pdf, TicDatFactory.create_from_full_schema(pdf.schema(True)))
     _tdfs_same(pdf, TicDatFactory.create_from_full_schema(_deep_anonymize(pdf.schema(True))))
コード例 #20
0
 def testSpaces(self):
     if not self.can_run:
         return
     for hack, raw_data in list(product(*(([True, False],)*2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
             create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
コード例 #21
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
 def testTwo(self):
     objOrig = dietData()
     staticFactory = TicDatFactory(**dietSchema())
     tables = set(staticFactory.primary_key_fields)
     ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
     self.assertTrue(staticFactory.good_tic_dat_object(ticDat))
     for t in tables :
         self._assertSame(getattr(objOrig, t), getattr(ticDat,t),
                                 lambda _t : staticFactory.good_tic_dat_table(_t, t))
コード例 #22
0
ファイル: testxls.py プロジェクト: austin-bren/ticdat
 def testSillyTwoTables(self):
     if not self.can_run:
         return
     tdf = TicDatFactory(**sillyMeSchema())
     ticDat = tdf.TicDat(**sillyMeDataTwoTables())
     filePath = os.path.join(_scratchDir, "sillyMeTwoTables.xls")
     tdf.xls.write_file(ticDat, filePath)
     xlsTicDat = tdf.xls.create_tic_dat(filePath)
     self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
コード例 #23
0
ファイル: testpandat_io.py プロジェクト: nandi6uc/ticdat
    def test_datetime(self):
        core_path = os.path.join(_scratchDir, "parameters")
        pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]],
                            parameters=[["a"], ["b"]])
        pdf.add_parameter("p1", "Dec 15 1970", datetime=True)
        pdf.add_parameter("p2", None, datetime=True, nullable=True)
        pdf.set_data_type("table_with_stuffs", "field one", datetime=True)
        pdf.set_data_type("table_with_stuffs",
                          "field two",
                          datetime=True,
                          nullable=True)
        dat = TicDatFactory(**pdf.schema()).TicDat(
            table_with_stuffs=[[dateutil.parser.parse("July 11 1972"), None],
                               [
                                   datetime.datetime.now(),
                                   dateutil.parser.parse("Sept 11 2011")
                               ]],
            parameters=[["p1", "7/11/1911"], ["p2", None]])
        dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
            dat, drop_pk_columns=False)
        self.assertFalse(
            pdf.find_data_type_failures(dat)
            or pdf.find_data_row_failures(dat))

        for attr, path in [["csv", core_path + "_csv"],
                           ["xls", core_path + ".xlsx"],
                           ["sql", core_path + ".db"],
                           ["json", core_path + ".json"]]:
            func = "write_directory" if attr == "csv" else "write_file"
            getattr(getattr(pdf, attr), func)(dat, path)
            dat_1 = getattr(pdf, attr).create_pan_dat(path)
            self.assertFalse(pdf._same_data(dat, dat_1))
            self.assertFalse(
                pdf.find_data_type_failures(dat_1)
                or pdf.find_data_row_failures(dat_1))
            dat_1 = pdf.copy_to_tic_dat(dat_1)
            self.assertTrue(set(dat_1.parameters) == {'p1', 'p2'})
            self.assertTrue(
                isinstance(dat_1.parameters["p1"]["b"],
                           (datetime.datetime, numpy.datetime64))
                and not pd.isnull(dat_1.parameters["p1"]["b"]))
            self.assertTrue(pd.isnull(dat_1.parameters["p2"]["b"]))
            self.assertTrue(
                all(
                    isinstance(_, (datetime.datetime,
                                   numpy.datetime64)) and not pd.isnull(_)
                    for _ in dat_1.table_with_stuffs))
            self.assertTrue(
                all(
                    isinstance(_, (datetime.datetime, numpy.datetime64))
                    or _ is None or utils.safe_apply(math.isnan)(_)
                    for v in dat_1.table_with_stuffs.values()
                    for _ in v.values()))
            self.assertTrue({
                pd.isnull(_)
                for v in dat_1.table_with_stuffs.values() for _ in v.values()
            } == {True, False})
コード例 #24
0
 def testSillyTwoTables(self):
     if not self.can_run:
         return
     tdf = TicDatFactory(**sillyMeSchema())
     ticDat = tdf.TicDat(**sillyMeDataTwoTables())
     dirPath = os.path.join(_scratchDir, "sillyTwoTables")
     tdf.csv.write_directory(ticDat, dirPath)
     self.assertFalse(tdf.csv.find_duplicates(dirPath))
     csvTicDat = tdf.csv.create_tic_dat(dirPath)
     self.assertTrue(tdf._same_data(ticDat, csvTicDat))
コード例 #25
0
    def testNetflow(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(netflowData(), t)
                   for t in tdf.all_tables}))
        filePath = "netflow.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        def changeIt():
            mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12

        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        self.assertTrue(
            "Unable to recognize field nimrod in table nodes" in self.
            firesException(lambda: tdfHacked.mdb.create_tic_dat(filePath)))
コード例 #26
0
ファイル: testsql.py プロジェクト: vn8317x/opalytics-ticdat
    def testWeirdDiets(self):
        if not self.can_run:
            return
        filePath = os.path.join(_scratchDir, "weirdDiet.db")
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.sql.write_db_data(dat2, filePath , allow_overwrite=True)
        self.assertFalse(tdf2.sql.find_duplicates(filePath))
        sqlTicDat = tdf.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))


        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.sql.write_db_data(dat3, makeCleanPath(filePath))
        with sql.connect(filePath) as con:
            con.execute("ALTER TABLE nutrition_quantities RENAME TO [nutrition quantities]")

        sqlTicDat2 = tdf3.sql.create_tic_dat(filePath)
        self.assertTrue(tdf3._same_data(dat3, sqlTicDat2))
        with sql.connect(filePath) as con:
            con.execute("create table nutrition_quantities(boger)")

        self.assertTrue(self.firesException(lambda : tdf3.sql.create_tic_dat(filePath)))
コード例 #27
0
    def testWeirdDiets(self):
        if not _can_accdb_unit_test:
            return
        filePath = os.path.join(_scratchDir, "weirdDiet.accdb")
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.mdb.write_file(dat2, filePath , allow_overwrite=True)
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))


        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.mdb.write_file(dat3, makeCleanPath(filePath))
        with py.connect(_connection_str(filePath)) as con:
            con.cursor().execute("SELECT * INTO [nutrition quantities] FROM nutrition_quantities").commit()
            con.cursor().execute("DROP TABLE nutrition_quantities").commit()

        accdbTicDat2 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf3._same_data(dat3, accdbTicDat2))
        with py.connect(_connection_str(filePath)) as con:
            con.cursor().execute("create table nutrition_quantities (boger int)").commit()

        self.assertTrue(self.firesException(lambda : tdf3.mdb.create_tic_dat(filePath)))
コード例 #28
0
ファイル: testxls.py プロジェクト: austin-bren/ticdat
    def testMissingTable(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        tdf2 = TicDatFactory(**{
            k: v
            for k, v in dietSchema().items() if k != "nutritionQuantities"
        })
        ticDat2 = tdf2.copy_tic_dat(dietData())
        filePath = makeCleanPath(os.path.join(_scratchDir,
                                              "diet_missing.xlsx"))
        tdf2.xls.write_file(ticDat2, filePath)
        ticDat3 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf2._same_data(ticDat2, ticDat3))
        self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables))
        self.assertFalse(ticDat3.nutritionQuantities)
        self.assertTrue(ticDat3.categories and ticDat3.foods)

        tdf2 = TicDatFactory(
            **{k: v
               for k, v in dietSchema().items() if k == "categories"})
        ticDat2 = tdf2.copy_tic_dat(dietData())
        filePath = makeCleanPath(os.path.join(_scratchDir,
                                              "diet_missing.xlsx"))
        tdf2.xls.write_file(ticDat2, filePath)
        ticDat3 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf2._same_data(ticDat2, ticDat3))
        self.assertTrue(all(hasattr(ticDat3, x) for x in tdf.all_tables))
        self.assertFalse(ticDat3.nutritionQuantities or ticDat3.foods)
        self.assertTrue(ticDat3.categories)
コード例 #29
0
 def testSpacesOpalytics(self):
     if not self.can_run:
         return
     for hack, raw_data in list(itertools.product(*(([True, False], ) *
                                                    2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         inputset = create_inputset_mock(tdf, ticDat, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data)
         self.assertTrue(tdf._same_data(ticDat,
                                        pdf.copy_to_tic_dat(panDat)))
コード例 #30
0
ファイル: testlingo.py プロジェクト: vn8317x/opalytics-ticdat
 def test_(schema_factory, data_factory):
     tdf = TicDatFactory(**schema_factory())
     dat = tdf.copy_tic_dat(data_factory())
     mapping = tlingo._try_create_space_case_mapping(tdf,
                                                     dat)["mapping"]
     remapdat = tlingo._apply_space_case_mapping(
         tdf, dat, {v: k
                    for k, v in mapping.items()})
     mapmapdat = tlingo._apply_space_case_mapping(
         tdf, remapdat, mapping)
     self.assertTrue(tdf._same_data(dat, mapmapdat))
     self.assertFalse(tdf._same_data(dat, remapdat))
コード例 #31
0
    def testNetflow(self):
        if not self.can_run:
            return
        for hack, raw_data in list(product(*(([True, False],)*2))):
            tdf = TicDatFactory(**netflowSchema())
            ticDat = tdf.copy_tic_dat(netflowData())
            self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
                create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))

            ticDat.nodes[12] = {}
            self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
                create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
コード例 #32
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testThree(self):
        objOrig = netflowData()
        staticFactory = TicDatFactory(**netflowSchema())
        goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t)
        tables = set(staticFactory.primary_key_fields)
        ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
        self.assertTrue(staticFactory.good_tic_dat_object(ticDat))
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t))

        objOrig.commodities.append(12.3)
        objOrig.arcs[(1, 2)] = [12]
        self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes"))
        self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost"))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.commodities, ticDat.commodities, goodTable("commodities")) ))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.arcs, ticDat.arcs, goodTable("arcs")) ))

        ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t))

        self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12)
        self.assertTrue(12.3 in ticDat.commodities)

        objOrig.cost[5]=5

        self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in
            firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat
                                    (**{t:getattr(objOrig,t) for t in tables}))))

        objOrig = netflowData()
        def editMeBadly(t) :
            def rtn() :
                t.cost["hack"] = 12
            return rtn
        def editMeWell(t) :
            def rtn() :
                t.cost["hack", "my", "balls"] = 12.12
            return rtn
        self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in
                            (ticDat, staticFactory.freeze_me(staticFactory.TicDat()))))

        def attributeMe(t) :
            def rtn() :
                t.boger="bogerwoger"
            return rtn

        self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe(
                staticFactory.freeze_me(staticFactory.TicDat()))))

        mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t))

        self.assertTrue(firesException(editMeBadly(mutable)))
        self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable)))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.cost, mutable.cost, goodTable("cost")) ))
コード例 #33
0
ファイル: testsql.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))
        filePath = os.path.join(_scratchDir, "silly.db")
        tdf.sql.write_db_data(ticDat, filePath)

        ticDat2 = tdf2.sql.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.sql.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.sql.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.sql.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, 12)
        tdf.sql.write_db_data(ticDat, makeCleanPath(filePath))
        ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #34
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
 def testSeven(self):
     tdf = TicDatFactory(**dietSchema())
     def makeIt() :
         rtn = tdf.TicDat()
         rtn.foods["a"] = {}
         rtn.categories["1"] = {}
         rtn.categories["2"] = [0,1]
         self.assertTrue(rtn.categories["2"]["minNutrition"] == 0)
         self.assertTrue(rtn.categories["2"]["maxNutrition"] == 1)
         rtn.nutritionQuantities['junk',1] = {}
         return tdf.freeze_me(rtn)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_values(foods = {"cost":"dontcare"},nutritionQuantities = {"qty":100} )
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]=='dontcare' and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 100)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_value("categories", "minNutrition", 1)
     tdf.set_default_value("categories", "maxNutrition", 2)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (1,2) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
コード例 #35
0
ファイル: testxls.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testRowOffsets(self):
        tdf = TicDatFactory(boger = [[],["the", "big", "boger"]],
                            woger = [[], ["the", "real", "big", "woger"]])
        td = tdf.freeze_me(tdf.TicDat(boger = ([1, 2, 3], [12, 24, 36], tdf.data_fields["boger"], [100, 200, 400]),
                              woger = ([[1, 2, 3, 4]]*4) + [tdf.data_fields["woger"]] +
                                      ([[100, 200, 300, 400]]*5)))
        filePath = os.path.join(_scratchDir, "rowoff.xls")
        tdf.xls.write_file(td, filePath)

        td1= tdf.xls.create_tic_dat(filePath)
        td2 = tdf.xls.create_tic_dat(filePath, {"woger": 5})
        td3 = tdf.xls.create_tic_dat(filePath, {"woger":5, "boger":3})
        self.assertTrue(tdf._same_data(td, td1))
        tdCheck = tdf.TicDat(boger = td2.boger, woger = td.woger)
        self.assertTrue(tdf._same_data(td, tdCheck))
        self.assertTrue(all (td2.woger[i]["big"] == 300 for i in range(5)))
        self.assertTrue(all (td3.woger[i]["real"] == 200 for i in range(5)))
        self.assertTrue(td3.boger[0]["big"] == 200 and len(td3.boger) == 1)
コード例 #36
0
ファイル: testxls.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testNetflow(self):
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}))
        filePath = os.path.join(_scratchDir, "netflow.xls")
        tdf.xls.write_file(ticDat, filePath)
        xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        def changeIt() :
            xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12
        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))

        xlsTicDat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, xlsTicDat))

        self.assertFalse(tdf.xls.get_duplicates(filePath))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        self.assertTrue(self.firesException(lambda : tdfHacked.xls.write_file(ticDat, filePath)))
        tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite =True)
        self.assertTrue("nodes : name" in self.firesException(lambda  :tdf.xls.create_tic_dat(filePath)))
コード例 #37
0
ファイル: testmdb.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testNetflow(self):
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables}))
        filePath = os.path.join(_scratchDir, "netflow.mdb")
        tdf.mdb.write_file(ticDat, filePath)
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        def changeIt() :
            mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12
        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables})
        tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath))
        self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath)))
        tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True)
        self.assertTrue("Unable to recognize field name in table nodes" in
                        self.firesException(lambda  :tdf.mdb.create_tic_dat(filePath)))
コード例 #38
0
ファイル: testcsv.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testNetflow(self):
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})
        dirPath = os.path.join(_scratchDir, "netflow")
        tdf.csv.write_directory(ticDat, dirPath)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf.csv.get_duplicates(dirPath))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        ticDat.nodes[12] = {}
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        # minor flaw - strings that are floatable get turned into floats when reading csvs
        del(ticDat.nodes[12])
        ticDat.nodes['12'] = {}
        self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath)))
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
コード例 #39
0
ファイル: testcsv.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        dirPath = os.path.join(_scratchDir, "diet")
        tdf.csv.write_directory(ticDat,dirPath)
        self.assertFalse(tdf.csv.get_duplicates(dirPath))
        csvTicDat = tdf.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        def change() :
            csvTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(firesException(change))
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))

        self.assertTrue(self.firesException(lambda  :
            tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith(
                                                                        "Invalid dialect excel_t"))

        tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab")
        self.assertTrue(firesException(change))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
コード例 #40
0
ファイル: testsql.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testNetflow(self):
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow")))
        self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow")))
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}))
        filePath = os.path.join(_scratchDir, "netflow.sql")
        tdf.sql.write_db_data(ticDat, filePath)
        sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
        def changeIt() :
            sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12
        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        sqlTicDat = tdf.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables})
        tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath))
        self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath)))
        tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True)
        self.assertTrue("Unable to recognize field name in table nodes" in
                        self.firesException(lambda  :tdf.sql.create_tic_dat(filePath)))

        ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})

        ticDatNew.cost['Pencils', 'booger', 'wooger'] =  10
        ticDatNew.cost['junker', 'Detroit', 'New York'] =  20
        ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] =  20
        ticDatNew.arcs['booger', 'wooger'] =  112
        self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for
                         f,v in tdf.find_foreign_key_failures(ticDatNew).items()} ==
        {('arcs', 'nodes', u'destination'): {('booger', 'wooger')},
         ('arcs', 'nodes', u'source'): {('booger', 'wooger')},
         ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'),
                                                 ('junker', 'Detroit', 'New York')},
         ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'),
                                             ('Pencils', 'booger', 'wooger')},
         ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}})
コード例 #41
0
ファイル: testmdb.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        filePath = makeCleanPath(os.path.join(_scratchDir, "diet.mdb"))
        tdf.mdb.write_file(ticDat, filePath)
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        def changeit() :
            mdbTicDat.categories["calories"]["minNutrition"]=12
        changeit()
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))

        self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath)))
        tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True)
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertTrue(self.firesException(changeit))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
コード例 #42
0
ファイル: testxls.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        filePath = os.path.join(_scratchDir, "diet.xls")
        tdf.xls.write_file(ticDat, filePath)
        xlsTicDat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        xlsTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(tdf._same_data(ticDat, xlsTicDat))

        self.assertFalse(tdf.xls.get_duplicates(filePath))

        ex = self.firesException(lambda :
                                 tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}))
        self.assertTrue("field names could not be found" in ex)
        xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}, headers_present=False)
        self.assertTrue(tdf._same_data(xlsTicDat, ticDat))
        xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:2 for t in tdf.all_tables}, headers_present=False)
        self.assertFalse(tdf._same_data(xlsTicDat, ticDat))
        self.assertTrue(all(len(getattr(ticDat, t))-1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
コード例 #43
0
ファイル: testxls.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header = True):
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables :
                sheet = book.add_sheet(t)
                if write_header :
                    for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) :
                        sheet.write(0, i, f)
                for rowInd, row in enumerate(data) :
                    for fieldInd, cellValue in enumerate(row):
                        sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False)
        self.assertTrue(self.firesException(lambda  : tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath, headers_present=False)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
        self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
コード例 #44
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testOne(self):
        def _cleanIt(x) :
            x.foods['macaroni'] = {"cost": 2.09}
            x.foods['milk'] = {"cost":0.89}
            return x
        dataObj = dietData()
        tdf = TicDatFactory(**dietSchema())
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        dataObj2 = tdf.copy_tic_dat(dataObj)
        dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True)
        dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4)))
        dataObj = _cleanIt(dataObj)
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3)))
        def hackit(x) :
            x.foods["macaroni"] = 100
        self.assertTrue(self.firesException(lambda :hackit(dataObj3)))
        hackit(dataObj2)
        self.assertTrue(not tdf._same_data(dataObj, dataObj2) and  tdf._same_data(dataObj, dataObj3))

        msg = []
        dataObj.foods[("milk", "cookies")] = {"cost": float("inf")}
        dataObj.boger = object()
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append))
        self.assertTrue({"foods : Inconsistent key lengths"} == set(msg))
        self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t)
                            for t in ("categories", "nutritionQuantities")))

        dataObj = dietData()
        dataObj.categories["boger"] = {"cost":1}
        dataObj.categories["boger"] = {"cost":1}
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append))
        self.assertTrue({'foods : Inconsistent key lengths',
                         'categories : Inconsistent data field name keys.'} == set(msg))
        ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t)
                                                                for t in tdf.primary_key_fields}))).message
        self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
コード例 #45
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testFive(self):
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables}))
        obfudat = tdf.obfusimplify(dat, freeze_it=1)
        self.assertFalse(tdf._same_data(dat, obfudat.copy))
        for (s,d),r in obfudat.copy.arcs.items():
            self.assertFalse((s,d) in dat.arcs)
            self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"])
        obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"])
        self.assertTrue(tdf._same_data(obfudat.copy, dat))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        mone, one2one = "many-to-one",  "one-to-one"
        fk, fkm = _ForeignKey, _ForeignKeyMapping
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'nodes', fkm('source',u'name'), mone),
                            fk("cost", 'nodes', fkm('destination',u'name'), mone),
                            fk("cost", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf.clear_foreign_keys("cost")
        self.assertTrue(set(tdf.foreign_keys) ==  {fk("arcs", 'nodes', fkm('source',u'name'), mone),
                            fk("arcs", 'nodes', fkm('destination',u'name'), mone),
                            fk("inflow", 'commodities', fkm('commodity',u'name'), mone),
                            fk("inflow", 'nodes', fkm('node',u'name'), mone)})

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        addDietForeignKeys(tdf)

        self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone),
                                                  fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)})

        tdf.TicDat()
        self.assertTrue(self.firesException(lambda  : tdf.clear_foreign_keys("nutritionQuantities")))
        self.assertTrue(tdf.foreign_keys)
        tdf = TicDatFactory(**dietSchema())
        addDietForeignKeys(tdf)
        tdf.clear_foreign_keys("nutritionQuantities")
        self.assertFalse(tdf.foreign_keys)

        tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]],
                            goodChild = [["gk"], ["gd1", "gd2"]],
                            badChild = [["bk1", "bk2"], ["bd"]],
                            appendageChild = [["ak"], ["ad1", "ad2"]],
                            appendageBadChild = [["bk1", "bk2"], []])
        tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk"))
        tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"])
        self.assertTrue("many-to-many" in self.firesException(lambda :
                tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"])))
        tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"])
        tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1")))
        fks = tdf.foreign_keys
        _getfk = lambda t : next(_ for _ in fks if _.native_table == t)
        self.assertTrue(_getfk("goodChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("badChild").cardinality == "many-to-one")
        self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one")
        self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one")

        tdf.clear_foreign_keys("appendageBadChild")
        self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys)
        tdf.clear_foreign_keys()
        self.assertFalse(tdf.foreign_keys)
コード例 #46
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSix(self):
        tdf = TicDatFactory(plants = [["name"], ["stuff", "otherstuff"]],
                            lines = [["name"], ["plant", "weird stuff"]],
                            line_descriptor = [["name"], ["booger"]],
                            products = [["name"],["gover"]],
                            production = [["line", "product"], ["min", "max"]],
                            pureTestingTable = [[], ["line", "plant", "product", "something"]],
                            extraProduction = [["line", "product"], ["extramin", "extramax"]],
                            weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]])
        tdf.add_foreign_key("production", "lines", ("line", "name"))
        tdf.add_foreign_key("production", "products", ("product", "name"))
        tdf.add_foreign_key("lines", "plants", ("plant", "name"))
        tdf.add_foreign_key("line_descriptor", "lines", ("name", "name"))
        for f in set(tdf.data_fields["pureTestingTable"]).difference({"something"}):
            tdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name"))
        tdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product")))
        tdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product")))
        tdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product")))

        goodDat = tdf.TicDat()
        goodDat.plants["Cleveland"] = ["this", "that"]
        goodDat.plants["Newark"]["otherstuff"] =1
        goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle"

        for i,p in enumerate(goodDat.plants):
            goodDat.lines[i]["plant"] = p

        for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)):
            goodDat.production[pl, pd] = {"min":1, "max":10+i}

        badDat1 = tdf.copy_tic_dat(goodDat)
        badDat1.production["notaline", "widgets"] = [0,1]
        badDat2 = tdf.copy_tic_dat(badDat1)

        fk, fkm = _ForeignKey, _ForeignKeyMapping
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) ==
                        {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'):
                             (('notaline',), (('notaline', 'widgets'),))})
        badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark"
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) == tdf.find_foreign_key_failures(badDat2) ==
                        {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one'):
                             (('notnewark',), ('notaline',))})
        tdf.remove_foreign_keys_failures(badDat1, propagate=False)
        tdf.remove_foreign_keys_failures(badDat2, propagate=True)
        self.assertTrue(tdf._same_data(badDat2, goodDat) and not tdf.find_foreign_key_failures(badDat2))
        self.assertTrue(tdf.find_foreign_key_failures(badDat1) ==
                {fk('production', 'lines', fkm('line', 'name'), 'many-to-one'):
                     (('notaline',), (('notaline', 'widgets'),))})

        tdf.remove_foreign_keys_failures(badDat1, propagate=False)
        self.assertTrue(tdf._same_data(badDat1, goodDat) and not tdf.find_foreign_key_failures(badDat1))

        _ = len(goodDat.lines)
        for i,p in enumerate(goodDat.plants.keys() + goodDat.plants.keys()):
            goodDat.lines[i+_]["plant"] = p
        for l in goodDat.lines:
            if i%2:
                goodDat.line_descriptor[l] = i+10

        for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))):
            goodDat.pureTestingTable.append((l,pl,pdct,i))
        self.assertFalse(tdf.find_foreign_key_failures(goodDat))
        badDat = tdf.copy_tic_dat(goodDat)
        badDat.pureTestingTable.append(("j", "u", "nk", "ay"))
        l = len(goodDat.pureTestingTable)
        self.assertTrue(tdf.find_foreign_key_failures(badDat) ==
         {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'): (('u',),(l,)),
          fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'): (('nk',), (l,)),
          fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one'): (('j',), (l,))})

        obfudat = tdf.obfusimplify(goodDat, freeze_it=True)
        self.assertTrue(all(len(getattr(obfudat.copy, t)) == len(getattr(goodDat, t))
                            for t in tdf.all_tables))
        for n in goodDat.plants.keys() + goodDat.lines.keys() + goodDat.products.keys() :
            self.assertTrue(n in {_[1] for _ in obfudat.renamings.values()})
            self.assertFalse(n in obfudat.renamings)
        self.assertTrue(obfudat.copy.plants['P2']['otherstuff'] == 1)
        self.assertFalse(tdf._same_data(obfudat.copy, goodDat))
        for k,r in obfudat.copy.line_descriptor.items():
            i = r.values()[0] - 10
            self.assertTrue(i%2 and (goodDat.line_descriptor[i].values()[0] == i+10))

        obfudat2 = tdf.obfusimplify(goodDat, {"plants": "P", "lines" : "L", "products" :"PR"})
        self.assertTrue(tdf._same_data(obfudat.copy, obfudat2.copy))

        obfudat3 = tdf.obfusimplify(goodDat, skip_tables=["plants", "lines", "products"])
        self.assertTrue(tdf._same_data(obfudat3.copy, goodDat))

        obfudat4 = tdf.obfusimplify(goodDat, skip_tables=["lines", "products"])
        self.assertFalse(tdf._same_data(obfudat4.copy, goodDat))
        self.assertFalse(tdf._same_data(obfudat4.copy, obfudat.copy))
コード例 #47
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testEight(self):
        tdf = TicDatFactory(**dietSchema())
        def makeIt() :
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40}
            rtn.categories["2"] = [10,20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f,p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        tdf.set_default_value("foods", "cost", 2)
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) ==
                        {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
        self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001})
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat))
        self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in
                            failed['nutritionQuantities', 'qty'].pks))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and
                        fixedDat.nutritionQuantities['a', 2]["qty"] == 12)

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2})
        self.assertTrue(tdf._same_data(fixedDat, fixedDat2))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=True)
        tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False)
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost")
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0)

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        tdf.enable_foreign_key_links()
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        def makeIt() :
            if not tdf.foreign_keys:
                tdf.enable_foreign_key_links()
                addNetflowForeignKeys(tdf)
            orig = netflowData()
            rtn = tdf.copy_tic_dat(orig)
            for n in rtn.nodes["Detroit"].arcs_source:
                rtn.arcs["Detroit", n] = n
            self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables))
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed="*")
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"])
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))})
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        netflowData_ = tdf.copy_tic_dat(netflowData())
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_))
        fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()),
                                        {("arcs", "capacity"):80, ("cost","cost") :"imok"}))
        fixedDat.arcs["Detroit", "Boston"] = 100
        fixedDat.arcs["Detroit", "Seattle"] = 120
        self.assertTrue(tdf._same_data(fixedDat, netflowData_))
コード例 #48
0
ファイル: testmdb.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        filePath = os.path.join(_scratchDir, "silly.mdb")
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def sillyMeCleanData() :
            return {
                "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")},
                "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12},
                "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) )
            }
        ticDat = tdf.TicDat(**sillyMeCleanData())
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def makeCleanSchema() :
            tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"},
                        b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"})
            return filePath
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))

        ticDat2 = tdf2.mdb.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.mdb.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.mdb.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, "twelve")
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #49
0
ファイル: testsql.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testDiet(self):
        def doTheTests(tdf) :
            ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db"))
            tdf.sql.write_db_data(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            def changeit() :
                sqlTicDat.categories["calories"]["minNutrition"]=12
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath)))
            tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True)
            sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

            filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql"))
            tdf.sql.write_sql_file(ticDat, filePath)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            changeit()
            self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

            tdf.sql.write_sql_file(ticDat, filePath, include_schema=True)
            sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
            self.assertTrue(self.firesException(changeit))
            self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        doTheTests(TicDatFactory(**dietSchema()))

        tdf = TicDatFactory(**dietSchema())
        self.assertFalse(tdf.foreign_keys)
        tdf.set_default_values(categories =  {'maxNutrition': float("inf"), 'minNutrition': 0.0},
                               foods =  {'cost': 0.0},
                               nutritionQuantities =  {'qty': 0.0})
        addDietForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities"))
        self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities"))

        ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})
        origTicDat = tdf.copy_tic_dat(ticDat)
        self.assertTrue(tdf._same_data(ticDat, origTicDat))
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12
        self.assertTrue(tdf.find_foreign_key_failures(ticDat) ==
        {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)),
         ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'):
             (('boger',), (('hot dog', 'boger'),))})

        self.assertFalse(tdf._same_data(ticDat, origTicDat))
        tdf.remove_foreign_keys_failures(ticDat)
        self.assertFalse(tdf.find_foreign_key_failures(ticDat))
        self.assertTrue(tdf._same_data(ticDat, origTicDat))

        doTheTests(tdf)
コード例 #50
0
ファイル: testcsv.py プロジェクト: Dr-Irv/opalytics-ticdat
        def doTest(headersPresent) :
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
            for t in ("a", "b") :
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b") :
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField",),[]]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in
                            (schema2, schema3, schema4, schema5, schema5b, schema6))
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))


            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3))

            if headersPresent :
                ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)
                for t in ("a", "b") :
                    for k,v in getattr(ticDat4, t).items() :
                        for _k, _v in v.items() :
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]) :
                            self.assertTrue(t == "b")
                        else :
                            self.assertTrue(t == "a")
            else :
                self.assertTrue(self.firesException(lambda :
                                    tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                                                    tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b))


            ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())]
                             for t in tdf.all_tables})

            def writeData(data):
                td = allDataTdf.TicDat(a = data, b=data, c=data)
                allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent)
            self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)


            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent)
            self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
            self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
コード例 #51
0
ファイル: testutils.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testFour(self):
        objOrig = sillyMeData()
        staticFactory = TicDatFactory(**sillyMeSchema())
        goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t)
        tables = set(staticFactory.primary_key_fields)
        ticDat = staticFactory.freeze_me(staticFactory.TicDat(**objOrig))
        self.assertTrue(staticFactory.good_tic_dat_object(ticDat))
        for t in tables :
            self._assertSame(objOrig[t], getattr(ticDat,t), goodTable(t))
        pickedData = staticFactory.TicDat(**staticFactory.as_dict(ticDat))
        self.assertTrue(staticFactory._same_data(ticDat, pickedData))
        mutTicDat = staticFactory.TicDat()
        for k,v in ticDat.a.items() :
            mutTicDat.a[k] = v.values()
        for k,v in ticDat.b.items() :
            mutTicDat.b[k] = v.values()[0]
        for r in ticDat.c:
            mutTicDat.c.append(r)
        for t in tables :
            self._assertSame(getattr(mutTicDat, t), getattr(ticDat,t), goodTable(t))

        self.assertTrue("theboger" not in mutTicDat.a)
        mutTicDat.a["theboger"]["aData2"] =22
        self.assertTrue("theboger" in mutTicDat.a and mutTicDat.a["theboger"].values() == (0, 22, 0))

        newSchema = sillyMeSchema()
        newSchema["a"][1] += ("aData4",)
        newFactory = TicDatFactory(**newSchema)
        def makeNewTicDat() : return newFactory.TicDat(a=ticDat.a, b=ticDat.b, c=ticDat.c)
        newTicDat = makeNewTicDat()
        self.assertFalse(staticFactory.good_tic_dat_object(newTicDat))
        self.assertTrue(newFactory.good_tic_dat_object(ticDat))
        self.assertTrue(newFactory._same_data(makeNewTicDat(), newTicDat))
        newTicDat.a[ticDat.a.keys()[0]]["aData4"]=12
        self.assertFalse(newFactory._same_data(makeNewTicDat(), newTicDat))