Exemple #1
0
    def test_time_stamp(self):
        tdf = TicDatFactory(table=[["Blah"], ["Timed Info"]])
        tdf.set_data_type("table", "Timed Info", nullable=True)
        tdf.set_default_value("table", "Timed Info", None)
        dat = tdf.TicDat()
        dat.table[1] = dateutil.parser.parse("2014-05-01 18:47:05.069722")
        dat.table[2] = dateutil.parser.parse("2014-05-02 18:47:05.178768")
        pgtf = tdf.pgsql
        pgtf.write_schema(self.engine,
                          test_schema,
                          forced_field_types={
                              ('table', 'Blah'): "integer",
                              ('table', 'Timed Info'): "timestamp"
                          })
        pgtf.write_data(dat,
                        self.engine,
                        test_schema,
                        dsn=self.postgresql.dsn())
        dat_2 = pgtf.create_tic_dat(self.engine, test_schema)
        self.assertTrue(tdf._same_data(dat, dat_2))
        self.assertTrue(
            all(
                isinstance(row["Timed Info"], datetime.datetime)
                for row in dat_2.table.values()))
        self.assertFalse(
            any(isinstance(k, datetime.datetime) for k in dat_2.table))

        pdf = PanDatFactory.create_from_full_schema(
            tdf.schema(include_ancillary_info=True))

        def same_data(pan_dat, pan_dat_2):
            df1, df2 = pan_dat.table, pan_dat_2.table
            if list(df1["Blah"]) != list(df2["Blah"]):
                return False
            for dt1, dt2 in zip(df1["Timed Info"], df2["Timed Info"]):
                delta = dt1 - dt2
                if abs(delta.total_seconds()) > 1e-6:
                    return False
            return True

        pan_dat = pdf.pgsql.create_pan_dat(self.engine, test_schema)
        pan_dat_2 = pan_dat_maker(tdf.schema(), dat_2)
        self.assertTrue(same_data(pan_dat, pan_dat_2))
        for df in [_.table for _ in [pan_dat, pan_dat_2]]:
            for i in range(len(df)):
                self.assertFalse(
                    isinstance(df.loc[i, "Blah"], datetime.datetime))
                self.assertTrue(
                    isinstance(df.loc[i, "Timed Info"], datetime.datetime))

        pan_dat.table.loc[1, "Timed Info"] = dateutil.parser.parse(
            "2014-05-02 18:48:05.178768")
        self.assertFalse(same_data(pan_dat, pan_dat_2))
        pdf.pgsql.write_data(pan_dat, self.engine, test_schema)
        pan_dat_2 = pdf.pgsql.create_pan_dat(self.engine, test_schema)
        self.assertTrue(same_data(pan_dat, pan_dat_2))

        dat.table[2] = dateutil.parser.parse("2014-05-02 18:48:05.178768")
        self.assertFalse(tdf._same_data(dat, dat_2))
Exemple #2
0
 def test_missing_tables(self):
     schema = test_schema + "_missing_tables"
     tdf_1 = TicDatFactory(this=[["Something"], ["Another"]])
     pdf_1 = PanDatFactory(**tdf_1.schema())
     tdf_2 = TicDatFactory(
         **dict(tdf_1.schema(), that=[["What", "Ever"], []]))
     pdf_2 = PanDatFactory(**tdf_2.schema())
     dat = tdf_1.TicDat(this=[["a", 2], ["b", 3], ["c", 5]])
     pan_dat = tdf_1.copy_to_pandas(dat, drop_pk_columns=False)
     tdf_1.pgsql.write_schema(self.engine, schema)
     tdf_1.pgsql.write_data(dat, self.engine, schema)
     pg_dat = tdf_2.pgsql.create_tic_dat(self.engine, schema)
     self.assertTrue(tdf_1._same_data(dat, pg_dat))
     pg_pan_dat = pdf_2.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(pdf_1._same_data(pan_dat, pg_pan_dat))
Exemple #3
0
 def test_pgtd_active_dups(self):
     if not self.can_run:
         return
     schema = test_schema + "_act_dups"
     tdf_1 = TicDatFactory(t_one=[[],
                                  ["Field One", "Field Two", "Da Active"]],
                           t_two=[[], ["Field One", "Da Active"]])
     dat = tdf_1.TicDat(t_one=[["a", "b", True], ["a", "c", True],
                               ["a", "b", False], ["a", "d", True]],
                        t_two=[["a", True], ["b", False], ["a", False],
                               ["b", False], ["a", False]])
     self.assertTrue(len(dat.t_one) == 4 and len(dat.t_two) == 5)
     tdf_1.pgsql.write_schema(
         self.engine,
         schema,
         include_ancillary_info=False,
         forced_field_types={(t, f): "boolean" if "Active" in f else "text"
                             for t, (pks, dfs) in tdf_1.schema().items()
                             for f in pks + dfs})
     tdf_1.pgsql.write_data(dat, self.engine, schema)
     self.assertTrue(
         tdf_1._same_data(dat,
                          tdf_1.pgsql.create_tic_dat(self.engine, schema),
                          epsilon=1e-8))
     tdf = TicDatFactory(t_one=[["Field One", "Field Two"], []],
                         t_two=[["Field One"], []])
     self.assertTrue(tdf.pgsql.find_duplicates(self.engine, schema))
     self.assertFalse(
         tdf.pgsql.find_duplicates(self.engine,
                                   schema,
                                   active_fld="da_active"))
Exemple #4
0
    def testDateTime(self):
        schema = test_schema + "_datetime"
        tdf = TicDatFactory(table_with_stuffs=[["field one"], ["field two"]],
                            parameters=[["a"], ["b"]])
        tdf.add_parameter("p1", "Dec 15 1970", datetime=True)
        tdf.add_parameter("p2", None, datetime=True, nullable=True)
        tdf.set_data_type("table_with_stuffs", "field one", datetime=True)
        tdf.set_data_type("table_with_stuffs",
                          "field two",
                          datetime=True,
                          nullable=True)

        dat = tdf.TicDat(table_with_stuffs=[[
            dateutil.parser.parse("July 11 1972"), None
        ], [datetime.datetime.now(),
            dateutil.parser.parse("Sept 11 2011")]],
                         parameters=[["p1", "7/11/1911"], ["p2", None]])
        self.assertFalse(
            tdf.find_data_type_failures(dat)
            or tdf.find_data_row_failures(dat))

        tdf.pgsql.write_schema(self.engine, schema)
        tdf.pgsql.write_data(dat, self.engine, schema)
        dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema)
        self.assertFalse(
            tdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True))
        self.assertTrue(
            all(
                len(getattr(dat, t)) == len(getattr(dat_1, t))
                for t in tdf.all_tables))
        self.assertFalse(
            tdf.find_data_type_failures(dat_1)
            or tdf.find_data_row_failures(dat_1))
        self.assertTrue(
            isinstance(dat_1.parameters["p1"]["b"], datetime.datetime))
        self.assertTrue(
            all(
                isinstance(_, datetime.datetime)
                for _ in dat_1.table_with_stuffs))
        self.assertTrue(
            len([_ for _ in dat_1.table_with_stuffs if pd.isnull(_)]) == 0)
        self.assertTrue(
            all(
                isinstance(_, datetime.datetime) or pd.isnull(_)
                for v in dat_1.table_with_stuffs.values() for _ in v.values()))
        self.assertTrue(
            len([
                _ for v in dat_1.table_with_stuffs.values()
                for _ in v.values() if pd.isnull(_)
            ]) == 1)
        pdf = PanDatFactory.create_from_full_schema(
            tdf.schema(include_ancillary_info=True))
        pan_dat = pdf.pgsql.create_pan_dat(self.engine, schema)
        dat_2 = pdf.copy_to_tic_dat(pan_dat)
        # pandas can be a real PIA sometimes, hacking around some weird downcasting
        for k in list(dat_2.table_with_stuffs):
            dat_2.table_with_stuffs[pd.Timestamp(
                k)] = dat_2.table_with_stuffs.pop(k)
        self.assertTrue(
            tdf._same_data(dat_1, dat_2, nans_are_same_for_data_rows=True))

        pdf.pgsql.write_data(pan_dat, self.engine, schema)
        dat_3 = pdf.copy_to_tic_dat(
            pdf.pgsql.create_pan_dat(self.engine, schema))
        for k in list(dat_3.table_with_stuffs):
            dat_3.table_with_stuffs[pd.Timestamp(
                k)] = dat_3.table_with_stuffs.pop(k)
        self.assertTrue(
            tdf._same_data(dat_1, dat_3, nans_are_same_for_data_rows=True))