def testNullsPd(self): pdf = PanDatFactory(table=[[], ["field one", "field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, nullable=True) dat = pdf.PanDat( table={ "field one": [None, 200, 0, 300, 400], "field two": [100, 109, 300, None, 0] }) schema = test_schema + "_bool_defaults_pd" pdf.pgsql.write_schema(self.engine, schema, include_ancillary_info=False) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True)) pdf = PanDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, max=float("inf"), inclusive_max=True) pdf.set_infinity_io_flag(None) dat_inf = pdf.PanDat( table={ "field one": [float("inf"), 200, 0, 300, 400], "field two": [100, 109, 300, float("inf"), 0] }) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat_inf, dat_1)) pdf.pgsql.write_data(dat_inf, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat_inf, dat_1)) pdf = PanDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, min=-float("inf"), inclusive_min=True) pdf.set_infinity_io_flag(None) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertFalse(pdf._same_data(dat_inf, dat_1)) dat_inf = pdf.PanDat( table={ "field one": [-float("inf"), 200, 0, 300, 400], "field two": [100, 109, 300, -float("inf"), 0] }) self.assertTrue(pdf._same_data(dat_inf, dat_1))
def test_nullables(self): schema = test_schema + "nullables" pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]]) pdf.set_data_type("table_with_stuffs", "field one") pdf.set_data_type("table_with_stuffs", "field two", number_allowed=False, strings_allowed='*', nullable=True) tdf = TicDatFactory.create_from_full_schema( pdf.schema(include_ancillary_info=True)) tic_dat = tdf.TicDat( table_with_stuffs=[[101, "022"], [202, None], [303, "111"]]) dat = tdf.copy_to_pandas(tic_dat, drop_pk_columns=False) self.assertFalse(tdf.find_data_type_failures(tic_dat)) self.assertFalse(pdf.find_data_type_failures(dat)) pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True)) tic_dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema) self.assertTrue( tdf._same_data(tic_dat, tic_dat_1, nans_are_same_for_data_rows=True))
def testDietWithInfFlaggingPd(self): pdf = PanDatFactory.create_from_full_schema( diet_schema.schema(include_ancillary_info=True)) dat = diet_schema.copy_to_pandas(diet_dat, drop_pk_columns=False) pdf.set_infinity_io_flag(999999999) schema = test_schema + "_diet_inf_flagging_pd" pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_1)) pdf = pdf.clone() dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_1)) tdf = PanDatFactory(**diet_schema.schema()) dat_1 = tdf.pgsql.create_pan_dat(self.engine, schema) self.assertFalse(tdf._same_data(dat, dat_1)) protein = dat_1.categories["Name"] == "protein" self.assertTrue( list(dat_1.categories[protein]["Max Nutrition"])[0] == 999999999) dat_1.categories.loc[protein, "Max Nutrition"] = float("inf") self.assertTrue(tdf._same_data(dat, dat_1))
def test_missing_tables(self): schema = test_schema + "_missing_tables" tdf_1 = TicDatFactory(this=[["Something"], ["Another"]]) pdf_1 = PanDatFactory(**tdf_1.schema()) tdf_2 = TicDatFactory( **dict(tdf_1.schema(), that=[["What", "Ever"], []])) pdf_2 = PanDatFactory(**tdf_2.schema()) dat = tdf_1.TicDat(this=[["a", 2], ["b", 3], ["c", 5]]) pan_dat = tdf_1.copy_to_pandas(dat, drop_pk_columns=False) tdf_1.pgsql.write_schema(self.engine, schema) tdf_1.pgsql.write_data(dat, self.engine, schema) pg_dat = tdf_2.pgsql.create_tic_dat(self.engine, schema) self.assertTrue(tdf_1._same_data(dat, pg_dat)) pg_pan_dat = pdf_2.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf_1._same_data(pan_dat, pg_pan_dat))
def test_parameters_pd(self): schema = test_schema + "_parameters_pd" pdf = PanDatFactory(parameters=[["Key"], ["Value"]]) pdf.add_parameter("Something", 100) pdf.add_parameter("Different", 'boo', strings_allowed='*', number_allowed=False) dat = TicDatFactory(**pdf.schema()).TicDat( parameters=[["Something", float("inf")], ["Different", "inf"]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_ = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_))
def test_big_diet_pd(self): if not self.can_run: return tdf = diet_schema pdf = PanDatFactory(**tdf.schema()) pgpf = PostgresPanFactory(pdf) big_dat = diet_schema.copy_tic_dat(diet_dat) for k in range(int(1e5)): big_dat.categories[str(k)] = [0, 100] pan_dat = pan_dat_maker(tdf.schema(), big_dat) schema = "test_pg_big_diet" now = time.time() pgpf.write_schema(self.engine, schema) pgpf.write_data(pan_dat, self.engine, schema) print(f"**&&*{time.time()-now}**&&**") now = time.time() pg_pan_dat = pgpf.create_pan_dat(self.engine, schema) print(f"*&&*{time.time()-now}**&&**") self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))