def test_test_table_primary_keys(self): fl = FactoryLoader() df = fl.load("test") self.assertTrue(df.id.is_unique) self.assertFalse(df.id.hasnans) self.assertEqual(df[["date", "store_nbr", "item_nbr"]].drop_duplicates().shape[0], df.shape[0]) self.assertFalse(df.date.hasnans) self.assertFalse(df.store_nbr.hasnans) self.assertFalse(df.item_nbr.hasnans)
def main(sample=False): filename = "data/cache/master_data.hdf" assert os.path.exists(os.path.split(filename)[0]) df_master = FactoryLoader().load("master", sample=sample) df_master.to_hdf(filename, "table") df = get_data_cube_from_df(df=df_master) with h5py.File(filename, "a") as h: h.create_dataset("data_cube", data=df)
if __name__ == "__main__": config = get_custom_project_config() alias = config["alias"] random_seed = config["random_seed"] sample = config["sample"] cuda = config["cuda"] batch_size = config["batch_size"] forecast_horizon = config["forecast_horizon"] learning_rate = config["learning_rate"] log_config(config) wandb.config.update(config) # Load data dependent on time logger.info("Generating time-dependent dataset...") df_master = FactoryLoader().load("master", sample=sample) logger.info( f"Time dataset generated successfully! Shape: {df_master.shape}") logger.info("Converting time-dependent dataset to data cube...") df_master = get_records_cube_from_df(df=df_master) cat_cardinalities_time = { col: len(np.unique(df_master[col])) for col in df_master.dtype.names if col in categorical_feats } logger.info(f"Data cube successfully generated! Shape: {df_master.shape}") # Load static data logger.info("Generating static dataset...") df_master_static = FactoryLoader().load("master_timeless", sample=sample) df_master_static = df_master_static.to_records() cat_cardinalities_timeless = {
def test_prototype_name(self): incorrect_ref = "__prototype" self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
def test_incorrect_name(self): incorrect_ref = "holidays" self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
def test_transactions_table_primary_keys(self): fl = FactoryLoader() df = fl.load("transactions") self.assertEqual(df[["date", "store_nbr"]].drop_duplicates().shape[0], df.shape[0]) self.assertFalse(df.date.hasnans) self.assertFalse(df.store_nbr.hasnans)
def test_stores_table_primary_keys(self): fl = FactoryLoader() df = fl.load("stores") self.assertTrue(df.store_nbr.is_unique) self.assertFalse(df.store_nbr.hasnans)
def test_oil_table_primary_keys(self): fl = FactoryLoader() df = fl.load("oil") self.assertTrue(df.date.is_unique) self.assertFalse(df.date.hasnans)
def test_items_table_shape(self): fl = FactoryLoader() df = fl.load("items") self.assertEqual((4100, 3), df.shape)
def test_holidays_events_table_primary_keys(self): fl = FactoryLoader() df = fl.load("holidays_events") self.assertTrue(df.date.is_unique) self.assertFalse(df.date.hasnans)
def test_transactions_table_shape(self): fl = FactoryLoader() df = fl.load("transactions") self.assertEqual((83488, 3), df.shape)
def test_train_table_shape(self): fl = FactoryLoader() df = fl.load("train") self.assertEqual((125497040, 6), df.shape)
def test_test_table_shape(self): fl = FactoryLoader() df = fl.load("test") self.assertEqual((3370464, 5), df.shape)
def test_stores_table_shape(self): fl = FactoryLoader() df = fl.load("stores") self.assertEqual((54, 5), df.shape)
def test_oil_table_shape(self): fl = FactoryLoader() df = fl.load("oil") self.assertEqual((1175, 2), df.shape)
def test_holidays_events_table_shape(self): fl = FactoryLoader() df = fl.load("holidays_events") self.assertEqual((312, 6), df.shape)