Exemple #1
0
 def test_test_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("test")
     self.assertTrue(df.id.is_unique)
     self.assertFalse(df.id.hasnans)
     self.assertEqual(df[["date", "store_nbr", "item_nbr"]].drop_duplicates().shape[0], df.shape[0])
     self.assertFalse(df.date.hasnans)
     self.assertFalse(df.store_nbr.hasnans)
     self.assertFalse(df.item_nbr.hasnans)
Exemple #2
0
def main(sample=False):
    filename = "data/cache/master_data.hdf"
    assert os.path.exists(os.path.split(filename)[0])

    df_master = FactoryLoader().load("master", sample=sample)
    df_master.to_hdf(filename, "table")

    df = get_data_cube_from_df(df=df_master)
    with h5py.File(filename, "a") as h:
        h.create_dataset("data_cube", data=df)
Exemple #3
0
if __name__ == "__main__":
    config = get_custom_project_config()
    alias = config["alias"]
    random_seed = config["random_seed"]
    sample = config["sample"]
    cuda = config["cuda"]
    batch_size = config["batch_size"]
    forecast_horizon = config["forecast_horizon"]
    learning_rate = config["learning_rate"]
    log_config(config)
    wandb.config.update(config)

    # Load data dependent on time
    logger.info("Generating time-dependent dataset...")
    df_master = FactoryLoader().load("master", sample=sample)
    logger.info(
        f"Time dataset generated successfully! Shape: {df_master.shape}")
    logger.info("Converting time-dependent dataset to data cube...")
    df_master = get_records_cube_from_df(df=df_master)
    cat_cardinalities_time = {
        col: len(np.unique(df_master[col]))
        for col in df_master.dtype.names if col in categorical_feats
    }
    logger.info(f"Data cube successfully generated! Shape: {df_master.shape}")

    # Load static data
    logger.info("Generating static dataset...")
    df_master_static = FactoryLoader().load("master_timeless", sample=sample)
    df_master_static = df_master_static.to_records()
    cat_cardinalities_timeless = {
Exemple #4
0
 def test_prototype_name(self):
     incorrect_ref = "__prototype"
     self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
Exemple #5
0
 def test_incorrect_name(self):
     incorrect_ref = "holidays"
     self.assertRaises(ValueError, lambda: FactoryLoader().load(incorrect_ref))
Exemple #6
0
 def test_transactions_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("transactions")
     self.assertEqual(df[["date", "store_nbr"]].drop_duplicates().shape[0], df.shape[0])
     self.assertFalse(df.date.hasnans)
     self.assertFalse(df.store_nbr.hasnans)
Exemple #7
0
 def test_stores_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("stores")
     self.assertTrue(df.store_nbr.is_unique)
     self.assertFalse(df.store_nbr.hasnans)
Exemple #8
0
 def test_oil_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("oil")
     self.assertTrue(df.date.is_unique)
     self.assertFalse(df.date.hasnans)
Exemple #9
0
 def test_items_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("items")
     self.assertEqual((4100, 3), df.shape)
Exemple #10
0
 def test_holidays_events_table_primary_keys(self):
     fl = FactoryLoader()
     df = fl.load("holidays_events")
     self.assertTrue(df.date.is_unique)
     self.assertFalse(df.date.hasnans)
Exemple #11
0
 def test_transactions_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("transactions")
     self.assertEqual((83488, 3), df.shape)
Exemple #12
0
 def test_train_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("train")
     self.assertEqual((125497040, 6), df.shape)
Exemple #13
0
 def test_test_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("test")
     self.assertEqual((3370464, 5), df.shape)
Exemple #14
0
 def test_stores_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("stores")
     self.assertEqual((54, 5), df.shape)
Exemple #15
0
 def test_oil_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("oil")
     self.assertEqual((1175, 2), df.shape)
Exemple #16
0
 def test_holidays_events_table_shape(self):
     fl = FactoryLoader()
     df = fl.load("holidays_events")
     self.assertEqual((312, 6), df.shape)