def test_arima(): if os.path.exists("results"): shutil.rmtree("results") config = { 'directory': 'results/arima', 'freq': "30min", 'params': { 'arima_order': (1, 0, 0) } } dataset = athena.Dataset("dfw_demand.csv.gz", index="timestamp", freq="30min", max_days=500, max_training_days=200, predition_length=12, test_start_values=["2019-07-27 13:00:00"], test_sequence_length=4) transform = DataTransformARIMA(['vehicles']) evaluate_ARIMA(dataset, transform, config)
def get_dataset(config): dataset = athena.Dataset( config['filename'], index=config['index'], freq=config['freq'], max_days=config['max_days'], max_training_days=config['max_training_days'], predition_length=config['prediction_length'], test_start_values=config['test_start_values'], test_sequence_length=config['test_sequence_length']) return dataset
def evaluate_model(model, config): if os.path.exists("results"): shutil.rmtree("results") dataset = athena.Dataset("dfw_demand.csv.gz", index="timestamp", freq="30min", max_training_days=5, predition_length=48, test_start_values=["2019-07-27 00:00:00"], test_sequence_length=1 ) transform = athena.utils.gluonts.DataTransformGluon(['vehicles']) athena.utils.gluonts.evaluate_gluonts(dataset, transform, model, config) df = pd.read_csv(glob.glob("{}/*.csv".format(config['directory']))[0]) assert len(df) == 48
def test_ds(): ds = athena.Dataset("dfw_demand.csv.gz", index="timestamp", freq="30min", max_days=500, max_training_days=200, predition_length=1, test_start_values=["2019-07-27 00:00:00"], test_sequence_length=4 ) # Ensure the number of cv: len(test_start_values)*test_sequence_length assert len(ds.cv) == 4 # Assert the max days in the dataset assert len(ds.df) == 500*ds.rows_per_day # verify each cv has a max of 200 training days for test in ds.cv: assert (test['train_stop'] - test['train_start'])/ds.rows_per_day <= 200
def evaluate_model(model, config): if os.path.exists("results"): shutil.rmtree("results") dataset = athena.Dataset("dfw_demand.csv.gz", index="timestamp", freq="30min", max_days=500, max_training_days=200, predition_length=1, test_start_values=["2019-07-27 13:00:00"], test_sequence_length=4) transform = DataTransformSKLearn( ['vehicles'], ['vehicles_1', 'vehicles_2', 'vehicles_3'], ['hour']) athena.utils.sklearn.evaluate_sklearn(dataset, transform, model, config) df = pd.read_csv(glob.glob("{}/*.csv".format(config['directory']))[0]) assert len(df) == 4 #test_sequence_length*len(test_start_values)