class DeepLearningTest(unittest.TestCase): PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY) df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) def test_create_net(self): # TODO pass
class ModelingUtilsTest(unittest.TestCase): PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY) df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) def test_split_data(self): print(self.df_hourly.dtypes) train, test = split_data(self.df_hourly, test_size=TEST_SIZE) expected_size = 524 self.assertEqual(expected_size, train.shape[0])
def runner(args: Namespace) -> None: dataset = load_dataframe_from_csv( create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency))) xgb_model, metrics = xgboost_model(dataset, args.sampling_frequency) metrics = { metric_name: str(metric_value) for metric_name, metric_value in metrics.items() } # TODO implement saving XGB model in create_path(args.home_path, PATH_RESULTS[args.sampling_frequency]['xgboost']['model'])) with open( create_path( args.home_path, PATH_RESULTS[args.sampling_frequency]['xgboost']['metrics']), 'w') as metrics_file: # TODO refactor this as a function metrics_file.write(json.dumps(metrics))
def runner(args: Namespace) -> None: df_bikes_clean = load_dataframe_from_csv(create_path( args.home_path, PATH_BIKES_CLEAN), parse_dates=[COL_BIKES_DATE]) df_weather = load_dataframe_from_json(create_path(args.home_path, PATH_AEMET_PER_DAY), parse_dates=[COL_WEATHER_DATE]) df_weather = clean_weather_data(df_weather) if args.sampling_frequency == 'daily': # extract daily/hourly value as constant df_prepared = prepare_daily_data(df_bikes_clean, df_weather) elif args.sampling_frequency == 'hourly': df_prepared = prepare_hourly_data(df_bikes_clean, df_weather) pass # TODO implement else, return error save_dataframe( df_prepared, create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency)))
def runner(args: Namespace) -> None: dataset = load_dataframe_from_csv( create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency))) net, metrics = deep_learning_model(dataset) metrics = { metric_name: str(metric_value) for metric_name, metric_value in metrics.items() } save_model( net, create_path( args.home_path, PATH_RESULTS[args.sampling_frequency]['deep-learning']['model'])) with open( create_path( args.home_path, PATH_RESULTS[args.sampling_frequency] ['deep-learning']['metrics']), 'w') as metrics_file: # TODO refactor this as a function metrics_file.write(json.dumps(metrics))
def main(): print("[data-preparation] Starting ... ") parser = argparse.ArgumentParser( description='[BiciMad Project] Data Cleaning') parser.add_argument('--home-path', type=str, default='.', metavar='H', help='home path') parser.add_argument('--sampling-frequency', type=str, default='hourly', metavar='S', help='Sampling frequency of data: daily/hourly ') args: Namespace = parser.parse_args() print("[data-preparation] Setting home path as: {}".format(args.home_path)) print("[data-preparation] Preparing [{}] data".format( args.sampling_frequency)) runner(args) print("[data-preparation] Success: Prepared data stored in {}.".format( create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency))))
class XGBoostTest(unittest.TestCase): PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY)