예제 #1
0
class DeepLearningTest(unittest.TestCase):
    PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly')
    PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily')
    df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY)
    df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY)

    def test_create_net(self):
        # TODO
        pass
예제 #2
0
class ModelingUtilsTest(unittest.TestCase):
    PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly')
    PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily')
    df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY)
    df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY)

    def test_split_data(self):
        print(self.df_hourly.dtypes)
        train, test = split_data(self.df_hourly, test_size=TEST_SIZE)
        expected_size = 524
        self.assertEqual(expected_size, train.shape[0])
예제 #3
0
def runner(args: Namespace) -> None:
    df_bikes = load_dataframe_from_csv(
        create_path(args.home_path, PATH_BIKES_RAW))
    df_bikes = clean_bikes_data(df_bikes,
                                without_employees=True,
                                remove_outliers=True)
    save_dataframe(df_bikes, create_path(args.home_path, PATH_BIKES_CLEAN))
class AggregationOperationsTest(unittest.TestCase):

    PATH_BIKES_CLEAN = '../../../' + PATH_BIKES_CLEAN
    PATH_WEATHER = '../../../' + PATH_AEMET_PER_DAY
    df_bikes = load_dataframe_from_csv(PATH_BIKES_CLEAN,
                                       parse_dates=[COL_BIKES_DATE])
    df_weather = load_dataframe_from_json(PATH_WEATHER,
                                          parse_dates=[COL_WEATHER_DATE])

    def test_preprocess_rides_per_day(self):
        df_rides_per_day = preprocess_rides_per_day(self.df_bikes)
        expected_size = 28
        self.assertEqual(df_rides_per_day.shape[0], expected_size)
        self.assertIn(COL_BIKES_RIDES, df_rides_per_day.columns)

    def test_preprocess_rides_per_hour(self):
        df_rides_per_hour = preprocess_rides_per_hour(self.df_bikes)
        expected_size = 656
        self.assertIn(COL_BIKES_RIDES, df_rides_per_hour.columns)
        self.assertIn(COL_BIKES_HOUR, df_rides_per_hour.columns)
        self.assertEqual(df_rides_per_hour.shape[0], expected_size)

    def test_add_mean_rides_for_day(self):
        df_rides_per_day = preprocess_rides_per_day(self.df_bikes)
        dd = add_mean_rides_for_day(df_rides_per_day)
        self.assertEqual(0, 0)

    def test_add_weather_data_per_day(self):
        df_rides_per_day = preprocess_rides_per_day(self.df_bikes)
        df_with_weather = add_weather_data_per_day(df_rides_per_day,
                                                   self.df_weather)
        self.assertIn(COL_WEATHER_RAIN, df_with_weather.columns)
        self.assertIn(COL_WEATHER_TEMP_MEAN, df_with_weather.columns)
        self.assertIn(COL_WEATHER_WIND_MEAN, df_with_weather.columns)

    def test_prepare_daily_data(self):
        dataset = prepare_daily_data(self.df_bikes, self.df_weather)
        self.assertNotIn(COL_BIKES_DATE, dataset.columns)

    def test_get_temperature_model(self):
        model = get_temperature_model(20, 24, 10, 12)
        print(model.predict(np.array(11.0).reshape(1, -1)))
        self.assertAlmostEqual(model.coef_[0], 2.0)
        self.assertAlmostEqual(model.intercept_, 0.0)

    def test_get_temperature_simple(self):
        expected_value = 20.0
        self.assertEqual(expected_value,
                         get_temperature_simple(7, 20, 15, 35, 6, 14))

    def test_get_hourly_weather(self):
        df_rides_per_hour = preprocess_rides_per_hour(self.df_bikes)
        df_with_weather = add_weather_data_per_day(df_rides_per_hour,
                                                   self.df_weather)
        df_with_weather_hourly = get_hourly_weather(df_with_weather)
        self.assertIn(COL_WEATHER_TEMP_HOURLY, df_with_weather_hourly.columns)
예제 #5
0
def runner(args: Namespace) -> None:
    dataset = load_dataframe_from_csv(
        create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency)))
    xgb_model, metrics = xgboost_model(dataset, args.sampling_frequency)
    metrics = {
        metric_name: str(metric_value)
        for metric_name, metric_value in metrics.items()
    }
    # TODO implement saving XGB model in create_path(args.home_path, PATH_RESULTS[args.sampling_frequency]['xgboost']['model']))
    with open(
            create_path(
                args.home_path,
                PATH_RESULTS[args.sampling_frequency]['xgboost']['metrics']),
            'w') as metrics_file:  # TODO refactor this as a function
        metrics_file.write(json.dumps(metrics))
class CleaningOperationsTest(unittest.TestCase):

    PATH_BIKES = '../../../' + PATH_BIKES_RAW
    PATH_WEATHER = '../../../' + PATH_AEMET_PER_DAY
    df_bikes = load_dataframe_from_csv(PATH_BIKES)
    df_weather = load_dataframe_from_json(PATH_WEATHER)

    def test_clean_stations(self):
        # TODO
        pass

    def test_transform_types(self):
        # TODO
        pass

    def test_remove_outliers_travel_time(self):
        # TODO improve test
        expected_shape = 0
        df_bikes = transform_types_bikes(self.df_bikes)
        upper_limit = df_bikes[COL_BIKES_TRAVEL_TIME].quantile(UPPER_QUANTILE)
        lower_limit = df_bikes[COL_BIKES_TRAVEL_TIME].quantile(LOWER_QUANTILE)
        df_bikes_clean = remove_outliers_travel_time(df_bikes)
        self.assertEqual(
            df_bikes_clean[
                (df_bikes_clean[COL_BIKES_TRAVEL_TIME] > upper_limit)
                & (df_bikes_clean[COL_BIKES_TRAVEL_TIME] < lower_limit)].
            shape[0], expected_shape)

    def test_clean_date(self):
        # TODO improve test
        df_bikes = transform_types_bikes(self.df_bikes)
        df_bikes_clean = clean_date_bikes(df_bikes)
        self.assertIn(COL_BIKES_DAY_OF_WEEK, df_bikes_clean.columns)
        self.assertIn(COL_BIKES_DAY, df_bikes_clean.columns)
        self.assertIn(COL_BIKES_MONTH, df_bikes_clean.columns)

    def test_filter_out_employees(self):
        # TODO
        pass

    def test_clean_weather_data(self):
        df_weather_clean = clean_weather_data(self.df_weather)
        self.assertEqual(df_weather_clean[COL_WEATHER_RAIN].dtype, 'float32')
        self.assertEqual(df_weather_clean[COL_WEATHER_TEMP_MEAN].dtype,
                         'float32')
        self.assertEqual(df_weather_clean[COL_WEATHER_WIND_MEAN].dtype,
                         'float32')
예제 #7
0
def runner(args: Namespace) -> None:
    df_bikes_clean = load_dataframe_from_csv(create_path(
        args.home_path, PATH_BIKES_CLEAN),
                                             parse_dates=[COL_BIKES_DATE])
    df_weather = load_dataframe_from_json(create_path(args.home_path,
                                                      PATH_AEMET_PER_DAY),
                                          parse_dates=[COL_WEATHER_DATE])
    df_weather = clean_weather_data(df_weather)
    if args.sampling_frequency == 'daily':  # extract daily/hourly value as constant
        df_prepared = prepare_daily_data(df_bikes_clean, df_weather)
    elif args.sampling_frequency == 'hourly':
        df_prepared = prepare_hourly_data(df_bikes_clean, df_weather)
        pass
    # TODO implement else, return error
    save_dataframe(
        df_prepared,
        create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency)))
예제 #8
0
def runner(args: Namespace) -> None:
    dataset = load_dataframe_from_csv(
        create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency)))

    net, metrics = deep_learning_model(dataset)
    metrics = {
        metric_name: str(metric_value)
        for metric_name, metric_value in metrics.items()
    }
    save_model(
        net,
        create_path(
            args.home_path,
            PATH_RESULTS[args.sampling_frequency]['deep-learning']['model']))
    with open(
            create_path(
                args.home_path, PATH_RESULTS[args.sampling_frequency]
                ['deep-learning']['metrics']),
            'w') as metrics_file:  # TODO refactor this as a function
        metrics_file.write(json.dumps(metrics))
예제 #9
0
class XGBoostTest(unittest.TestCase):
    PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily')
    df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY)

    PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly')
    df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY)