Esempio n. 1
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=32,
                                     is_visualise=False):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    chain_simple = TsForecastingChain(PrimaryNode('linear'))
    chain_simple.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_simple = calculate_validation_metric(
        chain_simple.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-simple_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE simple: {rmse_on_valid_simple}')

    chain_composite_lstm = get_composite_chain()
    chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_composite_lstm.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-lstm-only_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple
Esempio n. 2
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=64,
                                     with_visualisation=True):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)

    time_limit_min = 10
    available_model_types = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    if max_window_size == 1:
        # unit test model
        available_model_types = ['linear', 'ridge']
        time_limit_min = 0.001

    # each possible single-model chain
    for model in available_model_types:
        chain = TsForecastingChain(PrimaryNode(model))

        chain.fit(input_data=dataset_to_train, verbose=False)
        calculate_validation_metric(chain.predict(dataset_to_validate),
                                    dataset_to_validate,
                                    is_visualise=with_visualisation,
                                    label=model)

    # static multiscale chain
    multiscale_chain = get_composite_multiscale_chain()

    multiscale_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(multiscale_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Fixed multiscale')

    # static all-in-one ensemble chain
    ens_chain = get_ensemble_chain()
    ens_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(ens_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Ensemble composite')

    # optimized ensemble chain
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=10,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min),
        add_single_model_chains=False)

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(metric_function)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        ComposerVisualiser.visualise(chain)

    calculate_validation_metric(chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Automated ensemble')

    # optimized multiscale chain

    available_model_types_primary = ['trend_data_model', 'residual_data_model']

    available_model_types_secondary = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    available_model_types_all = available_model_types_primary + available_model_types_secondary

    composer_requirements = GPComposerRequirements(
        primary=available_model_types_all,
        secondary=available_model_types_secondary,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=30,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min))

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(
            metric_function).with_initial_chain(multiscale_chain)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        visualiser = ChainVisualiser()
        visualiser.visualise(chain)

    rmse_on_valid = calculate_validation_metric(
        chain.predict(dataset_to_validate),
        dataset_to_validate,
        is_visualise=with_visualisation,
        label='Automated multiscale')

    return rmse_on_valid