예제 #1
0
def get_split_data_paths():
    file_path_train = 'test/data/simple_regression_train.csv'
    file_path_test = 'test/data/simple_regression_test.csv'
    full_path_train = os.path.join(str(project_root()), file_path_train)
    full_path_test = os.path.join(str(project_root()), file_path_test)

    return full_path_train, full_path_test
def get_cancer_case_data_paths() -> Tuple[str, str]:
    train_file_path = os.path.join('cases', 'data', 'benchmark',
                                   'cancer_train.csv')
    test_file_path = os.path.join('cases', 'data', 'benchmark',
                                  'cancer_test.csv')
    full_train_file_path = os.path.join(str(project_root()), train_file_path)
    full_test_file_path = os.path.join(str(project_root()), test_file_path)

    return full_train_file_path, full_test_file_path
예제 #3
0
def get_scoring_case_data_paths() -> Tuple[str, str]:
    train_file_path = os.path.join('cases', 'data', 'scoring',
                                   'scoring_train.csv')
    test_file_path = os.path.join('cases', 'data', 'scoring',
                                  'scoring_test.csv')
    full_train_file_path = os.path.join(str(project_root()), train_file_path)
    full_test_file_path = os.path.join(str(project_root()), test_file_path)

    return full_train_file_path, full_test_file_path
예제 #4
0
def test_metocean_forecasting_problem():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path, 'test/data/simple_time_series.csv')
    file_path_test = file_path_train
    full_path_train = os.path.join(str(project_root()), file_path_train)
    full_path_test = os.path.join(str(project_root()), file_path_test)

    rmse = run_metocean_forecasting_problem(full_path_train, full_path_test,
                                            forecast_length=1, max_window_size=1)
    assert rmse < 50
예제 #5
0
def test_credit_scoring_problem():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv')
    file_path_test = file_path_train
    full_path_train = os.path.join(str(project_root()), file_path_train)
    full_path_test = os.path.join(str(project_root()), file_path_test)

    roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test,
                                              max_lead_time=timedelta(minutes=0.1))
    assert roc_auc_test > 0.5
예제 #6
0
def get_scoring_data():
    file_path_train = 'cases/data/scoring/scoring_train.csv'
    full_path_train = join(str(project_root()), file_path_train)

    # a dataset for a final validation of the composed model
    file_path_test = 'cases/data/scoring/scoring_test.csv'
    full_path_test = join(str(project_root()), file_path_test)
    task = Task(TaskTypesEnum.classification)
    train = InputData.from_csv(full_path_train, task=task)
    test = InputData.from_csv(full_path_test, task=task)

    return train, test
예제 #7
0
def get_scoring_data():
    # the dataset was obtained from https://www.kaggle.com/c/GiveMeSomeCredit

    # a dataset that will be used as a train and test set during composition

    file_path_train = 'cases/data/scoring/scoring_train.csv'
    full_path_train = os.path.join(str(project_root()), file_path_train)

    # a dataset for a final validation of the composed model
    file_path_test = 'cases/data/scoring/scoring_test.csv'
    full_path_test = os.path.join(str(project_root()), file_path_test)

    return full_path_train, full_path_test
예제 #8
0
def test_tpot_vs_fedot_example():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv')
    file_path_test = file_path_train

    auc = run_tpot_vs_fedot_example(file_path_train, file_path_test)
    assert auc > 0.5
예제 #9
0
def run_gapfilling_case(file_path):
    """
    The function runs an example of filling in gaps in a time series with
    air temperature. Real data case.

    :param file_path: path to the file
    :return: pandas dataframe with columns 'date','with_gap','ridge',
    'composite','temperature'
    """

    # Load dataframe
    full_path = os.path.join(str(project_root()), file_path)
    dataframe = pd.read_csv(full_path)
    dataframe['date'] = pd.to_datetime(dataframe['date'])

    # Filling in gaps based on inverted ridge regression model
    ridge_chain = get_simple_chain()
    ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain)
    with_gap_array = np.array(dataframe['with_gap'])
    without_gap_arr_ridge = ridge_gapfiller.forward_inverse_filling(
        with_gap_array)
    dataframe['ridge'] = without_gap_arr_ridge

    # Filling in gaps based on a chain of 5 models
    composite_chain = get_composite_chain()
    composite_gapfiller = ModelGapFiller(gap_value=-100.0,
                                         chain=composite_chain)
    without_gap_composite = composite_gapfiller.forward_filling(with_gap_array)
    dataframe['composite'] = without_gap_composite
    return dataframe
예제 #10
0
def create_multi_clf_examples_from_excel(file_path: str,
                                         return_df: bool = False):
    """ Return dataframe from excel file or path to the csv file """
    df = pd.read_excel(file_path, engine='openpyxl')
    train, test = split_data(df)
    file_dir_name = file_path.replace('.', '/').split('/')[-2]
    file_csv_name = f'{file_dir_name}.csv'
    directory_names = ['examples', 'data', file_dir_name]

    # Check does obtained directory exist or not
    ensure_directory_exists(directory_names)
    if return_df:
        # Need to return dataframe and path to the file in csv format
        path = os.path.join(directory_names[0], directory_names[1],
                            directory_names[2], file_csv_name)
        full_file_path = os.path.join(str(project_root()), path)
        save_file_to_csv(df, full_file_path)
        return df, full_file_path
    else:
        # Need to return only paths to the files with train and test data
        full_train_file_path, full_test_file_path = get_split_data_paths(
            directory_names)
        save_file_to_csv(train, full_train_file_path)
        save_file_to_csv(train, full_test_file_path)
        return full_train_file_path, full_test_file_path
예제 #11
0
def test_multiclass_example():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/multiclass_classification.csv')

    chain = get_model(file_path_train, cur_lead_time=timedelta(seconds=1))
    assert chain is not None
예제 #12
0
def test_exogenous_ts_example():
    project_root_path = str(project_root())
    path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv')
    run_exogenous_experiment(path_to_file=path,
                             len_forecast=50,
                             with_exog=True,
                             with_visualisation=False)
예제 #13
0
def test_lagged_with_invalid_params_fit_correctly():
    """ The function define a chain with incorrect parameters in the lagged
    transformation. During the training of the chain, the parameter 'window_size'
    is corrected
    """
    window_size = 600
    len_forecast = 50

    # The length of the time series is 500 elements
    project_root_path = str(project_root())
    file_path = os.path.join(project_root_path,
                             'test/data/short_time_series.csv')
    df = pd.read_csv(file_path)
    time_series = np.array(df['sea_height'])

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_input = InputData(idx=np.arange(0, len(time_series)),
                            features=time_series,
                            target=time_series,
                            task=task,
                            data_type=DataTypesEnum.ts)

    # Get chain with lagged transformation in it
    chain = get_ts_chain(window_size)

    # Fit it
    chain.fit(train_input)

    is_chain_was_fitted = True
    assert is_chain_was_fitted
예제 #14
0
def get_kc2_data():
    file_path = 'cases/data/kc2/kc2.csv'
    full_path = join(str(project_root()), file_path)
    task = Task(TaskTypesEnum.classification)
    data = InputData.from_csv(full_path, task=task)
    train, test = train_test_data_setup(data)

    return train, test
예제 #15
0
def get_cholesterol_data():
    file_path = 'cases/data/cholesterol/cholesterol.csv'
    full_path = join(str(project_root()), file_path)
    task = Task(TaskTypesEnum.regression)
    data = InputData.from_csv(full_path, task=task)
    train, test = train_test_data_setup(data)

    return train, test
예제 #16
0
def test_spam_detection_problem():
    """ Simple launch of spam detection case """
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/spam_detection.csv')

    # Classification task based on text data
    run_text_problem_from_saved_meta_file(file_path_train)
예제 #17
0
def test_evaluate_individuals():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ration_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=0.001)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=5)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
예제 #18
0
def prepare_input_data(train_file_path, test_file_path, forecast_length):
    """ Function for preparing InputData for train and test algorithm

    :param train_file_path: path to the csv file for training
    :param test_file_path: path to the csv file for validation
    :param forecast_length: forecast length for prediction

    :return dataset_to_train: InputData for train
    :return dataset_to_validate: InputData for validation
    """
    # specify the task to solve
    task_to_solve = Task(TaskTypesEnum.ts_forecasting,
                         TsForecastingParams(forecast_length=forecast_length))

    # Load train and test dataframes
    full_path_train = os.path.join(str(project_root()), train_file_path)
    full_path_test = os.path.join(str(project_root()), test_file_path)
    df_train = pd.read_csv(full_path_train)
    df_test = pd.read_csv(full_path_test)

    # Get idx for train and series for train
    train_feature_ts = np.ravel(np.array(df_train['wind_speed']))
    train_target_ts = np.ravel(np.array(df_train['sea_height']))
    idx_train = np.arange(0, len(train_feature_ts))
    dataset_to_train = InputData(idx=idx_train,
                                 features=train_feature_ts,
                                 target=train_target_ts,
                                 task=task_to_solve,
                                 data_type=DataTypesEnum.ts)

    start_forecast = len(idx_train)
    end_forecast = start_forecast + forecast_length
    idx_test = np.arange(start_forecast, end_forecast)

    test_target_ts = np.ravel(np.array(df_test['sea_height']))
    test_target_ts = test_target_ts[:forecast_length]
    dataset_to_validate = InputData(idx=idx_test,
                                    features=train_feature_ts,
                                    target=test_target_ts,
                                    task=task_to_solve,
                                    data_type=DataTypesEnum.ts)

    return dataset_to_train, dataset_to_validate
예제 #19
0
def test_chain_from_automl_example():
    project_root_path = str(project_root())
    experimental_repo_file = os.path.join('model_repository_with_automl.json')
    with OperationTypesRepository(experimental_repo_file) as _:
        file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv')
        file_path_test = file_path_train

        auc = run_chain_from_automl(file_path_train, file_path_test, max_run_time=timedelta(seconds=1))

    assert auc > 0.5
예제 #20
0
def test_multistep_example():
    project_root_path = str(project_root())
    path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv')

    df = pd.read_csv(path)
    time_series = np.array(df['Level'])

    run_multistep_example(time_series,
                          len_forecast=20,
                          future_steps=40,
                          vis=False)
예제 #21
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=32,
                                     is_visualise=False):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    chain_simple = TsForecastingChain(PrimaryNode('linear'))
    chain_simple.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_simple = calculate_validation_metric(
        chain_simple.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-simple_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE simple: {rmse_on_valid_simple}')

    chain_composite_lstm = get_composite_chain()
    chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_composite_lstm.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-lstm-only_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple
예제 #22
0
def test_forecasting_model_composing_example():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_time_series.csv')
    file_path_test = os.path.join(project_root_path,
                                  'test/data/simple_time_series_test.csv')

    rmse = run_metocean_forecasting_problem(file_path_train,
                                            file_path_test,
                                            max_window_size=1,
                                            forecast_length=4,
                                            with_visualisation=False)
    assert rmse > 0
예제 #23
0
def create_multi_clf_examples_from_excel(file_path: str,
                                         return_df: bool = False):
    df = pd.read_excel(file_path)
    train, test = split_data(df)
    file_dir_name = file_path.replace('.', '/').split('/')[-2]
    file_csv_name = f'{file_dir_name}.csv'
    directory_names = ['examples', 'data', file_dir_name]
    ensure_directory_exists(directory_names)
    if return_df:
        path = os.path.join(directory_names[0], directory_names[1],
                            directory_names[2], file_csv_name)
        full_file_path = os.path.join(str(project_root()), path)
        save_file_to_csv(df, full_file_path)
        return df, full_file_path
    else:
        full_train_file_path, full_test_file_path = get_split_data_paths(
            directory_names)
        save_file_to_csv(train, full_train_file_path)
        save_file_to_csv(train, full_test_file_path)
        return full_train_file_path, full_test_file_path
예제 #24
0
def test_river_levels_problem():
    # Initialise chain for river levels prediction
    node_encoder = PrimaryNode('one_hot_encoding')
    node_scaling = SecondaryNode('scaling', nodes_from=[node_encoder])
    node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling])
    node_lasso = SecondaryNode('lasso', nodes_from=[node_scaling])
    node_final = SecondaryNode('rfr', nodes_from=[node_ridge, node_lasso])

    init_chain = Chain(node_final)

    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/station_levels.csv')

    run_river_experiment(file_path=file_path_train,
                         chain=init_chain,
                         iterations=1,
                         tuner=ChainTuner,
                         tuner_iterations=10)

    is_experiment_finished = True

    assert is_experiment_finished
예제 #25
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=64,
                                     with_visualisation=True):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)

    time_limit_min = 10
    available_model_types = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    if max_window_size == 1:
        # unit test model
        available_model_types = ['linear', 'ridge']
        time_limit_min = 0.001

    # each possible single-model chain
    for model in available_model_types:
        chain = TsForecastingChain(PrimaryNode(model))

        chain.fit(input_data=dataset_to_train, verbose=False)
        calculate_validation_metric(chain.predict(dataset_to_validate),
                                    dataset_to_validate,
                                    is_visualise=with_visualisation,
                                    label=model)

    # static multiscale chain
    multiscale_chain = get_composite_multiscale_chain()

    multiscale_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(multiscale_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Fixed multiscale')

    # static all-in-one ensemble chain
    ens_chain = get_ensemble_chain()
    ens_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(ens_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Ensemble composite')

    # optimized ensemble chain
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=10,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min),
        add_single_model_chains=False)

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(metric_function)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        ComposerVisualiser.visualise(chain)

    calculate_validation_metric(chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Automated ensemble')

    # optimized multiscale chain

    available_model_types_primary = ['trend_data_model', 'residual_data_model']

    available_model_types_secondary = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    available_model_types_all = available_model_types_primary + available_model_types_secondary

    composer_requirements = GPComposerRequirements(
        primary=available_model_types_all,
        secondary=available_model_types_secondary,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=30,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min))

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(
            metric_function).with_initial_chain(multiscale_chain)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        visualiser = ChainVisualiser()
        visualiser.visualise(chain)

    rmse_on_valid = calculate_validation_metric(
        chain.predict(dataset_to_validate),
        dataset_to_validate,
        is_visualise=with_visualisation,
        label='Automated multiscale')

    return rmse_on_valid
예제 #26
0
class ComposerVisualiser:
    root_parent_path = os.path.join('../', str(project_root()))
    root_parent_path_dirname = os.path.dirname(root_parent_path)
    temp_path = os.path.join(root_parent_path_dirname, 'tmp/')
    if 'tmp' not in os.listdir(root_parent_path_dirname):
        os.mkdir(temp_path)
    gif_prefix = 'for_gif_'

    @staticmethod
    def visualise(chain: Chain):
        try:
            chain.sort_nodes()
            graph, node_labels = as_nx_graph(chain=chain)
            pos = node_positions(graph.to_undirected())
            plt.figure(figsize=(10, 16))
            nx.draw(graph,
                    pos=pos,
                    with_labels=True,
                    labels=node_labels,
                    font_size=12,
                    font_family='calibri',
                    font_weight='bold',
                    node_size=7000,
                    width=2.0,
                    node_color=colors_by_node_labels(node_labels),
                    cmap='Set3')
            plt.show()
        except Exception as ex:
            print(f'Visualisation failed with {ex}')

    @staticmethod
    def _visualise_chains(chains, fitnesses):
        fitnesses = deepcopy(fitnesses)
        last_best_chain = chains[0]

        prev_fit = fitnesses[0]

        for ch_id, chain in enumerate(chains):
            graph, node_labels = as_nx_graph(chain=chain)
            pos = node_positions(graph.to_undirected())
            plt.rcParams['axes.titlesize'] = 20
            plt.rcParams['axes.labelsize'] = 20
            plt.rcParams['figure.figsize'] = [10, 10]
            plt.title('Current chain')
            nx.draw(graph,
                    pos=pos,
                    with_labels=True,
                    labels=node_labels,
                    font_size=12,
                    font_family='calibri',
                    font_weight='bold',
                    node_size=scaled_node_size(chain.length),
                    width=2.0,
                    node_color=colors_by_node_labels(node_labels),
                    cmap='Set3')
            path = f'{ComposerVisualiser.temp_path}ch_{ch_id}.png'
            plt.savefig(path, bbox_inches='tight')

            plt.cla()
            plt.clf()
            plt.close('all')

            path_best = f'{ComposerVisualiser.temp_path}best_ch_{ch_id}.png'

            if fitnesses[ch_id] > prev_fit:
                fitnesses[ch_id] = prev_fit
            else:
                last_best_chain = chain
            prev_fit = fitnesses[ch_id]

            best_graph, best_node_labels = as_nx_graph(chain=last_best_chain)
            pos = node_positions(best_graph.to_undirected())
            plt.rcParams['axes.titlesize'] = 20
            plt.rcParams['axes.labelsize'] = 20
            plt.rcParams['figure.figsize'] = [10, 10]
            plt.title(f'Best chain after {round(ch_id)} evals')
            nx.draw(best_graph,
                    pos=pos,
                    with_labels=True,
                    labels=best_node_labels,
                    font_size=12,
                    font_family='calibri',
                    font_weight='bold',
                    node_size=scaled_node_size(chain.length),
                    width=2.0,
                    node_color=colors_by_node_labels(best_node_labels),
                    cmap='Set3')

            plt.savefig(path_best, bbox_inches='tight')

            plt.cla()
            plt.clf()
            plt.close('all')

    @staticmethod
    def _visualise_convergence(fitness_history):
        fitness_history = deepcopy(fitness_history)
        prev_fit = fitness_history[0]
        for fit_id, fit in enumerate(fitness_history):
            if fit > prev_fit:
                fitness_history[fit_id] = prev_fit
            prev_fit = fitness_history[fit_id]
        ts_set = list(range(len(fitness_history)))
        df = pd.DataFrame({
            'ts': ts_set,
            'fitness': [-f for f in fitness_history]
        })

        ind = 0
        for ts in ts_set:
            plt.rcParams['axes.titlesize'] = 20
            plt.rcParams['axes.labelsize'] = 20
            plt.rcParams['figure.figsize'] = [10, 10]

            ind = ind + 1
            plt.plot(df['ts'], df['fitness'], label='Composer')
            plt.xlabel('Evaluation', fontsize=18)
            plt.ylabel('Best ROC AUC', fontsize=18)

            plt.axvline(x=ts, color='black')
            plt.legend(loc='upper left')

            path = f'{ComposerVisualiser.temp_path}{ind}.png'
            plt.savefig(path, bbox_inches='tight')

            plt.cla()
            plt.clf()
            plt.close('all')

    @staticmethod
    def visualise_history(chains, fitnesses):
        print('START VISUALISATION')
        try:
            ComposerVisualiser._clean(with_gif=True)
            ComposerVisualiser._visualise_chains(chains, fitnesses)
            ComposerVisualiser._visualise_convergence(fitnesses)
            ComposerVisualiser._merge_images(len(chains))
            ComposerVisualiser._combine_gifs()
            ComposerVisualiser._clean()
        except Exception as ex:
            print(f'Visualisation failed with {ex}')

    @staticmethod
    def _merge_images(num_images):
        for img_idx in (range(1, num_images)):
            images = list(
                map(Image.open, [
                    f'{ComposerVisualiser.temp_path}ch_{img_idx}.png',
                    f'{ComposerVisualiser.temp_path}best_ch_{img_idx}.png',
                    f'{ComposerVisualiser.temp_path}{img_idx}.png'
                ]))
            widths, heights = zip(*(i.size for i in images))

            total_width = sum(widths)
            max_height = max(heights)

            new_im = Image.new('RGB', (total_width, max_height))

            x_offset = 0
            for im in images:
                new_im.paste(im, (x_offset, 0))
                x_offset += im.size[0]

            new_im.save(
                f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}{img_idx}.png'
            )

    @staticmethod
    def _combine_gifs():
        files = [
            file_name for file_name in iglob(
                f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}*.png'
            )
        ]
        files_idx = [
            int(file_name[len(
                f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}'
            ):(len(file_name) - len('.png'))]) for file_name in iglob(
                f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}*.png'
            )
        ]
        files = [file for _, file in sorted(zip(files_idx, files))]

        with get_writer(
                f'{ComposerVisualiser.temp_path}final_{str(time())}.gif',
                mode='I',
                duration=0.5) as writer:
            for filename in files:
                image = imread(filename)
                writer.append_data(image)

    @staticmethod
    def _clean(with_gif=False):
        try:
            files = glob(f'{ComposerVisualiser.temp_path}*.png')
            if with_gif:
                files += glob(f'{ComposerVisualiser.temp_path}*.gif')
            for file in files:
                remove(file)
        except Exception as ex:
            print(ex)
예제 #27
0
    print(f'RMSE simple: {rmse_on_valid_simple}')

    chain_composite_lstm = get_composite_chain()
    chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_composite_lstm.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-lstm-only_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple


if __name__ == '__main__':
    # the dataset was obtained from NEMO model simulation for sea surface height

    # a dataset that will be used as a train and test set during composition
    file_path_train = 'cases/data/metocean/metocean_data_train.csv'
    full_path_train = os.path.join(str(project_root()), file_path_train)

    # a dataset for a final validation of the composed model
    file_path_test = 'cases/data/metocean/metocean_data_test.csv'
    full_path_test = os.path.join(str(project_root()), file_path_test)

    run_metocean_forecasting_problem(full_path_train,
                                     full_path_test,
                                     forecast_length=72,
                                     max_window_size=72,
                                     is_visualise=True)