Beispiel #1
0
def run_analysis_case(train_data: InputData,
                      test_data: InputData,
                      case_name: str,
                      task,
                      metric,
                      is_composed=False,
                      result_path=None):
    chain = chain_by_task(task=task,
                          metric=metric,
                          data=train_data,
                          is_composed=is_composed)

    chain.fit(train_data)

    if not result_path:
        result_path = join(default_fedot_data_dir(), 'sensitivity',
                           f'{case_name}')
        if not exists(result_path):
            makedirs(result_path)

    visualiser = ChainVisualiser()
    visualiser.visualise(chain, save_path=result_path)

    chain_analysis_result = ChainStructureAnalyze(
        chain=chain,
        train_data=train_data,
        test_data=test_data,
        all_nodes=True,
        path_to_save=result_path,
        approaches=[NodeDeletionAnalyze,
                    NodeReplaceOperationAnalyze]).analyze()

    print(f'chain analysis result {chain_analysis_result}')
Beispiel #2
0
def results_visualization(history, composed_chains):
    visualiser = ChainVisualiser()
    visualiser.visualise_history(history)
    visualiser.pareto_gif_create(history.archive_history, history.chains)
    visualiser.boxplots_gif_create(history.chains)
    for chain_evo_composed in composed_chains:
        visualiser.visualise(chain_evo_composed)
Beispiel #3
0
def test_extract_objectives():
    visualiser = ChainVisualiser()
    num_of_inds = 5
    individuals = [chain_first() for _ in range(num_of_inds)]
    fitness = (-0.8, 0.1)
    weights = tuple([-1 for _ in range(len(fitness))])
    for ind in individuals:
        ind.fitness = MultiObjFitness(values=fitness, weights=weights)
    populations_num = 3
    individuals_history = [individuals for _ in range(populations_num)]
    all_objectives = visualiser.extract_objectives(
        individuals=individuals_history, transform_from_minimization=True)
    assert all_objectives[0][0] > 0 and all_objectives[0][2] > 0
    assert all_objectives[1][0] > 0 and all_objectives[1][2] > 0
Beispiel #4
0
def run_chain_ang_history_visualisation(generations=2,
                                        pop_size=10,
                                        with_chain_visualisation=True):
    """ Function run visualisation of composing history and chain """
    # Generate chain and history
    chain = chain_first()
    history = generate_history(generations, pop_size)

    visualiser = ChainVisualiser()
    visualiser.visualise_history(history)
    if with_chain_visualisation:
        visualiser.visualise(chain)
Beispiel #5
0
def run_ts_forecasting_problem(forecast_length=50,
                               with_visualisation=True) -> None:
    """ Function launch time series task with composing

    :param forecast_length: length of the forecast
    :param with_visualisation: is it needed to show the plots
    """
    file_path = '../cases/data/metocean/metocean_data_test.csv'

    df = pd.read_csv(file_path)
    time_series = np.array(df['sea_height'])

    # Train/test split
    train_part = time_series[:-forecast_length]
    test_part = time_series[-forecast_length:]

    # Prepare data for train and test
    train_input, predict_input, task = prepare_train_test_input(
        train_part, forecast_length)

    # Get chain with pre-defined structure
    init_chain = get_source_chain()

    # Init check
    preds = fit_predict_for_chain(chain=init_chain,
                                  train_input=train_input,
                                  predict_input=predict_input)
    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    # Get available_operations type
    primary_operations, secondary_operations = get_available_operations()

    # Composer parameters
    composer_requirements = GPComposerRequirements(
        primary=primary_operations,
        secondary=secondary_operations,
        max_arity=3,
        max_depth=8,
        pop_size=10,
        num_of_generations=15,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=10),
        allow_single_operations=False)

    mutation_types = [
        MutationTypesEnum.parameter_change, MutationTypesEnum.simple,
        MutationTypesEnum.reduce
    ]
    optimiser_parameters = GPChainOptimiserParameters(
        mutation_types=mutation_types)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.MAE)
    builder = GPComposerBuilder(task=task). \
        with_optimiser_parameters(optimiser_parameters).\
        with_requirements(composer_requirements).\
        with_metrics(metric_function).with_initial_chain(init_chain)
    composer = builder.build()

    obtained_chain = composer.compose_chain(data=train_input,
                                            is_visualise=False)

    ################################
    # Obtained chain visualisation #
    ################################
    if with_visualisation:
        visualiser = ChainVisualiser()
        visualiser.visualise(obtained_chain)

    preds = fit_predict_for_chain(chain=obtained_chain,
                                  train_input=train_input,
                                  predict_input=predict_input)

    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    display_chain_info(obtained_chain)
Beispiel #6
0
                y_score=predicted_labels,
                multi_class='ovo',
                average='macro'), 3)
    return roc_auc_valid


if __name__ == '__main__':
    file_path_first = r'./data/example1.xlsx'
    file_path_second = r'./data/example2.xlsx'
    file_path_third = r'./data/example3.xlsx'

    train_file_path, test_file_path = create_multi_clf_examples_from_excel(
        file_path_first)
    test_data = InputData.from_csv(test_file_path)

    fitted_model = get_model(train_file_path)

    visualiser = ChainVisualiser()
    visualiser.visualise(fitted_model)

    roc_auc = validate_model_quality(fitted_model, test_file_path)
    print(f'ROC AUC metric is {roc_auc}')

    final_prediction_first = apply_model_to_data(fitted_model,
                                                 file_path_second)
    print(final_prediction_first['forecast'])

    final_prediction_second = apply_model_to_data(fitted_model,
                                                  file_path_third)
    print(final_prediction_second['forecast'])
Beispiel #7
0
def run_credit_scoring_problem(
        train_file_path,
        test_file_path,
        max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5),
        is_visualise=False,
        with_tuning=False):
    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(train_file_path, task=task)
    dataset_to_validate = InputData.from_csv(test_file_path, task=task)

    # the search of the models provided by the framework that can be used as nodes in a chain for the selected task
    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=task.task_type)

    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC_penalty)

    # the choice and initialisation of the GP search
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=3,
        max_depth=3,
        pop_size=20,
        num_of_generations=20,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=max_lead_time)

    # GP optimiser parameters choice
    scheme_type = GeneticSchemeTypesEnum.steady_state
    optimiser_parameters = GPChainOptimiserParameters(
        genetic_scheme_type=scheme_type)

    # Create builder for composer and set composer params
    builder = GPComposerBuilder(
        task=task).with_requirements(composer_requirements).with_metrics(
            metric_function).with_optimiser_parameters(optimiser_parameters)

    # Create GP-based composer
    composer = builder.build()

    # the optimal chain generation by composition - the most time-consuming task
    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                is_visualise=True)

    if with_tuning:
        chain_evo_composed.fine_tune_primary_nodes(
            input_data=dataset_to_compose, iterations=50, verbose=True)

    chain_evo_composed.fit(input_data=dataset_to_compose, verbose=True)

    if is_visualise:
        visualiser = ChainVisualiser()

        composer.log.info('History visualization started')
        visualiser.visualise_history(composer.history)
        composer.log.info('History visualization finished')
        composer.history.write_composer_history_to_csv()

        composer.log.info('Best chain visualization started')
        visualiser.visualise(chain_evo_composed)
        composer.log.info('Best chain visualization finished')

    # the quality assessment for the obtained composite models
    roc_on_valid_evo_composed = calculate_validation_metric(
        chain_evo_composed, dataset_to_validate)

    print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}')

    return roc_on_valid_evo_composed
Beispiel #8
0
        chain.add_node(root_node_child)
        root_of_tree.nodes_from.append(root_node_child)

    chain.add_node(root_of_tree)
    return chain


def generate_history(generations, pop_size):
    history = ComposingHistory()
    for gen in range(generations):
        new_pop = []
        for idx in range(pop_size):
            chain = chain_first()
            chain.fitness = 1 / (gen * idx + 1)
            new_pop.append(chain)
        history.add_to_history(new_pop)
    return history


if __name__ == '__main__':
    generations = 2
    pop_size = 10

    chain = chain_first()

    history = generate_history(generations, pop_size)

    visualiser = ChainVisualiser()
    visualiser.visualise_history(history)
    visualiser.visualise(chain)
def run_credit_scoring_problem(
        train_file_path,
        test_file_path,
        max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5),
        is_visualise=False,
        with_tuning=False,
        cache_path=None):
    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(train_file_path, task=task)
    dataset_to_validate = InputData.from_csv(test_file_path, task=task)

    # the search of the models provided by the framework that can be used as nodes in a chain for the selected task
    available_model_types = get_operations_for_task(task=task, mode='models')

    # the choice of the metric for the chain quality assessment during composition
    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    # the choice and initialisation of the GP search
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=3,
        max_depth=3,
        pop_size=20,
        num_of_generations=20,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=max_lead_time)

    # GP optimiser parameters choice
    scheme_type = GeneticSchemeTypesEnum.parameter_free
    optimiser_parameters = GPChainOptimiserParameters(
        genetic_scheme_type=scheme_type)

    # Create builder for composer and set composer params
    logger = default_log('FEDOT logger', verbose_level=4)
    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function).with_optimiser_parameters(optimiser_parameters).with_logger(logger=logger)

    if cache_path:
        builder = builder.with_cache(cache_path)

    # Create GP-based composer
    composer = builder.build()

    # the optimal chain generation by composition - the most time-consuming task
    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                is_visualise=True)

    if with_tuning:
        # TODO Add tuning
        raise NotImplementedError(f'Tuning is not supported')

    chain_evo_composed.fit(input_data=dataset_to_compose)

    composer.history.write_composer_history_to_csv()

    if is_visualise:
        visualiser = ChainVisualiser()

        composer.log.debug('History visualization started')
        visualiser.visualise_history(composer.history)
        composer.log.debug('History visualization finished')

        composer.log.debug('Best chain visualization started')
        visualiser.visualise(chain_evo_composed)
        composer.log.debug('Best chain visualization finished')

    # the quality assessment for the obtained composite models
    roc_on_valid_evo_composed = calculate_validation_metric(
        chain_evo_composed, dataset_to_validate)

    print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}')

    return roc_on_valid_evo_composed
Beispiel #10
0
 def show(self, path: str = None):
     ChainVisualiser().visualise(self, path)
Beispiel #11
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=64,
                                     with_visualisation=True):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)

    time_limit_min = 10
    available_model_types = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    if max_window_size == 1:
        # unit test model
        available_model_types = ['linear', 'ridge']
        time_limit_min = 0.001

    # each possible single-model chain
    for model in available_model_types:
        chain = TsForecastingChain(PrimaryNode(model))

        chain.fit(input_data=dataset_to_train, verbose=False)
        calculate_validation_metric(chain.predict(dataset_to_validate),
                                    dataset_to_validate,
                                    is_visualise=with_visualisation,
                                    label=model)

    # static multiscale chain
    multiscale_chain = get_composite_multiscale_chain()

    multiscale_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(multiscale_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Fixed multiscale')

    # static all-in-one ensemble chain
    ens_chain = get_ensemble_chain()
    ens_chain.fit(input_data=dataset_to_train, verbose=False)
    calculate_validation_metric(ens_chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Ensemble composite')

    # optimized ensemble chain
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=10,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min),
        add_single_model_chains=False)

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(metric_function)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        ComposerVisualiser.visualise(chain)

    calculate_validation_metric(chain.predict(dataset_to_validate),
                                dataset_to_validate,
                                is_visualise=with_visualisation,
                                label='Automated ensemble')

    # optimized multiscale chain

    available_model_types_primary = ['trend_data_model', 'residual_data_model']

    available_model_types_secondary = [
        'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr'
    ]

    available_model_types_all = available_model_types_primary + available_model_types_secondary

    composer_requirements = GPComposerRequirements(
        primary=available_model_types_all,
        secondary=available_model_types_secondary,
        max_arity=5,
        max_depth=2,
        pop_size=10,
        num_of_generations=30,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=time_limit_min))

    builder = GPComposerBuilder(task=task_to_solve).with_requirements(
        composer_requirements).with_metrics(
            metric_function).with_initial_chain(multiscale_chain)
    composer = builder.build()

    chain = composer.compose_chain(data=dataset_to_train, is_visualise=False)
    chain.fit_from_scratch(input_data=dataset_to_train, verbose=False)

    if with_visualisation:
        visualiser = ChainVisualiser()
        visualiser.visualise(chain)

    rmse_on_valid = calculate_validation_metric(
        chain.predict(dataset_to_validate),
        dataset_to_validate,
        is_visualise=with_visualisation,
        label='Automated multiscale')

    return rmse_on_valid
Beispiel #12
0
 def show(self):
     ChainVisualiser().visualise_pareto(
         archive=self.items,
         show=True,
         objectives_numbers=(1, 0),
         objectives_names=self.objective_names[::-1])