コード例 #1
0
ファイル: gp_composer.py プロジェクト: STATAN/FEDOT
 def set_default_composer_params(self):
     if not self._composer.composer_requirements:
         models, _ = ModelTypesRepository().suitable_model(task_type=self.task.task_type)
         self._composer.composer_requirements = GPComposerRequirements(primary=models, secondary=models)
     if not self._composer.metrics:
         metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC_penalty)
         if self.task.task_type in (TaskTypesEnum.regression, TaskTypesEnum.ts_forecasting):
             metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE)
         self._composer.metrics = metric_function
コード例 #2
0
    def get_metrics(self,
                    target: Union[np.ndarray, pd.Series] = None,
                    metric_names: Union[str, List[str]] = None) -> dict:
        """
        Get quality metrics for the fitted graph

        :param target: the array with target values of test data
        :param metric_names: the names of required metrics
        :return: the values of quality metrics
        """
        if metric_names is None:
            metric_names = self.metric_name

        if target is not None:
            if self.test_data is None:
                self.test_data = InputData(
                    idx=range(len(self.prediction.predict)),
                    features=None,
                    target=target[:len(self.prediction.predict)],
                    task=self.train_data.task,
                    data_type=self.train_data.data_type)
            else:
                self.test_data.target = target[:len(self.prediction.predict)]

        real = self.test_data

        # TODO change to sklearn metrics
        if not isinstance(metric_names, List):
            metric_names = [metric_names]

        calculated_metrics = dict()
        for metric_name in metric_names:
            if composer_metrics_mapping[metric_name] is NotImplemented:
                self.log.warn(f'{metric_name} is not available as metric')
            else:
                prediction = self.prediction
                metric_cls = MetricsRepository().metric_class_by_id(
                    composer_metrics_mapping[metric_name])
                if metric_cls.output_mode == 'labels':
                    prediction = self.prediction_labels
                if self.problem.task_type == TaskTypesEnum.ts_forecasting:
                    real.target = real.target[~np.isnan(prediction.predict)]
                    prediction.predict = prediction.predict[
                        ~np.isnan(prediction.predict)]

                metric_value = abs(
                    metric_cls.metric(reference=real, predicted=prediction))
                calculated_metrics[metric_name] = metric_value

        return calculated_metrics
コード例 #3
0
ファイル: test_composer.py プロジェクト: STATAN/FEDOT
def test_random_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    random_composer = RandomSearchComposer(iter_num=1)
    req = ComposerRequirements(primary=available_model_types,
                               secondary=available_model_types)
    chain_random_composed = random_composer.compose_chain(
        data=dataset_to_compose,
        initial_chain=None,
        composer_requirements=req,
        metrics=metric_function)
    chain_random_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_random_composed.predict(
        dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
コード例 #4
0
def test_composer_cv_correct():
    """ Checks if the composer works correctly when using cross validation for
    time series """
    folds = 2
    _, forecast_len, validation_blocks, time_series = configure_experiment()

    primary_operations, secondary_operations = get_available_operations()

    # Composer parameters
    composer_requirements = GPComposerRequirements(
        primary=primary_operations,
        secondary=secondary_operations,
        max_arity=3,
        max_depth=3,
        pop_size=2,
        num_of_generations=2,
        crossover_prob=0.8,
        mutation_prob=0.8,
        timeout=datetime.timedelta(seconds=5),
        cv_folds=folds,
        validation_blocks=validation_blocks)

    init_pipeline = get_simple_ts_pipeline()
    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)
    builder = GPComposerBuilder(task=time_series.task). \
        with_requirements(composer_requirements). \
        with_metrics(metric_function).with_initial_pipeline(init_pipeline)
    composer = builder.build()

    obtained_pipeline = composer.compose_pipeline(data=time_series,
                                                  is_visualise=False)
    assert isinstance(obtained_pipeline, Pipeline)
コード例 #5
0
ファイル: multiclass_prediction.py プロジェクト: STATAN/FEDOT
def get_model(train_file_path: str,
              cur_lead_time: datetime.timedelta = timedelta(seconds=60)):
    task = Task(task_type=TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(train_file_path, task=task)

    # the search of the models provided by the framework
    # that can be used as nodes in a chain for the selected task
    models_repo = ModelTypesRepository()
    available_model_types, _ = models_repo.suitable_model(
        task_type=task.task_type, tags=['simple'])

    metric_function = MetricsRepository(). \
        metric_by_id(ClassificationMetricsEnum.ROCAUC_penalty)

    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_lead_time=cur_lead_time)

    # Create the genetic programming-based composer, that allow to find
    # the optimal structure of the composite model
    builder = GPComposerBuilder(task).with_requirements(
        composer_requirements).with_metrics(metric_function)
    composer = builder.build()

    # run the search of best suitable model
    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                is_visualise=False)
    chain_evo_composed.fit(input_data=dataset_to_compose)

    return chain_evo_composed
コード例 #6
0
ファイル: test_composer.py プロジェクト: STATAN/FEDOT
def test_parameter_free_composer_build_chain_correct(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data
    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    req = GPComposerRequirements(primary=available_model_types,
                                 secondary=available_model_types,
                                 max_arity=2,
                                 max_depth=2,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5)
    opt_params = GPChainOptimiserParameters(
        genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free)
    builder = GPComposerBuilder(task=Task(
        TaskTypesEnum.classification)).with_requirements(req).with_metrics(
            metric_function).with_optimiser_parameters(opt_params)
    gp_composer = builder.build()
    chain_gp_composed = gp_composer.compose_chain(data=dataset_to_compose)

    chain_gp_composed.fit_from_scratch(input_data=dataset_to_compose)
    predicted_gp_composed = chain_gp_composed.predict(dataset_to_validate)

    roc_on_valid_gp_composed = roc_auc(y_true=dataset_to_validate.target,
                                       y_score=predicted_gp_composed.predict)

    assert roc_on_valid_gp_composed > 0.6
コード例 #7
0
 def _check_so_improvements(self, offspring: List[Any]) -> Tuple[bool, bool]:
     suppl_metric = MetricsRepository().metric_by_id(ComplexityMetricsEnum.node_num)
     best_in_offspring = self.get_best_individual(offspring, equivalents_from_current_pop=False)
     fitness_improved = best_in_offspring.fitness < self.best_individual.fitness
     complexity_decreased = suppl_metric(best_in_offspring) < suppl_metric(
         self.best_individual) and best_in_offspring.fitness <= self.best_individual.fitness
     return fitness_improved, complexity_decreased
コード例 #8
0
ファイル: test_quality_metrics.py プロジェクト: STATAN/FEDOT
def test_structural_quality_correct():
    chain = default_valid_chain()
    metric_function = MetricsRepository().metric_by_id(
        ComplexityMetricsEnum.structural)
    expected_metric_value = 13
    actual_metric_value = metric_function(chain)
    assert actual_metric_value <= expected_metric_value
コード例 #9
0
ファイル: test_quality_metrics.py プロジェクト: STATAN/FEDOT
def test_regression_quality_metric(data_setup):
    train, _ = data_setup
    chain = default_valid_chain()
    chain.fit(input_data=train)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)
    metric_value = metric_function(chain=chain, reference_data=train)

    metric_function_with_penalty = \
        MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE_penalty)
    metric_value_with_penalty = \
        metric_function_with_penalty(chain=chain, reference_data=train)

    assert metric_value > 0
    assert metric_value_with_penalty > 0
    assert metric_value < metric_value_with_penalty
コード例 #10
0
ファイル: test_quality_metrics.py プロジェクト: STATAN/FEDOT
def test_classification_quality_metric(data_setup):
    train, _ = data_setup
    chain = default_valid_chain()
    chain.fit(input_data=train)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)
    metric_value = metric_function(chain=chain, reference_data=train)

    metric_function_with_penalty = \
        MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC_penalty)
    metric_value_with_penalty = \
        metric_function_with_penalty(chain=chain, reference_data=train)

    assert 0.5 < abs(metric_value) < 1.0
    assert 0.5 < abs(metric_value_with_penalty) < 1.0
    assert metric_value < metric_value_with_penalty
コード例 #11
0
ファイル: test_builder.py プロジェクト: STATAN/FEDOT
def test_gp_composer_builder():
    task = Task(TaskTypesEnum.classification)

    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=task.task_type)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=3,
        max_depth=3,
        pop_size=5,
        num_of_generations=4,
        crossover_prob=0.8,
        mutation_prob=1,
        max_lead_time=datetime.timedelta(minutes=5))

    scheme_type = GeneticSchemeTypesEnum.steady_state
    optimiser_parameters = GPChainOptimiserParameters(
        genetic_scheme_type=scheme_type)

    builder_with_custom_params = GPComposerBuilder(
        task=task).with_requirements(composer_requirements).with_metrics(
            metric_function).with_optimiser_parameters(optimiser_parameters)

    composer_with_custom_params = builder_with_custom_params.build()

    assert composer_with_custom_params.optimiser.parameters.genetic_scheme_type == scheme_type
    assert composer_with_custom_params.metrics == metric_function
    assert composer_with_custom_params.composer_requirements.pop_size == 5
    assert composer_with_custom_params.composer_requirements.mutation_prob == 1

    builder_with_default_params = GPComposerBuilder(task=task)
    composer_with_default_params = builder_with_default_params.build()

    default_metric = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC.ROCAUC_penalty)

    assert composer_with_default_params.optimiser.parameters.genetic_scheme_type == GeneticSchemeTypesEnum.generational
    assert composer_with_default_params.metrics == default_metric
    assert composer_with_default_params.composer_requirements.pop_size == 20
    assert composer_with_default_params.composer_requirements.mutation_prob == 0.8
コード例 #12
0
def test_regression_quality_metric(data_setup):
    train, _ = data_setup
    pipeline = default_valid_pipeline()
    pipeline.fit(input_data=train)

    for metric in RegressionMetricsEnum:
        metric_function = MetricsRepository().metric_by_id(metric)
        metric_value = metric_function(pipeline=pipeline, reference_data=train)
        assert metric_value > 0
コード例 #13
0
def test_classification_quality_metric(data_setup):
    train, _ = data_setup
    chain = default_valid_chain()
    chain.fit(input_data=train)

    for metric in ClassificationMetricsEnum:
        metric_function = MetricsRepository().metric_by_id(metric)
        metric_value = metric_function(chain=chain, reference_data=train)
        assert 0 < abs(metric_value) < sys.maxsize
コード例 #14
0
def run_class_scoring_case(sa_class: str, is_composed: bool = False, path_to_save=None):
    train_data, test_data = get_scoring_data()
    task = Task(TaskTypesEnum.classification)
    # the choice of the metric for the pipeline quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC_penalty)

    if is_composed:
        case = f'scoring_composed_{sa_class}'
        is_composed = True
コード例 #15
0
def run_regr_case(sa_class: str, is_composed: bool = False, path_to_save=None):
    train_data, test_data = get_cholesterol_data()
    task = Task(TaskTypesEnum.regression)
    # the choice of the metric for the pipeline quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE)

    if is_composed:
        case = f'cholesterol_composed_{sa_class}'
        is_composed = True
コード例 #16
0
def test_regression_quality_metric(data_setup):
    train, _ = data_setup
    chain = default_valid_chain()
    chain.fit(input_data=train)

    for metric in RegressionMetricsEnum:
        metric_function = MetricsRepository().metric_by_id(metric)
        metric_value = metric_function(chain=chain, reference_data=train)
        assert metric_value > 0
コード例 #17
0
    def composer_metric(self, metrics, train_data: Union[InputData,
                                                         MultiModalData],
                        test_data: Union[InputData, MultiModalData],
                        pipeline: Pipeline) -> Optional[Tuple[Any]]:
        try:
            validate(pipeline)
            pipeline.log = self.log

            if type(metrics) is not list:
                metrics = [metrics]

            if self.cache is not None:
                # TODO improve cache
                pipeline.fit_from_cache(self.cache)

            if not pipeline.is_fitted:
                self.log.debug(
                    f'Pipeline {pipeline.root_node.descriptive_id} fit started'
                )
                pipeline.fit(input_data=train_data,
                             time_constraint=self.composer_requirements.
                             max_pipeline_fit_time)
                try:
                    self.cache.save_pipeline(pipeline)
                except Exception as ex:
                    self.log.info(f'Cache can not be saved: {ex}. Continue.')

            evaluated_metrics = ()
            for metric in metrics:
                if callable(metric):
                    metric_func = metric
                else:
                    metric_func = MetricsRepository().metric_by_id(metric)
                evaluated_metrics = evaluated_metrics + (metric_func(
                    pipeline, reference_data=test_data), )

            self.log.debug(
                f'Pipeline {pipeline.root_node.descriptive_id} with metrics: {list(evaluated_metrics)}'
            )

            # enforce memory cleaning
            pipeline.unfit()
            gc.collect()
        except Exception as ex:
            self.log.info(f'Pipeline assessment warning: {ex}. Continue.')
            evaluated_metrics = None

        return evaluated_metrics
コード例 #18
0
def run_regr_case(is_composed: bool = False, path_to_save=None):
    train_data, test_data = get_cholesterol_data()
    task = Task(TaskTypesEnum.regression)
    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)

    if is_composed:
        case = 'cholesterol_composed'
        run_analysis_case(train_data,
                          test_data,
                          case,
                          task,
                          metric=metric_function,
                          is_composed=True,
                          result_path=path_to_save)
コード例 #19
0
def run_class_kc2_case(is_composed: bool = False, path_to_save=None):
    train_data, test_data = get_kc2_data()
    task = Task(TaskTypesEnum.classification)
    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC_penalty)

    if is_composed:
        case = 'kc2_composed'
        run_analysis_case(train_data,
                          test_data,
                          case,
                          task,
                          metric=metric_function,
                          is_composed=True,
                          result_path=path_to_save)
コード例 #20
0
def run_credit_scoring_problem(train_file_path, test_file_path,
                               max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5),
                               gp_optimiser_params: Optional[GPChainOptimiserParameters] = None, pop_size=None,
                               generations=None):
    dataset_to_compose = InputData.from_csv(train_file_path)
    dataset_to_validate = InputData.from_csv(test_file_path)

    available_model_types, _ = ModelTypesRepository(). \
        suitable_model(task_type=TaskTypesEnum.classification)

    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC)

    if gp_optimiser_params:
        optimiser_parameters = gp_optimiser_params
    else:
        selection_types = [SelectionTypesEnum.tournament]
        crossover_types = [CrossoverTypesEnum.subtree]
        mutation_types = [MutationTypesEnum.simple, MutationTypesEnum.growth, MutationTypesEnum.reduce]
        regularization_type = RegularizationTypesEnum.decremental
        optimiser_parameters = GPChainOptimiserParameters(selection_types=selection_types,
                                                          crossover_types=crossover_types,
                                                          mutation_types=mutation_types,
                                                          regularization_type=regularization_type)
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types, max_arity=4,
        max_depth=3, pop_size=pop_size, num_of_generations=generations,
        crossover_prob=0.8, mutation_prob=0.8, max_lead_time=max_lead_time)

    # Create GP-based composer
    composer = GPComposer()

    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                initial_chain=None,
                                                composer_requirements=composer_requirements,
                                                metrics=metric_function, optimiser_parameters=optimiser_parameters,
                                                is_visualise=False)
    chain_evo_composed.fit(input_data=dataset_to_compose, verbose=True)

    roc_on_valid_evo_composed = calculate_validation_metric(chain_evo_composed, dataset_to_validate)

    print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}')

    return roc_on_valid_evo_composed, chain_evo_composed, composer
コード例 #21
0
ファイル: test_composer.py プロジェクト: STATAN/FEDOT
def test_composition_time(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    task = Task(TaskTypesEnum.classification)
    models_impl = ['mlp', 'knn']
    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    req_terminated_evolution = GPComposerRequirements(
        primary=models_impl,
        secondary=models_impl,
        max_arity=2,
        max_depth=2,
        pop_size=2,
        num_of_generations=5,
        crossover_prob=0.9,
        mutation_prob=0.9,
        max_lead_time=datetime.timedelta(minutes=0.000001))

    builder = GPComposerBuilder(task).with_requirements(
        req_terminated_evolution).with_metrics(metric_function)

    gp_composer_terminated_evolution = builder.build()

    _ = gp_composer_terminated_evolution.compose_chain(data=data)

    req_completed_evolution = GPComposerRequirements(primary=models_impl,
                                                     secondary=models_impl,
                                                     max_arity=2,
                                                     max_depth=2,
                                                     pop_size=2,
                                                     num_of_generations=2,
                                                     crossover_prob=0.4,
                                                     mutation_prob=0.5)

    builder = GPComposerBuilder(task).with_requirements(
        req_completed_evolution).with_metrics(metric_function)
    gp_composer_completed_evolution = builder.build()

    _ = gp_composer_completed_evolution.compose_chain(data=data)

    assert len(gp_composer_terminated_evolution.history.chains) == len(
        gp_composer_completed_evolution.history.chains)
コード例 #22
0
def _obtain_metric(task: Task, composer_metric: Union[str, Callable]):
    # the choice of the metric for the pipeline quality assessment during composition
    if composer_metric is None:
        composer_metric = MetricByTask(task.task_type).metric_cls.get_value

    if isinstance(composer_metric, str) or isinstance(composer_metric, Callable):
        composer_metric = [composer_metric]

    metric_function = []
    for specific_metric in composer_metric:
        if isinstance(specific_metric, Callable):
            specific_metric_function = specific_metric
        else:
            metric_id = composer_metrics_mapping.get(specific_metric, None)
            if metric_id is None:
                raise ValueError(f'Incorrect metric {specific_metric}')
            specific_metric_function = MetricsRepository().metric_by_id(metric_id)
        metric_function.append(specific_metric_function)
    return metric_function
コード例 #23
0
def metric_evaluation(pipeline, train_data: InputData, test_data: InputData,
                      metrics: list, evaluated_metrics: list, vb_number: int = None):
    """ Pipeline training and metrics assessment

    :param pipeline: pipeline for validation
    :param train_data: InputData for train
    :param test_data: InputData for validation
    :param metrics: list with metrics for evaluation
    :param evaluated_metrics: list with metrics values
    :param vb_number: number of validation blocks
    """
    pipeline.fit_from_scratch(train_data)

    for index, metric in enumerate(metrics):
        if callable(metric):
            metric_func = metric
        else:
            metric_func = MetricsRepository().metric_by_id(metric)
        metric_value = metric_func(pipeline, reference_data=test_data, validation_blocks=vb_number)
        evaluated_metrics[index].extend([metric_value])
    return evaluated_metrics
コード例 #24
0
    def composer_metric(self, metrics, train_data: InputData,
                        test_data: InputData,
                        chain: Chain) -> Optional[Tuple[Any]]:
        try:
            validate(chain)
            chain.log = self.log

            if type(metrics) is not list:
                metrics = [metrics]

            if self.cache is not None:
                # TODO improve cache
                chain.fit_from_cache(self.cache)

            if not chain.is_fitted:
                self.log.debug(
                    f'Chain {chain.root_node.descriptive_id} fit started')
                chain.fit(input_data=train_data,
                          time_constraint=self.composer_requirements.
                          max_chain_fit_time)
                self.cache.save_chain(chain)

            evaluated_metrics = ()
            for metric in metrics:
                if callable(metric):
                    metric_func = metric
                else:
                    metric_func = MetricsRepository().metric_by_id(metric)
                evaluated_metrics = evaluated_metrics + (metric_func(
                    chain, reference_data=test_data), )

            self.log.debug(
                f'Chain {chain.root_node.descriptive_id} with metrics: {list(evaluated_metrics)}'
            )

        except Exception as ex:
            self.log.info(f'Chain assessment warning: {ex}. Continue.')
            evaluated_metrics = None

        return evaluated_metrics
コード例 #25
0
ファイル: test_composer.py プロジェクト: STATAN/FEDOT
def test_fixed_structure_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_model_types = ['logit', 'lda', 'knn']

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    req = GPComposerRequirements(primary=available_model_types,
                                 secondary=available_model_types,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5,
                                 add_single_model_chains=False)

    reference_chain = get_class_chain()
    builder = FixedStructureComposerBuilder(
        task=Task(TaskTypesEnum.classification)).with_initial_chain(
            reference_chain).with_metrics(metric_function).with_requirements(
                req)
    composer = builder.build()

    chain_composed = composer.compose_chain(data=dataset_to_compose)
    chain_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_composed.predict(dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
    assert chain_composed.depth == reference_chain.depth
    assert chain_composed.length == reference_chain.length
コード例 #26
0
def run_ts_forecasting_problem(forecast_length=50,
                               with_visualisation=True) -> None:
    """ Function launch time series task with composing

    :param forecast_length: length of the forecast
    :param with_visualisation: is it needed to show the plots
    """
    file_path = '../cases/data/metocean/metocean_data_test.csv'

    df = pd.read_csv(file_path)
    time_series = np.array(df['sea_height'])

    # Train/test split
    train_part = time_series[:-forecast_length]
    test_part = time_series[-forecast_length:]

    # Prepare data for train and test
    train_input, predict_input, task = prepare_train_test_input(
        train_part, forecast_length)

    # Get chain with pre-defined structure
    init_chain = get_source_chain()

    # Init check
    preds = fit_predict_for_chain(chain=init_chain,
                                  train_input=train_input,
                                  predict_input=predict_input)
    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    # Get available_operations type
    primary_operations, secondary_operations = get_available_operations()

    # Composer parameters
    composer_requirements = GPComposerRequirements(
        primary=primary_operations,
        secondary=secondary_operations,
        max_arity=3,
        max_depth=8,
        pop_size=10,
        num_of_generations=15,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=datetime.timedelta(minutes=10),
        allow_single_operations=False)

    mutation_types = [
        MutationTypesEnum.parameter_change, MutationTypesEnum.simple,
        MutationTypesEnum.reduce
    ]
    optimiser_parameters = GPChainOptimiserParameters(
        mutation_types=mutation_types)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.MAE)
    builder = GPComposerBuilder(task=task). \
        with_optimiser_parameters(optimiser_parameters).\
        with_requirements(composer_requirements).\
        with_metrics(metric_function).with_initial_chain(init_chain)
    composer = builder.build()

    obtained_chain = composer.compose_chain(data=train_input,
                                            is_visualise=False)

    ################################
    # Obtained chain visualisation #
    ################################
    if with_visualisation:
        visualiser = ChainVisualiser()
        visualiser.visualise(obtained_chain)

    preds = fit_predict_for_chain(chain=obtained_chain,
                                  train_input=train_input,
                                  predict_input=predict_input)

    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    display_chain_info(obtained_chain)
コード例 #27
0
ファイル: b_fedot.py プロジェクト: yujiimt/FEDOT-benchmarks
def run_fedot(params: 'ExecutionParams'):
    train_file_path = params.train_file
    test_file_path = params.test_file
    case_label = params.case_label
    task_type = params.task

    if task_type == TaskTypesEnum.classification:
        metric = ClassificationMetricsEnum.ROCAUC
    elif task_type == TaskTypesEnum.regression:
        metric = RegressionMetricsEnum.RMSE
    else:
        raise NotImplementedError()

    metric_func = MetricsRepository().metric_by_id(metric)

    task = Task(task_type)
    dataset_to_compose = InputData.from_csv(train_file_path, task=task)
    dataset_to_validate = InputData.from_csv(test_file_path, task=task)

    models_hyperparameters = get_models_hyperparameters()['FEDOT']
    cur_lead_time = models_hyperparameters['MAX_RUNTIME_MINS']

    saved_model_name = f'fedot_{case_label}_{task_type.name}_{cur_lead_time}_{metric.name}'
    loaded_model = load_fedot_model(saved_model_name)

    if not loaded_model:
        generations = models_hyperparameters['GENERATIONS']
        population_size = models_hyperparameters['POPULATION_SIZE']

        # the search of the models provided by the framework that can be used as nodes in a chain'
        models_repo = ModelTypesRepository()
        available_model_types, _ = models_repo.suitable_model(task.task_type)

        heavy_models = ['svc', 'multinb', 'tfidf', 'qda']
        available_model_types = [
            model for model in available_model_types
            if model not in heavy_models
        ]

        # the choice and initialisation of the GP search
        composer_requirements = GPComposerRequirements(
            primary=available_model_types,
            secondary=available_model_types,
            max_arity=3,
            max_depth=2,
            pop_size=population_size,
            num_of_generations=generations,
            crossover_prob=0.8,
            mutation_prob=0.8,
            max_lead_time=datetime.timedelta(minutes=cur_lead_time),
            add_single_model_chains=True)

        # Create GP-based composer
        builder = GPComposerBuilder(task).with_requirements(
            composer_requirements).with_metrics(metric_func)
        gp_composer = builder.build()

        chain_gp_composed = gp_composer.compose_chain(data=dataset_to_compose)

        chain_gp_composed.fit_from_scratch(input_data=dataset_to_compose)
        save_fedot_model(chain_gp_composed, saved_model_name)
    else:
        chain_gp_composed = loaded_model

    evo_predicted = chain_gp_composed.predict(dataset_to_validate)
    evo_predicted_labels = chain_gp_composed.predict(dataset_to_validate,
                                                     output_mode='labels')

    return dataset_to_validate.target, evo_predicted.predict, evo_predicted_labels.predict
コード例 #28
0
def run_ts_forecasting_problem(forecast_length=50,
                               with_visualisation=True,
                               cv_folds=None) -> None:
    """ Function launch time series task with composing

    :param forecast_length: length of the forecast
    :param with_visualisation: is it needed to show the plots
    :param cv_folds: is it needed apply cross validation and what number
    of folds to use
    """
    file_path = '../cases/data/metocean/metocean_data_test.csv'

    df = pd.read_csv(file_path)
    time_series = np.array(df['sea_height'])

    # Train/test split
    train_part = time_series[:-forecast_length]
    test_part = time_series[-forecast_length:]

    # Prepare data for train and test
    train_input, predict_input, task = prepare_train_test_input(
        train_part, forecast_length)

    # Get pipeline with pre-defined structure
    init_pipeline = get_source_pipeline()

    # Init check
    preds = fit_predict_for_pipeline(pipeline=init_pipeline,
                                     train_input=train_input,
                                     predict_input=predict_input)
    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    # Get available_operations type
    primary_operations, secondary_operations = get_available_operations()

    # Composer parameters
    composer_requirements = GPComposerRequirements(
        primary=primary_operations,
        secondary=secondary_operations,
        max_arity=3,
        max_depth=8,
        pop_size=10,
        num_of_generations=10,
        crossover_prob=0.8,
        mutation_prob=0.8,
        timeout=datetime.timedelta(minutes=10),
        cv_folds=cv_folds,
        validation_blocks=3)

    mutation_types = [
        parameter_change_mutation, MutationTypesEnum.simple,
        MutationTypesEnum.reduce
    ]
    optimiser_parameters = GPGraphOptimiserParameters(
        mutation_types=mutation_types)

    metric_function = MetricsRepository().metric_by_id(
        RegressionMetricsEnum.RMSE)
    builder = GPComposerBuilder(task=task). \
        with_optimiser_parameters(optimiser_parameters).\
        with_requirements(composer_requirements).\
        with_metrics(metric_function).with_initial_pipeline(init_pipeline)
    composer = builder.build()

    obtained_pipeline = composer.compose_pipeline(data=train_input,
                                                  is_visualise=False)

    ###################################
    # Obtained pipeline visualisation #
    ###################################
    if with_visualisation:
        obtained_pipeline.show()

    preds = fit_predict_for_pipeline(pipeline=obtained_pipeline,
                                     train_input=train_input,
                                     predict_input=predict_input)

    display_validation_metric(predicted=preds,
                              real=test_part,
                              actual_values=time_series,
                              is_visualise=with_visualisation)

    obtained_pipeline.print_structure()
コード例 #29
0
def run_credit_scoring_problem(
        train_file_path,
        test_file_path,
        max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5),
        is_visualise=False,
        with_tuning=False):
    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(train_file_path, task=task)
    dataset_to_validate = InputData.from_csv(test_file_path, task=task)

    # the search of the models provided by the framework that can be used as nodes in a chain for the selected task
    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=task.task_type)

    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC_penalty)

    # the choice and initialisation of the GP search
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=3,
        max_depth=3,
        pop_size=20,
        num_of_generations=20,
        crossover_prob=0.8,
        mutation_prob=0.8,
        max_lead_time=max_lead_time)

    # GP optimiser parameters choice
    scheme_type = GeneticSchemeTypesEnum.steady_state
    optimiser_parameters = GPChainOptimiserParameters(
        genetic_scheme_type=scheme_type)

    # Create builder for composer and set composer params
    builder = GPComposerBuilder(
        task=task).with_requirements(composer_requirements).with_metrics(
            metric_function).with_optimiser_parameters(optimiser_parameters)

    # Create GP-based composer
    composer = builder.build()

    # the optimal chain generation by composition - the most time-consuming task
    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                is_visualise=True)

    if with_tuning:
        chain_evo_composed.fine_tune_primary_nodes(
            input_data=dataset_to_compose, iterations=50, verbose=True)

    chain_evo_composed.fit(input_data=dataset_to_compose, verbose=True)

    if is_visualise:
        visualiser = ChainVisualiser()

        composer.log.info('History visualization started')
        visualiser.visualise_history(composer.history)
        composer.log.info('History visualization finished')
        composer.history.write_composer_history_to_csv()

        composer.log.info('Best chain visualization started')
        visualiser.visualise(chain_evo_composed)
        composer.log.info('Best chain visualization finished')

    # the quality assessment for the obtained composite models
    roc_on_valid_evo_composed = calculate_validation_metric(
        chain_evo_composed, dataset_to_validate)

    print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}')

    return roc_on_valid_evo_composed
コード例 #30
0
def run_river_composer_experiment(file_path, init_pipeline, file_to_save,
                                  iterations=20, tuner=None):
    """ Function launch experiment for river level prediction. Composing and
    tuner processes are available for such experiment.

    :param file_path: path to the file with river level data
    :param init_pipeline: pipeline to start composing process
    :param file_to_save: path to the file and file name to save report
    :param iterations: amount of iterations to process
    :param tuner: if tuning after composing process is required or not. tuner -
    NodesTuner or PipelineTuner.
    """

    # Read dataframe and prepare train and test data
    data = InputData.from_csv(file_path, target_columns='level_station_2',
                              task=Task(TaskTypesEnum.regression),
                              columns_to_drop=['date'])
    train_input, predict_input = train_test_data_setup(data)
    y_data_test = np.array(predict_input.target)

    available_secondary_operations = ['ridge', 'lasso', 'dtreg',
                                      'xgbreg', 'adareg', 'knnreg',
                                      'linear', 'svr', 'poly_features',
                                      'scaling', 'ransac_lin_reg', 'rfe_lin_reg',
                                      'pca', 'ransac_non_lin_reg',
                                      'rfe_non_lin_reg', 'normalization']
    available_primary_operations = ['one_hot_encoding']

    # Report arrays
    obtained_pipelines = []
    depths = []
    maes = []
    for i in range(0, iterations):
        print(f'Iteration {i}\n')

        composer_requirements = GPComposerRequirements(
            primary=available_primary_operations,
            secondary=available_secondary_operations, max_arity=3,
            max_depth=8, pop_size=10, num_of_generations=5,
            crossover_prob=0.8, mutation_prob=0.8,
            timeout=datetime.timedelta(minutes=5))

        metric_function = MetricsRepository().metric_by_id(
            RegressionMetricsEnum.MAE)
        builder = GPComposerBuilder(task=data.task). \
            with_requirements(composer_requirements). \
            with_metrics(metric_function).with_initial_pipeline(init_pipeline)
        composer = builder.build()

        obtained_pipeline = composer.compose_pipeline(data=train_input, is_visualise=False)

        # Display info about obtained pipeline
        obtained_models, depth = get_pipeline_info(pipeline=obtained_pipeline)

        preds = fit_predict_for_pipeline(pipeline=obtained_pipeline,
                                         train_input=train_input,
                                         predict_input=predict_input)

        mse_value = mean_squared_error(y_data_test, preds, squared=False)
        mae_value = mean_absolute_error(y_data_test, preds)
        print(f'Obtained metrics for current iteration {i}:')
        print(f'RMSE - {mse_value:.2f}')
        print(f'MAE - {mae_value:.2f}\n')

        if tuner is not None:
            print(f'Start tuning process ...')
            pipeline_tuner = tuner(pipeline=obtained_pipeline, task=data.task,
                                   iterations=100)
            tuned_pipeline = pipeline_tuner.tune_pipeline(input_data=train_input,
                                                          loss_function=mean_absolute_error)

            preds_tuned = fit_predict_for_pipeline(pipeline=tuned_pipeline,
                                                   train_input=train_input,
                                                   predict_input=predict_input)

            mse_value = mean_squared_error(y_data_test, preds_tuned, squared=False)
            mae_value = mean_absolute_error(y_data_test, preds_tuned)

            print(f'Obtained metrics for current iteration {i} after tuning:')
            print(f'RMSE - {mse_value:.2f}')
            print(f'MAE - {mae_value:.2f}\n')

        obtained_pipelines.append(obtained_models)
        maes.append(mae_value)
        depths.append(depth)

    report = pd.DataFrame({'Pipeline': obtained_pipelines,
                           'Depth': depths,
                           'MAE': maes})
    report.to_csv(file_to_save, index=False)