def results_visualization(history, composed_chains): visualiser = ChainVisualiser() visualiser.visualise_history(history) visualiser.pareto_gif_create(history.archive_history, history.chains) visualiser.boxplots_gif_create(history.chains) for chain_evo_composed in composed_chains: visualiser.visualise(chain_evo_composed)
def run_chain_ang_history_visualisation(generations=2, pop_size=10, with_chain_visualisation=True): """ Function run visualisation of composing history and chain """ # Generate chain and history chain = chain_first() history = generate_history(generations, pop_size) visualiser = ChainVisualiser() visualiser.visualise_history(history) if with_chain_visualisation: visualiser.visualise(chain)
def run_credit_scoring_problem( train_file_path, test_file_path, max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5), is_visualise=False, with_tuning=False): task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(train_file_path, task=task) dataset_to_validate = InputData.from_csv(test_file_path, task=task) # the search of the models provided by the framework that can be used as nodes in a chain for the selected task available_model_types, _ = ModelTypesRepository().suitable_model( task_type=task.task_type) # the choice of the metric for the chain quality assessment during composition metric_function = MetricsRepository().metric_by_id( ClassificationMetricsEnum.ROCAUC_penalty) # the choice and initialisation of the GP search composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types, max_arity=3, max_depth=3, pop_size=20, num_of_generations=20, crossover_prob=0.8, mutation_prob=0.8, max_lead_time=max_lead_time) # GP optimiser parameters choice scheme_type = GeneticSchemeTypesEnum.steady_state optimiser_parameters = GPChainOptimiserParameters( genetic_scheme_type=scheme_type) # Create builder for composer and set composer params builder = GPComposerBuilder( task=task).with_requirements(composer_requirements).with_metrics( metric_function).with_optimiser_parameters(optimiser_parameters) # Create GP-based composer composer = builder.build() # the optimal chain generation by composition - the most time-consuming task chain_evo_composed = composer.compose_chain(data=dataset_to_compose, is_visualise=True) if with_tuning: chain_evo_composed.fine_tune_primary_nodes( input_data=dataset_to_compose, iterations=50, verbose=True) chain_evo_composed.fit(input_data=dataset_to_compose, verbose=True) if is_visualise: visualiser = ChainVisualiser() composer.log.info('History visualization started') visualiser.visualise_history(composer.history) composer.log.info('History visualization finished') composer.history.write_composer_history_to_csv() composer.log.info('Best chain visualization started') visualiser.visualise(chain_evo_composed) composer.log.info('Best chain visualization finished') # the quality assessment for the obtained composite models roc_on_valid_evo_composed = calculate_validation_metric( chain_evo_composed, dataset_to_validate) print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}') return roc_on_valid_evo_composed
chain.add_node(root_node_child) root_of_tree.nodes_from.append(root_node_child) chain.add_node(root_of_tree) return chain def generate_history(generations, pop_size): history = ComposingHistory() for gen in range(generations): new_pop = [] for idx in range(pop_size): chain = chain_first() chain.fitness = 1 / (gen * idx + 1) new_pop.append(chain) history.add_to_history(new_pop) return history if __name__ == '__main__': generations = 2 pop_size = 10 chain = chain_first() history = generate_history(generations, pop_size) visualiser = ChainVisualiser() visualiser.visualise_history(history) visualiser.visualise(chain)
def run_credit_scoring_problem( train_file_path, test_file_path, max_lead_time: datetime.timedelta = datetime.timedelta(minutes=5), is_visualise=False, with_tuning=False, cache_path=None): task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(train_file_path, task=task) dataset_to_validate = InputData.from_csv(test_file_path, task=task) # the search of the models provided by the framework that can be used as nodes in a chain for the selected task available_model_types = get_operations_for_task(task=task, mode='models') # the choice of the metric for the chain quality assessment during composition metric_function = ClassificationMetricsEnum.ROCAUC_penalty # the choice and initialisation of the GP search composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types, max_arity=3, max_depth=3, pop_size=20, num_of_generations=20, crossover_prob=0.8, mutation_prob=0.8, max_lead_time=max_lead_time) # GP optimiser parameters choice scheme_type = GeneticSchemeTypesEnum.parameter_free optimiser_parameters = GPChainOptimiserParameters( genetic_scheme_type=scheme_type) # Create builder for composer and set composer params logger = default_log('FEDOT logger', verbose_level=4) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function).with_optimiser_parameters(optimiser_parameters).with_logger(logger=logger) if cache_path: builder = builder.with_cache(cache_path) # Create GP-based composer composer = builder.build() # the optimal chain generation by composition - the most time-consuming task chain_evo_composed = composer.compose_chain(data=dataset_to_compose, is_visualise=True) if with_tuning: # TODO Add tuning raise NotImplementedError(f'Tuning is not supported') chain_evo_composed.fit(input_data=dataset_to_compose) composer.history.write_composer_history_to_csv() if is_visualise: visualiser = ChainVisualiser() composer.log.debug('History visualization started') visualiser.visualise_history(composer.history) composer.log.debug('History visualization finished') composer.log.debug('Best chain visualization started') visualiser.visualise(chain_evo_composed) composer.log.debug('Best chain visualization finished') # the quality assessment for the obtained composite models roc_on_valid_evo_composed = calculate_validation_metric( chain_evo_composed, dataset_to_validate) print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}') return roc_on_valid_evo_composed