def test_classification_quality_improvement(): # input data initialization train_data_path = f'{project_root()}/cases/data/scoring/scoring_train.csv' test_data_path = f'{project_root()}/cases/data/scoring/scoring_test.csv' problem = 'classification' baseline_model = Fedot(problem=problem) baseline_model.fit(features=train_data_path, target='target', predefined_model='xgboost') expected_baseline_quality = 0.823 baseline_model.predict_proba(features=test_data_path) baseline_metrics = baseline_model.get_metrics() # Define parameters for composing composer_params = {'max_depth': 3, 'max_arity': 3, 'pop_size': 20, 'num_of_generations': 20, 'learning_time': 10, 'with_tuning': True} auto_model = Fedot(problem=problem, composer_params=composer_params, seed=42, verbose_level=4) auto_model.fit(features=train_data_path, target='target') auto_model.predict_proba(features=test_data_path) auto_metrics = auto_model.get_metrics() print(auto_metrics['roc_auc']) assert auto_metrics['roc_auc'] > baseline_metrics['roc_auc'] >= expected_baseline_quality
def test_baseline_with_api(): train_data, test_data, threshold = get_dataset('classification') # task selection, initialisation of the framework baseline_model = Fedot(problem='classification') # fit model without optimisation - single XGBoost node is used baseline_model.fit(features=train_data, target='target', predefined_model='xgboost') # evaluate the prediction with test data prediction = baseline_model.predict_proba(features=test_data) assert len(prediction) == len(test_data.target) # evaluate quality metric for the test sample baseline_metrics = baseline_model.get_metrics(metric_names='f1') assert baseline_metrics['f1'] > 0
def run_classification_example(timeout=None): train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' problem = 'classification' baseline_model = Fedot(problem=problem, timeout=timeout) baseline_model.fit(features=train_data_path, target='target', predefined_model='xgboost') baseline_model.predict(features=test_data_path) print(baseline_model.get_metrics()) auto_model = Fedot(problem=problem, seed=42, timeout=timeout) auto_model.fit(features=train_data_path, target='target') prediction = auto_model.predict_proba(features=test_data_path) print(auto_model.get_metrics()) return prediction
def run_classification_multiobj_example(with_plot=True): train_data = pd.read_csv( f'{project_root()}/examples/data/Hill_Valley_with_noise_Training.data') test_data = pd.read_csv( f'{project_root()}/examples/data/Hill_Valley_with_noise_Testing.data') target = test_data['class'] del test_data['class'] problem = 'classification' auto_model = Fedot(problem=problem, learning_time=2, preset='light', composer_params={'metric': ['f1', 'node_num']}, seed=42) auto_model.fit(features=train_data, target='class') prediction = auto_model.predict_proba(features=test_data) print(auto_model.get_metrics(target)) if with_plot: auto_model.best_models.show() return prediction
def run_additional_learning_example(): train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' train_data = pd.read_csv(train_data_path) test_data = pd.read_csv(test_data_path) test_data_target = test_data['target'] del test_data['target'] problem = 'classification' auto_model = Fedot(problem=problem, seed=42, preset='light', timeout=5, composer_params={ 'initial_pipeline': Pipeline( SecondaryNode( 'logit', nodes_from=[PrimaryNode('scaling')])) }) auto_model.fit(features=deepcopy(train_data.head(1000)), target='target') auto_model.predict_proba(features=deepcopy(test_data)) print('auto_model', auto_model.get_metrics(target=deepcopy(test_data_target))) prev_model = auto_model.current_pipeline prev_model.show() prev_model.unfit() atomized_model = Pipeline( SecondaryNode(operation_type=AtomizedModel(prev_model), nodes_from=[PrimaryNode('scaling')])) non_atomized_model = deepcopy(prev_model) train_data = train_data.head(5000) timeout = 1 auto_model_from_atomized = Fedot( problem=problem, seed=42, preset='light', timeout=timeout, composer_params={'initial_pipeline': atomized_model}, verbose_level=2) auto_model_from_atomized.fit(features=deepcopy(train_data), target='target') auto_model_from_atomized.predict_proba(features=deepcopy(test_data)) auto_model_from_atomized.current_pipeline.show() print('auto_model_from_atomized', auto_model_from_atomized.get_metrics(deepcopy(test_data_target))) auto_model_from_pipeline = Fedot( problem=problem, seed=42, preset='light', timeout=timeout, composer_params={'initial_pipeline': non_atomized_model}, verbose_level=2) auto_model_from_pipeline.fit(features=deepcopy(train_data), target='target') auto_model_from_pipeline.predict_proba(features=deepcopy(test_data)) auto_model_from_pipeline.current_pipeline.show() print('auto_model_from_pipeline', auto_model_from_pipeline.get_metrics(deepcopy(test_data_target)))