def create_pipeline() -> Pipeline: node_logit = PrimaryNode('logit') node_lda = PrimaryNode('lda') node_lda.custom_params = {'n_components': 1} node_xgboost = PrimaryNode('xgboost') node_knn = PrimaryNode('knn') node_knn.custom_params = {'n_neighbors': 9} node_knn_second = SecondaryNode('knn') node_knn_second.custom_params = {'n_neighbors': 5} node_knn_second.nodes_from = [node_lda, node_knn] node_logit_second = SecondaryNode('logit') node_logit_second.nodes_from = [node_xgboost, node_lda] node_lda_second = SecondaryNode('lda') node_lda_second.custom_params = {'n_components': 1} node_lda_second.nodes_from = [node_logit_second, node_knn_second, node_logit] node_xgboost_second = SecondaryNode('xgboost') node_xgboost_second.nodes_from = [node_logit, node_logit_second, node_knn] node_knn_third = SecondaryNode('knn') node_knn_third.custom_params = {'n_neighbors': 8} node_knn_third.nodes_from = [node_lda_second, node_xgboost_second] pipeline = Pipeline(node_knn_third) return pipeline
def get_composite_pipeline(composite_flag: bool = True) -> Pipeline: node_first = PrimaryNode('cnn') node_first.custom_params = { 'image_shape': (28, 28, 1), 'architecture': 'deep', 'num_classes': 10, 'epochs': 15, 'batch_size': 128 } node_second = PrimaryNode('cnn') node_second.custom_params = { 'image_shape': (28, 28, 1), 'architecture_type': 'simplified', 'num_classes': 10, 'epochs': 10, 'batch_size': 128 } node_final = SecondaryNode('rf', nodes_from=[node_first, node_second]) if not composite_flag: node_final = SecondaryNode('rf', nodes_from=[node_first]) pipeline = Pipeline(node_final) return pipeline
def get_pipeline(): node_lagged_1 = PrimaryNode('lagged') node_lagged_1.custom_params = {'window_size': 120} node_lagged_2 = PrimaryNode('lagged') node_lagged_2.custom_params = {'window_size': 10} node_first = SecondaryNode('ridge', nodes_from=[node_lagged_1]) node_second = SecondaryNode('dtreg', nodes_from=[node_lagged_2]) node_final = SecondaryNode('ridge', nodes_from=[node_first, node_second]) pipeline = Pipeline(node_final) return pipeline
def test_multi_modal_pipeline(): task = Task(TaskTypesEnum.classification) images_size = (128, 128) files_path = os.path.join('test', 'data', 'multi_modal') path = os.path.join(str(fedot_project_root()), files_path) train_num, _, train_img, _, train_text, _ = \ prepare_multi_modal_data(path, task, images_size, with_split=False) # image image_node = PrimaryNode('cnn') image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1), 'architecture': 'simplified', 'num_classes': 2, 'epochs': 1, 'batch_size': 128} # image ds_image = PrimaryNode('data_source_img') image_node = SecondaryNode('cnn', nodes_from=[ds_image]) image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1), 'architecture': 'simplified', 'num_classes': 2, 'epochs': 15, 'batch_size': 128} # table ds_table = PrimaryNode('data_source_table') scaling_node = SecondaryNode('scaling', nodes_from=[ds_table]) numeric_node = SecondaryNode('rf', nodes_from=[scaling_node]) # text ds_text = PrimaryNode('data_source_text') node_text_clean = SecondaryNode('text_clean', nodes_from=[ds_text]) text_node = SecondaryNode('tfidf', nodes_from=[node_text_clean]) pipeline = Pipeline(SecondaryNode('logit', nodes_from=[numeric_node, image_node, text_node])) fit_data = MultiModalData({ 'data_source_img': train_img, 'data_source_table': train_num, 'data_source_text': train_text }) pipeline.fit(fit_data) prediction = pipeline.predict(fit_data) assert prediction is not None
def get_simple_pipeline(): """ Function returns simple pipeline """ node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': 150} node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged]) ridge_pipeline = Pipeline(node_ridge) return ridge_pipeline
def test_forecast_with_exog(): train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts( ) # Source data for lagged node node_lagged = PrimaryNode('lagged') # Set window size for lagged transformation node_lagged.custom_params = {'window_size': window_size} # Exogenous variable for exog node node_exog = PrimaryNode('exog_ts_data_source') node_final = SecondaryNode('linear', nodes_from=[node_lagged, node_exog]) pipeline = Pipeline(node_final) pipeline.fit(input_data=MultiModalData({ 'exog_ts_data_source': train_exog_ts, 'lagged': train_source_ts })) forecast = pipeline.predict( input_data=MultiModalData({ 'exog_ts_data_source': predict_exog_ts, 'lagged': predict_source_ts })) prediction = np.ravel(np.array(forecast.predict)) assert tuple(prediction) == tuple(ts_test)
def run_one_model_with_specific_evaluation_mod(train_data, test_data, mode: str = None): """ Runs the example with one model svc. :param train_data: train data for pipeline training :param test_data: test data for pipeline training :param mode: pass gpu flag to make gpu evaluation """ problem = 'classification' if mode == 'gpu': baseline_model = Fedot(problem=problem, preset='gpu') else: baseline_model = Fedot(problem=problem) svc_node_with_custom_params = PrimaryNode('svc') # the custom params are needed to make probability evaluation available # otherwise an error is occurred svc_node_with_custom_params.custom_params = dict(kernel='rbf', C=10, gamma=1, cache_size=2000, probability=True) preset_pipeline = Pipeline(svc_node_with_custom_params) start = datetime.now() baseline_model.fit(features=train_data, target='target', predefined_model=preset_pipeline) print(f'Completed with custom params in: {datetime.now() - start}') baseline_model.predict(features=test_data) print(baseline_model.get_metrics())
def get_ts_pipeline(window_size): """ Function return pipeline with lagged transformation in it """ node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': window_size} node_final = SecondaryNode('ridge', nodes_from=[node_lagged]) pipeline = Pipeline(node_final) return pipeline
def get_simple_ts_pipeline(model_root: str = 'ridge', window_size: int = 20): node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': window_size} node_root = SecondaryNode(model_root, nodes_from=[node_lagged]) pipeline = Pipeline(node_root) return pipeline
def get_composite_pipeline(): """ The function returns prepared pipeline of 5 models :return: Pipeline object """ node_1 = PrimaryNode('lagged') node_1.custom_params = {'window_size': 150} node_2 = PrimaryNode('lagged') node_2.custom_params = {'window_size': 100} node_linear_1 = SecondaryNode('linear', nodes_from=[node_1]) node_linear_2 = SecondaryNode('linear', nodes_from=[node_2]) node_final = SecondaryNode('ridge', nodes_from=[node_linear_1, node_linear_2]) pipeline = Pipeline(node_final) return pipeline
def get_stlarima_pipeline(): """ Function return complex pipeline with the following structure stl_arima """ node_final = PrimaryNode('stl_arima') node_final.custom_params = {'period': 80, 'p': 2, 'd': 1, 'q': 0} pipeline = Pipeline(node_final) return pipeline
def get_simple_short_lagged_pipeline(): # Create simple pipeline for forecasting node_lagged = PrimaryNode('lagged') # Use 4 elements in time series as predictors node_lagged.custom_params = {'window_size': 4} node_final = SecondaryNode('linear', nodes_from=[node_lagged]) pipeline = Pipeline(node_final) return pipeline
def get_multiscale_pipeline(): # First branch node_lagged_1 = PrimaryNode('lagged') node_lagged_1.custom_params = {'window_size': 20} node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1]) # Second branch, which will try to make prediction based on smoothed ts node_filtering = PrimaryNode('gaussian_filter') node_filtering.custom_params = {'sigma': 3} node_lagged_2 = SecondaryNode('lagged', nodes_from=[node_filtering]) node_lagged_2.custom_params = {'window_size': 100} node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2]) node_final = SecondaryNode('linear', nodes_from=[node_ridge_1, node_ridge_2]) pipeline = Pipeline(node_final) return pipeline
def get_non_refinement_pipeline(lagged): """ Create 4-level pipeline without decompose operation """ node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': lagged} node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged]) node_dtreg = SecondaryNode('dtreg', nodes_from=[node_lagged]) node_dtreg.custom_params = {'max_depth': 3} final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg]) pipeline = Pipeline(final_node) return pipeline
def get_stlarima_nemo_pipeline(): """ Function return complex pipeline with the following structure stl_arima \ linear nemo | """ node_arima = PrimaryNode('stl_arima') node_arima.custom_params = {'period': 80, 'p': 2, 'd': 1, 'q': 0} node_nemo = PrimaryNode('exog_ts_data_source') node_final = SecondaryNode('linear', nodes_from=[node_arima, node_nemo]) pipeline = Pipeline(node_final) return pipeline
def create_pipeline() -> Pipeline: pipeline = Pipeline() node_logit = PrimaryNode('logit') node_lda = PrimaryNode('lda') node_lda.custom_params = {'n_components': 1} node_xgboost = SecondaryNode('xgboost') node_xgboost.custom_params = {'n_components': 1} node_xgboost.nodes_from = [node_logit, node_lda] pipeline.add_node(node_xgboost) return pipeline
def test_forecast_with_sparse_lagged(): train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts( ) # Source data for lagged node node_lagged = PrimaryNode('sparse_lagged') # Set window size for lagged transformation node_lagged.custom_params = {'window_size': window_size} node_final = SecondaryNode('linear', nodes_from=[node_lagged]) pipeline = Pipeline(node_final) pipeline.fit(input_data=MultiModalData({'sparse_lagged': train_source_ts})) forecast = pipeline.predict( input_data=MultiModalData({'sparse_lagged': predict_source_ts})) is_forecasted = True assert is_forecasted
def get_refinement_pipeline(lagged): """ Create 4-level pipeline with decompose operation """ node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': lagged} node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged]) node_decompose = SecondaryNode('decompose', nodes_from=[node_lagged, node_lasso]) node_dtreg = SecondaryNode('dtreg', nodes_from=[node_decompose]) node_dtreg.custom_params = {'max_depth': 3} # Pipelines with different outputs pipeline_with_decompose_finish = Pipeline(node_dtreg) pipeline_with_main_finish = Pipeline(node_lasso) # Combining branches with different targets (T and T_decomposed) final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg]) pipeline = Pipeline(final_node) return pipeline_with_main_finish, pipeline_with_decompose_finish, pipeline
def run_pipeline_with_specific_evaluation_mode(train_data: InputData, test_data: InputData, mode: str = None): """ Runs the example with 3-node pipeline. :param train_data: train data for pipeline training :param test_data: test data for pipeline training :param mode: pass gpu flag to make gpu evaluation """ problem = 'classification' if mode == 'gpu': baseline_model = Fedot(problem=problem, preset='gpu') else: baseline_model = Fedot(problem=problem) svc_node_with_custom_params = PrimaryNode('svc') svc_node_with_custom_params.custom_params = dict(kernel='rbf', C=10, gamma=1, cache_size=2000, probability=True) logit_node = PrimaryNode('logit') rf_node = SecondaryNode( 'rf', nodes_from=[svc_node_with_custom_params, logit_node]) preset_pipeline = Pipeline(rf_node) start = datetime.now() baseline_model.fit(features=train_data, target='target', predefined_model=preset_pipeline) print(f'Completed with custom params in: {datetime.now() - start}') baseline_model.predict(features=test_data) print(baseline_model.get_metrics())
def run_gapfilling_example(): """ This function runs an example of filling in gaps in synthetic data :return arrays_dict: dictionary with 4 keys ('ridge', 'local_poly', 'batch_poly', 'linear') that can be used to get arrays without gaps :return gap_data: an array with gaps :return real_data: an array with actual values in gaps """ # Get synthetic time series gap_data, real_data = get_array_with_gaps() # Filling in gaps using pipeline from FEDOT node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': 100} node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged]) ridge_pipeline = Pipeline(node_ridge) ridge_gapfiller = ModelGapFiller(gap_value=-100.0, pipeline=ridge_pipeline) without_gap_arr_ridge = \ ridge_gapfiller.forward_inverse_filling(gap_data) # Filling in gaps using simple methods such as polynomial approximation simple_gapfill = SimpleGapFiller(gap_value=-100.0) without_gap_local_poly = \ simple_gapfill.local_poly_approximation(gap_data, 4, 150) without_gap_batch_poly = \ simple_gapfill.batch_poly_approximation(gap_data, 4, 150) without_gap_linear = \ simple_gapfill.linear_interpolation(gap_data) arrays_dict = {'ridge': without_gap_arr_ridge, 'local_poly': without_gap_local_poly, 'batch_poly': without_gap_batch_poly, 'linear': without_gap_linear} return arrays_dict, gap_data, real_data
def get_statsmodels_pipeline(): node_ar = PrimaryNode('ar') node_ar.custom_params = {'lag_1': 20, 'lag_2': 100} pipeline = Pipeline(node_ar) return pipeline