def test_data_model_types_forecasting_pipeline_fit(): train_data, test_data = get_ts_data(forecast_length=10) pipeline = get_multiscale_pipeline() pipeline.fit(train_data) pipeline.save('data_model_forecasting') expected_len_nodes = len(pipeline.nodes) actual_len_nodes = len(PipelineTemplate(pipeline).operation_templates) assert actual_len_nodes == expected_len_nodes
def configure_experiment(): """ Generates a time series of 100 elements. The prediction is performed for five elements ahead """ # Default number of validation blocks validation_blocks = 3 forecast_len = 5 time_series, _ = get_ts_data(n_steps=105, forecast_length=forecast_len) log = default_log(__name__) return log, forecast_len, validation_blocks, time_series
def test_ts_pipeline_with_stats_model(): """ Tests PipelineTuner for time series forecasting task with AR model """ train_data, test_data = get_ts_data(n_steps=200, forecast_length=5) ar_pipeline = Pipeline(PrimaryNode('ar')) # Tune AR model tuner_ar = PipelineTuner(pipeline=ar_pipeline, task=train_data.task, iterations=3) tuned_ar_pipeline = tuner_ar.tune_pipeline(input_data=train_data, loss_function=mse) is_tuning_finished = True assert is_tuning_finished
def get_dataset(task_type: str): if task_type == 'regression': data = get_synthetic_regression_data() train_data, test_data = train_test_data_setup(data) threshold = np.std(test_data.target) * 0.05 elif task_type == 'classification': data = get_iris_data() train_data, test_data = train_test_data_setup(data, shuffle_flag=True) threshold = 0.95 elif task_type == 'clustering': data = get_synthetic_input_data(n_samples=1000) train_data, test_data = train_test_data_setup(data) threshold = 0.5 elif task_type == 'ts_forecasting': train_data, test_data = get_ts_data(forecast_length=5) threshold = np.std(test_data.target) else: raise ValueError('Incorrect type of machine learning task') return train_data, test_data, threshold
def test_ts_models_fit_correct(): train_data, test_data = get_ts_data(forecast_length=5) logger = default_log('default_test_logger') with OperationTypesRepository() as repo: model_names, _ = repo.suitable_operation( task_type=TaskTypesEnum.ts_forecasting, tags=['time_series']) for model_name in model_names: logger.info(f"Test time series model: {model_name}.") model = Model(operation_type=model_name) _, train_predicted = model.fit(data=deepcopy(train_data)) test_pred = model.predict(fitted_operation=_, data=test_data, is_fit_pipeline_stage=False) mae_value_test = mean_absolute_error(y_true=test_data.target, y_pred=test_pred.predict[0]) mae_threshold = np.var(test_data.target) * 2 assert mae_value_test < mae_threshold
def test_ts_forecasting_decomposition(): """ The function checks whether, after the decompose operation, the pipeline actually models the original target (not decomposed) for the time series forecasting task """ # Generate synthetic data for time series forecasting train_data, _ = get_ts_data(forecast_length=5) # Distort original values train_data.features = train_data.features + 150 train_data.target = train_data.target + 150 _, pipeline_decompose_finish, pipeline = get_refinement_pipeline(lagged=10) pipeline.fit(train_data) pipeline_decompose_finish.fit(train_data) full_output = pipeline.predict(train_data) decompose_output = pipeline_decompose_finish.predict(train_data) full_level = np.mean(full_output.predict) decompose_level = np.mean(decompose_output.predict) assert full_level > (decompose_level + 100)