def test_ts_forecasting_lagged_data_operation(): train_input, predict_input, y_test = get_time_series() node_lagged = PrimaryNode('lagged') node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged]) pipeline = Pipeline(node_ridge) pipeline.fit_from_scratch(train_input) predicted_output = pipeline.predict(predict_input) predicted = np.ravel(predicted_output.predict) assert len(predicted) == len(np.ravel(y_test))
def test_ts_forecasting_smoothing_data_operation(): train_input, predict_input, y_test = get_time_series() model_names, _ = OperationTypesRepository().operations_with_tag( tags=['smoothing']) for smoothing_operation in model_names: node_smoothing = PrimaryNode(smoothing_operation) node_lagged = SecondaryNode('lagged', nodes_from=[node_smoothing]) node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged]) pipeline = Pipeline(node_ridge) pipeline.fit_from_scratch(train_input) predicted_output = pipeline.predict(predict_input) predicted = np.ravel(predicted_output.predict) assert len(predicted) == len(np.ravel(y_test))
def test_classification_data_operations(): train_input, predict_input, y_test = get_small_classification_dataset() model_names, _ = OperationTypesRepository().suitable_operation( task_type=TaskTypesEnum.classification) for data_operation in model_names: node_data_operation = PrimaryNode(data_operation) node_final = SecondaryNode('logit', nodes_from=[node_data_operation]) pipeline = Pipeline(node_final) # Fit and predict for pipeline pipeline.fit_from_scratch(train_input) predicted_output = pipeline.predict(predict_input) predicted = predicted_output.predict assert len(predicted) == len(y_test)
def test_inf_and_nan_absence_after_pipeline_fitting_from_scratch(): train_input = get_nan_inf_data() model_names, _ = OperationTypesRepository().suitable_operation( task_type=TaskTypesEnum.regression) for data_operation in model_names: node_data_operation = PrimaryNode(data_operation) node_final = SecondaryNode('linear', nodes_from=[node_data_operation]) pipeline = Pipeline(node_final) # Fit and predict for pipeline pipeline.fit_from_scratch(train_input) predicted_output = pipeline.predict(train_input) predicted = predicted_output.predict assert np.sum(np.isinf(predicted)) == 0 assert np.sum(np.isnan(predicted)) == 0
def test_data_merge_in_pipeline(): """ Test check is the pipeline can correctly work with dynamic changes in tables during the fit process """ # ridge # / \ (merge operation) # | ransac_lin_reg (remove several lines in table) # \ / # scaling node_scaling = PrimaryNode('scaling') node_lin_ransac = SecondaryNode('ransac_lin_reg', nodes_from=[node_scaling]) node_final = SecondaryNode('ridge', nodes_from=[node_lin_ransac, node_scaling]) pipeline = Pipeline(node_final) features_options = {'informative': 2, 'bias': 2.0} x_train, y_train, x_test, y_test = get_regression_dataset(features_options=features_options, samples_amount=100, features_amount=5) # Define regression task task = Task(TaskTypesEnum.regression) # Prepare data to train the model train_input = InputData(idx=np.arange(0, len(x_train)), features=x_train, target=y_train, task=task, data_type=DataTypesEnum.table) # Fit and predict pipeline.fit_from_scratch(train_input) prediction = pipeline.predict(train_input) assert prediction is not None