def test_config_variations_3(): df = pd.DataFrame([range(300), range(1000, 1300)]).T df["time"] = pd.date_range("2018-01-01", periods=len(df), freq="H") config.update({ "data": df, "datetime_column": "time", "freq": "", "predicted_column": False, "datalength": 100, "other_columns": True, "analyzeit": 2, "default_other_columns_length": 5, "data_transform": None, "repeatit": 10, "error_criterion": "max_error", "multiprocessing": None, "remove_outliers": None, "remove_nans_threshold": 0.8, "remove_nans_or_replace": "mean", "optimization": True, "optimization_variable": "default_n_steps_in", "optimization_values": [3, 6, 9], "plot_all_optimized_models": True, "correlation_threshold": 0.4, "standardizeit": "standardize", "predicts": 7, "smoothit": None, }) assert validate_result(predictit.predict().predictions)
def test_presets(): config.update({ "data": "https://www.ncdc.noaa.gov/cag/global/time-series/globe/land_ocean/ytd/12/1880-2016.json", "request_datatype_suffix": ".json", "predicted_table": "data", "data_orientation": "index", "predicted_column": 0, "datalength": 100, }) config.update({"use_config_preset": "fast"}) preset_result = predictit.predict().predictions assert validate_result(preset_result) config.update({"use_config_preset": "normal"}) assert validate_result(preset_result)
def test_config_variations_2(): config.update({ "data": "test_random", "predicted_column": "", "predicts": 4, "analyzeit": 1, "datetime_column": "", "freq": "M", "datalength": 100, "default_n_steps_in": 3, "data_transform": "difference", "error_criterion": "rmse", "remove_outliers": 3, "print_number_of_models": None, "print_table": "detailed", "plot_library": "matplotlib", "show_plot": True, "print_time_table": True, "correlation_threshold": 0.2, "data_extension": None, "optimizeit": True, "optimizeit_limit": 0.1, "optimizeit_details": 2, "optimizeit_plot": True, "standardizeit": None, "multiprocessing": "pool", "confidence_interval": None, "trace_processes_memory": True, "used_models": ["Bayes ridge regression"], "models_parameters": { "Bayes ridge regression": { "model": "BayesianRidge", "n_iter": 300, "alpha_1": 1.0e-6, "alpha_2": 1.0e-6, "lambda_1": 1.0e-6, "lambda_2": 1.0e-6, } }, "fragments": 4, "iterations": 2, "models_parameters_limits": { "Bayes ridge regression": { "alpha_1": [0.1e-6, 3e-6], "model": ["BayesianRidge", "LassoRegression", "LinearRegression"], } }, }) config.hyperparameter_optimization.optimizeit_limit = 10 assert validate_result(predictit.predict().predictions)
def test_default_config_and_outputs(): config.reset() results_default = predictit.predict() # Validate tables validate_tables = True for i in [ results_default.tables.detailed, results_default.tables.simple, results_default.tables.time, ]: if not isinstance(i, str) or len(i) < 20: validate_tables = False assert validate_result(results_default.predictions) assert validate_result(results_default.best_prediction) assert validate_result(results_default.results_df) assert validate_tables assert np.isnan(results_default.with_history["Predicted column"].iloc[-1]) assert not np.isnan((results_default.with_history.iloc[-1, -1]))
def test_config_variations_1(): config.update({ "embedding": "label", "plot_library": "plotly", "data": "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv", "predicted_column": "Temp", "datetime_column": "Date", "freq": "D", "error_criterion": "dtw", "max_imported_length": 300, "remove_nans_threshold": 0.85, "remove_nans_or_replace": "neighbor", "trace_processes_memory": False, "print_number_of_models": 10, "add_fft_columns": 16, "data_extension": { "differences": True, "second_differences": True, "multiplications": True, "rolling_means": 10, "rolling_stds": 10, "mean_distances": True, }, "analyzeit": 3, "correlation_threshold": 0.2, "optimizeit": False, "standardizeit": "01", "multiprocessing": "process", "smoothit": (19, 2), "power_transformed": True, "analyze_seasonal_decompose": { "period": 32, "model": "additive" }, "confidence_interval": 0.6, }) assert validate_result(predictit.predict().predictions)
def test_main_multiple(): data = pd.read_csv( "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv" ).iloc[:120] data["Another col"] = data["Temp"] + 16 config_test = config.copy() config_test.update({ "datetime_column": "Date", "freqs": ["D", "H"], "data": data, "predicted_columns": ["*"], "remove_nans_threshold": 0.9, "remove_nans_or_replace": 2, "optimization": False, }) result = predictit.predict_multiple_columns(config=config_test) result_dataframe = next(iter(result.best_predictions_dataframes.values())) assert (result.results and len(result.results) == 4 and validate_result(result_dataframe) and len(result_dataframe.columns) == 2)
def test_config_inputs(): predict_with_params = predictit.predict(predicts=3) data = [ pd.DataFrame(np.random.randn(100, 3), columns=["a", "b", "c"]), pd.DataFrame(np.random.randn(80, 3), columns=["e", "b", "c"]), ] predict_with_positional_params = predictit.predict(data, "b") config_test = config.copy() config_test.output.predicts = 4 predict_config_as_param = predictit.predict(config=config_test) cli_args_str = ( "python predictit/main.py --used_function predict --data " "'https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv' " "--predicted_column 'Temp' ") result_cli = subprocess.check_output(cli_args_str.split(" ")) assert len(predict_with_params.best_prediction) == 3 assert len(predict_config_as_param.best_prediction) == 4 assert validate_result(predict_with_positional_params.predictions) assert "Best model is" in str(result_cli)