def test_global_modeling_split_df(): ### GLOBAL MODELLING - SPLIT DF log.info("Global Modeling - Split df") df = pd.read_csv(PEYTON_FILE, nrows=512) df1 = df.iloc[:128, :].copy(deep=True) df2 = df.iloc[128:256, :].copy(deep=True) df3 = df.iloc[256:384, :].copy(deep=True) df_dict = {"dataset1": df1, "dataset2": df2, "dataset3": df3} m = NeuralProphet(n_forecasts=2, n_lags=3) log.info("split df with single df") df_train, df_val = m.split_df(df1) log.info("split df with dict of dataframes") df_train, df_val = m.split_df(df_dict) log.info("split df with dict of dataframes - local_split") df_train, df_val = m.split_df(df_dict, local_split=True)
def test_train_eval_test(self): log.info("testing: Train Eval Test") m = NeuralProphet( n_lags=10, n_forecasts=3, ar_sparsity=0.1, epochs=3, batch_size=32, ) df = pd.read_csv(PEYTON_FILE, nrows=95) df = df_utils.check_dataframe(df, check_y=False) df = m._handle_missing_data(df, freq="D", predicting=False) df_train, df_test = m.split_df(df, freq="D", valid_p=0.1, inputs_overbleed=True) metrics = m.fit(df_train, freq="D", validate_each_epoch=True, valid_p=0.1) metrics = m.fit(df_train, freq="D") val_metrics = m.test(df_test) log.debug("Metrics: train/eval: \n {}".format( metrics.to_string(float_format=lambda x: "{:6.3f}".format(x)))) log.debug("Metrics: test: \n {}".format( val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
def check_split(df_in, df_len_expected, n_lags, n_forecasts, freq, p=0.1): m = NeuralProphet( n_lags=n_lags, n_forecasts=n_forecasts, ) df_in = df_utils.check_dataframe(df_in, check_y=False) df_in = m._handle_missing_data(df_in, freq=freq, predicting=False) assert df_len_expected == len(df_in) total_samples = len(df_in) - n_lags - 2 * n_forecasts + 2 df_train, df_test = m.split_df(df_in, freq=freq, valid_p=0.1, inputs_overbleed=True) n_train = len(df_train) - n_lags - n_forecasts + 1 n_test = len(df_test) - n_lags - n_forecasts + 1 assert total_samples == n_train + n_test n_test_expected = max(1, int(total_samples * p)) n_train_expected = total_samples - n_test_expected assert n_train == n_train_expected assert n_test == n_test_expected
def test_train_eval_test(self): log.info("testing: Train Eval Test") m = NeuralProphet( n_lags=14, n_forecasts=7, ar_sparsity=0.1, epochs=2, ) df = pd.read_csv(PEYTON_FILE, nrows=512) df_train, df_test = m.split_df(df, valid_p=0.1, inputs_overbleed=True) metrics = m.fit(df_train, freq="D", validate_each_epoch=True, valid_p=0.1) val_metrics = m.test(df_test) log.debug("Metrics: train/eval: \n {}".format(metrics.to_string(float_format=lambda x: "{:6.3f}".format(x)))) log.debug("Metrics: test: \n {}".format(val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
def test_infer_frequency(): df = pd.read_csv(PEYTON_FILE, nrows=102)[:50] m = NeuralProphet() # Check if freq is set automatically df_train, df_test = m.split_df(df) log.debug("freq automatically set") # Check if freq is set when equal to the original df_train, df_test = m.split_df(df, freq="D") log.debug("freq is equal to ideal") # Check if freq is set in different freq df_train, df_test = m.split_df(df, freq="5D") log.debug("freq is set even though is different than the ideal") # Assert for data unevenly spaced index = np.unique(np.geomspace(1, 40, 20, dtype=int)) df_uneven = df.iloc[index, :] with pytest.raises(ValueError): m.split_df(df_uneven) # Check if freq is set even in a df with multiple freqs df_train, df_test = m.split_df(df_uneven, freq="H") log.debug("freq is set even with not definable freq") # Check if freq is set for list df_dict = {"df1": df, "df2": df} m = NeuralProphet() m.fit(df_dict, epochs=5) log.debug("freq is set for list of dataframes") # Check if freq is set for list with different freq for n_lags=0 df1 = df.copy(deep=True) time_range = pd.date_range(start="1994-12-01", periods=df.shape[0], freq="M") df1["ds"] = time_range df_dict = {"df1": df, "df2": df1} m = NeuralProphet(n_lags=0, epochs=5) m.fit(df_dict, epochs=5) log.debug("freq is set for list of dataframes(n_lags=0)") # Assert for automatic frequency in list with different freq m = NeuralProphet(n_lags=2) with pytest.raises(ValueError): m.fit(df_dict, epochs=5) # Exceptions frequencies = ["M", "MS", "Y", "YS", "Q", "QS", "B", "BH"] df = df.iloc[:200, :] for freq in frequencies: df1 = df.copy(deep=True) time_range = pd.date_range(start="1994-12-01", periods=df.shape[0], freq=freq) df1["ds"] = time_range df_train, df_test = m.split_df(df1) log.debug("freq is set for all the exceptions")