def test_global_modeling_split_df():
    ### GLOBAL MODELLING - SPLIT DF
    log.info("Global Modeling - Split df")
    df = pd.read_csv(PEYTON_FILE, nrows=512)
    df1 = df.iloc[:128, :].copy(deep=True)
    df2 = df.iloc[128:256, :].copy(deep=True)
    df3 = df.iloc[256:384, :].copy(deep=True)
    df_dict = {"dataset1": df1, "dataset2": df2, "dataset3": df3}
    m = NeuralProphet(n_forecasts=2, n_lags=3)
    log.info("split df with single df")
    df_train, df_val = m.split_df(df1)
    log.info("split df with dict of dataframes")
    df_train, df_val = m.split_df(df_dict)
    log.info("split df with dict of dataframes - local_split")
    df_train, df_val = m.split_df(df_dict, local_split=True)
Beispiel #2
0
 def test_train_eval_test(self):
     log.info("testing: Train Eval Test")
     m = NeuralProphet(
         n_lags=10,
         n_forecasts=3,
         ar_sparsity=0.1,
         epochs=3,
         batch_size=32,
     )
     df = pd.read_csv(PEYTON_FILE, nrows=95)
     df = df_utils.check_dataframe(df, check_y=False)
     df = m._handle_missing_data(df, freq="D", predicting=False)
     df_train, df_test = m.split_df(df,
                                    freq="D",
                                    valid_p=0.1,
                                    inputs_overbleed=True)
     metrics = m.fit(df_train,
                     freq="D",
                     validate_each_epoch=True,
                     valid_p=0.1)
     metrics = m.fit(df_train, freq="D")
     val_metrics = m.test(df_test)
     log.debug("Metrics: train/eval: \n {}".format(
         metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
     log.debug("Metrics: test: \n {}".format(
         val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
Beispiel #3
0
        def check_split(df_in,
                        df_len_expected,
                        n_lags,
                        n_forecasts,
                        freq,
                        p=0.1):
            m = NeuralProphet(
                n_lags=n_lags,
                n_forecasts=n_forecasts,
            )
            df_in = df_utils.check_dataframe(df_in, check_y=False)
            df_in = m._handle_missing_data(df_in, freq=freq, predicting=False)
            assert df_len_expected == len(df_in)

            total_samples = len(df_in) - n_lags - 2 * n_forecasts + 2
            df_train, df_test = m.split_df(df_in,
                                           freq=freq,
                                           valid_p=0.1,
                                           inputs_overbleed=True)
            n_train = len(df_train) - n_lags - n_forecasts + 1
            n_test = len(df_test) - n_lags - n_forecasts + 1
            assert total_samples == n_train + n_test

            n_test_expected = max(1, int(total_samples * p))
            n_train_expected = total_samples - n_test_expected
            assert n_train == n_train_expected
            assert n_test == n_test_expected
Beispiel #4
0
    def test_train_eval_test(self):
        log.info("testing: Train Eval Test")
        m = NeuralProphet(
            n_lags=14,
            n_forecasts=7,
            ar_sparsity=0.1,
            epochs=2,
        )
        df = pd.read_csv(PEYTON_FILE, nrows=512)
        df_train, df_test = m.split_df(df, valid_p=0.1, inputs_overbleed=True)

        metrics = m.fit(df_train, freq="D", validate_each_epoch=True, valid_p=0.1)
        val_metrics = m.test(df_test)
        log.debug("Metrics: train/eval: \n {}".format(metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
        log.debug("Metrics: test: \n {}".format(val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
def test_infer_frequency():
    df = pd.read_csv(PEYTON_FILE, nrows=102)[:50]
    m = NeuralProphet()
    # Check if freq is set automatically
    df_train, df_test = m.split_df(df)
    log.debug("freq automatically set")
    # Check if freq is set when equal to the original
    df_train, df_test = m.split_df(df, freq="D")
    log.debug("freq is equal to ideal")
    # Check if freq is set in different freq
    df_train, df_test = m.split_df(df, freq="5D")
    log.debug("freq is set even though is different than the ideal")
    # Assert for data unevenly spaced
    index = np.unique(np.geomspace(1, 40, 20, dtype=int))
    df_uneven = df.iloc[index, :]
    with pytest.raises(ValueError):
        m.split_df(df_uneven)
    # Check if freq is set even in a df with multiple freqs
    df_train, df_test = m.split_df(df_uneven, freq="H")
    log.debug("freq is set even with not definable freq")
    # Check if freq is set for list
    df_dict = {"df1": df, "df2": df}
    m = NeuralProphet()
    m.fit(df_dict, epochs=5)
    log.debug("freq is set for list of dataframes")
    # Check if freq is set for list with different freq for n_lags=0
    df1 = df.copy(deep=True)
    time_range = pd.date_range(start="1994-12-01",
                               periods=df.shape[0],
                               freq="M")
    df1["ds"] = time_range
    df_dict = {"df1": df, "df2": df1}
    m = NeuralProphet(n_lags=0, epochs=5)
    m.fit(df_dict, epochs=5)
    log.debug("freq is set for list of dataframes(n_lags=0)")
    # Assert for automatic frequency in list with different freq
    m = NeuralProphet(n_lags=2)
    with pytest.raises(ValueError):
        m.fit(df_dict, epochs=5)
    # Exceptions
    frequencies = ["M", "MS", "Y", "YS", "Q", "QS", "B", "BH"]
    df = df.iloc[:200, :]
    for freq in frequencies:
        df1 = df.copy(deep=True)
        time_range = pd.date_range(start="1994-12-01",
                                   periods=df.shape[0],
                                   freq=freq)
        df1["ds"] = time_range
        df_train, df_test = m.split_df(df1)
    log.debug("freq is set for all the exceptions")