Exemple #1
0
 def test_train_eval_test(self):
     log.info("testing: Train Eval Test")
     m = NeuralProphet(
         n_lags=10,
         n_forecasts=3,
         ar_sparsity=0.1,
         epochs=3,
         batch_size=32,
     )
     df = pd.read_csv(PEYTON_FILE, nrows=95)
     df = df_utils.check_dataframe(df, check_y=False)
     df = m._handle_missing_data(df, freq="D", predicting=False)
     df_train, df_test = m.split_df(df,
                                    freq="D",
                                    valid_p=0.1,
                                    inputs_overbleed=True)
     metrics = m.fit(df_train,
                     freq="D",
                     validate_each_epoch=True,
                     valid_p=0.1)
     metrics = m.fit(df_train, freq="D")
     val_metrics = m.test(df_test)
     log.debug("Metrics: train/eval: \n {}".format(
         metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
     log.debug("Metrics: test: \n {}".format(
         val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
    def test_time_dataset(self):
        # manually load any file that stores a time series, for example:
        df_in = pd.read_csv(AIR_FILE, index_col=False)
        log.debug("Infile shape: {}".format(df_in.shape))

        n_lags = 3
        n_forecasts = 1
        valid_p = 0.2
        df_train, df_val = df_utils.split_df(df_in,
                                             n_lags,
                                             n_forecasts,
                                             valid_p,
                                             inputs_overbleed=True)

        # create a tabularized dataset from time series
        df = df_utils.check_dataframe(df_train)
        data_params = df_utils.init_data_params(df, normalize="minmax")
        df = df_utils.normalize(df, data_params)
        inputs, targets = time_dataset.tabularize_univariate_datetime(
            df,
            n_lags=n_lags,
            n_forecasts=n_forecasts,
        )
        log.debug("tabularized inputs: {}".format("; ".join([
            "{}: {}".format(inp, values.shape)
            for inp, values in inputs.items()
        ])))
def test_df_utils_func():
    log.info("testing: df_utils Test")
    df = pd.read_csv(PEYTON_FILE, nrows=95)
    df = df_utils.check_dataframe(df, check_y=False)

    # test find_time_threshold
    df_dict, _ = df_utils.prep_copy_df_dict(df)
    time_threshold = df_utils.find_time_threshold(df_dict,
                                                  n_lags=2,
                                                  valid_p=0.2,
                                                  inputs_overbleed=True)
    df_train, df_val = df_utils.split_considering_timestamp(
        df_dict,
        n_lags=2,
        n_forecasts=2,
        inputs_overbleed=True,
        threshold_time_stamp=time_threshold)

    # init data params with a list
    global_data_params = df_utils.init_data_params(df_dict, normalize="soft")
    global_data_params = df_utils.init_data_params(df_dict, normalize="soft1")
    global_data_params = df_utils.init_data_params(df_dict,
                                                   normalize="standardize")

    log.debug("Time Threshold: \n {}".format(time_threshold))
    log.debug("Df_train: \n {}".format(type(df_train)))
    log.debug("Df_val: \n {}".format(type(df_val)))
        def check_split(df_in,
                        df_len_expected,
                        n_lags,
                        n_forecasts,
                        freq,
                        p=0.1):
            m = NeuralProphet(
                n_lags=n_lags,
                n_forecasts=n_forecasts,
            )
            df_in = df_utils.check_dataframe(df_in, check_y=False)
            df_in = m._handle_missing_data(df_in, freq=freq, predicting=False)
            assert df_len_expected == len(df_in)

            total_samples = len(df_in) - n_lags - 2 * n_forecasts + 2
            df_train, df_test = m.split_df(df_in,
                                           freq=freq,
                                           valid_p=0.1,
                                           inputs_overbleed=True)
            n_train = len(df_train) - n_lags - n_forecasts + 1
            n_test = len(df_test) - n_lags - n_forecasts + 1
            assert total_samples == n_train + n_test

            n_test_expected = max(1, int(total_samples * p))
            n_train_expected = total_samples - n_test_expected
            assert n_train == n_train_expected
            assert n_test == n_test_expected