def test_df_utils_func(): log.info("testing: df_utils Test") df = pd.read_csv(PEYTON_FILE, nrows=95) df = df_utils.check_dataframe(df, check_y=False) # test find_time_threshold df_dict, _ = df_utils.prep_copy_df_dict(df) time_threshold = df_utils.find_time_threshold(df_dict, n_lags=2, valid_p=0.2, inputs_overbleed=True) df_train, df_val = df_utils.split_considering_timestamp( df_dict, n_lags=2, n_forecasts=2, inputs_overbleed=True, threshold_time_stamp=time_threshold) # init data params with a list global_data_params = df_utils.init_data_params(df_dict, normalize="soft") global_data_params = df_utils.init_data_params(df_dict, normalize="soft1") global_data_params = df_utils.init_data_params(df_dict, normalize="standardize") log.debug("Time Threshold: \n {}".format(time_threshold)) log.debug("Df_train: \n {}".format(type(df_train))) log.debug("Df_val: \n {}".format(type(df_val)))
def test_time_dataset(self): # manually load any file that stores a time series, for example: df_in = pd.read_csv(AIR_FILE, index_col=False) log.debug("Infile shape: {}".format(df_in.shape)) n_lags = 3 n_forecasts = 1 valid_p = 0.2 df_train, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p, inputs_overbleed=True) # create a tabularized dataset from time series df = df_utils.check_dataframe(df_train) data_params = df_utils.init_data_params(df, normalize="minmax") df = df_utils.normalize(df, data_params) inputs, targets = time_dataset.tabularize_univariate_datetime( df, n_lags=n_lags, n_forecasts=n_forecasts, ) log.debug("tabularized inputs: {}".format("; ".join([ "{}: {}".format(inp, values.shape) for inp, values in inputs.items() ])))
def test_normalize(self): for add in [0, -1, 0.00000001, -0.99999999]: length = 1000 days = pd.date_range(start="2017-01-01", periods=length) y = np.zeros(length) y[1] = 1 y = y + add df = pd.DataFrame({"ds": days, "y": y}) m = NeuralProphet(normalize="soft", ) data_params = df_utils.init_data_params( df, normalize=m.normalize, covariates_config=m.config_covar, regressor_config=m.regressors_config, events_config=m.events_config, ) df_norm = df_utils.normalize(df, data_params)
def init_data_params(self, df_dict, covariates_config=None, regressor_config=None, events_config=None): if len(df_dict) == 1: if not self.global_normalization: log.info( "Setting normalization to global as only one dataframe provided for training." ) self.global_normalization = True self.local_data_params, self.global_data_params = df_utils.init_data_params( df_dict=df_dict, normalize=self.normalize, covariates_config=covariates_config, regressor_config=regressor_config, events_config=events_config, global_normalization=self.global_normalization, global_time_normalization=self.global_normalization, )
def test_normalize(): length = 100 days = pd.date_range(start="2017-01-01", periods=length) y = np.ones(length) y[1] = 0 y[2] = 2 y[3] = 3.3 df = pd.DataFrame({"ds": days, "y": y}) m = NeuralProphet(normalize="soft", ) # with config m.config_normalization.init_data_params( df_utils.prep_copy_df_dict(df)[0], m.config_covar, m.regressors_config, m.events_config) df_norm = m._normalize(df_utils.prep_copy_df_dict(df)[0]) m.config_normalization.unknown_data_normalization = True df_norm = m._normalize(df_utils.prep_copy_df_dict(df)[0]) m.config_normalization.unknown_data_normalization = False # using config for utils df_norm = df_utils.normalize(df.copy(deep=True), m.config_normalization.global_data_params) df_norm = df_utils.normalize( df_utils.prep_copy_df_dict(df)[0]["__df__"], m.config_normalization.local_data_params["__df__"]) # with utils local_data_params, global_data_params = df_utils.init_data_params( df_dict=df_utils.prep_copy_df_dict(df)[0], normalize=m.config_normalization.normalize, covariates_config=m.config_covar, regressor_config=m.regressors_config, events_config=m.events_config, global_normalization=m.config_normalization.global_normalization, global_time_normalization=m.config_normalization. global_time_normalization, ) df_norm = df_utils.normalize(df.copy(deep=True), global_data_params) df_norm = df_utils.normalize( df_utils.prep_copy_df_dict(df)[0]["__df__"], local_data_params["__df__"])