# series normalization series = np.nan_to_num((series - np.expand_dims(series_mean, 1)) / (np.expand_dims(series_std, 1) + 1e-7), False).astype("float32") # lag from deepseries.functional import make_lags, batch_autocorr, get_valid_start_end series_lags = [] for l in LAGS: series_lags.append( make_lags(series, l, use_smooth=True if l > 100 else False)) series_lags = np.concatenate(series_lags, 1).transpose([0, 2, 1]) series_lags = np.nan_to_num(series_lags) corr = batch_autocorr(series, LAGS, start, end, 1.05) # series statistic features series_mean_mean = series_mean.mean() series_mean_std = series_mean.std() series_std_mean = series_std.mean() series_std_std = series_std.std() series_skew_mean = series_skew.mean() series_skew_std = series_skew.std() series_kurt_mean = series_kurt.mean() series_kurt_std = series_kurt.std() xy_series_mean = (series_mean - series_mean_mean) / series_mean_std xy_series_std = (series_std - series_std_mean) / series_std_std xy_series_skew = (series_skew - series_skew_mean) / series_skew_std
if fill_zero: x_norm = np.nan_to_num(x_norm) return x_norm, mu, std df_series, df_price, df_calendar, df_product, df_sub = load_data() series = df_series.values price = df_price.values series_is_nan = np.isnan(series) series_is_zero = series == 0 starts, ends = F.get_valid_start_end(series_is_nan) series_lags = F.make_lags(series, LAGS, use_smooth=True) series_lags_corr = F.batch_autocorr(series, LAGS, starts, ends, threshold=1.05) series_lags_corr = normalize(series_lags_corr, axis=0)[0] series_lags_corr = Values(series_lags_corr, name='series_lags_corr') series, series_mean, series_std = normalize(series[:, np.newaxis, DROP_BEFORE:], axis=2) series_lags = np.nan_to_num( (series_lags[:, :, DROP_BEFORE:] - series_mean) / series_std) series_lags = Values(series_lags, 'xy_lags') time_idxes = np.arange(series.shape[2]) trn_idx, val_idx = forward_split(time_idxes, ENC_LEN, VALID_LEN + TEST_LEN) val_idx, test_idx = forward_split(val_idx, ENC_LEN, TEST_LEN) trn_dl = create_seq2seq_data_loader(series, enc_len=ENC_LEN,
xy_15min = power_15min.values.reshape(62, -1, 4 * 24) # (62, 1082, 96) xy_daily = power_daily.values N_TEST = 30 N_VALID = 2 DEC_LEN = 2 ENC_LEN = 7 drop_before = 1000 starts, ends = F.get_valid_start_end(np.isnan(xy_daily)) corr_7 = F.batch_autocorr(xy_daily, 7, starts, ends, 1.05, use_smooth=False, smooth_offset=None) corr_14 = F.batch_autocorr(xy_daily, 14, starts, ends, 1.05, use_smooth=False, smooth_offset=None) corr_365 = F.batch_autocorr(xy_daily, 365, starts, ends, 1.05,
def get_autocorr(self, n_lags, threshold=1.5, backoffset=0, use_smooth=False): self.autocorr = F.batch_autocorr(self.series, n_lags, self.starts, self.ends, threshold, backoffset, use_smooth) return self