starts, ends, 1.05, use_smooth=False, smooth_offset=None) corr_365 = F.batch_autocorr(xy_daily, 365, starts, ends, 1.05, use_smooth=True, smooth_offset=2) xy_daily_auto_corr = np.concatenate([corr_7, corr_14, corr_365], 1) xy_daily_auto_corr = normalize(xy_daily_auto_corr, 0)[0] xy_lags = normalize(F.make_lags(xy_daily, [7, 14, 365])[:, :, drop_before:], axis=2)[0].transpose([0, 2, 1]) weights = (~np.bitwise_or(np.isnan(xy_15min), xy_15min == 0)).astype("float32")[:, drop_before:] # weights[:, :, np.where((power.columns >= "2020-02-01") & (power.columns < "2020-03-01"), 1, 0)] = 0.01 # weights = weights * xy_mean / xy_mean.mean() # weights = weights.transpose([0, 2, 1]) xy_cat = np.expand_dims(np.arange(len(weights)), 1) def get_holiday_features(dts): select_holidays = [ "Spring Festival", "National Day", "Labour Day", "New Year's Day", "Mid-autumn Festival", "Tomb-sweeping Day" ]
series_skew = sp.stats.mstats.skew(series_valid_masked, axis=1).data series_kurt = np.clip( sp.stats.mstats.kurtosis(series_valid_masked, axis=1).data, None, 10) # series normalization series = np.nan_to_num((series - np.expand_dims(series_mean, 1)) / (np.expand_dims(series_std, 1) + 1e-7), False).astype("float32") # lag from deepseries.functional import make_lags, batch_autocorr, get_valid_start_end series_lags = [] for l in LAGS: series_lags.append( make_lags(series, l, use_smooth=True if l > 100 else False)) series_lags = np.concatenate(series_lags, 1).transpose([0, 2, 1]) series_lags = np.nan_to_num(series_lags) corr = batch_autocorr(series, LAGS, start, end, 1.05) # series statistic features series_mean_mean = series_mean.mean() series_mean_std = series_mean.std() series_std_mean = series_std.mean() series_std_std = series_std.std() series_skew_mean = series_skew.mean() series_skew_std = series_skew.std() series_kurt_mean = series_kurt.mean() series_kurt_std = series_kurt.std()
x_norm = (x - mu) / std if fill_zero: x_norm = np.nan_to_num(x_norm) return x_norm, mu, std df_series, df_price, df_calendar, df_product, df_sub = load_data() series = df_series.values price = df_price.values series_is_nan = np.isnan(series) series_is_zero = series == 0 starts, ends = F.get_valid_start_end(series_is_nan) series_lags = F.make_lags(series, LAGS, use_smooth=True) series_lags_corr = F.batch_autocorr(series, LAGS, starts, ends, threshold=1.05) series_lags_corr = normalize(series_lags_corr, axis=0)[0] series_lags_corr = Values(series_lags_corr, name='series_lags_corr') series, series_mean, series_std = normalize(series[:, np.newaxis, DROP_BEFORE:], axis=2) series_lags = np.nan_to_num( (series_lags[:, :, DROP_BEFORE:] - series_mean) / series_std) series_lags = Values(series_lags, 'xy_lags') time_idxes = np.arange(series.shape[2]) trn_idx, val_idx = forward_split(time_idxes, ENC_LEN, VALID_LEN + TEST_LEN) val_idx, test_idx = forward_split(val_idx, ENC_LEN, TEST_LEN) trn_dl = create_seq2seq_data_loader(series,