starts,
                           ends,
                           1.05,
                           use_smooth=False,
                           smooth_offset=None)
corr_365 = F.batch_autocorr(xy_daily,
                            365,
                            starts,
                            ends,
                            1.05,
                            use_smooth=True,
                            smooth_offset=2)
xy_daily_auto_corr = np.concatenate([corr_7, corr_14, corr_365], 1)
xy_daily_auto_corr = normalize(xy_daily_auto_corr, 0)[0]

xy_lags = normalize(F.make_lags(xy_daily, [7, 14, 365])[:, :, drop_before:],
                    axis=2)[0].transpose([0, 2, 1])

weights = (~np.bitwise_or(np.isnan(xy_15min), xy_15min
                          == 0)).astype("float32")[:, drop_before:]
# weights[:, :, np.where((power.columns >= "2020-02-01") & (power.columns < "2020-03-01"), 1, 0)] = 0.01
# weights = weights * xy_mean / xy_mean.mean()
# weights = weights.transpose([0, 2, 1])
xy_cat = np.expand_dims(np.arange(len(weights)), 1)


def get_holiday_features(dts):
    select_holidays = [
        "Spring Festival", "National Day", "Labour Day", "New Year's Day",
        "Mid-autumn Festival", "Tomb-sweeping Day"
    ]
Beispiel #2
0
series_skew = sp.stats.mstats.skew(series_valid_masked, axis=1).data
series_kurt = np.clip(
    sp.stats.mstats.kurtosis(series_valid_masked, axis=1).data, None, 10)

# series normalization

series = np.nan_to_num((series - np.expand_dims(series_mean, 1)) /
                       (np.expand_dims(series_std, 1) + 1e-7),
                       False).astype("float32")

# lag
from deepseries.functional import make_lags, batch_autocorr, get_valid_start_end
series_lags = []
for l in LAGS:
    series_lags.append(
        make_lags(series, l, use_smooth=True if l > 100 else False))

series_lags = np.concatenate(series_lags, 1).transpose([0, 2, 1])
series_lags = np.nan_to_num(series_lags)
corr = batch_autocorr(series, LAGS, start, end, 1.05)

# series statistic features

series_mean_mean = series_mean.mean()
series_mean_std = series_mean.std()
series_std_mean = series_std.mean()
series_std_std = series_std.std()
series_skew_mean = series_skew.mean()
series_skew_std = series_skew.std()
series_kurt_mean = series_kurt.mean()
series_kurt_std = series_kurt.std()
    x_norm = (x - mu) / std
    if fill_zero:
        x_norm = np.nan_to_num(x_norm)
    return x_norm, mu, std


df_series, df_price, df_calendar, df_product, df_sub = load_data()

series = df_series.values
price = df_price.values

series_is_nan = np.isnan(series)
series_is_zero = series == 0

starts, ends = F.get_valid_start_end(series_is_nan)
series_lags = F.make_lags(series, LAGS, use_smooth=True)
series_lags_corr = F.batch_autocorr(series, LAGS, starts, ends, threshold=1.05)
series_lags_corr = normalize(series_lags_corr, axis=0)[0]
series_lags_corr = Values(series_lags_corr, name='series_lags_corr')

series, series_mean, series_std = normalize(series[:, np.newaxis,
                                                   DROP_BEFORE:],
                                            axis=2)
series_lags = np.nan_to_num(
    (series_lags[:, :, DROP_BEFORE:] - series_mean) / series_std)
series_lags = Values(series_lags, 'xy_lags')

time_idxes = np.arange(series.shape[2])
trn_idx, val_idx = forward_split(time_idxes, ENC_LEN, VALID_LEN + TEST_LEN)
val_idx, test_idx = forward_split(val_idx, ENC_LEN, TEST_LEN)
trn_dl = create_seq2seq_data_loader(series,