예제 #1
0
# series normalization

series = np.nan_to_num((series - np.expand_dims(series_mean, 1)) /
                       (np.expand_dims(series_std, 1) + 1e-7),
                       False).astype("float32")

# lag
from deepseries.functional import make_lags, batch_autocorr, get_valid_start_end
series_lags = []
for l in LAGS:
    series_lags.append(
        make_lags(series, l, use_smooth=True if l > 100 else False))

series_lags = np.concatenate(series_lags, 1).transpose([0, 2, 1])
series_lags = np.nan_to_num(series_lags)
corr = batch_autocorr(series, LAGS, start, end, 1.05)

# series statistic features

series_mean_mean = series_mean.mean()
series_mean_std = series_mean.std()
series_std_mean = series_std.mean()
series_std_std = series_std.std()
series_skew_mean = series_skew.mean()
series_skew_std = series_skew.std()
series_kurt_mean = series_kurt.mean()
series_kurt_std = series_kurt.std()

xy_series_mean = (series_mean - series_mean_mean) / series_mean_std
xy_series_std = (series_std - series_std_mean) / series_std_std
xy_series_skew = (series_skew - series_skew_mean) / series_skew_std
    if fill_zero:
        x_norm = np.nan_to_num(x_norm)
    return x_norm, mu, std


df_series, df_price, df_calendar, df_product, df_sub = load_data()

series = df_series.values
price = df_price.values

series_is_nan = np.isnan(series)
series_is_zero = series == 0

starts, ends = F.get_valid_start_end(series_is_nan)
series_lags = F.make_lags(series, LAGS, use_smooth=True)
series_lags_corr = F.batch_autocorr(series, LAGS, starts, ends, threshold=1.05)
series_lags_corr = normalize(series_lags_corr, axis=0)[0]
series_lags_corr = Values(series_lags_corr, name='series_lags_corr')

series, series_mean, series_std = normalize(series[:, np.newaxis,
                                                   DROP_BEFORE:],
                                            axis=2)
series_lags = np.nan_to_num(
    (series_lags[:, :, DROP_BEFORE:] - series_mean) / series_std)
series_lags = Values(series_lags, 'xy_lags')

time_idxes = np.arange(series.shape[2])
trn_idx, val_idx = forward_split(time_idxes, ENC_LEN, VALID_LEN + TEST_LEN)
val_idx, test_idx = forward_split(val_idx, ENC_LEN, TEST_LEN)
trn_dl = create_seq2seq_data_loader(series,
                                    enc_len=ENC_LEN,
xy_15min = power_15min.values.reshape(62, -1, 4 * 24)  # (62, 1082, 96)
xy_daily = power_daily.values

N_TEST = 30
N_VALID = 2
DEC_LEN = 2
ENC_LEN = 7

drop_before = 1000

starts, ends = F.get_valid_start_end(np.isnan(xy_daily))
corr_7 = F.batch_autocorr(xy_daily,
                          7,
                          starts,
                          ends,
                          1.05,
                          use_smooth=False,
                          smooth_offset=None)
corr_14 = F.batch_autocorr(xy_daily,
                           14,
                           starts,
                           ends,
                           1.05,
                           use_smooth=False,
                           smooth_offset=None)
corr_365 = F.batch_autocorr(xy_daily,
                            365,
                            starts,
                            ends,
                            1.05,
예제 #4
0
 def get_autocorr(self, n_lags, threshold=1.5,  backoffset=0, use_smooth=False):
     self.autocorr = F.batch_autocorr(self.series, n_lags, self.starts, self.ends, threshold, backoffset, use_smooth)
     return self