Ejemplo n.º 1
0
 def __init__(self, series, mask=None, mask_zero=True, mask_nan=True):
     self.series = series
     if mask is None:
         self.mask = F.mask_zero_nan(series, mask_zero, mask_nan)
     else:
         self.mask = np.bitwise_or(mask, F.mask_zero_nan(series, mask_zero, mask_nan))
     self.mask_zero = mask_zero
     self.starts, self.ends = F.get_valid_start_end(self.series, self.mask)
     self.valid_lens = self.ends - self.starts
     self.autocorr = None
     self.trend = None
     self.max_T = None
Ejemplo n.º 2
0
    print(f"calendar shape {cal.shape}")

    submission = pd.read_csv(os.path.join(DIR, "sample_submission.csv"))
    print(f"submisson shape {submission.shape}")
    return xy, price, cal, product, submission


df_series, df_price, df_calendar, df_product, df_sub = load_data()
# series
series = df_series.values
price = df_price.values

# series state
series_nan = np.isnan(series).astype("int8")
series_zero = (series == 0).astype("int8")
start, end = get_valid_start_end(np.bitwise_or(series_nan, series_zero))

# series statistics

series_valid_masked = np.ma.masked_array(series, mask=series_nan.astype(bool))

series_mean = series_valid_masked.mean(axis=1).data
series_std = series_valid_masked.std(axis=1).data
series_skew = sp.stats.mstats.skew(series_valid_masked, axis=1).data
series_kurt = np.clip(
    sp.stats.mstats.kurtosis(series_valid_masked, axis=1).data, None, 10)

# series normalization

series = np.nan_to_num((series - np.expand_dims(series_mean, 1)) /
                       (np.expand_dims(series_std, 1) + 1e-7),
    "1D").sum().reset_index()
power_daily = power_daily.pivot(index='cid',
                                columns='data_time',
                                values='value')

xy_15min = power_15min.values.reshape(62, -1, 4 * 24)  # (62, 1082, 96)
xy_daily = power_daily.values

N_TEST = 30
N_VALID = 2
DEC_LEN = 2
ENC_LEN = 7

drop_before = 1000

starts, ends = F.get_valid_start_end(np.isnan(xy_daily))
corr_7 = F.batch_autocorr(xy_daily,
                          7,
                          starts,
                          ends,
                          1.05,
                          use_smooth=False,
                          smooth_offset=None)
corr_14 = F.batch_autocorr(xy_daily,
                           14,
                           starts,
                           ends,
                           1.05,
                           use_smooth=False,
                           smooth_offset=None)
corr_365 = F.batch_autocorr(xy_daily,
    std = np.nanstd(x, axis, keepdims=True)
    x_norm = (x - mu) / std
    if fill_zero:
        x_norm = np.nan_to_num(x_norm)
    return x_norm, mu, std


df_series, df_price, df_calendar, df_product, df_sub = load_data()

series = df_series.values
price = df_price.values

series_is_nan = np.isnan(series)
series_is_zero = series == 0

starts, ends = F.get_valid_start_end(series_is_nan)
series_lags = F.make_lags(series, LAGS, use_smooth=True)
series_lags_corr = F.batch_autocorr(series, LAGS, starts, ends, threshold=1.05)
series_lags_corr = normalize(series_lags_corr, axis=0)[0]
series_lags_corr = Values(series_lags_corr, name='series_lags_corr')

series, series_mean, series_std = normalize(series[:, np.newaxis,
                                                   DROP_BEFORE:],
                                            axis=2)
series_lags = np.nan_to_num(
    (series_lags[:, :, DROP_BEFORE:] - series_mean) / series_std)
series_lags = Values(series_lags, 'xy_lags')

time_idxes = np.arange(series.shape[2])
trn_idx, val_idx = forward_split(time_idxes, ENC_LEN, VALID_LEN + TEST_LEN)
val_idx, test_idx = forward_split(val_idx, ENC_LEN, TEST_LEN)