def add_is_off_column(df, metadata):
    if 'is_day_off' in df.columns:
        return
    is_day_off = []
    for idx, row in tqdm(df.iterrows(), total=len(df), desc='Adding day off'):
        is_day_off.append(
            _is_day_off(row['series_id'], row['weekday'], metadata))
    df['is_day_off'] = is_day_off
Exemple #2
0
def _prepare_is_day_off(window, df, metadata, series_id):
    is_day_off = df.is_holiday.values[::24].tolist()
    current_date = df.timestamp.values[-1]
    current_weekday = df.weekday.values[-1]
    for _ in range(WINDOW_TO_PRED_DAYS[window]):
        current_date = _get_next_date(current_date)
        current_weekday = _get_next_weekday(current_weekday)
        current_day_is_off = _is_day_off(series_id, current_weekday, metadata)
        current_day_is_off = current_day_is_off or _is_holiday(current_date)
        is_day_off.append(current_day_is_off)
    is_day_off = np.array(is_day_off, dtype=np.float32)
    is_day_off[is_day_off == 0] = -1
    is_day_off = np.expand_dims(is_day_off, axis=0)
    return is_day_off
def _prepare_future_day_off(window, df, metadata, series_id):
    is_day_off = []
    current_date = df.timestamp.values[-1]
    current_weekday = df.weekday.values[-1]
    for _ in range(WINDOW_TO_PRED_DAYS[window]):
        current_date = _get_next_date(current_date)
        current_weekday = _get_next_weekday(current_weekday)
        current_day_is_off = _is_day_off(series_id, current_weekday, metadata)
        current_day_is_off = current_day_is_off or _is_holiday(current_date)
        is_day_off.append(current_day_is_off)
    is_day_off = np.array(is_day_off, dtype=np.float32)
    if window == 'hourly':
        is_day_off = np.repeat(is_day_off, 24, axis=0)
    if window == 'weekly':
        is_day_off = np.reshape(is_day_off, (2, -1))
    else:
        is_day_off = np.expand_dims(is_day_off, axis=1)
    is_day_off = np.expand_dims(is_day_off, axis=0)
    return is_day_off
def visualize_idx(idx, train, train_arrange, preds, metadata):
    row = train_arrange.loc[idx]
    df = train[train.series_id == row['series_id']]
    consumption = df.consumption.values[
        row['train_start_idx']:row['val_end_idx']]
    dates = df.timestamp.values[row['train_start_idx']:row['val_end_idx']]
    weekdays = df.weekday.values[row['train_start_idx']:row['val_end_idx']]

    if row['window'] == 'hourly':
        batch_size = 24
    elif row['window'] == 'daily':
        batch_size = 1
        weekdays = weekdays[::24]
        dates = dates[::24]
        consumption = group_sum(consumption, 24)

    plt.plot(dates[-len(preds[idx]):], preds[idx], color='green', lw=3)
    plt.plot(dates[-len(preds[idx]):][::batch_size],
             preds[idx][::batch_size],
             'o',
             color='green',
             lw=3)
    for i in range(len(dates) // batch_size):
        weekday = weekdays[i * batch_size]
        if _is_day_off(row['series_id'], weekday, metadata):
            color = 'orange'
        else:
            color = 'blue'
        plt.plot(dates[i * batch_size:(i + 1) * batch_size + 1],
                 consumption[i * batch_size:(i + 1) * batch_size + 1],
                 color=color)
        plt.plot(dates[i * batch_size:(i) * batch_size + 1],
                 consumption[i * batch_size:(i) * batch_size + 1],
                 'o',
                 color=color)
    plt.title('%i Nmae: %.3f' % (idx, row['nmae']))
 def _is_day_off(self, weekday, series_id, date):
     ret = _is_day_off(series_id, weekday, self._metadata)
     if self._use_holidays:
         ret = ret or _is_holiday(date)
     return int(ret)