from keras.callbacks import EarlyStopping
import cPickle as pickle
import time
import sys
import pandas as pd

np.random.seed(1337)  # for reproducibility
from deepst_flow.config import Config
DATAPATH = Config().DATAPATH

if len(sys.argv) != 4:
    print(__doc__)
    sys.exit(-1)

fname = sys.argv[1]
data, timestamps = load_stdata(os.path.join(DATAPATH, '{}.h5'.format(fname)))

T = 48
slot_time = 24. * 60 / 48
# setting
nb_flow = 2
seq_len = 3

data = data[:, :nb_flow]

preprocessing_name = sys.argv[2]
model_name = sys.argv[3]

# load TCN and MMS
fpkl = open(preprocessing_name, 'rb')
mmn = pickle.load(fpkl)
def period_trend_closeness(len_closeness=3,
                           len_trend=3,
                           TrendInterval=7,
                           len_period=3,
                           PeriodInterval=1):
    print("start: period_trend_closeness")
    model_name = sys.argv[1]
    steps = 24
    # Period = 7

    T = 48  # lenofday
    # len_seq = 3
    nb_flow = 2
    # nb_days = 120
    # divide data into two subsets:
    # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
    # len_train = T * (nb_days - 7)
    len_test = T * 7

    data, timestamps = load_stdata(
        os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
    print(timestamps)
    # remove a certain day which has not 48 timestamps
    data, timestamps = remove_incomplete_days(data, timestamps, T)
    data = data[:, :nb_flow]
    # minmax_scale
    data_train = data[-len_test:]
    mmn = MinMaxNormalization()
    mmn.fit(data_train)
    data = mmn.transform(data)

    st = STMatrix(data, timestamps, T)

    # save TCN and MMS
    fpkl = open('preprocessing.pkl', 'wb')
    for obj in [mmn]:  # [tcn, mmn]:
        pickle.dump(obj, fpkl)
    fpkl.close()

    depends = [
        range(1, len_closeness + 1),
        [PeriodInterval * T * j for j in xrange(1, len_period + 1)],
        [TrendInterval * T * j for j in xrange(1, len_trend + 1)]
    ]
    if len_closeness > 0:
        c_conf = (nb_flow, len_closeness, 32, 32)
    else:
        c_conf = None
    if len_period > 0:
        p_conf = (nb_flow, len_period, 32, 32)
    else:
        p_conf = None
    if len_trend > 0:
        t_conf = (nb_flow, len_trend, 32, 32)
    else:
        t_conf = None
    generator = seqCNN_CPT2(c_conf=c_conf, p_conf=p_conf, t_conf=t_conf)

    adam = Adam()
    generator.compile(loss='mean_absolute_error', optimizer=adam)
    generator.load_weights(model_name)

    # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
    offset_frame = pd.DateOffset(minutes=24 * 60 // T)
    Y_test = st.data[-(len_test + steps - 1):]
    Y_pd_timestamps = st.pd_timestamps[-(len_test + steps - 1):]

    XC = []
    XP = []
    XT = []
    for pd_timestamp in Y_pd_timestamps:
        # x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
        # X_test.append(np.vstack(x))
        x_c = [
            st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[0]
        ]
        x_p = [
            st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[1]
        ]
        x_t = [
            st.get_matrix(pd_timestamp - j * offset_frame) for j in depends[2]
        ]
        if len_closeness > 0:
            XC.append(np.vstack(x_c))
        if len_period > 0:
            XP.append(np.vstack(x_p))
        if len_trend > 0:
            XT.append(np.vstack(x_t))
    if len_closeness > 0:
        XC = np.asarray(XC)
    if len_period > 0:
        XP = np.asarray(XP)
    if len_trend > 0:
        XT = np.asarray(XT)
    print(XC.shape, XP.shape, XT.shape)

    # X_test = np.asarray(X_test)
    XAll = []
    for l, X_ in zip([len_closeness, len_period, len_trend], [XC, XP, XT]):
        if l > 0:
            XAll.append(X_)

    Y_true = mmn.inverse_transform(Y_test[-len_test:])
    Y_hats = []

    # for i in xrange(len(XAll[0])):
    #    x = []
    #    for _X in XAll:
    #        x.append([_X[i]])

    for k in xrange(1, steps + 1):
        print("\n\n==%d-step rmse==" % k)
        ts = time.time()
        # k^th predicted sequence
        Y_hat = generator.predict(XAll)
        Y_hats.append(copy(Y_hat))
        print('Y_hat shape', Y_hat.shape)
        # eval
        Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
        rmse(Y_true, Y_pred)
        X_hat = []
        for _X in XAll:
            X_hat.append(copy(_X[1:]))
        # X_hat = [XC[1:], XP[1:], XT[1:]]  # copy(X_test[1:])
        '''
        # for j in xrange(len_closeness-1, 0):
        for j in xrange(1, min(k, len_closeness) + 1):
            # last sequence -j
            if j == 1:
                X_hat[0][:, -1 * nb_flow:] = Y_hats[-j][:-j]
            else:
                X_hat[0][:, nb_flow*(-j):nb_flow*(-j+1)] = Y_hats[-j][:-j]
        '''

        XC_hat = X_hat[0]
        len_replace = min(k, len_closeness)

        for j in xrange(len_replace):
            # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[-(j+1)][:-(len_replace-j)]
            XC_hat[:, nb_flow * (j):nb_flow *
                   (j + 1)] = Y_hats[-(j + 1)][:-(j + 1)]
            # XC_hat[:, nb_flow*(j):nb_flow*(j+1)] = Y_hats[j][:-(j+1)]
        # for j in xrange(1, + 1):
        #    XC_hat[:, ] =

        # for j in xrange(1, min(k, len_closeness) + 1):
        # Y^\hat _t replace
        #    X_hat[0][:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j]

        XAll = X_hat
        print("\nelapsed time (eval): ", time.time() - ts)
Exemple #3
0
def period_trend(period=1, trend=1):
    model_name = sys.argv[1]
    steps = 24
    Period = 7

    T = 48  # lenofday
    len_seq = 3
    nb_flow = 4
    nb_days = 120
    # divide data into two subsets:
    # Train: ~ 2015.06.21 & Test: 2015.06.22 ~ 2015.06.28
    len_train = T * (nb_days - 7)
    len_test = T * 7

    data, timestamps = load_stdata(os.path.join(DATAPATH, 'traffic_flow_bj15_nomissing.h5'))
    print(timestamps)
    # remove a certain day which has not 48 timestamps
    data, timestamps = remove_incomplete_days(data, timestamps, T)

    # minmax_scale
    data_train = data[:len_train]
    mmn = MinMaxNormalization()
    mmn.fit(data_train)
    data = mmn.transform(data)

    st = STMatrix(data, timestamps, T)

    # save TCN and MMS
    fpkl = open('preprocessing.pkl', 'wb')
    for obj in [mmn]:  # [tcn, mmn]:
        pickle.dump(obj, fpkl)
    fpkl.close()

    if period == 1 and trend == 1:
        depends = [1, 2, 3, Period*T, Period*T+1, Period*T+2, Period*T+3]
        len_close = 3
    elif period == 1:
        depends = [1] + [Period * T * j for j in xrange(1, len_seq+1)]
        len_close = 1
    elif trend == 1:
        depends = range(1, 1+len_seq)
        len_close = 3
    else:
        depends = [1]
        len_close = 1
    # else:
    #    print("unknown args")
    #    sys.exit(-1)

    generator = generator_model(nb_flow, len(depends), 32, 32)
    adam = Adam()
    generator.compile(loss='mean_absolute_error', optimizer=adam)
    generator.load_weights(model_name)

    # instance-based dataset --> sequences with format as (X, Y) where X is a sequence of images and Y is an image.
    offset_frame = pd.DateOffset(minutes=24 * 60 // T)
    Y_test = st.data[-(len_test+steps-1):]
    Y_pd_timestamps = st.pd_timestamps[-(len_test+steps-1):]

    X_test = []
    for pd_timestamp in Y_pd_timestamps:
        x = [st.get_matrix(pd_timestamp - j * offset_frame) for j in depends]
        X_test.append(np.vstack(x))
    X_test = np.asarray(X_test)

    Y_true = mmn.inverse_transform(Y_test[-len_test:])

    Y_hats = []
    for k in xrange(1, steps+1):
        print("\n\n==%d-step rmse==" % k)
        ts = time.time()
        Y_hat = generator.predict(X_test)
        Y_hats.append(copy(Y_hat))
        print('Y_hat shape', Y_hat.shape, 'X_test shape:', X_test.shape)
        # eval
        Y_pred = mmn.inverse_transform(Y_hat[-len_test:])
        rmse(Y_true, Y_pred)
        X_test_hat = copy(X_test[1:])
        for j in xrange(1, min(k, len_close) + 1):
            # Y^\hat _t replace
            X_test_hat[:, nb_flow*(j-1):nb_flow*j] = Y_hats[-j][:-j]

        X_test = copy(X_test_hat)
        print("\nelapsed time (eval): ", time.time() - ts)