Beispiel #1
0
def main():
    # load data
    print("loading data...")
    ts = time.time()
    fname = os.path.join(
        DATAPATH, 'CACHE',
        'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period, len_trend))
    if os.path.exists(fname) and CACHEDATA:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
            fname)
        print("load %s successfully" % fname)
    else:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
            T=T,
            nb_flow=nb_flow,
            len_closeness=len_closeness,
            len_period=len_period,
            len_trend=len_trend,
            len_test=len_test,
            preprocess_name='preprocessing.pkl',
            meta_data=True,
            meteorol_data=True,
            holiday_data=True)
        if CACHEDATA:
            cache(fname, X_train, Y_train, X_test, Y_test, external_dim,
                  timestamp_train, timestamp_test)

    print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
    print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))

    print('=' * 10)
    print("compiling model...")
    print(
        "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**"
    )

    ts = time.time()
    model = build_model(external_dim)
    hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}'.format(
        len_closeness, len_period, len_trend, nb_residual_unit, lr)
    fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))

    early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min')
    model_checkpoint = ModelCheckpoint(fname_param,
                                       monitor='val_rmse',
                                       verbose=0,
                                       save_best_only=True,
                                       mode='min')

    print("\nelapsed time (compiling model): %.3f seconds\n" %
          (time.time() - ts))

    print('=' * 10)
    print("training model...")
    ts = time.time()
    history = model.fit(X_train,
                        Y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        validation_split=0.1,
                        callbacks=[early_stopping, model_checkpoint],
                        verbose=1)
    model.save_weights(os.path.join('MODEL', '{}.h5'.format(hyperparams_name)),
                       overwrite=True)
    pickle.dump((history.history),
                open(
                    os.path.join(path_result,
                                 '{}.history.pkl'.format(hyperparams_name)),
                    'wb'))
    print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))

    print('=' * 10)
    print('evaluating using the model that has the best loss on the valid set')
    ts = time.time()
    model.load_weights(fname_param)
    score = model.evaluate(X_train,
                           Y_train,
                           batch_size=Y_train.shape[0] // 48,
                           verbose=0)
    print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
    score = model.evaluate(X_test,
                           Y_test,
                           batch_size=Y_test.shape[0],
                           verbose=0)
    print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
    print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts))

    print('=' * 10)
    print("training model (cont)...")
    ts = time.time()
    fname_param = os.path.join('MODEL',
                               '{}.cont.best.h5'.format(hyperparams_name))
    model_checkpoint = ModelCheckpoint(fname_param,
                                       monitor='rmse',
                                       verbose=0,
                                       save_best_only=True,
                                       mode='min')
    history = model.fit(X_train,
                        Y_train,
                        nb_epoch=nb_epoch_cont,
                        verbose=1,
                        batch_size=batch_size,
                        callbacks=[model_checkpoint])
    pickle.dump(
        (history.history),
        open(
            os.path.join(path_result,
                         '{}.cont.history.pkl'.format(hyperparams_name)),
            'wb'))
    model.save_weights(os.path.join('MODEL',
                                    '{}_cont.h5'.format(hyperparams_name)),
                       overwrite=True)
    print("\nelapsed time (training cont): %.3f seconds\n" %
          (time.time() - ts))

    print('=' * 10)
    print('evaluating using the final model')
    score = model.evaluate(X_train,
                           Y_train,
                           batch_size=Y_train.shape[0] // 48,
                           verbose=0)
    print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
    ts = time.time()
    score = model.evaluate(X_test,
                           Y_test,
                           batch_size=Y_test.shape[0],
                           verbose=0)
    print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))
    print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts))
print("loading data...")
ts = time.time()
fname = os.path.join(
    path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period,
                                               len_trend))
if os.path.exists(fname) and CACHEDATA:
    X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
        fname)
    print("load %s successfully" % fname)
else:
    X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
        T=T,
        nb_flow=nb_flow,
        len_closeness=len_closeness,
        len_period=len_period,
        len_trend=len_trend,
        len_test=len_test,
        preprocess_name='preprocessing.pkl',
        meta_data=True,
        meteorol_data=True,
        holiday_data=True,
        datapath=DATAPATH)
    if CACHEDATA:
        cache(fname, X_train, Y_train, X_test, Y_test, external_dim,
              timestamp_train, timestamp_test)

print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))

print('=' * 10)

# training-test-evaluation iterations
def taxibj_evaluation():
    # parameters
    DATAPATH = '../data'  # data path, you may set your own data path with the global envirmental variable DATAPATH
    CACHEDATA = True  # cache data or NOT
    path_cache = os.path.join(DATAPATH, 'CACHE', 'ST-ResNet')  # cache path
    T = 48  # number of time intervals in one day
    lr = 0.0002  # learning rate
    len_closeness = 3  # length of closeness dependent sequence
    len_period = 1  # length of peroid dependent sequence
    len_trend = 1  # length of trend dependent sequence
    nb_residual_unit = 12  # paper says 12 for taxiBJ

    nb_flow = 2
    days_test = 7 * 4
    len_test = T * days_test
    map_height, map_width = 32, 32  # grid size
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    fname = os.path.join(
        path_cache,
        'TaxiBJ_onlyMeta_C{}_P{}_T{}.h5'.format(len_closeness, len_period,
                                                len_trend))
    if os.path.exists(fname) and CACHEDATA:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
            fname, 'preprocessing_taxibj.pkl')
        print("load %s successfully" % fname)
    else:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
            T=T,
            nb_flow=nb_flow,
            len_closeness=len_closeness,
            len_period=len_period,
            len_trend=len_trend,
            len_test=len_test,
            preprocess_name='preprocessing_taxibj.pkl',
            meta_data=True,
            meteorol_data=False,
            holiday_data=False,
            datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train, Y_train, X_test, Y_test, external_dim,
                  timestamp_train, timestamp_test)

    print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
    print('=' * 10)

    # build model
    model = build_model(external_dim, nb_residual_unit, map_height, map_width,
                        len_closeness, len_period, len_trend)

    model_fname = 'TaxiBJ.c3.p1.t1.resunit12.iter8.cont.best.h5'
    model.load_weights(os.path.join('../best_models', 'ST-ResNet',
                                    model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model,
                                     X_test,
                                     Y_test,
                                     mmn,
                                     len_closeness,
                                     step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'taxibj_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)
Beispiel #4
0
def taxibj_evaluation():
    # parameters
    DATAPATH = '../data'
    CACHEDATA = True  # cache data or NOT
    T = 48  # number of time intervals in one day
    lr = 0.0002  # learning rate
    len_closeness = 4  # length of closeness dependent sequence - should be 6
    len_period = 4  # length of peroid dependent sequence
    len_trend = 4  # length of trend dependent sequence

    nb_flow = 2
    days_test = 7 * 4
    len_test = T * days_test
    map_height, map_width = 32, 32  # grid size

    path_cache = os.path.join(DATAPATH, 'CACHE', 'MST3D')  # cache path
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    ts = time.time()
    fname = os.path.join(
        path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period,
                                                   len_trend))
    if os.path.exists(fname) and CACHEDATA:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache(
            fname, 'preprocessing.pkl')
        print("load %s successfully" % fname)
    else:
        X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data(
            T=T,
            nb_flow=nb_flow,
            len_closeness=len_closeness,
            len_period=len_period,
            len_trend=len_trend,
            len_test=len_test,
            preprocess_name='preprocessing.pkl',
            meta_data=True,
            meteorol_data=True,
            holiday_data=True,
            datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train, Y_train, X_test, Y_test, external_dim,
                  timestamp_train, timestamp_test)

    print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
    print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))

    print('=' * 10)
    mmn._max = 1292  # just to be sure it's correct

    # build model
    model = build_model('BJ', len_closeness, len_period, len_trend, nb_flow,
                        map_height, map_width, external_dim)
    model_fname = 'TaxiBJ.c4.p4.t4.iter6.best.h5'
    model.load_weights(os.path.join('../best_models', 'MST3D', model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model,
                                     X_test,
                                     Y_test,
                                     mmn,
                                     len_closeness,
                                     step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'taxibj_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)