コード例 #1
0
def taxibj_evaluation():
    # parameters
    DATAPATH = '../data' 
    T = 48  # number of time intervals in one day
    CACHEDATA = True  # cache data or NOT

    lr = 0.0001  # learning rate
    len_c = 4  # length of closeness dependent sequence
    len_p = 0  # length of peroid dependent sequence
    len_t = 0  # length of trend dependent sequence
    input_length = len_c + len_p + len_t
    num_hidden = 64
    filter_size = (3,3)
    encoder_length = 4
    decoder_length = 6

    nb_flow = 2  
    days_test = 7*4
    len_test = T*days_test
    len_val = 2*len_test

    map_height, map_width = 32, 32

    path_cache = os.path.join(DATAPATH, 'CACHE', 'Pred-CNN')  # cache path
    path_result = 'RET'
    path_model = 'MODEL'
    if os.path.isdir(path_result) is False:
        os.mkdir(path_result)
    if os.path.isdir(path_model) is False:
        os.mkdir(path_model)
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    preprocess_name = 'preprocessing_taxibj.pkl'
    fname = os.path.join(path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(
        len_c, len_p, len_t))
    if os.path.exists(fname) and CACHEDATA:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = read_cache(
            fname, preprocess_name)
        print("load %s successfully" % fname)
    else:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = TaxiBJ.load_data(
            T=T, nb_flow=nb_flow, len_closeness=len_c, len_period=len_p, len_trend=len_t, len_test=len_test,
            len_val=len_val, preprocess_name=preprocess_name, meta_data=True, datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val, Y_val, X_test, Y_test,
                    external_dim, timestamp_train_all, timestamp_train, timestamp_val, timestamp_test)

    # build model and load weights
    model = build_model(input_length, map_height, map_width, nb_flow, encoder_length,
                        decoder_length, num_hidden, filter_size, lr)
    model_fname = 'TaxiBJ.c4.p0.t0.iter0.best.h5'
    model.load_weights(os.path.join('../best_models', 'Pred-CNN', model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model, X_test, Y_test, mmn, len_c, step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'taxibj_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)
コード例 #2
0
    timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = TaxiBJ.load_data(
        T=T, nb_flow=nb_flow, len_closeness=len_c, len_period=len_p, len_trend=len_t, len_test=len_test,
        len_val=len_val, preprocess_name=preprocess_name, meta_data=True, datapath=DATAPATH)
    if CACHEDATA:
        cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val, Y_val, X_test, Y_test,
                external_dim, timestamp_train_all, timestamp_train, timestamp_val, timestamp_test)

# print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])

# training-test-evaluation iterations
for i in range(0,10):
    print('=' * 10)
    print("compiling model...")

    # build model
    model = build_model(input_length, map_height, map_width, nb_flow, encoder_length,
                        decoder_length, num_hidden, filter_size, lr)

    hyperparams_name = 'TaxiBJ.c{}.p{}.t{}.iter{}'.format(
        len_c, len_p, len_t, i)
    fname_param = os.path.join(path_model, '{}.best.h5'.format(hyperparams_name))
    print(hyperparams_name)

    early_stopping = EarlyStopping(monitor='val_rmse', patience=25, mode='min')
    model_checkpoint = ModelCheckpoint(
        fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min')

    print('=' * 10)
    # train model
    np.random.seed(i*18)
    tf.random.set_seed(i*18)
    print("training model...")
コード例 #3
0
def bikenyc_evaluation():
    # parameters
    DATAPATH = '../data' 
    T = 24  # number of time intervals in one day
    CACHEDATA = True  # cache data or NOT

    lr = 0.0001  # learning rate
    len_c = 4  # length of closeness dependent sequence
    len_p = 0  # length of peroid dependent sequence
    len_t = 0  # length of trend dependent sequence
    input_length = len_c + len_p + len_t
    num_hidden = 64
    filter_size = (3,3)
    encoder_length = 2
    decoder_length = 3

    nb_flow = 2  # there are two types of flows: new-flow and end-flow
    # divide data into two subsets: Train & Test, of which the test set is the
    # last 10 days
    days_test = 10
    len_test = T*days_test
    len_val = 2*len_test

    map_height, map_width = 16, 8  # grid size

    path_cache = os.path.join(DATAPATH, 'CACHE', 'Pred-CNN')  # cache path
    if CACHEDATA and os.path.isdir(path_cache) is False:
        os.mkdir(path_cache)

    # load data
    print("loading data...")
    preprocess_name = 'preprocessing_bikenyc.pkl'
    fname = os.path.join(path_cache, 'BikeNYC_C{}_P{}_T{}.h5'.format(
        len_c, len_p, len_t))
    if os.path.exists(fname) and CACHEDATA:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = read_cache(
            fname, preprocess_name)
        print("load %s successfully" % fname)
    else:
        X_train_all, Y_train_all, X_train, Y_train, \
        X_val, Y_val, X_test, Y_test, mmn, external_dim, \
        timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = BikeNYC.load_data(
            T=T, nb_flow=nb_flow, len_closeness=len_c, len_period=len_p, len_trend=len_t, len_test=len_test,
            len_val=len_val, preprocess_name=preprocess_name, meta_data=True, datapath=DATAPATH)
        if CACHEDATA:
            cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val, Y_val, X_test, Y_test,
                    external_dim, timestamp_train_all, timestamp_train, timestamp_val, timestamp_test)

    # build model and load weights
    model = build_model(input_length, map_height, map_width, nb_flow, encoder_length,
                        decoder_length, num_hidden, filter_size, lr)
    
    model_fname = 'BikeNYC.c4.p0.t0.iter0.best.h5'
    model.load_weights(os.path.join('../best_models', 'Pred-CNN', model_fname))

    # evaluate and save results
    dict_multi_score = multi_step_2D(model, X_test, Y_test, mmn, len_c, step=5)

    for i in range(len(dict_multi_score)):
        csv_name = os.path.join('results', f'bikenyc_step{i+1}.csv')
        save_to_csv(dict_multi_score[i], csv_name)
コード例 #4
0
def train_model(lr,
                batch_size,
                num_hidden,
                encoder_length,
                decoder_length,
                save_results=False,
                i=''):
    # get discrete parameters
    num_hidden = 2**int(num_hidden)
    encoder_length = int(encoder_length)
    decoder_length = int(decoder_length)
    batch_size = 16 * int(batch_size)
    # kernel_size = int(kernel_size)
    lr = round(lr, 5)

    # build model
    model = build_model(input_length, map_height, map_width, nb_flow,
                        encoder_length, decoder_length, num_hidden,
                        filter_size, lr)
    # model.summary()
    hyperparams_name = 'TaxiNYC{}.c{}.p{}.t{}.num_hidden_{}.encoder_length_{}.decoder_length_{}.lr_{}.batchsize_{}'.format(
        i, len_c, len_p, len_t, num_hidden, encoder_length, decoder_length, lr,
        batch_size)
    fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))

    early_stopping = EarlyStopping(monitor='val_rmse', patience=25, mode='min')
    # lr_callback = LearningRateScheduler(lrschedule)
    model_checkpoint = ModelCheckpoint(fname_param,
                                       monitor='val_rmse',
                                       verbose=0,
                                       save_best_only=True,
                                       mode='min')

    # train model
    print("training model...")
    ts = time.time()
    if (i):
        print(f'Iteration {i}')
        np.random.seed(i * 18)
        tf.random.set_seed(i * 18)
    history = model.fit(
        X_train_all,
        Y_train_all,
        epochs=nb_epoch,
        batch_size=batch_size,
        validation_data=(X_test, Y_test),
        # callbacks=[early_stopping, model_checkpoint],
        # callbacks=[model_checkpoint, lr_callback],
        callbacks=[model_checkpoint],
        verbose=2)
    model.save_weights(os.path.join('MODEL', '{}.h5'.format(hyperparams_name)),
                       overwrite=True)
    pickle.dump((history.history),
                open(
                    os.path.join(path_result,
                                 '{}.history.pkl'.format(hyperparams_name)),
                    'wb'))
    print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))

    # evaluate
    model.load_weights(fname_param)
    score = model.evaluate(X_test,
                           Y_test,
                           batch_size=Y_test.shape[0],
                           verbose=0)
    print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
          (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))

    if (save_results):
        print(
            'evaluating using the model that has the best loss on the valid set'
        )
        model.load_weights(
            fname_param)  # load best weights for current iteration

        Y_pred = model.predict(X_test)  # compute predictions

        score = evaluate(Y_test, Y_pred, mmn,
                         rmse_factor=1)  # evaluate performance

        # save to csv
        csv_name = os.path.join('results', 'pred-cnn_taxiNYC_results.csv')
        if not os.path.isfile(csv_name):
            if os.path.isdir('results') is False:
                os.mkdir('results')
            with open(csv_name, 'a', encoding="utf-8") as file:
                file.write('iteration,'
                           'rsme_in,rsme_out,rsme_tot,'
                           'mape_in,mape_out,mape_tot,'
                           'ape_in,ape_out,ape_tot')
                file.write("\n")
                file.close()
        with open(csv_name, 'a', encoding="utf-8") as file:
            file.write(
                f'{i},{score[0]},{score[1]},{score[2]},{score[3]},'
                f'{score[4]},{score[5]},{score[6]},{score[7]},{score[8]}')
            file.write("\n")
            file.close()
        K.clear_session()

    # bayes opt is a maximization algorithm, to minimize validation_loss, return 1-this
    bayes_opt_score = 1.0 - score[1]

    return bayes_opt_score