def main(): # load data print("loading data...") ts = time.time() fname = os.path.join( DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period, len_trend)) if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( fname) print("load %s successfully" % fname) else: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, preprocess_name='preprocessing.pkl', meta_data=True, meteorol_data=True, holiday_data=True) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test) print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**" ) ts = time.time() model = build_model(external_dim) hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}'.format( len_closeness, len_period, len_trend, nb_residual_unit, lr) fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min') print("\nelapsed time (compiling model): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model...") ts = time.time() history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) model.save_weights(os.path.join('MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump((history.history), open( os.path.join(path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the model that has the best loss on the valid set') ts = time.time() model.load_weights(fname_param) score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model (cont)...") ts = time.time() fname_param = os.path.join('MODEL', '{}.cont.best.h5'.format(hyperparams_name)) model_checkpoint = ModelCheckpoint(fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min') history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=1, batch_size=batch_size, callbacks=[model_checkpoint]) pickle.dump( (history.history), open( os.path.join(path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb')) model.save_weights(os.path.join('MODEL', '{}_cont.h5'.format(hyperparams_name)), overwrite=True) print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the final model') score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) ts = time.time() score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts))
print("loading data...") ts = time.time() fname = os.path.join( path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period, len_trend)) if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( fname) print("load %s successfully" % fname) else: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, preprocess_name='preprocessing.pkl', meta_data=True, meteorol_data=True, holiday_data=True, datapath=DATAPATH) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test) print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) # training-test-evaluation iterations
def taxibj_evaluation(): # parameters DATAPATH = '../data' # data path, you may set your own data path with the global envirmental variable DATAPATH CACHEDATA = True # cache data or NOT path_cache = os.path.join(DATAPATH, 'CACHE', 'ST-ResNet') # cache path T = 48 # number of time intervals in one day lr = 0.0002 # learning rate len_closeness = 3 # length of closeness dependent sequence len_period = 1 # length of peroid dependent sequence len_trend = 1 # length of trend dependent sequence nb_residual_unit = 12 # paper says 12 for taxiBJ nb_flow = 2 days_test = 7 * 4 len_test = T * days_test map_height, map_width = 32, 32 # grid size if CACHEDATA and os.path.isdir(path_cache) is False: os.mkdir(path_cache) # load data print("loading data...") fname = os.path.join( path_cache, 'TaxiBJ_onlyMeta_C{}_P{}_T{}.h5'.format(len_closeness, len_period, len_trend)) if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( fname, 'preprocessing_taxibj.pkl') print("load %s successfully" % fname) else: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, preprocess_name='preprocessing_taxibj.pkl', meta_data=True, meteorol_data=False, holiday_data=False, datapath=DATAPATH) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test) print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) print('=' * 10) # build model model = build_model(external_dim, nb_residual_unit, map_height, map_width, len_closeness, len_period, len_trend) model_fname = 'TaxiBJ.c3.p1.t1.resunit12.iter8.cont.best.h5' model.load_weights(os.path.join('../best_models', 'ST-ResNet', model_fname)) # evaluate and save results dict_multi_score = multi_step_2D(model, X_test, Y_test, mmn, len_closeness, step=5) for i in range(len(dict_multi_score)): csv_name = os.path.join('results', f'taxibj_step{i+1}.csv') save_to_csv(dict_multi_score[i], csv_name)
def taxibj_evaluation(): # parameters DATAPATH = '../data' CACHEDATA = True # cache data or NOT T = 48 # number of time intervals in one day lr = 0.0002 # learning rate len_closeness = 4 # length of closeness dependent sequence - should be 6 len_period = 4 # length of peroid dependent sequence len_trend = 4 # length of trend dependent sequence nb_flow = 2 days_test = 7 * 4 len_test = T * days_test map_height, map_width = 32, 32 # grid size path_cache = os.path.join(DATAPATH, 'CACHE', 'MST3D') # cache path if CACHEDATA and os.path.isdir(path_cache) is False: os.mkdir(path_cache) # load data print("loading data...") ts = time.time() fname = os.path.join( path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(len_closeness, len_period, len_trend)) if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = read_cache( fname, 'preprocessing.pkl') print("load %s successfully" % fname) else: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test = TaxiBJ.load_data( T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, preprocess_name='preprocessing.pkl', meta_data=True, meteorol_data=True, holiday_data=True, datapath=DATAPATH) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test) print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) mmn._max = 1292 # just to be sure it's correct # build model model = build_model('BJ', len_closeness, len_period, len_trend, nb_flow, map_height, map_width, external_dim) model_fname = 'TaxiBJ.c4.p4.t4.iter6.best.h5' model.load_weights(os.path.join('../best_models', 'MST3D', model_fname)) # evaluate and save results dict_multi_score = multi_step_2D(model, X_test, Y_test, mmn, len_closeness, step=5) for i in range(len(dict_multi_score)): csv_name = os.path.join('results', f'taxibj_step{i+1}.csv') save_to_csv(dict_multi_score[i], csv_name)