def multi_pred(sess, y_pred, seq, batch_size, n_his, n_pred, step_idx, dynamic_batch=True): ''' Multi_prediction function. :param sess: tf.Session(). :param y_pred: placeholder. :param seq: np.ndarray, [len_seq, n_frame, n_route, C_0]. :param batch_size: int, the size of batch. :param n_his: int, size of historical records for training. :param n_pred: int, the length of prediction. :param step_idx: int or list, index for prediction slice. :param dynamic_batch: bool, whether changes the batch size in the last one if its length is less than the default. :return y_ : tensor, 'sep' [len_inputs, n_route, 1]; 'merge' [step_idx, len_inputs, n_route, 1]. len_ : int, the length of prediction. ''' pred_list = [] for i in gen_batch(seq, min(batch_size, len(seq)), dynamic_batch=dynamic_batch): # Note: use np.copy() to avoid the modification of source data. test_seq = np.copy(i[:, 0:n_his + 1, :, :]) step_list = [] for j in range(n_pred): pred = sess.run(y_pred, feed_dict={'data_input:0': test_seq, 'keep_prob:0': 1.0}) if isinstance(pred, list): pred = np.array(pred[0]) test_seq[:, 0:n_his - 1, :, :] = test_seq[:, 1:n_his, :, :] test_seq[:, n_his - 1, :, :] = pred step_list.append(pred) pred_list.append(step_list) # pred_array -> [n_pred, batch_size, n_route, C_0) pred_array = np.concatenate(pred_list, axis=1) return pred_array[step_idx],pred_array, pred_array.shape[1]
def multi_pred(exe, gw, gf, program, y_pred, seq, batch_size, \ n_his, n_pred, step_idx, dynamic_batch=True): """multi step prediction""" pred_list = [] for i in gen_batch(seq, min(batch_size, len(seq)), dynamic_batch=dynamic_batch): # Note: use np.copy() to avoid the modification of source data. test_seq = np.copy(i[:, 0:n_his + 1, :, :]).astype(np.float32) graph = gf.build_graph(i[:, 0:n_his, :, :]) feed = gw.to_feed(graph) step_list = [] for j in range(n_pred): feed['input'] = test_seq pred = exe.run(program, feed=feed, fetch_list=[y_pred]) if isinstance(pred, list): pred = np.array(pred[0]) test_seq[:, 0:n_his - 1, :, :] = test_seq[:, 1:n_his, :, :] test_seq[:, n_his - 1, :, :] = pred step_list.append(pred) pred_list.append(step_list) # pred_array -> [n_pred, len(seq), n_route, C_0) pred_array = np.concatenate(pred_list, axis=1) return pred_array, pred_array.shape[1]
def multi_pred(sess, y_pred, seq, batch_size, n_his, n_pred, step_idx, dynamic_batch=True, shrinkFirst=False): ''' Multi_prediction function. :param sess: tf.Session(). :param y_pred: placeholder. :param seq: np.ndarray, [len_seq, n_frame, n_route, C_0]. :param batch_size: int, the size of batch. :param n_his: int, size of historical records for training. :param n_pred: int, the length of prediction. :param step_idx: int or list, index for prediction slice. :param dynamic_batch: bool, whether changes the batch size in the last one if its length is less than the default. :return y_ : tensor, 'sep' [len_inputs, n_route, 1]; 'merge' [step_idx, len_inputs, n_route, 1]. len_ : int, the length of prediction. ''' pred_list = [] for i in gen_batch(seq, min(batch_size, len(seq)), dynamic_batch=dynamic_batch): # Note: use np.copy() to avoid the modification of source data. pred = sess.run(y_pred, feed_dict={ 'data_input:0': i[:, 0:n_his + n_pred, :, :], 'keep_prob:0': 1.0 }) pred_list.append(pred) # pred_array -> [n_pred, batch_size, n_route, C_0) print(f'pred_arry.shape={np.array(pred_list).shape}') pred_array = np.concatenate(pred_list, axis=1) print(f'pred_arry.shape`={pred_array.shape}') if (shrinkFirst): return pred_array[0], pred_array.shape[1] return pred_array, pred_array.shape[0]
def model_train(inputs, blocks, args, sum_path='./output/tensorboard', output_dim=1): ''' Train the base model. :param inputs: instance of class Dataset, data source for training. :param blocks: list, channel configs of st_conv blocks. :param args: instance of class argparse, args for training. ''' n, n_his, n_pred = args.n_route, args.seq_len, args.horizon Ks, Kt = args.ks, args.kt batch_size, epoch, inf_mode, opt = args.batch_size, args.epoch, args.inf_mode, args.opt # Placeholder for model training x = tf.compat.v1.placeholder(tf.float32, [None, n_his + 1, n, 3], name='data_input') keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob') # Define model loss, for one step forecasting... train_loss, pred = build_model(x, n_his, Ks, Kt, blocks, keep_prob, output_dim=output_dim) tf.summary.scalar('train_loss', train_loss) # copy loss just using the previous step as current step prediction copy_loss = tf.add_n(tf.get_collection('copy_loss')) tf.summary.scalar('copy_loss', copy_loss) # Learning rate settings global_steps = tf.Variable(0, trainable=False) len_train = inputs.get_len('train') if len_train % batch_size == 0: epoch_step = len_train / batch_size else: epoch_step = int(len_train / batch_size) + 1 # Learning rate decay with rate 0.7 every 5 epochs. lr = tf.train.exponential_decay(args.lr, global_steps, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) tf.summary.scalar('learning_rate', lr) step_op = tf.assign_add(global_steps, 1) with tf.control_dependencies([step_op]): if opt == 'RMSProp': train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss) elif opt == 'ADAM': train_op = tf.train.AdamOptimizer(lr).minimize(train_loss) else: raise ValueError(f'ERROR: optimizer "{opt}" is not defined.') merged = tf.summary.merge_all() with tf.Session() as sess: writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) sess.run(tf.global_variables_initializer()) if inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. # step_idx = tmp_idx = np.arange(3, n_pred + 1, 3) - 1 step_idx = tmp_idx = np.arange(n_pred) min_val = min_va_val = np.array([4e1, 1e5, 1e5] * len(step_idx)) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') for i in range(epoch): start_time = time.time() for j, x_batch in enumerate( gen_batch(inputs.get_data('train'), batch_size, dynamic_batch=True, shuffle=True)): summary, _ = sess.run([merged, train_op], feed_dict={ x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0 }) writer.add_summary(summary, i * epoch_step + j) if j % 50 == 0: loss_value = \ sess.run([train_loss, copy_loss], feed_dict={x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0}) print( f'Epoch {i:2d}, Step {j:3d}: [model_loss: {loss_value[0]:.3f}, copy_loss: {loss_value[1]:.3f}]', flush=True) # # for testing # min_va_val, min_val = \ # model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) # for ix in tmp_idx: # va, te = min_va_val[ix*3:(ix + 1)*3], min_val[ix*3:(ix + 1)*3] # # va, te = min_va_val[ix], min_val[ix] # print(f'Time Step {ix + 1}: ' # f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' # f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' # f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.', flush=True) # print(f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s', flush=True) print( f'Epoch {i:2d} Training Time {time.time() - start_time:.3f}s') start_time = time.time() min_va_val, min_val = \ model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) for ix in tmp_idx: va, te = min_va_val[ix * 3:(ix + 1) * 3], min_val[ix * 3:(ix + 1) * 3] # va, te = min_va_val[ix], min_val[ix] print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.') print( f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s') if (i + 1) % args.save == 0: model_save(sess, global_steps, 'STGCN') writer.close() print('Training model finished!')
def main(args): """main""" # PeMS = data_gen_mydata(args.input_file, args.label_file, args.n_route, args.n_his, # args.n_pred, (args.n_val, args.n_test)) PeMS = data_gen_custom(args.input_file, args.label_file, args.city_file, args.n_route, args.n_his, args.n_pred, (args.n_val, args.n_test)) log.info(PeMS.get_stats()) log.info(PeMS.get_len('train')) gf = GraphFactory(args) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.GraphWrapper("gw", place, node_feat=[('norm', [None, 1], "float32")], edge_feat=[('weights', [None, 1], "float32")]) model = STGCNModel(args, gw) train_loss, y_pred = model.forward() infer_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): epoch_step = int(PeMS.get_len('train') / args.batch_size) + 1 lr = fl.exponential_decay(learning_rate=args.lr, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) if args.opt == 'RMSProp': train_op = fluid.optimizer.RMSPropOptimizer(lr).minimize( train_loss) elif args.opt == 'ADAM': train_op = fluid.optimizer.Adam(lr).minimize(train_loss) exe = fluid.Executor(place) exe.run(startup_program) if args.inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = args.n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif args.inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. step_idx = tmp_idx = np.arange(3, args.n_pred + 1, 3) - 1 min_val = min_va_val = np.array([4e1, 1e5, 1e5]) * len(step_idx) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') step = 0 for epoch in range(1, args.epochs + 1): for idx, x_batch in enumerate( gen_batch(PeMS.get_data('train'), args.batch_size, dynamic_batch=True, shuffle=True)): x = np.array(x_batch[:, 0:args.n_his, :, :], dtype=np.float32) graph = gf.build_graph(x) feed = gw.to_feed(graph) feed['input'] = np.array(x_batch[:, 0:args.n_his + 1, :, :], dtype=np.float32) b_loss, b_lr = exe.run(train_program, feed=feed, fetch_list=[train_loss, lr]) if idx % 5 == 0: log.info("epoch %d | step %d | lr %.6f | loss %.6f" % (epoch, idx, b_lr[0], b_loss[0])) min_va_val, min_val = \ model_inference(exe, gw, gf, infer_program, y_pred, PeMS, args, \ step_idx, min_va_val, min_val) for ix in tmp_idx: va, te = min_va_val[ix - 2:ix + 1], min_val[ix - 2:ix + 1] print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; ' f'MAE {va[1]:4.3f}, {te[1]:4.3f}; ' f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.') if epoch % 5 == 0: model_test(exe, gw, gf, infer_program, y_pred, PeMS, args)
def model_train(inputs, blocks, args, sum_path='./output/tensorboard',load_path='./output/models/',load=False): ''' Train the base model. :param inputs: instance of class Dataset, data source for training. :param blocks: list, channel configs of st_conv blocks. :param args: instance of class argparse, args for training. ''' n, n_his, n_pred = args.n_route, args.n_his, args.n_pred Ks, Kt = args.ks, args.kt batch_size, epoch, inf_mode, opt = args.batch_size, args.epoch, args.inf_mode, args.opt # Placeholder for model training x = tf.placeholder(tf.float32, [None, n_his + n_pred, n, 1], name='data_input') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # Define model loss train_loss, pred = build_model(x, n_his, Ks, Kt, blocks, keep_prob) tf.summary.scalar('train_loss', train_loss) copy_loss = tf.add_n(tf.get_collection('copy_loss')) tf.summary.scalar('copy_loss', copy_loss) # Learning rate settings global_steps = tf.Variable(0, trainable=False) len_train = inputs.get_len('train') if len_train % batch_size == 0: epoch_step = len_train / batch_size else: epoch_step = int(len_train / batch_size) + 1 # Learning rate decay with rate 0.7 every 5 epochs. lr = tf.train.exponential_decay(args.lr, global_steps, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) tf.summary.scalar('learning_rate', lr) step_op = tf.assign_add(global_steps, 1) with tf.control_dependencies([step_op]): if opt == 'RMSProp': train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss) elif opt == 'ADAM': train_op = tf.train.AdamOptimizer(lr).minimize(train_loss) else: raise ValueError(f'ERROR: optimizer "{opt}" is not defined.') merged = tf.summary.merge_all() ########################################################################## saver = tf.train.Saver(max_to_keep=3); config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) if(load): ckpt = tf.train.get_checkpoint_state(load_path) saver.restore(sess, ckpt.model_checkpoint_path) print(f'>> Loading saved model from {load_path} ...') else: sess.run(tf.global_variables_initializer()) if inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. # step_idx = tmp_idx = np.arange(3, n_pred + 1, 3) - 1 step_idx = tmp_idx = np.array([1,2,3,4,5,6,7,8])-1 min_val = min_va_val = np.array([4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5,4e1, 1e5, 1e5]) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') for i in range(epoch): start_time = time.time() for j, x_batch in enumerate( gen_batch(inputs.get_data('train'), batch_size, dynamic_batch=True, shuffle=True)): summary, _ = sess.run([merged, train_op], feed_dict={x: x_batch[:, 0:n_his + n_pred, :, :], keep_prob: 1.0}) writer.add_summary(summary, i * epoch_step + j) if j % 50 == 0: loss_value = \ sess.run([train_loss, copy_loss], feed_dict={x: x_batch[:, 0:n_his + n_pred, :, :], keep_prob: 1.0}) print(f'Epoch {i:2d}, Step {j:3d}: [{loss_value[0]:.3f}, {loss_value[1]:.3f}]') print(f'Epoch {i:2d} Training Time {time.time() - start_time:.3f}s') if (i + 1) % args.save == 0: model_save(sess, global_steps, 'STGCN') # print('sleep begin') # time.sleep(90) # print('sleep end') if((i+1)%5!=0): continue start_time = time.time() min_va_val, min_val = \ model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val) cnt=0; for ix in tmp_idx: # va, te = min_va_val[ix - 2:ix + 1], min_val[ix - 2:ix + 1] va,te=min_va_val[cnt:cnt+3],min_val[cnt:cnt+3] cnt+=3 print(f'Time Step {ix + 1}: ' f'MAPE {va[0]:7.3%}; ' f'MAE {va[1]:4.3f}; ' f'RMSE {va[2]:6.3f}.') print(f'Epoch {i:2d} Inference Time {time.time() - start_time:.3f}s') writer.close() print('Training model finished!')
def transient_model_train(inputs, blocks, args, x_mean, scenario_data, test_data, sum_path='./output/tensorboard'): ''' Train the base model. :param inputs: instance of class Dataset, data source for training. :param blocks: list, channel configs of st_conv blocks. :param args: instance of class argparse, args for training. ''' n, n_his, n_pred = args.n_route, args.n_his, args.n_pred Ks, Kt = args.ks, args.kt model_feature = args.feature batch_size, epoch, inf_mode, opt = args.batch_size, args.epoch, args.inf_mode, args.opt prediction_start_time = args.p_start_t val_pred_length = args.val_pred_length test_pred_length = args.test_pred_length # Placeholder for model training # placeholder data_input the size dimension is fixed x = tf.placeholder(tf.float32, [None, n_his + 1, n, 1], name='data_input') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # Define model loss train_loss, pred = build_model(x, n_his, Ks, Kt, blocks, keep_prob) tf.summary.scalar('train_loss', train_loss) copy_loss = tf.add_n(tf.get_collection('copy_loss')) tf.summary.scalar('copy_loss', copy_loss) # Learning rate settings global_steps = tf.Variable(0, trainable=False) len_train = inputs.get_len('train') if len_train % batch_size == 0: epoch_step = len_train / batch_size else: epoch_step = int(len_train / batch_size) + 1 # Learning rate decay with rate 0.7 every 5 epochs. lr = tf.train.exponential_decay(args.lr, global_steps, decay_steps=5 * epoch_step, decay_rate=0.7, staircase=True) tf.summary.scalar('learning_rate', lr) step_op = tf.assign_add(global_steps, 1) with tf.control_dependencies([step_op]): if opt == 'RMSProp': train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss) elif opt == 'ADAM': train_op = tf.train.AdamOptimizer(lr).minimize(train_loss) else: raise ValueError(f'ERROR: optimizer "{opt}" is not defined.') merged = tf.summary.merge_all() ''' print("Get the training model parameters") total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape) print(len(shape)) variable_parameters = 1 for dim in shape: print(dim) variable_parameters *= dim.value print(variable_parameters) total_parameters += variable_parameters print(total_parameters) total_comp =np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print("comparison") print(total_comp) ''' with tf.Session() as sess: ##need to recomment if you normally train the model writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) sess.run(tf.global_variables_initializer()) ###add tbe segment that loads a model and resumes training ''' load_path = './output/models/' load_path = load_path+ str(n_his)+str(1)+'/' model_path = tf.train.get_checkpoint_state(load_path).model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(load_path)) print(f'>> Loading saved model from {model_path} ...') graph = tf.get_default_graph() writer = tf.summary.FileWriter(pjoin(sum_path, 'train'), sess.graph) ''' if inf_mode == 'sep': # for inference mode 'sep', the type of step index is int. step_idx = n_pred - 1 tmp_idx = [step_idx] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) elif inf_mode == 'merge': # for inference mode 'merge', the type of step index is np.ndarray. step_idx = tmp_idx = np.arange(1, n_pred + 1, 1) - 1 tmp_idx = [n_pred - 1] min_val = min_va_val = np.array([4e1, 1e5, 1e5]) else: raise ValueError(f'ERROR: test mode "{inf_mode}" is not defined.') train_time_starter = time.time() for i in range(epoch): start_time = time.time() for j, x_batch in enumerate( gen_batch(inputs.get_data('train'), batch_size, dynamic_batch=True, shuffle=True)): summary, _ = sess.run([merged, train_op], feed_dict={ x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0 }) writer.add_summary(summary, i * epoch_step + j) if j % 50 == 0: loss_value = \ sess.run([train_loss, copy_loss], feed_dict={x: x_batch[:, 0:n_his + 1, :, :], keep_prob: 1.0}) print( f'Epoch {i:2d}, Step {j:3d}: [Train_loss {loss_value[0]:.3f}]' ) print( f'Epoch {i:2d} Training Time {time.time() - start_time:.3f}s') min_va_val, min_val = \ transient_model_inference(sess, pred, inputs, batch_size, n_his, n_pred, step_idx, min_va_val, min_val, prediction_start_time,x_mean,i,global_steps,model_feature,val_pred_length,test_pred_length,scenario_data,test_data) for ix in tmp_idx: va, te = min_va_val, min_val ''' print(f'Future Time Step {ix + 1}: ' f'MAPE validation {va[0]:7.3%}, testing {te[0]:7.3%}; ' f'MAE validation {va[1]:14.8f}, testing {te[1]:14.8f}; ' f'RMSE validation {va[2]:16.8f}, testing {te[2]:16.8f}.') ''' writer.close() total_train_time = time.time() - train_time_starter load_path = './output/models/' with open(load_path + "training_time.txt", mode='w') as file: file.write('Training Time %s seconds.\n' % (total_train_time)) #print("Maximum memory usage") #memory_usage=sess.run(tf.contrib.memory_stats.MaxBytesInUse()) #print(memory_usage) print('Training model finished!')