def main(args): """Run testing.""" test_data = pred_utils.read_data(args, "test") print("total test samples:%s" % test_data.num_examples) model = pred_models.get_model(args, gpuid=args.gpuid) tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True tfconfig.gpu_options.visible_device_list = "%s" % (",".join( ["%s" % i for i in [args.gpuid]])) with tf.Session(config=tfconfig) as sess: pred_utils.initialize(load=True, load_best=args.load_best, args=args, sess=sess) # load the graph and variables tester = pred_models.Tester(model, args, sess) perf = pred_utils.evaluate(test_data, args, sess, tester) print("performance:") key_metrics = [] for i in range(len(args.scene_grids)): if not args.use_grids[i]: continue key_metrics += [ "grid%d_acc" % i, "grid%d_traj_ade" % i, "grid%d_traj_fde" % i ] if args.show_center_only: key_metrics += [ "grid%d_centerOnly_traj_ade" % i, "grid%d_centerOnly_traj_fde" % i ] if args.show_grid_acc_at_T: # min, max length, then 2 second, 4 second show_T = [0, 4, 9, 11] key_metrics += ["grid%d_acc_@T=%d" % (i, t) for t in show_T] if args.per_scene_eval: scenes = ["0000", "0002", "0400", "0401", "0500"] key_metrics += [("%s_ade" % scene) for scene in scenes] key_metrics += [("%s_fde" % scene) for scene in scenes] numbers = [] for k in sorted(perf.keys()): print("%s, %s" % (k, perf[k])) if k in key_metrics: numbers.append(("%s" % perf[k], k)) print(" ".join([k for v, k in numbers])) print(" ".join([v for v, k in numbers]))
def main(args): """Run training.""" val_perf = [] # summary of validation performance, and the training loss train_data = pred_utils.read_data(args, "train") val_data = pred_utils.read_data(args, "val") args.train_num_examples = train_data.num_examples # construct model under gpu0 model = pred_models.get_model(args, gpuid=args.gpuid) if args.check_model: print("--------------- Model Weights -----------------") for var in tf.global_variables(): not_show = False for c in [ "Adam", "beta1_power", "beta2_power", "Adam_1", "Adadelta_1", "Adadelta", "Momentum", "global_step" ]: if c in var.name: not_show = True if not_show: continue shape = var.get_shape() print("%s %s\n" % (var.name, shape)) return trainer = pred_models.Trainer(model, args) tester = pred_models.Tester(model, args) saver = tf.train.Saver(max_to_keep=5) bestsaver = tf.train.Saver(max_to_keep=5) save_period = args.save_period # also the eval period # start training! tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True tfconfig.gpu_options.visible_device_list = "%s" % (",".join( ["%s" % i for i in [args.gpuid]])) with tf.Session(config=tfconfig) as sess: pred_utils.initialize(load=args.load, load_best=args.load_best, args=args, sess=sess) # the total step (iteration) the model will run # total / batchSize * epoch num_steps = int( math.ceil(train_data.num_examples / float(args.batch_size))) * args.num_epochs # get_batches is a generator, run on the fly print(" batch_size:%s, epoch:%s, %s step every epoch, total step:%s," " eval/save every %s steps" % (args.batch_size, args.num_epochs, math.ceil(train_data.num_examples / float(args.batch_size)), num_steps, args.save_period)) metric = "grid%d_traj_ade" % args.val_grid_num # average displacement error # smaller better # remember the best eval acc during training best = {metric: 999999, "step": -1} finalperf = None is_start = True loss, wd_loss = [ pred_utils.FIFO_ME(args.loss_moving_avg_step) for i in range(2) ] pred_grid_loss = [ pred_utils.FIFO_ME(args.loss_moving_avg_step) for _ in range(sum(args.use_grids)) ] * 2 for batch in tqdm(train_data.get_batches(args.batch_size, num_steps=num_steps), total=num_steps, ascii=True): global_step = sess.run(model.global_step) + 1 # start from 0 # if load from existing model, save if first if (global_step % save_period == 0) or \ (args.load_best and is_start) or \ (args.load and is_start): tqdm.write("\tsaving model %s..." % global_step) saver.save(sess, args.save_dir_model, global_step=global_step) tqdm.write("\tdone") evalperf = pred_utils.evaluate(val_data, args, sess, tester) tqdm.write( ("\tmoving average of %s steps: loss:%s, wd_loss:%s," " pred_grid_loss:%s," " eval on validation:%s," " (best %s:%s at step %s) ") % (args.loss_moving_avg_step, loss, wd_loss, pred_grid_loss, [ "%s: %.4f" % (k, evalperf[k]) for k in sorted(evalperf.keys()) ], metric, best[metric], best["step"])) # remember the best acc if evalperf[metric] < best[metric]: best[metric] = evalperf[metric] best["step"] = global_step # save the best model tqdm.write("\t saving best model...") bestsaver.save(sess, args.save_dir_best_model, global_step=global_step) tqdm.write("\t done.") val_perf.append((loss, evalperf)) finalperf = evalperf is_start = False this_loss, _, this_wd_loss, this_pred_grid_loss = \ trainer.step(sess, batch) if math.isnan(this_loss): print("nan loss.") print(this_pred_grid_loss) sys.exit() # add to moving average loss.put(this_loss) wd_loss.put(this_wd_loss) for i in range(len(pred_grid_loss)): pred_grid_loss[i].put(this_pred_grid_loss[i]) if global_step % save_period != 0: saver.save(sess, args.save_dir_model, global_step=global_step) print("best eval on val %s: %s at %s step, final step %s %s is %s" % (metric, best[metric], best["step"], global_step, metric, finalperf[metric]))