os.path.join( model_dir, 'pg-{}-TSP{}-epoch-{}.pt'.format(str(id), args.n_points, epoch))) torch.save( policy, os.path.join( model_dir, 'full-model-pg-{}-TSP{}-epoch-{}.pt'.format( str(id), args.n_points, epoch))) best_running_reward = val_rwd_log.exp_avg val_best_dist = val_best_dist_log.val best_gap = gap if epoch % args.log_interval == 0: train_rwd_log.log(log) train_init_dist_log.log(log) train_best_dist_log.log(log) train_policy_loss_log.log(log) train_entropy_loss_log.log(log) train_value_loss_log.log(log) train_loss_log.log(log) val_rwd_log.log(log) val_init_dist_log.log(log) val_best_dist_log.log(log) print( '\033[1;32;40m Train - epoch:{} |rwd: {:.2f}'.format( epoch, train_rwd_log.val),
step_list = [1, 8, 16, 24, 50, 100] print('training start now: ') for n_step in step_list: i = -1 env = EnvironmentState() state_set = StateDataset(env, skip_step=n_step, size=7000000, random_torque=True, remove_torque=False) state_set_loader = DataLoader(state_set, batch_size=1024) predictor = Predictor(state_set.output_size, 2) predictor.stepper.recurrent_step = 0 avm = AverageMeter() with Timer(): for s in state_set_loader: i += 1 d = s['s1'][:, 12:14] - s['s0'][:, 12:14] # with torques r, loss = predictor.optimize(s['s0'], d) avm.log(loss) if i % 10 == 0: print( f'epoch {i}: trn loss {avm.value:.4f} {avm.std:.4f}, rmse {avm.value**0.5:.4f}, ' f'pred to stat error ratio: {torch.mean(torch.abs(r / d)):.4f} ' f'max_d {torch.max(d):.2f} {torch.min(d):.2f}, max_r {torch.max(r):.2f} {torch.min(r):.2f}' ) predictor.save(f'predictor{n_step}step.pt') print('training finished.')