def main(_): logdir = Path(FLAGS.logdir) / FLAGS.env_name logdir.mkdir(parents=True, exist_ok=True) device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') torch.set_num_threads(4) dt = load_data() visual = 'Visual' in FLAGS.env_name if not visual: # Setup your model. state_dim, action_dim, discrete = get_dims(FLAGS.env_name) model = NNPolicy(state_dim, [16, 32, 64], action_dim, discrete) # TODO: Train your model. # train_model(model, logdir, dt['states'], dt['actions'], device, # discrete) model = model.eval() else: state_dim, action_dim, discrete = get_dims(FLAGS.env_name) # Stack as many past images to represent the state stack_states = 2 c, h, w = state_dim model = CNNPolicy(stack_states, (c, h, w), [16, 32, 64], action_dim, discrete) # TODO: Train your model # model = train_model_cnn(model, stack_states, logdir, dt['states'], # dt['actions'], device, discrete) model = model.eval() # Setting up validation environments. val_envs = [ gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val) ] [env.seed(i + 1000) for i, env in enumerate(val_envs)] val(model, device, val_envs, FLAGS.episode_len, visual) [env.close() for env in val_envs] if FLAGS.vis or FLAGS.vis_save: env_vis = gym.make(FLAGS.env_name) state, g, gif, info = test_model_in_env(model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis, vis_save=FLAGS.vis_save, visual=visual) if FLAGS.vis_save: gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif', format='GIF', append_images=gif, save_all=True, duration=50, loop=0) env_vis.close()
def main(_): logdir = Path(FLAGS.logdir) / FLAGS.env_name logdir.mkdir(parents=True, exist_ok=True) device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') torch.set_num_threads(4) dt = load_data() visual = 'Visual' in FLAGS.env_name epochs = [15, 30, 60, 120, 250] rewards = [] metrics = [] for e in epochs: FLAGS.num_episodes_train = e model = train(dt, visual, device) # Setting up validation environments. val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val)] [env.seed(i+1000) for i, env in enumerate(val_envs)] reward, metric, metric_name = val(model, device, val_envs, FLAGS.episode_len, visual) rewards.append(reward) metrics.append(metric) [env.close() for env in val_envs] plot_result(rewards, metrics, epochs, metric_name) if FLAGS.vis or FLAGS.vis_save: env_vis = gym.make(FLAGS.env_name) state, g, gif, info = test_model_in_env( model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis, vis_save=FLAGS.vis_save, visual=visual) if FLAGS.vis_save: gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif', format='GIF', append_images=gif, save_all=True, duration=50, loop=0) env_vis.close()
def main(_): torch.manual_seed(FLAGS.seed) logdir = Path(FLAGS.logdir) / f'seed{FLAGS.seed}' logdir.mkdir(parents=True, exist_ok=True) # Setup training environments. train_envs = [ gym.make(FLAGS.env_name) for _ in range(FLAGS.num_train_envs) ] [env.seed(i + FLAGS.seed) for i, env in enumerate(train_envs)] # Setting up validation environments. val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes)] [env.seed(i + 1000) for i, env in enumerate(val_envs)] val_fn = lambda model, device: val(model, device, val_envs, FLAGS. episode_len) torch.set_num_threads(1) device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') state_dim, action_dim = get_dims(FLAGS.env_name) if FLAGS.algo == 'dqn': n_models = 1 models, targets = [], [] for i in range(n_models): models.append( DQNPolicy(state_dim, [16, 32, 64], action_dim, device)) models[-1].to(device) for i in range(n_models): targets.append( DQNPolicy(state_dim, [16, 32, 64], action_dim, device)) targets[-1].to(device) train_model_dqn(models, targets, state_dim, action_dim, train_envs, FLAGS.gamma, device, logdir, val_fn) model = models[0] elif FLAGS.algo == 'ac': model = ActorCriticPolicy(state_dim, [16, 32, 64], action_dim) train_model_ac(model, train_envs, FLAGS.gamma, device, logdir, val_fn) [env.close() for env in train_envs] [env.close() for env in val_envs] if FLAGS.vis or FLAGS.vis_save: env_vis = gym.make(FLAGS.env_name) state, g, gif, info = test_model_in_env(model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis, vis_save=FLAGS.vis_save) if FLAGS.vis_save: gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif', format='GIF', append_images=gif, save_all=True, duration=50, loop=0) env_vis.close()
# =============BEGIN OF THE LEARNING LOOP=================== # # initialization best_acc = 0. for epoch in range(opt.n_epoch): # update learning rate lrScheduler.step() # train train_loss, train_acc_rot = train(train_loader, model, opt.bin_size, opt.shape, criterion_azi, criterion_ele, criterion_inp, criterion_reg, optimizer) # evaluate eval_loss, eval_acc_rot, _, _ = val(eval_loader, model, opt.bin_size, opt.shape, criterion_azi, criterion_ele, criterion_inp, criterion_reg) # update best_acc and save checkpoint is_best = eval_acc_rot > best_acc best_acc = max(best_acc, eval_acc_rot) losses[epoch, :] = [train_loss, eval_loss] accuracies[epoch, :] = [train_acc_rot, eval_acc_rot] save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'losses': losses, 'accuracies': accuracies
def main(_): logdir = Path(FLAGS.logdir) / FLAGS.env_name logdir.mkdir(parents=True, exist_ok=True) device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') torch.set_num_threads(4) dt = load_data() visual = 'Visual' in FLAGS.env_name if not visual: # Setup your model. state_dim, action_dim, discrete = get_dims(FLAGS.env_name) model = NNPolicy(state_dim, [16, 32, 64], action_dim, discrete) if discrete: criterion = nn.CrossEntropyLoss() else: criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=1e-4) for epoch in range(FLAGS.num_episodes_train): total_loss = 0. states = torch.from_numpy(dt['states']).float().to(device)[epoch,:] actions = torch.from_numpy(dt['actions']).long().to(device)[epoch,:] for i in range(states.size()[0]): output = model(states[i]).view(1,-1) loss = criterion(output, actions[i]) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss if epoch % 20 == 0: print("Epoch:", epoch, "Loss:", total_loss) # TODO: Train your model. # train_model(model, logdir, dt['states'], dt['actions'], device, # discrete) model = model.eval() else: state_dim, action_dim, discrete = get_dims(FLAGS.env_name) # Stack as many past images to represent the state stack_states = 2 c, h, w = state_dim model = CNNPolicy(stack_states, (c, h, w), [16, 32, 64], action_dim, discrete) # TODO: Train your model # model = train_model_cnn(model, stack_states, logdir, dt['states'], # dt['actions'], device, discrete) model = model.eval() # Setting up validation environments. val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val)] [env.seed(i+1000) for i, env in enumerate(val_envs)] val(model, device, val_envs, FLAGS.episode_len, visual) [env.close() for env in val_envs] if FLAGS.vis or FLAGS.vis_save: env_vis = gym.make(FLAGS.env_name) state, g, gif, info = test_model_in_env( model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis, vis_save=FLAGS.vis_save, visual=visual) if FLAGS.vis_save: gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif', format='GIF', append_images=gif, save_all=True, duration=50, loop=0) env_vis.close()