예제 #1
0
def main(_):
    logdir = Path(FLAGS.logdir) / FLAGS.env_name
    logdir.mkdir(parents=True, exist_ok=True)

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    torch.set_num_threads(4)

    dt = load_data()
    visual = 'Visual' in FLAGS.env_name

    if not visual:
        # Setup your model.
        state_dim, action_dim, discrete = get_dims(FLAGS.env_name)
        model = NNPolicy(state_dim, [16, 32, 64], action_dim, discrete)

        # TODO: Train your model.
        # train_model(model, logdir,  dt['states'], dt['actions'], device,
        #             discrete)
        model = model.eval()

    else:
        state_dim, action_dim, discrete = get_dims(FLAGS.env_name)
        # Stack as many past images to represent the state
        stack_states = 2
        c, h, w = state_dim
        model = CNNPolicy(stack_states, (c, h, w), [16, 32, 64], action_dim,
                          discrete)
        # TODO: Train your model
        # model =  train_model_cnn(model, stack_states, logdir,  dt['states'],
        #                          dt['actions'], device, discrete)
        model = model.eval()

    # Setting up validation environments.
    val_envs = [
        gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val)
    ]
    [env.seed(i + 1000) for i, env in enumerate(val_envs)]
    val(model, device, val_envs, FLAGS.episode_len, visual)
    [env.close() for env in val_envs]

    if FLAGS.vis or FLAGS.vis_save:
        env_vis = gym.make(FLAGS.env_name)
        state, g, gif, info = test_model_in_env(model,
                                                env_vis,
                                                FLAGS.episode_len,
                                                device,
                                                vis=FLAGS.vis,
                                                vis_save=FLAGS.vis_save,
                                                visual=visual)
        if FLAGS.vis_save:
            gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif',
                        format='GIF',
                        append_images=gif,
                        save_all=True,
                        duration=50,
                        loop=0)
        env_vis.close()
예제 #2
0
def main(_):
    logdir = Path(FLAGS.logdir) / FLAGS.env_name
    logdir.mkdir(parents=True, exist_ok=True)

    device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    torch.set_num_threads(4)

    dt = load_data()
    visual = 'Visual' in FLAGS.env_name

    epochs = [15, 30, 60, 120, 250]
    rewards = []
    metrics = []
    for e in epochs:
        FLAGS.num_episodes_train = e
        model = train(dt, visual, device)

        # Setting up validation environments.
        val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val)]
        [env.seed(i+1000) for i, env in enumerate(val_envs)]
        reward, metric, metric_name = val(model, device, val_envs, FLAGS.episode_len, visual)
        rewards.append(reward)
        metrics.append(metric)
        [env.close() for env in val_envs]

    plot_result(rewards, metrics, epochs, metric_name)

    if FLAGS.vis or FLAGS.vis_save:
        env_vis = gym.make(FLAGS.env_name)
        state, g, gif, info = test_model_in_env(
            model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis,
            vis_save=FLAGS.vis_save, visual=visual)
        if FLAGS.vis_save:
            gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif',
                        format='GIF', append_images=gif,
                        save_all=True, duration=50, loop=0)
        env_vis.close()
예제 #3
0
def main(_):
    torch.manual_seed(FLAGS.seed)
    logdir = Path(FLAGS.logdir) / f'seed{FLAGS.seed}'
    logdir.mkdir(parents=True, exist_ok=True)

    # Setup training environments.
    train_envs = [
        gym.make(FLAGS.env_name) for _ in range(FLAGS.num_train_envs)
    ]
    [env.seed(i + FLAGS.seed) for i, env in enumerate(train_envs)]

    # Setting up validation environments.
    val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes)]
    [env.seed(i + 1000) for i, env in enumerate(val_envs)]
    val_fn = lambda model, device: val(model, device, val_envs, FLAGS.
                                       episode_len)

    torch.set_num_threads(1)
    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')

    state_dim, action_dim = get_dims(FLAGS.env_name)

    if FLAGS.algo == 'dqn':
        n_models = 1
        models, targets = [], []
        for i in range(n_models):
            models.append(
                DQNPolicy(state_dim, [16, 32, 64], action_dim, device))
            models[-1].to(device)

        for i in range(n_models):
            targets.append(
                DQNPolicy(state_dim, [16, 32, 64], action_dim, device))
            targets[-1].to(device)

        train_model_dqn(models, targets, state_dim, action_dim, train_envs,
                        FLAGS.gamma, device, logdir, val_fn)
        model = models[0]

    elif FLAGS.algo == 'ac':
        model = ActorCriticPolicy(state_dim, [16, 32, 64], action_dim)
        train_model_ac(model, train_envs, FLAGS.gamma, device, logdir, val_fn)

    [env.close() for env in train_envs]
    [env.close() for env in val_envs]

    if FLAGS.vis or FLAGS.vis_save:
        env_vis = gym.make(FLAGS.env_name)
        state, g, gif, info = test_model_in_env(model,
                                                env_vis,
                                                FLAGS.episode_len,
                                                device,
                                                vis=FLAGS.vis,
                                                vis_save=FLAGS.vis_save)
        if FLAGS.vis_save:
            gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif',
                        format='GIF',
                        append_images=gif,
                        save_all=True,
                        duration=50,
                        loop=0)
        env_vis.close()
예제 #4
0
# =============BEGIN OF THE LEARNING LOOP=================== #
# initialization
best_acc = 0.

for epoch in range(opt.n_epoch):
    # update learning rate
    lrScheduler.step()

    # train
    train_loss, train_acc_rot = train(train_loader, model, opt.bin_size,
                                      opt.shape, criterion_azi, criterion_ele,
                                      criterion_inp, criterion_reg, optimizer)

    # evaluate
    eval_loss, eval_acc_rot, _, _ = val(eval_loader, model, opt.bin_size,
                                        opt.shape, criterion_azi,
                                        criterion_ele, criterion_inp,
                                        criterion_reg)

    # update best_acc and save checkpoint
    is_best = eval_acc_rot > best_acc
    best_acc = max(best_acc, eval_acc_rot)
    losses[epoch, :] = [train_loss, eval_loss]
    accuracies[epoch, :] = [train_acc_rot, eval_acc_rot]
    save_checkpoint(
        {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
            'losses': losses,
            'accuracies': accuracies
def main(_):
    logdir = Path(FLAGS.logdir) / FLAGS.env_name
    logdir.mkdir(parents=True, exist_ok=True)

    device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    torch.set_num_threads(4)

    dt = load_data()
    visual = 'Visual' in FLAGS.env_name


    if not visual:
        # Setup your model.
        state_dim, action_dim, discrete = get_dims(FLAGS.env_name)
        model = NNPolicy(state_dim, [16, 32, 64], action_dim, discrete)

        if discrete:
            criterion = nn.CrossEntropyLoss()
        else:
            criterion = nn.MSELoss()

        optimizer = optim.Adam(model.parameters(), lr=1e-4)

        for epoch in range(FLAGS.num_episodes_train):
            total_loss = 0.
            states = torch.from_numpy(dt['states']).float().to(device)[epoch,:]
            actions = torch.from_numpy(dt['actions']).long().to(device)[epoch,:]

            for i in range(states.size()[0]):
                output = model(states[i]).view(1,-1)
                loss = criterion(output, actions[i])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                total_loss += loss
            if epoch % 20 == 0:
                    print("Epoch:", epoch, "Loss:", total_loss)


        # TODO: Train your model.
        # train_model(model, logdir,  dt['states'], dt['actions'], device,
        #             discrete)
        model = model.eval()

    else:
        state_dim, action_dim, discrete = get_dims(FLAGS.env_name)
        # Stack as many past images to represent the state
        stack_states = 2
        c, h, w = state_dim
        model = CNNPolicy(stack_states, (c, h, w), [16, 32, 64], action_dim, discrete)
        # TODO: Train your model
        # model =  train_model_cnn(model, stack_states, logdir,  dt['states'],
        #                          dt['actions'], device, discrete)
        model = model.eval()

    # Setting up validation environments.
    val_envs = [gym.make(FLAGS.env_name) for _ in range(FLAGS.num_episodes_val)]
    [env.seed(i+1000) for i, env in enumerate(val_envs)]
    val(model, device, val_envs, FLAGS.episode_len, visual)
    [env.close() for env in val_envs]

    if FLAGS.vis or FLAGS.vis_save:
        env_vis = gym.make(FLAGS.env_name)
        state, g, gif, info = test_model_in_env(
            model, env_vis, FLAGS.episode_len, device, vis=FLAGS.vis,
            vis_save=FLAGS.vis_save, visual=visual)
        if FLAGS.vis_save:
            gif[0].save(fp=f'{logdir}/vis-{env_vis.unwrapped.spec.id}.gif',
                        format='GIF', append_images=gif,
                        save_all=True, duration=50, loop=0)
        env_vis.close()