Ejemplo n.º 1
0
def train():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim

    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    print(model)

    ## Choose the optimizer to train
    # optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.95, weight_decay=0.) # default
    # optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            next_obs_act = Variable(data['next_obs_act'].type(Tensor))
            rewards = Variable(data['rewards'].type(Tensor))
            terminals = Variable(data['terminals'].type(Tensor))

            # loss, output, stats = criterion(model, input_, target_) # default

            target_q_values = model(next_obs_act).detach()
            y_target = rewards + (1. - terminals) * discount * target_q_values
            y_target = y_target.detach()
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, y_target)

            optim.zero_grad()
            loss.backward()
            optim.step()

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))

    test()
Ejemplo n.º 2
0
def train():
    epoch = 2000 # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        GymDataset(),
        batch_size=400,
        shuffle=True,
        num_workers= 8,
    )

    for md in range(Num_ensemble):
        print('Training Model Num : %d'%(md))

        model = FlattenMlp_Dropout(
            input_size=23,
            output_size=1,
            hidden_sizes=[256, 256],
        )

        ## Choose the optimizer to train

        optim = torch.optim.Adam(model.parameters(), lr=1e-3)
        loss_buffer = []

        for ep in range(epoch):
            for i, data in enumerate(dataloader):
                obs_act = Variable(data['obs_act'].type(Tensor))
                next_obs_act = Variable(data['next_obs_act'].type(Tensor))
                rewards = Variable(data['rewards'].type(Tensor))
                terminals = Variable(data['terminals'].type(Tensor))

                target_q_values = model(next_obs_act).detach()
                y_target = rewards + (1. - terminals) * discount * target_q_values
                y_target = y_target.detach()
                y_pred = model(obs_act)
                loss = qf_criterion(y_pred, y_target)

                optim.zero_grad()
                loss.backward()
                optim.step()

                # print('[Epoch : %d/%d] [Batch : %d/%d] [loss : %f] [q : %f]' % (ep, epoch, i, len(dataloader), loss.item(), y_repr.item()))
                loss_buffer.append(loss.item())
            print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer))))
            if ep % 20 == 0:
                torch.save(model.state_dict(), './dropout_128/rl_dropout_%d.pt' % (ep))

    test()