def train(): env = gym.make(opts.env_name) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size input_size = obs_dim + action_dim epoch = 2000 # default : 3000 qf_criterion = torch.nn.MSELoss() dataloader = DataLoader( # ScatterDataset(path='reg_data/test_data.npy'), GymDataset(env, opts.ood_test, opts.env_name), batch_size=400, shuffle=True, num_workers=8, ) ## Choose the training model model = FlattenMlp_Dropout( input_size=input_size, output_size=1, hidden_sizes=[256, 256], ).cuda() print(model) ## Choose the optimizer to train # optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.95, weight_decay=0.) # default # optim = torch.optim.Adam(model.parameters(), lr=1e-2) optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_buffer = [] for ep in range(epoch): for i, data in enumerate(dataloader): obs_act = Variable(data['obs_act'].type(Tensor)) next_obs_act = Variable(data['next_obs_act'].type(Tensor)) rewards = Variable(data['rewards'].type(Tensor)) terminals = Variable(data['terminals'].type(Tensor)) # loss, output, stats = criterion(model, input_, target_) # default target_q_values = model(next_obs_act).detach() y_target = rewards + (1. - terminals) * discount * target_q_values y_target = y_target.detach() y_pred = model(obs_act) loss = qf_criterion(y_pred, y_target) optim.zero_grad() loss.backward() optim.step() loss_buffer.append(loss.item()) print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer)))) if ep % 20 == 0: torch.save(model.state_dict(), '{}/{}/model_{}.pt'.format(path, opts.env_name, ep)) test()
def train(): epoch = 2000 # default : 3000 qf_criterion = torch.nn.MSELoss() dataloader = DataLoader( GymDataset(), batch_size=400, shuffle=True, num_workers= 8, ) for md in range(Num_ensemble): print('Training Model Num : %d'%(md)) model = FlattenMlp_Dropout( input_size=23, output_size=1, hidden_sizes=[256, 256], ) ## Choose the optimizer to train optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_buffer = [] for ep in range(epoch): for i, data in enumerate(dataloader): obs_act = Variable(data['obs_act'].type(Tensor)) next_obs_act = Variable(data['next_obs_act'].type(Tensor)) rewards = Variable(data['rewards'].type(Tensor)) terminals = Variable(data['terminals'].type(Tensor)) target_q_values = model(next_obs_act).detach() y_target = rewards + (1. - terminals) * discount * target_q_values y_target = y_target.detach() y_pred = model(obs_act) loss = qf_criterion(y_pred, y_target) optim.zero_grad() loss.backward() optim.step() # print('[Epoch : %d/%d] [Batch : %d/%d] [loss : %f] [q : %f]' % (ep, epoch, i, len(dataloader), loss.item(), y_repr.item())) loss_buffer.append(loss.item()) print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer)))) if ep % 20 == 0: torch.save(model.state_dict(), './dropout_128/rl_dropout_%d.pt' % (ep)) test()