def unc_premodel(env, env_name, model_name): path = './uncertainty_modeling/rl_uncertainty' obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size input_size = obs_dim + action_dim model = None if model_name == 'mc_dropout': model = FlattenMlp_Dropout( # Check the dropout layer! input_size=input_size, output_size=1, hidden_sizes=[256, 256], ).cuda() if model_name == 'rank1': model = Model(x_dim=input_size, h_dim=10, y_dim=1, n=10).cuda() if model_name == 'swag': kwargs = {"dimensions": [200, 50, 50, 50], "output_dim": 1, "input_dim": input_size} args = list() model = SWAG(RegNetBase, subspace_type="pca", *args, **kwargs, subspace_kwargs={"max_rank": 10, "pca_rank": 10}) model.cuda() if model == None: raise AttributeError else: model.load_state_dict(torch.load('{}/{}/model/{}/model_200.pt'.format(path, model_name, env_name))) if model_name == 'swag': model.sample(scale=10.) return model
def train(): env = gym.make(opts.env_name) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size input_size = obs_dim + action_dim epoch = 2000 # default : 3000 qf_criterion = torch.nn.MSELoss() dataloader = DataLoader( # ScatterDataset(path='reg_data/test_data.npy'), GymDataset(env, opts.ood_test, opts.env_name), batch_size=400, shuffle=True, num_workers=8, ) ## Choose the training model model = FlattenMlp_Dropout( input_size=input_size, output_size=1, hidden_sizes=[256, 256], ).cuda() print(model) ## Choose the optimizer to train # optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.95, weight_decay=0.) # default # optim = torch.optim.Adam(model.parameters(), lr=1e-2) optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_buffer = [] for ep in range(epoch): for i, data in enumerate(dataloader): obs_act = Variable(data['obs_act'].type(Tensor)) next_obs_act = Variable(data['next_obs_act'].type(Tensor)) rewards = Variable(data['rewards'].type(Tensor)) terminals = Variable(data['terminals'].type(Tensor)) # loss, output, stats = criterion(model, input_, target_) # default target_q_values = model(next_obs_act).detach() y_target = rewards + (1. - terminals) * discount * target_q_values y_target = y_target.detach() y_pred = model(obs_act) loss = qf_criterion(y_pred, y_target) optim.zero_grad() loss.backward() optim.step() loss_buffer.append(loss.item()) print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer)))) if ep % 20 == 0: torch.save(model.state_dict(), '{}/{}/model_{}.pt'.format(path, opts.env_name, ep)) test()
def test(): env = gym.make(opts.env_name) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size input_size = obs_dim + action_dim ## Choose the trained model dataloader = DataLoader( # ScatterDataset(path='reg_data/test_data.npy'), GymDataset(env, opts.ood_test, opts.env_name), batch_size=400, shuffle=True, num_workers=8, ) ## Choose the training model model = FlattenMlp_Dropout( input_size=input_size, output_size=1, hidden_sizes=[256, 256], ).cuda() model.load_state_dict( torch.load("{}/{}/model_100.pt".format( path, opts.env_name))) # if not handling ensemble for i, data in enumerate(dataloader): id_obs_act = Variable(data['id_obs_act'].type(Tensor)) ood_obs_act = Variable(data['ood_obs_act'].type(Tensor)) # if i == 0 : with torch.no_grad(): ## Load testing dataset id_trajectories, ood_trajectories = [], [] ## Iterative test for each model for i in range(10): id_output_ = model(id_obs_act).cpu().numpy().T ood_output_ = model(ood_obs_act).cpu().numpy().T id_trajectories.append(id_output_[:1, :]) ood_trajectories.append(ood_output_[:1, :]) id_trajectories = np.vstack(id_trajectories) ood_trajectories = np.vstack(ood_trajectories) # id_sigma = np.std(id_trajectories, axis=0) # ood_sigma = np.std(ood_trajectories, axis=0) id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean( id_trajectories, axis=0)**2 ood_sigma = np.mean(ood_trajectories**2, axis=0) - np.mean( ood_trajectories, axis=0)**2 print('id_sigma : {}, ood_sigma : {}'.format( np.mean(id_sigma), np.mean(ood_sigma)))
def train(): epoch = 2000 # default : 3000 qf_criterion = torch.nn.MSELoss() dataloader = DataLoader( GymDataset(), batch_size=400, shuffle=True, num_workers= 8, ) for md in range(Num_ensemble): print('Training Model Num : %d'%(md)) model = FlattenMlp_Dropout( input_size=23, output_size=1, hidden_sizes=[256, 256], ) ## Choose the optimizer to train optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_buffer = [] for ep in range(epoch): for i, data in enumerate(dataloader): obs_act = Variable(data['obs_act'].type(Tensor)) next_obs_act = Variable(data['next_obs_act'].type(Tensor)) rewards = Variable(data['rewards'].type(Tensor)) terminals = Variable(data['terminals'].type(Tensor)) target_q_values = model(next_obs_act).detach() y_target = rewards + (1. - terminals) * discount * target_q_values y_target = y_target.detach() y_pred = model(obs_act) loss = qf_criterion(y_pred, y_target) optim.zero_grad() loss.backward() optim.step() # print('[Epoch : %d/%d] [Batch : %d/%d] [loss : %f] [q : %f]' % (ep, epoch, i, len(dataloader), loss.item(), y_repr.item())) loss_buffer.append(loss.item()) print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer)))) if ep % 20 == 0: torch.save(model.state_dict(), './dropout_128/rl_dropout_%d.pt' % (ep)) test()
def test(): ## Choose the trained model model = FlattenMlp_Dropout( input_size=23, output_size=1, hidden_sizes=[128, 128], ) dataloader = DataLoader( GymDataset_test(), batch_size=1000, shuffle=True, num_workers=8, ) model.load_state_dict(torch.load("./dropout_128/rl_dropout_" + str(60) + ".pt")) # if not handling ensemble for i, data in enumerate(dataloader): id_obs_act = Variable(data['id_obs_act'].type(Tensor)) ood_obs_act = Variable(data['ood_obs_act'].type(Tensor)) # if i == 0 : with torch.no_grad(): ## Load testing dataset id_trajectories, ood_trajectories = [], [] ## Iterative test for each model for i in range(10): id_output_ = model(id_obs_act).cpu().numpy().T ood_output_ = model(ood_obs_act).cpu().numpy().T id_trajectories.append(id_output_[:1, :]) ood_trajectories.append(ood_output_[:1, :]) id_trajectories = np.vstack(id_trajectories) ood_trajectories = np.vstack(ood_trajectories) # id_sigma = np.std(id_trajectories, axis=0) # ood_sigma = np.std(ood_trajectories, axis=0) id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(id_trajectories, axis=0) ** 2 ood_sigma = np.mean(ood_trajectories ** 2, axis=0) - np.mean(ood_trajectories, axis=0) ** 2 print('id_sigma : {}, ood_sigma : {}'.format(np.mean(id_sigma), np.mean(ood_sigma)))
def experiment(variant): eval_env = gym.make(variant['env_name']) expl_env = eval_env obs_dim = expl_env.observation_space.low.size action_dim = eval_env.action_space.low.size M = variant['layer_size'] qf1 = FlattenMlp_Dropout( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[ M, M, ], ) qf2 = FlattenMlp_Dropout( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[ M, M, ], ) target_qf1 = FlattenMlp_Dropout( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[ M, M, ], ) target_qf2 = FlattenMlp_Dropout( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[ M, M, ], ) policy = TanhGaussianPolicy( obs_dim=obs_dim, action_dim=action_dim, hidden_sizes=[ M, M, ], ) vae_policy = VAEPolicy( obs_dim=obs_dim, action_dim=action_dim, hidden_sizes=[750, 750], latent_dim=action_dim * 2, ) eval_path_collector = CustomMDPPathCollector(eval_env, ) expl_path_collector = MdpPathCollector( expl_env, policy, ) buffer_filename = None if variant['buffer_filename'] is not None: buffer_filename = variant['buffer_filename'] replay_buffer = EnvReplayBuffer( variant['replay_buffer_size'], expl_env, ) load_hdf5(eval_env.unwrapped.get_dataset(), replay_buffer, max_size=variant['replay_buffer_size']) trainer = UWACTrainer(env=eval_env, policy=policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, vae=vae_policy, **variant['trainer_kwargs']) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, batch_rl=True, q_learning_alg=True, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
from collections import OrderedDict import numpy as np import torch import torch.optim as optim from torch import nn as nn from rlkit.torch.networks import FlattenMlp_Dropout import rlkit.torch.pytorch_util as ptu from rlkit.core.eval_util import create_stats_ordered_dict from rlkit.torch.torch_rl_algorithm import TorchTrainer from torch import autograd model = FlattenMlp_Dropout( input_size=23, output_size=1, hidden_sizes=[256, 256], ).cuda() model.load_state_dict( torch.load( '/home/user/Documents/Workspace-Changyeop/Workspace/AdvancedDL/AI602_Project/bear/rlkit/torch/sac/rl_dropout_140.pt' )) def uncertainty(state, action, rep, beta): with torch.no_grad(): batch_size = state.shape[0] state_cp = state.unsqueeze(1).repeat(1, rep, 1).view(state.shape[0] * rep, state.shape[1]) action_cp = action.unsqueeze(1).repeat(1, rep, 1).view(action.shape[0] * rep,