def unc_premodel(env, env_name, model_name):
    path = './uncertainty_modeling/rl_uncertainty'
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim
    model = None
    if model_name == 'mc_dropout':
        model = FlattenMlp_Dropout(  # Check the dropout layer!
            input_size=input_size,
            output_size=1,
            hidden_sizes=[256, 256],
        ).cuda()
    if model_name == 'rank1':
        model = Model(x_dim=input_size, h_dim=10, y_dim=1, n=10).cuda()
    if model_name == 'swag':
        kwargs = {"dimensions": [200, 50, 50, 50],
                  "output_dim": 1,
                  "input_dim": input_size}
        args = list()
        model = SWAG(RegNetBase, subspace_type="pca", *args, **kwargs,
                     subspace_kwargs={"max_rank": 10, "pca_rank": 10})
        model.cuda()
    if model == None:
        raise AttributeError
    else:
        model.load_state_dict(torch.load('{}/{}/model/{}/model_200.pt'.format(path, model_name, env_name)))
        if model_name == 'swag':
            model.sample(scale=10.)
        return model
Exemple #2
0
def test():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim
    ## Choose the trained model
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    model.load_state_dict(
        torch.load("{}/{}/model_100.pt".format(
            path, opts.env_name)))  # if not handling ensemble

    for i, data in enumerate(dataloader):
        id_obs_act = Variable(data['id_obs_act'].type(Tensor))
        ood_obs_act = Variable(data['ood_obs_act'].type(Tensor))
        # if i == 0 :
        with torch.no_grad():
            ## Load testing dataset
            id_trajectories, ood_trajectories = [], []
            ## Iterative test for each model
            for i in range(10):
                id_output_ = model(id_obs_act).cpu().numpy().T
                ood_output_ = model(ood_obs_act).cpu().numpy().T
                id_trajectories.append(id_output_[:1, :])
                ood_trajectories.append(ood_output_[:1, :])
            id_trajectories = np.vstack(id_trajectories)
            ood_trajectories = np.vstack(ood_trajectories)

            # id_sigma = np.std(id_trajectories, axis=0)
            # ood_sigma = np.std(ood_trajectories, axis=0)
            id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(
                id_trajectories, axis=0)**2
            ood_sigma = np.mean(ood_trajectories**2, axis=0) - np.mean(
                ood_trajectories, axis=0)**2

            print('id_sigma : {}, ood_sigma : {}'.format(
                np.mean(id_sigma), np.mean(ood_sigma)))
Exemple #3
0
def test():
    ## Choose the trained model
    model = FlattenMlp_Dropout(
            input_size=23,
            output_size=1,
            hidden_sizes=[128, 128],
        )

    dataloader = DataLoader(
        GymDataset_test(),
        batch_size=1000,
        shuffle=True,
        num_workers=8,
    )

    model.load_state_dict(torch.load("./dropout_128/rl_dropout_" + str(60) + ".pt"))  # if not handling ensemble

    for i, data in enumerate(dataloader):
        id_obs_act = Variable(data['id_obs_act'].type(Tensor))
        ood_obs_act = Variable(data['ood_obs_act'].type(Tensor))
        # if i == 0 :
        with torch.no_grad():
            ## Load testing dataset
            id_trajectories, ood_trajectories = [], []
            ## Iterative test for each model
            for i in range(10):
                id_output_ = model(id_obs_act).cpu().numpy().T
                ood_output_ = model(ood_obs_act).cpu().numpy().T
                id_trajectories.append(id_output_[:1, :])
                ood_trajectories.append(ood_output_[:1, :])
            id_trajectories = np.vstack(id_trajectories)
            ood_trajectories = np.vstack(ood_trajectories)

            # id_sigma = np.std(id_trajectories, axis=0)
            # ood_sigma = np.std(ood_trajectories, axis=0)
            id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(id_trajectories, axis=0) ** 2
            ood_sigma = np.mean(ood_trajectories ** 2, axis=0) - np.mean(ood_trajectories, axis=0) ** 2

            print('id_sigma : {}, ood_sigma : {}'.format(np.mean(id_sigma), np.mean(ood_sigma)))
Exemple #4
0
import torch
import torch.optim as optim
from torch import nn as nn
from rlkit.torch.networks import FlattenMlp_Dropout
import rlkit.torch.pytorch_util as ptu
from rlkit.core.eval_util import create_stats_ordered_dict
from rlkit.torch.torch_rl_algorithm import TorchTrainer
from torch import autograd

model = FlattenMlp_Dropout(
    input_size=23,
    output_size=1,
    hidden_sizes=[256, 256],
).cuda()
model.load_state_dict(
    torch.load(
        '/home/user/Documents/Workspace-Changyeop/Workspace/AdvancedDL/AI602_Project/bear/rlkit/torch/sac/rl_dropout_140.pt'
    ))


def uncertainty(state, action, rep, beta):
    with torch.no_grad():
        batch_size = state.shape[0]
        state_cp = state.unsqueeze(1).repeat(1, rep,
                                             1).view(state.shape[0] * rep,
                                                     state.shape[1])
        action_cp = action.unsqueeze(1).repeat(1, rep,
                                               1).view(action.shape[0] * rep,
                                                       action.shape[1])

        target_qf1 = model(torch.cat([state_cp, action_cp], dim=1))  # BTx1
        target_qf1 = target_qf1.view(batch_size, rep, 1)  # BxTx1