コード例 #1
0
def unc_premodel(env, env_name, model_name):
    path = './uncertainty_modeling/rl_uncertainty'
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim
    model = None
    if model_name == 'mc_dropout':
        model = FlattenMlp_Dropout(  # Check the dropout layer!
            input_size=input_size,
            output_size=1,
            hidden_sizes=[256, 256],
        ).cuda()
    if model_name == 'rank1':
        model = Model(x_dim=input_size, h_dim=10, y_dim=1, n=10).cuda()
    if model_name == 'swag':
        kwargs = {"dimensions": [200, 50, 50, 50],
                  "output_dim": 1,
                  "input_dim": input_size}
        args = list()
        model = SWAG(RegNetBase, subspace_type="pca", *args, **kwargs,
                     subspace_kwargs={"max_rank": 10, "pca_rank": 10})
        model.cuda()
    if model == None:
        raise AttributeError
    else:
        model.load_state_dict(torch.load('{}/{}/model/{}/model_200.pt'.format(path, model_name, env_name)))
        if model_name == 'swag':
            model.sample(scale=10.)
        return model
コード例 #2
0
def train():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim

    epoch = 2000  # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    print(model)

    ## Choose the optimizer to train
    # optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.95, weight_decay=0.) # default
    # optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_buffer = []

    for ep in range(epoch):
        for i, data in enumerate(dataloader):
            obs_act = Variable(data['obs_act'].type(Tensor))
            next_obs_act = Variable(data['next_obs_act'].type(Tensor))
            rewards = Variable(data['rewards'].type(Tensor))
            terminals = Variable(data['terminals'].type(Tensor))

            # loss, output, stats = criterion(model, input_, target_) # default

            target_q_values = model(next_obs_act).detach()
            y_target = rewards + (1. - terminals) * discount * target_q_values
            y_target = y_target.detach()
            y_pred = model(obs_act)
            loss = qf_criterion(y_pred, y_target)

            optim.zero_grad()
            loss.backward()
            optim.step()

            loss_buffer.append(loss.item())
        print('[Epoch : %d/%d] [loss : %f] ' %
              (ep, epoch, np.mean(np.array(loss_buffer))))

        if ep % 20 == 0:
            torch.save(model.state_dict(),
                       '{}/{}/model_{}.pt'.format(path, opts.env_name, ep))

    test()
コード例 #3
0
ファイル: ood_test.py プロジェクト: junmokane/AI602_Project
def test():
    env = gym.make(opts.env_name)
    obs_dim = env.observation_space.low.size
    action_dim = env.action_space.low.size
    input_size = obs_dim + action_dim
    ## Choose the trained model
    dataloader = DataLoader(
        # ScatterDataset(path='reg_data/test_data.npy'),
        GymDataset(env, opts.ood_test, opts.env_name),
        batch_size=400,
        shuffle=True,
        num_workers=8,
    )

    ## Choose the training model
    model = FlattenMlp_Dropout(
        input_size=input_size,
        output_size=1,
        hidden_sizes=[256, 256],
    ).cuda()

    model.load_state_dict(
        torch.load("{}/{}/model_100.pt".format(
            path, opts.env_name)))  # if not handling ensemble

    for i, data in enumerate(dataloader):
        id_obs_act = Variable(data['id_obs_act'].type(Tensor))
        ood_obs_act = Variable(data['ood_obs_act'].type(Tensor))
        # if i == 0 :
        with torch.no_grad():
            ## Load testing dataset
            id_trajectories, ood_trajectories = [], []
            ## Iterative test for each model
            for i in range(10):
                id_output_ = model(id_obs_act).cpu().numpy().T
                ood_output_ = model(ood_obs_act).cpu().numpy().T
                id_trajectories.append(id_output_[:1, :])
                ood_trajectories.append(ood_output_[:1, :])
            id_trajectories = np.vstack(id_trajectories)
            ood_trajectories = np.vstack(ood_trajectories)

            # id_sigma = np.std(id_trajectories, axis=0)
            # ood_sigma = np.std(ood_trajectories, axis=0)
            id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(
                id_trajectories, axis=0)**2
            ood_sigma = np.mean(ood_trajectories**2, axis=0) - np.mean(
                ood_trajectories, axis=0)**2

            print('id_sigma : {}, ood_sigma : {}'.format(
                np.mean(id_sigma), np.mean(ood_sigma)))
コード例 #4
0
def train():
    epoch = 2000 # default : 3000
    qf_criterion = torch.nn.MSELoss()
    dataloader = DataLoader(
        GymDataset(),
        batch_size=400,
        shuffle=True,
        num_workers= 8,
    )

    for md in range(Num_ensemble):
        print('Training Model Num : %d'%(md))

        model = FlattenMlp_Dropout(
            input_size=23,
            output_size=1,
            hidden_sizes=[256, 256],
        )

        ## Choose the optimizer to train

        optim = torch.optim.Adam(model.parameters(), lr=1e-3)
        loss_buffer = []

        for ep in range(epoch):
            for i, data in enumerate(dataloader):
                obs_act = Variable(data['obs_act'].type(Tensor))
                next_obs_act = Variable(data['next_obs_act'].type(Tensor))
                rewards = Variable(data['rewards'].type(Tensor))
                terminals = Variable(data['terminals'].type(Tensor))

                target_q_values = model(next_obs_act).detach()
                y_target = rewards + (1. - terminals) * discount * target_q_values
                y_target = y_target.detach()
                y_pred = model(obs_act)
                loss = qf_criterion(y_pred, y_target)

                optim.zero_grad()
                loss.backward()
                optim.step()

                # print('[Epoch : %d/%d] [Batch : %d/%d] [loss : %f] [q : %f]' % (ep, epoch, i, len(dataloader), loss.item(), y_repr.item()))
                loss_buffer.append(loss.item())
            print('[Epoch : %d/%d] [loss : %f] ' % (ep, epoch, np.mean(np.array(loss_buffer))))
            if ep % 20 == 0:
                torch.save(model.state_dict(), './dropout_128/rl_dropout_%d.pt' % (ep))

    test()
コード例 #5
0
def test():
    ## Choose the trained model
    model = FlattenMlp_Dropout(
            input_size=23,
            output_size=1,
            hidden_sizes=[128, 128],
        )

    dataloader = DataLoader(
        GymDataset_test(),
        batch_size=1000,
        shuffle=True,
        num_workers=8,
    )

    model.load_state_dict(torch.load("./dropout_128/rl_dropout_" + str(60) + ".pt"))  # if not handling ensemble

    for i, data in enumerate(dataloader):
        id_obs_act = Variable(data['id_obs_act'].type(Tensor))
        ood_obs_act = Variable(data['ood_obs_act'].type(Tensor))
        # if i == 0 :
        with torch.no_grad():
            ## Load testing dataset
            id_trajectories, ood_trajectories = [], []
            ## Iterative test for each model
            for i in range(10):
                id_output_ = model(id_obs_act).cpu().numpy().T
                ood_output_ = model(ood_obs_act).cpu().numpy().T
                id_trajectories.append(id_output_[:1, :])
                ood_trajectories.append(ood_output_[:1, :])
            id_trajectories = np.vstack(id_trajectories)
            ood_trajectories = np.vstack(ood_trajectories)

            # id_sigma = np.std(id_trajectories, axis=0)
            # ood_sigma = np.std(ood_trajectories, axis=0)
            id_sigma = np.mean(id_trajectories**2, axis=0) - np.mean(id_trajectories, axis=0) ** 2
            ood_sigma = np.mean(ood_trajectories ** 2, axis=0) - np.mean(ood_trajectories, axis=0) ** 2

            print('id_sigma : {}, ood_sigma : {}'.format(np.mean(id_sigma), np.mean(ood_sigma)))
コード例 #6
0
def experiment(variant):
    eval_env = gym.make(variant['env_name'])
    expl_env = eval_env

    obs_dim = expl_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    M = variant['layer_size']
    qf1 = FlattenMlp_Dropout(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[
            M,
            M,
        ],
    )
    qf2 = FlattenMlp_Dropout(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[
            M,
            M,
        ],
    )
    target_qf1 = FlattenMlp_Dropout(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[
            M,
            M,
        ],
    )
    target_qf2 = FlattenMlp_Dropout(
        input_size=obs_dim + action_dim,
        output_size=1,
        hidden_sizes=[
            M,
            M,
        ],
    )
    policy = TanhGaussianPolicy(
        obs_dim=obs_dim,
        action_dim=action_dim,
        hidden_sizes=[
            M,
            M,
        ],
    )
    vae_policy = VAEPolicy(
        obs_dim=obs_dim,
        action_dim=action_dim,
        hidden_sizes=[750, 750],
        latent_dim=action_dim * 2,
    )
    eval_path_collector = CustomMDPPathCollector(eval_env, )
    expl_path_collector = MdpPathCollector(
        expl_env,
        policy,
    )
    buffer_filename = None
    if variant['buffer_filename'] is not None:
        buffer_filename = variant['buffer_filename']

    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    load_hdf5(eval_env.unwrapped.get_dataset(),
              replay_buffer,
              max_size=variant['replay_buffer_size'])

    trainer = UWACTrainer(env=eval_env,
                          policy=policy,
                          qf1=qf1,
                          qf2=qf2,
                          target_qf1=target_qf1,
                          target_qf2=target_qf2,
                          vae=vae_policy,
                          **variant['trainer_kwargs'])
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        batch_rl=True,
        q_learning_alg=True,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
コード例 #7
0
from collections import OrderedDict

import numpy as np
import torch
import torch.optim as optim
from torch import nn as nn
from rlkit.torch.networks import FlattenMlp_Dropout
import rlkit.torch.pytorch_util as ptu
from rlkit.core.eval_util import create_stats_ordered_dict
from rlkit.torch.torch_rl_algorithm import TorchTrainer
from torch import autograd

model = FlattenMlp_Dropout(
    input_size=23,
    output_size=1,
    hidden_sizes=[256, 256],
).cuda()
model.load_state_dict(
    torch.load(
        '/home/user/Documents/Workspace-Changyeop/Workspace/AdvancedDL/AI602_Project/bear/rlkit/torch/sac/rl_dropout_140.pt'
    ))


def uncertainty(state, action, rep, beta):
    with torch.no_grad():
        batch_size = state.shape[0]
        state_cp = state.unsqueeze(1).repeat(1, rep,
                                             1).view(state.shape[0] * rep,
                                                     state.shape[1])
        action_cp = action.unsqueeze(1).repeat(1, rep,
                                               1).view(action.shape[0] * rep,