Ejemplo n.º 1
0
    def __init__(self, obs_shape, action_space, hidden_size=100, embed_size=0, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], obs_shape[1:], hidden_size, embed_size, **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
Ejemplo n.º 2
0
def create_output_distribution(action_space, output_size):
    if action_space.__class__.__name__ == "Discrete":
        num_outputs = action_space.n
        dist = Categorical(output_size, num_outputs)
    elif action_space.__class__.__name__ == "Box":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiBinary":
        num_outputs = action_space.shape[0]
        dist = Bernoulli(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiDiscrete":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)

    elif action_space.__class__.__name__ == "Tuple":
        dists = [
            create_output_distribution(space, output_size)
            for space in action_space
        ]
        # for space in action_space:
        #     print(action_space.__class__.__name__)

        dist = DistributionGeneratorTuple(tuple(dists))
    else:
        raise NotImplementedError
    return dist
Ejemplo n.º 3
0
    def __init__(self, obs_shape, ac_shape, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        num_outputs = ac_shape[0]
        self.dist = DiagGaussian(self.base.output_size, num_outputs)
Ejemplo n.º 4
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 is_leaf,
                 base=None,
                 base_kwargs=None):
        super(OpsPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        self.base = OpsBase(obs_shape[0],
                            action_space,
                            is_leaf=is_leaf,
                            **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 5
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            self.dist = MultiCategoricalDistribution(self.base.output_size, int(np.sum(action_space.nvec)), action_space.nvec)

        else:
            raise NotImplementedError
Ejemplo n.º 6
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 agent_num,
                 agent_i,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)
        # actor输入维度num_state,critic输入num_state*agent_num
        self.base = base(obs_shape[0], agent_num, **base_kwargs)
        #import pdb; pdb.set_trace()
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 7
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        #     if len(obs_shape) == 3:
        #         base = CNNBase
        #     elif len(obs_shape) == 1:
        #         base = MLPBase
        #     else:
        #         raise NotImplementedError
        #base = base
        self.base = base  #base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            #num_outputs2 = action_space2.n
            self.dist = Categorical(self.base.output_size, num_outputs)
            #self.dist2 = Categorical(self.base.output_size, num_outputs2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))
Ejemplo n.º 8
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 action_activation=None,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # print("21312312")

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n

            def action_embedding(actions):
                return torch.nn.functional.one_hot(
                    actions, num_classes=num_outputs).squeeze(-2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]

            def action_embedding(actions):
                return actions
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]

            raise NotImplementedError
        else:
            raise NotImplementedError

        base_kwargs["num_actions"] = num_outputs
        base_kwargs["action_embedding"] = action_embedding

        self.ob_dim = obs_shape[0]
        self.h_dim = base_kwargs["hidden_size"]
        self.ac_dim = num_outputs

        # print("start")
        self.base = base(obs_shape[0], **base_kwargs)
        # print('finish')

        if action_space.__class__.__name__ == "Discrete":
            self.dist = Categorical(self.base.output_size,
                                    num_outputs,
                                    is_ref=base_kwargs["is_ref"])
        elif action_space.__class__.__name__ == "Box":
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     activation=action_activation)
        elif action_space.__class__.__name__ == "MultiBinary":
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        self.obs_rms = None
Ejemplo n.º 9
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base == 'Mnist':
            base = CNNMnist
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(
            obs_shape, **base_kwargs
        )  # TODO Lia changed this, the original is the below commendted line
        #self.base = base(obs_shape[0], **base_kwargs)
        print("obs_shape[0]:", obs_shape[0])
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 10
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # This key is needed to configure the CNN correctly for Genesis, but causes problems with other domains
        if (not base_kwargs['is_genesis']):
            del base_kwargs['is_genesis']

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 11
0
 def __init__(self,
              obs_shape,
              feat_dim,
              action_space,
              base=None,
              base_kwargs=None):
     super(Policy, self).__init__()
     if base_kwargs is None:
         base_kwargs = {}
     if base is None:
         if len(obs_shape) == 3:
             if feat_dim == 0:
                 self.base = CNNBase(obs_shape[0], **base_kwargs)
             else:
                 self.base = CombinedBase(obs_shape[0], feat_dim,
                                          **base_kwargs)
         elif len(obs_shape) == 1:
             self.base = MLPBase(obs_shape[0], **base_kwargs)
         else:
             raise NotImplementedError
     if action_space.__class__.__name__ == "Discrete":
         num_outputs = action_space.n
         self.dist = Categorical_masked(self.base.output_size, num_outputs)
         self.act_dim = action_space.n
         self.ent_denom = math.log(self.act_dim)
     elif action_space.__class__.__name__ == "Box":
         num_outputs = action_space.shape[0]
         self.dist = DiagGaussian(self.base.output_size, num_outputs)
     elif action_space.__class__.__name__ == "MultiBinary":
         num_outputs = action_space.shape[0]
         self.dist = Bernoulli(self.base.output_size, num_outputs)
     else:
         raise NotImplementedError
Ejemplo n.º 12
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 model='base'):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                if (model == 'base'):
                    base = CNNBase
                elif (model == 'resnet'):
                    base = CNNDeep
                else:
                    raise Exception('Model not implemented')
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 13
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.tt = 0
        self.nn = 0
        self.visionmodel = None
        self.knob_target_hist = torch.zeros(1, 3).cuda()
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 14
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, base_encoder='simple'):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                if 'mlp' in base_encoder:
                    base = MLPBase
                elif 'deep' in base_encoder:
                    base = DeepMLPBase
                elif 'attn' in base_encoder:
                    base = AttnMLP
                elif 'special' in base_encoder:
                    base = SpecialMLP
                elif 'MHSA' in base_encoder:
                    base = MHeadAttnModel
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 15
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 is_minigrid=False,
                 use_rew=False):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3 and not (is_minigrid):
                base = CNNBase
            elif len(obs_shape) == 3 and is_minigrid:
                base = CNN_minigrid
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        self.use_rew = use_rew

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 16
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = Adaptor(base=base,
                            obs_shape=obs_shape,
                            n_classes=512,
                            **base_kwargs)
        self.base.critic_linear = nn.Linear(self.base.output_size, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 17
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, key_value = 0):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if key_value == 1:
           
            base = KeyValueBase
        else:
            if base is None:
                if len(obs_shape) == 3:
                    base = CNNBase
                    # base = RLINE   #Swap CNNBase for RLINE if you want RLINE
                elif len(obs_shape) == 1:
                    base = MLPBase
                else:
                    raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 18
0
    def __init__(self,
                 obs_space,
                 obs_process,
                 obs_module,
                 action_space,
                 base_kwargs=None):
        super(Policy, self).__init__()
        self.obs_space = obs_space
        self.obs_process = obs_process
        self.obs_module = obs_module

        if base_kwargs is None:
            base_kwargs = {}

        # base takes all of the observations and produces a single feature vector
        self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.base.output_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 19
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(APolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            base = MLPBase

        self.hidden_size = 256

        if action_space.__class__.__name__ == "Discrete":
            num_inputs = action_space.n + obs_shape[0] * 2
            #num_inputs =  obs_shape[0]
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]

            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.base = base(num_inputs, num_outputs)
Ejemplo n.º 20
0
 def __init__(self, action_space, base=None, base_kwargs=None):
     super(Policy, self).__init__()
     if base_kwargs is None:
         base_kwargs = {}
     base = MLPBase
     obs_size = get_v('obs_size')
     self.base = base(obs_size, **base_kwargs)
     
     self.dist = nn.ModuleList([DiagGaussian(self.base.output_size, 2), Categorical(self.base.output_size, 2)])
Ejemplo n.º 21
0
    def __init__(self, num_inputs, num_outputs, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            base = MLPBase

        self.base = base(num_inputs, **base_kwargs)

        self.dist = DiagGaussian(self.base.output_size, num_outputs)
Ejemplo n.º 22
0
    def __init__(self, obs_shape, action_space, hidden_size):
        super(Policy, self).__init__()

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(num_outputs, num_outputs)

        self.base = nn.Linear(obs_shape[0], hidden_size)
        self.value_head = nn.Linear(hidden_size, 1)
        self.action_head = nn.Linear(hidden_size, 2)
Ejemplo n.º 23
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 other_cars=False,
                 ego_dim=None,
                 beta_dist=False,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0],
                         other_cars=other_cars,
                         ego_dim=ego_dim,
                         **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.beta_dist = beta_dist
            if self.beta_dist:
                self.dist = BetaDist(self.base.output_size, num_outputs)
                self.entropy_lb = Variable(
                    torch.distributions.Beta(20, 20).entropy().float())
            else:
                self.dist = DiagGaussian(self.base.output_size, num_outputs)
            self.hi_lim = action_space.high
            self.lo_lim = action_space.low
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 24
0
    def __init__(self, obs_shape, action_space, IAM=False, RNN=False, base_kwargs=None):
        super(IAMPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        self.IAM = IAM
        self.recurrent = RNN

        if len(obs_shape) == 3:
            if self.IAM:
                print("Using IAMBaseCNN")
                base = IAMBaseCNN
            else:
                print("Using CNNBase")
                base = CNNBase
        elif len(obs_shape) == 1:
            if self.IAM:
                print("Using IAMBase")
                base = IAMBase
            elif self.recurrent:
                print("Using RNNBase")
                base = RNNBase
            else:
                print("Using MLPBase")
                base = MLPBase
        else:
            raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            print("discrete")
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "Box":
            print("Box")
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            print("MultiBinary")
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size(), num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 25
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 navi=False,
                 hidden_size=64,
                 n_layers=2):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                # TODO(add hidden size)
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        print("DEV: PPO using base:", type(base).__name__)
        self.base = base(obs_shape[0],
                         hidden_size=hidden_size,
                         n_layers=n_layers,
                         **base_kwargs)
        # print(self.base.state_dict().keys())

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            net_outputs = self.base.output_size
            if navi:
                net_outputs = 256 * 10
            self.dist = Categorical(net_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)

        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 26
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        # if len(obs_shape) == 3:
        #     base = CNNBase
        # elif len(obs_shape) == 1:
        #     base = MLPBase
        # else:
        #     raise NotImplementedError

        if base == 'mlp':
            self.base = MLPBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'shared':
            self.base = SharedBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'osc':
            self.base = OscBase(
                obs_shape[0] * 2, **
                base_kwargs)  # adding prev observation that includes sim time
        else:
            raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            # self.dist = OrnsteinUhlenbeckActionNoise()
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Ejemplo n.º 27
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Model, self).__init__()

        self.hidden_size = 128

        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            base = MLPBase
        if action_space.__class__.__name__ == "Discrete":
            num_inputs = action_space.n + obs_shape[0]
        elif action_space.__class__.__name__ == "Box":
            num_inputs = action_space.shape[0] + obs_shape[0]
        elif action_space.__class__.__name__ == "MultiBinary":
            num_inputs = action_space.shape[0] + obs_shape[0]
        else:
            raise NotImplementedError

        num_outputs = obs_shape[0]
        self.dist = DiagGaussian(self.hidden_size, num_outputs)
        self.base = base(num_inputs, obs_shape[0])
Ejemplo n.º 28
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 zero_last_layer=False,
                 base=None,
                 base_kwargs=None,
                 dist=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if isinstance(obs_shape[0], tuple):
                base = E2EBase
            elif len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError
            self.base = base(obs_shape, **base_kwargs)
        else:
            self.base = base

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     zll=zero_last_layer)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
        if dist is not None:
            self.dist = dist
Ejemplo n.º 29
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 load_expert=None,
                 env_name=None,
                 rl_baseline_zoo_dir=None,
                 expert_algo=None,
                 normalize=True):
        super(Policy, self).__init__()

        #TODO: Pass these parameters in
        self.epsilon = 0.1
        self.dril = True

        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if env_name in ['duckietown']:
                base = DuckieTownCNN
            elif len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], normalize=normalize, **base_kwargs)
        self.action_space = None
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
            self.action_space = "Discrete"
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
            self.action_space = "Box"
        elif action_space.__class__.__name__ == "MultiBinary":
            raise Exception('Error')
        else:
            raise NotImplementedError

        if load_expert == True and env_name not in [
                'duckietown', 'highway-v0'
        ]:
            print('[Loading Expert --- Base]')
            model_path = os.path.join(rl_baseline_zoo_dir, 'trained_agents',
                                      f'{expert_algo}')
            try:
                import mpi4py
                from stable_baselines import TRPO
            except ImportError:
                mpi4py = None
                DDPG, TRPO = None, None

            from stable_baselines import PPO2

            model_path = f'{model_path}/{env_name}.pkl'
            if env_name in ['AntBulletEnv-v0']:
                baselines_model = TRPO.load(model_path)
            else:
                baselines_model = PPO2.load(model_path)
            for key, value in baselines_model.get_parameters().items():
                print(key, value.shape)

            if base.__name__ == 'CNNBase':
                print(['Loading CNNBase expert model'])
                params = copy_cnn_weights(baselines_model)
            elif load_expert == True and base.__name__ == 'MLPBase':
                print(['Loading MLPBase expert model'])
                params = copy_mlp_weights(baselines_model)

            #TODO: I am not sure what this is doing
            try:
                self.load_state_dict(params)
                self.obs_shape = obs_shape[0]
            except:
                self.base = base(obs_shape[0] + 1, **base_kwargs)
                self.load_state_dict(params)
                self.obs_shape = obs_shape[0] + 1
Ejemplo n.º 30
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 architecture,
                 state_channels,
                 hidden_size,
                 recurse_depth=1,
                 pool_inject=False,
                 **kwargs):
        super(RecurrentPolicy, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        num_inputs = obs_shape[0]
        im_size = obs_shape[1]
        assert im_size == obs_shape[2]
        self.spatial_latent_size = (7, 7)
        self.hidden_size = hidden_size
        self.architecture = architecture
        self.is_recurrent = architecture in ["rnn", "crnn"]

        if im_size == 84:
            self.encoder = nn.Sequential(
                init_(nn.Conv2d(num_inputs, 32, 8, stride=4)),
                nn.ReLU(),
                init_(nn.Conv2d(32, 64, 4, stride=2)),
                nn.ReLU(),
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
            )
        elif im_size == 64:
            self.encoder = nn.Sequential(
                # input (x, 64, 64)
                init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)),
                nn.ReLU(),
                # input (3, 16, 16)
                init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)),
                nn.ReLU(),
                # input (3, 9, 9)
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
                # input (3, 7, 7)
            )
        else:
            raise NotImplementedError

        if architecture == "ff":
            self.transition = NoTransition(hidden_size,
                                           state_channels=state_channels)
        elif architecture == "rnn":
            self.transition = RNNTransition(hidden_size,
                                            state_channels=state_channels,
                                            recurse_depth=recurse_depth)
        elif architecture == "crnn":
            self.transition = CRNNTransition(hidden_size,
                                             state_channels=state_channels,
                                             recurse_depth=recurse_depth,
                                             pool_inject=pool_inject)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.hidden_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.train()