Example #1
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            self.dist = MultiCategoricalDistribution(self.base.output_size, int(np.sum(action_space.nvec)), action_space.nvec)

        else:
            raise NotImplementedError
    def __init__(self,
                 obs_shape,
                 action_space,
                 is_leaf,
                 base=None,
                 base_kwargs=None):
        super(OpsPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        self.base = OpsBase(obs_shape[0],
                            action_space,
                            is_leaf=is_leaf,
                            **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
    def __init__(self,
                 obs_shape,
                 action_space,
                 agent_num,
                 agent_i,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)
        # actor输入维度num_state,critic输入num_state*agent_num
        self.base = base(obs_shape[0], agent_num, **base_kwargs)
        #import pdb; pdb.set_trace()
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #4
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        #     if len(obs_shape) == 3:
        #         base = CNNBase
        #     elif len(obs_shape) == 1:
        #         base = MLPBase
        #     else:
        #         raise NotImplementedError
        #base = base
        self.base = base  #base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            #num_outputs2 = action_space2.n
            self.dist = Categorical(self.base.output_size, num_outputs)
            #self.dist2 = Categorical(self.base.output_size, num_outputs2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))
Example #5
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 action_activation=None,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # print("21312312")

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n

            def action_embedding(actions):
                return torch.nn.functional.one_hot(
                    actions, num_classes=num_outputs).squeeze(-2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]

            def action_embedding(actions):
                return actions
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]

            raise NotImplementedError
        else:
            raise NotImplementedError

        base_kwargs["num_actions"] = num_outputs
        base_kwargs["action_embedding"] = action_embedding

        self.ob_dim = obs_shape[0]
        self.h_dim = base_kwargs["hidden_size"]
        self.ac_dim = num_outputs

        # print("start")
        self.base = base(obs_shape[0], **base_kwargs)
        # print('finish')

        if action_space.__class__.__name__ == "Discrete":
            self.dist = Categorical(self.base.output_size,
                                    num_outputs,
                                    is_ref=base_kwargs["is_ref"])
        elif action_space.__class__.__name__ == "Box":
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     activation=action_activation)
        elif action_space.__class__.__name__ == "MultiBinary":
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        self.obs_rms = None
Example #6
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # This key is needed to configure the CNN correctly for Genesis, but causes problems with other domains
        if (not base_kwargs['is_genesis']):
            del base_kwargs['is_genesis']

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                if obs_shape[-1] == 84:
                    base = CNNBase
                elif obs_shape[-1] == 64:
                    base = CNNBase64
                else:
                    raise NotImplementedError
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #8
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.tt = 0
        self.nn = 0
        self.visionmodel = None
        self.knob_target_hist = torch.zeros(1, 3).cuda()
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #9
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(APolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            base = MLPBase

        self.hidden_size = 256

        if action_space.__class__.__name__ == "Discrete":
            num_inputs = action_space.n + obs_shape[0] * 2
            #num_inputs =  obs_shape[0]
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]

            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.base = base(num_inputs, num_outputs)
Example #10
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base == 'Mnist':
            base = CNNMnist
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(
            obs_shape, **base_kwargs
        )  # TODO Lia changed this, the original is the below commendted line
        #self.base = base(obs_shape[0], **base_kwargs)
        print("obs_shape[0]:", obs_shape[0])
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, base_encoder='simple'):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                if 'mlp' in base_encoder:
                    base = MLPBase
                elif 'deep' in base_encoder:
                    base = DeepMLPBase
                elif 'attn' in base_encoder:
                    base = AttnMLP
                elif 'special' in base_encoder:
                    base = SpecialMLP
                elif 'MHSA' in base_encoder:
                    base = MHeadAttnModel
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #12
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 is_minigrid=False,
                 use_rew=False):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3 and not (is_minigrid):
                base = CNNBase
            elif len(obs_shape) == 3 and is_minigrid:
                base = CNN_minigrid
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        self.use_rew = use_rew

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #13
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = Adaptor(base=base,
                            obs_shape=obs_shape,
                            n_classes=512,
                            **base_kwargs)
        self.base.critic_linear = nn.Linear(self.base.output_size, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #14
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, key_value = 0):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if key_value == 1:
           
            base = KeyValueBase
        else:
            if base is None:
                if len(obs_shape) == 3:
                    base = CNNBase
                    # base = RLINE   #Swap CNNBase for RLINE if you want RLINE
                elif len(obs_shape) == 1:
                    base = MLPBase
                else:
                    raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #15
0
    def __init__(self,
                 obs_space,
                 obs_process,
                 obs_module,
                 action_space,
                 base_kwargs=None):
        super(Policy, self).__init__()
        self.obs_space = obs_space
        self.obs_process = obs_process
        self.obs_module = obs_module

        if base_kwargs is None:
            base_kwargs = {}

        # base takes all of the observations and produces a single feature vector
        self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.base.output_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #16
0
def create_output_distribution(action_space, output_size):
    if action_space.__class__.__name__ == "Discrete":
        num_outputs = action_space.n
        dist = Categorical(output_size, num_outputs)
    elif action_space.__class__.__name__ == "Box":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiBinary":
        num_outputs = action_space.shape[0]
        dist = Bernoulli(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiDiscrete":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)

    elif action_space.__class__.__name__ == "Tuple":
        dists = [
            create_output_distribution(space, output_size)
            for space in action_space
        ]
        # for space in action_space:
        #     print(action_space.__class__.__name__)

        dist = DistributionGeneratorTuple(tuple(dists))
    else:
        raise NotImplementedError
    return dist
Example #17
0
 def __init__(self,
              obs_shape,
              feat_dim,
              action_space,
              base=None,
              base_kwargs=None):
     super(Policy, self).__init__()
     if base_kwargs is None:
         base_kwargs = {}
     if base is None:
         if len(obs_shape) == 3:
             if feat_dim == 0:
                 self.base = CNNBase(obs_shape[0], **base_kwargs)
             else:
                 self.base = CombinedBase(obs_shape[0], feat_dim,
                                          **base_kwargs)
         elif len(obs_shape) == 1:
             self.base = MLPBase(obs_shape[0], **base_kwargs)
         else:
             raise NotImplementedError
     if action_space.__class__.__name__ == "Discrete":
         num_outputs = action_space.n
         self.dist = Categorical_masked(self.base.output_size, num_outputs)
         self.act_dim = action_space.n
         self.ent_denom = math.log(self.act_dim)
     elif action_space.__class__.__name__ == "Box":
         num_outputs = action_space.shape[0]
         self.dist = DiagGaussian(self.base.output_size, num_outputs)
     elif action_space.__class__.__name__ == "MultiBinary":
         num_outputs = action_space.shape[0]
         self.dist = Bernoulli(self.base.output_size, num_outputs)
     else:
         raise NotImplementedError
Example #18
0
    def __init__(self, obs_shape, action_space, hidden_size):
        super(Policy, self).__init__()

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(num_outputs, num_outputs)

        self.base = nn.Linear(obs_shape[0], hidden_size)
        self.value_head = nn.Linear(hidden_size, 1)
        self.action_head = nn.Linear(hidden_size, 2)
Example #19
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 other_cars=False,
                 ego_dim=None,
                 beta_dist=False,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0],
                         other_cars=other_cars,
                         ego_dim=ego_dim,
                         **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.beta_dist = beta_dist
            if self.beta_dist:
                self.dist = BetaDist(self.base.output_size, num_outputs)
                self.entropy_lb = Variable(
                    torch.distributions.Beta(20, 20).entropy().float())
            else:
                self.dist = DiagGaussian(self.base.output_size, num_outputs)
            self.hi_lim = action_space.high
            self.lo_lim = action_space.low
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #20
0
    def __init__(self, obs_shape, action_space, IAM=False, RNN=False, base_kwargs=None):
        super(IAMPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        self.IAM = IAM
        self.recurrent = RNN

        if len(obs_shape) == 3:
            if self.IAM:
                print("Using IAMBaseCNN")
                base = IAMBaseCNN
            else:
                print("Using CNNBase")
                base = CNNBase
        elif len(obs_shape) == 1:
            if self.IAM:
                print("Using IAMBase")
                base = IAMBase
            elif self.recurrent:
                print("Using RNNBase")
                base = RNNBase
            else:
                print("Using MLPBase")
                base = MLPBase
        else:
            raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            print("discrete")
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "Box":
            print("Box")
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            print("MultiBinary")
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size(), num_outputs)
        else:
            raise NotImplementedError
Example #21
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 navi=False,
                 hidden_size=64,
                 n_layers=2):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                # TODO(add hidden size)
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        print("DEV: PPO using base:", type(base).__name__)
        self.base = base(obs_shape[0],
                         hidden_size=hidden_size,
                         n_layers=n_layers,
                         **base_kwargs)
        # print(self.base.state_dict().keys())

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            net_outputs = self.base.output_size
            if navi:
                net_outputs = 256 * 10
            self.dist = Categorical(net_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)

        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #22
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        # if len(obs_shape) == 3:
        #     base = CNNBase
        # elif len(obs_shape) == 1:
        #     base = MLPBase
        # else:
        #     raise NotImplementedError

        if base == 'mlp':
            self.base = MLPBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'shared':
            self.base = SharedBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'osc':
            self.base = OscBase(
                obs_shape[0] * 2, **
                base_kwargs)  # adding prev observation that includes sim time
        else:
            raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            # self.dist = OrnsteinUhlenbeckActionNoise()
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Example #23
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 zero_last_layer=False,
                 base=None,
                 base_kwargs=None,
                 dist=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if isinstance(obs_shape[0], tuple):
                base = E2EBase
            elif len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError
            self.base = base(obs_shape, **base_kwargs)
        else:
            self.base = base

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     zll=zero_last_layer)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
        if dist is not None:
            self.dist = dist
Example #24
0
    def __init__(self, action_space, env_name, device):
        super(RandomAgent, self).__init__()
        self.action_space = action_space
        self.env_name = env_name
        self.device = device

        if 'NoFrameskip' in self.env_name:
            self.feature_size = 1024
        else:
            self.feature_size = 64

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = self.action_space.n
            self.dist = Categorical(self.feature_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = self.action_space.shape[0]
            self.dist = DiagGaussian(self.feature_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = self.action_space.shape[0]
            self.dist = Bernoulli(self.feature_size, num_outputs)
        else:
            raise NotImplementedError

        self.actor_features = torch.rand(self.feature_size)
    def __init__(self,
                 obs_shape,
                 action_space,
                 architecture,
                 state_channels,
                 hidden_size,
                 recurse_depth=1,
                 pool_inject=False,
                 **kwargs):
        super(RecurrentPolicy, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        num_inputs = obs_shape[0]
        im_size = obs_shape[1]
        assert im_size == obs_shape[2]
        self.spatial_latent_size = (7, 7)
        self.hidden_size = hidden_size
        self.architecture = architecture
        self.is_recurrent = architecture in ["rnn", "crnn"]

        if im_size == 84:
            self.encoder = nn.Sequential(
                init_(nn.Conv2d(num_inputs, 32, 8, stride=4)),
                nn.ReLU(),
                init_(nn.Conv2d(32, 64, 4, stride=2)),
                nn.ReLU(),
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
            )
        elif im_size == 64:
            self.encoder = nn.Sequential(
                # input (x, 64, 64)
                init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)),
                nn.ReLU(),
                # input (3, 16, 16)
                init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)),
                nn.ReLU(),
                # input (3, 9, 9)
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
                # input (3, 7, 7)
            )
        else:
            raise NotImplementedError

        if architecture == "ff":
            self.transition = NoTransition(hidden_size,
                                           state_channels=state_channels)
        elif architecture == "rnn":
            self.transition = RNNTransition(hidden_size,
                                            state_channels=state_channels,
                                            recurse_depth=recurse_depth)
        elif architecture == "crnn":
            self.transition = CRNNTransition(hidden_size,
                                             state_channels=state_channels,
                                             recurse_depth=recurse_depth,
                                             pool_inject=pool_inject)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.hidden_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.train()