Esempio n. 1
0
def create_output_distribution(action_space, output_size):
    if action_space.__class__.__name__ == "Discrete":
        num_outputs = action_space.n
        dist = Categorical(output_size, num_outputs)
    elif action_space.__class__.__name__ == "Box":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiBinary":
        num_outputs = action_space.shape[0]
        dist = Bernoulli(output_size, num_outputs)
    elif action_space.__class__.__name__ == "MultiDiscrete":
        num_outputs = action_space.shape[0]
        dist = DiagGaussian(output_size, num_outputs)

    elif action_space.__class__.__name__ == "Tuple":
        dists = [
            create_output_distribution(space, output_size)
            for space in action_space
        ]
        # for space in action_space:
        #     print(action_space.__class__.__name__)

        dist = DistributionGeneratorTuple(tuple(dists))
    else:
        raise NotImplementedError
    return dist
Esempio n. 2
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.tt = 0
        self.nn = 0
        self.visionmodel = None
        self.knob_target_hist = torch.zeros(1, 3).cuda()
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 3
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # This key is needed to configure the CNN correctly for Genesis, but causes problems with other domains
        if (not base_kwargs['is_genesis']):
            del base_kwargs['is_genesis']

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 4
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        elif isinstance(action_space, gym.spaces.MultiDiscrete):
            self.dist = MultiCategoricalDistribution(self.base.output_size, int(np.sum(action_space.nvec)), action_space.nvec)

        else:
            raise NotImplementedError
Esempio n. 5
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 is_leaf,
                 base=None,
                 base_kwargs=None):
        super(OpsPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        self.base = OpsBase(obs_shape[0],
                            action_space,
                            is_leaf=is_leaf,
                            **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 6
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 num_agents=1):
        super(PolicyShareBase, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[2], **base_kwargs)
        num_outputs = action_space.n
        self.dists = nn.ModuleList([
            Categorical(self.base.output_size, num_outputs)
            for _ in range(num_agents)
        ])
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linears = nn.ModuleList([
            init_(nn.Linear(self.base.output_size, 1))
            for _ in range(num_agents)
        ])
        self.num_agents = num_agents
    def __init__(self,
                 obs_shape,
                 action_space,
                 agent_num,
                 agent_i,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)
        # actor输入维度num_state,critic输入num_state*agent_num
        self.base = base(obs_shape[0], agent_num, **base_kwargs)
        #import pdb; pdb.set_trace()
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 8
0
    def __init__(self,
                 coord_size,
                 input_size=(1, 1),
                 action_space=1,
                 hidden_size=1,
                 window_size=1,
                 action_embedding=0):
        # input_size: (#lstm_input, #mlp_input)
        super().__init__()
        # TODO: should change "batch_size" to coord_size

        self.net = BasicNet(coord_size,
                            input_size=(input_size[0] + action_embedding,
                                        input_size[1]),
                            hidden_size=hidden_size,
                            window_size=window_size)
        # will coordinate-wisely return distributions
        self.action_distribution = Categorical(input_size[0] * hidden_size +
                                               input_size[1] + 1,
                                               action_space,
                                               coord_size=coord_size)
        self.critic = CriticHead(
            coord_size * (input_size[0] * hidden_size + input_size[1] + 1))
        self.recurrent_hidden_state_size = hidden_size
        self.coord_size = coord_size
        self.input_size = input_size
        self.action_space = action_space
        self.hidden_size = hidden_size
        self.window_size = window_size
        self.action_embedding_size = action_embedding
Esempio n. 9
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base == 'Mnist':
            base = CNNMnist
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(
            obs_shape, **base_kwargs
        )  # TODO Lia changed this, the original is the below commendted line
        #self.base = base(obs_shape[0], **base_kwargs)
        print("obs_shape[0]:", obs_shape[0])
        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 10
0
    def __init__(self,
                 obs_space,
                 obs_process,
                 obs_module,
                 action_space,
                 base_kwargs=None):
        super(Policy, self).__init__()
        self.obs_space = obs_space
        self.obs_process = obs_process
        self.obs_module = obs_module

        if base_kwargs is None:
            base_kwargs = {}

        # base takes all of the observations and produces a single feature vector
        self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.base.output_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 11
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                if obs_shape[-1] == 84:
                    base = CNNBase
                elif obs_shape[-1] == 64:
                    base = CNNBase64
                else:
                    raise NotImplementedError
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 12
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 action_activation=None,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        # print("21312312")

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n

            def action_embedding(actions):
                return torch.nn.functional.one_hot(
                    actions, num_classes=num_outputs).squeeze(-2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]

            def action_embedding(actions):
                return actions
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]

            raise NotImplementedError
        else:
            raise NotImplementedError

        base_kwargs["num_actions"] = num_outputs
        base_kwargs["action_embedding"] = action_embedding

        self.ob_dim = obs_shape[0]
        self.h_dim = base_kwargs["hidden_size"]
        self.ac_dim = num_outputs

        # print("start")
        self.base = base(obs_shape[0], **base_kwargs)
        # print('finish')

        if action_space.__class__.__name__ == "Discrete":
            self.dist = Categorical(self.base.output_size,
                                    num_outputs,
                                    is_ref=base_kwargs["is_ref"])
        elif action_space.__class__.__name__ == "Box":
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     activation=action_activation)
        elif action_space.__class__.__name__ == "MultiBinary":
            self.dist = Bernoulli(self.base.output_size, num_outputs)

        self.obs_rms = None
Esempio n. 13
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 is_minigrid=False,
                 use_rew=False):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3 and not (is_minigrid):
                base = CNNBase
            elif len(obs_shape) == 3 and is_minigrid:
                base = CNN_minigrid
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        self.use_rew = use_rew

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 14
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(APolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            base = MLPBase

        self.hidden_size = 256

        if action_space.__class__.__name__ == "Discrete":
            num_inputs = action_space.n + obs_shape[0] * 2
            #num_inputs =  obs_shape[0]
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]

            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_inputs = action_space.shape[0] + obs_shape[0] * 2
            #num_inputs = obs_shape[0]
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.base = base(num_inputs, num_outputs)
Esempio n. 15
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, key_value = 0):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if key_value == 1:
           
            base = KeyValueBase
        else:
            if base is None:
                if len(obs_shape) == 3:
                    base = CNNBase
                    # base = RLINE   #Swap CNNBase for RLINE if you want RLINE
                elif len(obs_shape) == 1:
                    base = MLPBase
                else:
                    raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 16
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        #     if len(obs_shape) == 3:
        #         base = CNNBase
        #     elif len(obs_shape) == 1:
        #         base = MLPBase
        #     else:
        #         raise NotImplementedError
        #base = base
        self.base = base  #base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            #num_outputs2 = action_space2.n
            self.dist = Categorical(self.base.output_size, num_outputs)
            #self.dist2 = Categorical(self.base.output_size, num_outputs2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))
Esempio n. 17
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None, base_encoder='simple'):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                if 'mlp' in base_encoder:
                    base = MLPBase
                elif 'deep' in base_encoder:
                    base = DeepMLPBase
                elif 'attn' in base_encoder:
                    base = AttnMLP
                elif 'special' in base_encoder:
                    base = SpecialMLP
                elif 'MHSA' in base_encoder:
                    base = MHeadAttnModel
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 18
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = Adaptor(base=base,
                            obs_shape=obs_shape,
                            n_classes=512,
                            **base_kwargs)
        self.base.critic_linear = nn.Linear(self.base.output_size, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 19
0
 def __init__(self, action_space, base=None, base_kwargs=None):
     super(Policy, self).__init__()
     if base_kwargs is None:
         base_kwargs = {}
     base = MLPBase
     obs_size = get_v('obs_size')
     self.base = base(obs_size, **base_kwargs)
     
     self.dist = nn.ModuleList([DiagGaussian(self.base.output_size, 2), Categorical(self.base.output_size, 2)])
Esempio n. 20
0
    def __init__(self, obs_shape, action_space, hidden_size):
        super(Policy, self).__init__()

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(num_outputs, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(num_outputs, num_outputs)

        self.base = nn.Linear(obs_shape[0], hidden_size)
        self.value_head = nn.Linear(hidden_size, 1)
        self.action_head = nn.Linear(hidden_size, 2)
    def __init__(self, occ_obs_shape, sign_obs_shape, state_rep, action_space,
                 recurrent_policy):
        super(Policy, self).__init__()

        if state_rep in ['sign', 'original']:
            self.base = MLPBase(sign_obs_shape, recurrent_policy)
        elif state_rep == 'full':
            self.base = CNNBase(occ_obs_shape, sign_obs_shape,
                                recurrent_policy)
        else:
            raise NotImplemented(
                'Only implemented sign, origianal, and full state representation'
            )

        num_outputs = action_space.n  # 2
        self.dist = Categorical(self.base.output_size, num_outputs)
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        # if len(obs_shape) == 3:
        #     base = CNNBase
        # elif len(obs_shape) == 1:
        #     base = MLPBase
        # else:
        #     raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)
        self.base = base
        self.base.train()

        self.dist = Categorical()
Esempio n. 23
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 other_cars=False,
                 ego_dim=None,
                 beta_dist=False,
                 base=None,
                 base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0],
                         other_cars=other_cars,
                         ego_dim=ego_dim,
                         **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.beta_dist = beta_dist
            if self.beta_dist:
                self.dist = BetaDist(self.base.output_size, num_outputs)
                self.entropy_lb = Variable(
                    torch.distributions.Beta(20, 20).entropy().float())
            else:
                self.dist = DiagGaussian(self.base.output_size, num_outputs)
            self.hi_lim = action_space.high
            self.lo_lim = action_space.low
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 24
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 base=None,
                 base_kwargs=None,
                 navi=False,
                 hidden_size=64,
                 n_layers=2):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                # TODO(add hidden size)
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        print("DEV: PPO using base:", type(base).__name__)
        self.base = base(obs_shape[0],
                         hidden_size=hidden_size,
                         n_layers=n_layers,
                         **base_kwargs)
        # print(self.base.state_dict().keys())

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            net_outputs = self.base.output_size
            if navi:
                net_outputs = 256 * 10
            self.dist = Categorical(net_outputs, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)

        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 25
0
    def __init__(self, obs_shape, action_space, IAM=False, RNN=False, base_kwargs=None):
        super(IAMPolicy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        self.IAM = IAM
        self.recurrent = RNN

        if len(obs_shape) == 3:
            if self.IAM:
                print("Using IAMBaseCNN")
                base = IAMBaseCNN
            else:
                print("Using CNNBase")
                base = CNNBase
        elif len(obs_shape) == 1:
            if self.IAM:
                print("Using IAMBase")
                base = IAMBase
            elif self.recurrent:
                print("Using RNNBase")
                base = RNNBase
            else:
                print("Using MLPBase")
                base = MLPBase
        else:
            raise NotImplementedError
        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            print("discrete")
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "Box":
            print("Box")
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size(), num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            print("MultiBinary")
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size(), num_outputs)
        else:
            raise NotImplementedError
Esempio n. 26
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        # if len(obs_shape) == 3:
        #     base = CNNBase
        # elif len(obs_shape) == 1:
        #     base = MLPBase
        # else:
        #     raise NotImplementedError

        if base == 'mlp':
            self.base = MLPBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'shared':
            self.base = SharedBase(
                obs_shape[0] * 2,
                **base_kwargs)  # adding prev observation to the input
        elif base == 'osc':
            self.base = OscBase(
                obs_shape[0] * 2, **
                base_kwargs)  # adding prev observation that includes sim time
        else:
            raise NotImplementedError

        # self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            # self.dist = OrnsteinUhlenbeckActionNoise()
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 27
0
 def __init__(self,
              obs_shape,
              action_space,
              architecture,
              state_channels,
              hidden_size,
              recurse_depth=1,
              pool_inject=False,
              **kwargs):
     super(PolicyNetwork2AM, self).__init__(obs_shape,
                                            action_space,
                                            architecture,
                                            state_channels,
                                            hidden_size,
                                            recurse_depth=recurse_depth,
                                            pool_inject=pool_inject,
                                            **kwargs)
     init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                            constant_(x, 0))
     self.critic_cog = init_(nn.Linear(self.hidden_size, 1))
     self.actor_cog = Categorical(self.hidden_size, 2)
     self.train()
Esempio n. 28
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 zero_last_layer=False,
                 base=None,
                 base_kwargs=None,
                 dist=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if isinstance(obs_shape[0], tuple):
                base = E2EBase
            elif len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError
            self.base = base(obs_shape, **base_kwargs)
        else:
            self.base = base

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size,
                                     num_outputs,
                                     zll=zero_last_layer)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
        if dist is not None:
            self.dist = dist
Esempio n. 29
0
    def __init__(self, action_space, env_name, device):
        super(RandomAgent, self).__init__()
        self.action_space = action_space
        self.env_name = env_name
        self.device = device

        if 'NoFrameskip' in self.env_name:
            self.feature_size = 1024
        else:
            self.feature_size = 64

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = self.action_space.n
            self.dist = Categorical(self.feature_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = self.action_space.shape[0]
            self.dist = DiagGaussian(self.feature_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = self.action_space.shape[0]
            self.dist = Bernoulli(self.feature_size, num_outputs)
        else:
            raise NotImplementedError

        self.actor_features = torch.rand(self.feature_size)
Esempio n. 30
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 architecture,
                 state_channels,
                 hidden_size,
                 recurse_depth=1,
                 pool_inject=False,
                 **kwargs):
        super(RecurrentPolicy, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        num_inputs = obs_shape[0]
        im_size = obs_shape[1]
        assert im_size == obs_shape[2]
        self.spatial_latent_size = (7, 7)
        self.hidden_size = hidden_size
        self.architecture = architecture
        self.is_recurrent = architecture in ["rnn", "crnn"]

        if im_size == 84:
            self.encoder = nn.Sequential(
                init_(nn.Conv2d(num_inputs, 32, 8, stride=4)),
                nn.ReLU(),
                init_(nn.Conv2d(32, 64, 4, stride=2)),
                nn.ReLU(),
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
            )
        elif im_size == 64:
            self.encoder = nn.Sequential(
                # input (x, 64, 64)
                init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)),
                nn.ReLU(),
                # input (3, 16, 16)
                init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)),
                nn.ReLU(),
                # input (3, 9, 9)
                init_(nn.Conv2d(64, state_channels, 3, stride=1)),
                nn.ReLU(),
                # input (3, 7, 7)
            )
        else:
            raise NotImplementedError

        if architecture == "ff":
            self.transition = NoTransition(hidden_size,
                                           state_channels=state_channels)
        elif architecture == "rnn":
            self.transition = RNNTransition(hidden_size,
                                            state_channels=state_channels,
                                            recurse_depth=recurse_depth)
        elif architecture == "crnn":
            self.transition = CRNNTransition(hidden_size,
                                             state_channels=state_channels,
                                             recurse_depth=recurse_depth,
                                             pool_inject=pool_inject)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.hidden_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.train()