def __init__(self, args, cent_obs_space, device=torch.device("cpu")): super(R_Critic, self).__init__() self.hidden_size = args.hidden_size self._use_orthogonal = args.use_orthogonal self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._recurrent_N = args.recurrent_N self._use_popart = args.use_popart self.tpdv = dict(dtype=torch.float32, device=device) init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal] cent_obs_shape = get_shape_from_obs_space(cent_obs_space) base = CNNBase if len(cent_obs_shape) == 3 else MLPBase self.base = base(args, cent_obs_shape) if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) def init_(m): return init(m, init_method, lambda x: nn.init.constant_(x, 0)) if self._use_popart: self.v_out = init_(PopArt(self.hidden_size, 1, device=device)) else: self.v_out = init_(nn.Linear(self.hidden_size, 1)) self.to(device)
def __init__(self, args, obs_space, action_space, device=torch.device("cpu")): super(R_Actor, self).__init__() self.hidden_size = args.hidden_size self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._use_policy_active_masks = args.use_policy_active_masks self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._recurrent_N = args.recurrent_N self.tpdv = dict(dtype=torch.float32, device=device) obs_shape = get_shape_from_obs_space(obs_space) base = CNNBase if len(obs_shape) == 3 else MLPBase self.base = base(args, obs_shape) if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) self.act = ACTLayer(action_space, self.hidden_size, self._use_orthogonal, self._gain) self.to(device)
def __init__(self, args, obs_space, action_space, hidden_size=64, use_recurrent_policy=True): super(actor, self).__init__() self.hidden_size = hidden_size self._use_recurrent_policy = use_recurrent_policy self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._recurrent_N = args.recurrent_N self.tpdv = dict(dtype=torch.float32) obs_shape = get_shape_from_obs_space(obs_space) base = CNNBase if len(obs_shape) == 3 else MLPBase self.base = base(args, obs_shape) if self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) self.act = ACTLayer(action_space, self.hidden_size, self._use_orthogonal, self._gain)
def __init__(self, args, obs_space, share_obs_space, action_space, device=torch.device("cpu"), cat_self=True): super(R_Model, self).__init__() self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._use_ReLU = args.use_ReLU self._recurrent_N = args.recurrent_N self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._use_centralized_V = args.use_centralized_V self.hidden_size = args.hidden_size self.device = device self.tpdv = dict(dtype=torch.float32, device=device) init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal] # obs space obs_shape = get_shape_from_obs_space(obs_space) base = CNNBase if len(obs_shape) == 3 else MLPBase self.obs_prep = base(args, obs_shape) # share obs space if self._use_centralized_V: share_obs_shape = get_shape_from_obs_space(share_obs_space) self.share_obs_prep = base(args, share_obs_shape, cat_self) else: self.share_obs_prep = self.obs_prep # common layer self.common = MLPLayer(self.hidden_size, self.hidden_size, layer_N=0, use_orthogonal=self._use_orthogonal, use_ReLU=self._use_ReLU) if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) def init_(m): return init(m, init_method, lambda x: nn.init.constant_(x, 0)) # value self.v_out = init_(nn.Linear(self.hidden_size, 1)) # action self.act = ACTLayer(action_space, self.hidden_size, self._use_orthogonal, self._gain) self.to(self.device)
def __init__(self, args, cent_obs_space, action_space, device=torch.device("cpu")): super(R_Q_Head, self).__init__() self.hidden_size = args.hidden_size self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._recurrent_N = args.recurrent_N self.tpdv = dict(dtype=torch.float32, device=device) init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal] if args.env_name == 'GRFootball': cent_obs_space = [args.state_shape] else: cent_obs_space = get_shape_from_obs_space(cent_obs_space) base = CNNBase if len(cent_obs_space) == 3 else MLPBase self.base = base(args, cent_obs_space) if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) def init_(m): return init(m, init_method, lambda x: nn.init.constant_(x, 0)) self.v_out = init_(nn.Linear(self.hidden_size, self.hidden_size)) # self.act = ACTLayer(action_space, self.hidden_size, self._use_orthogonal, self._gain) if action_space.__class__.__name__ == "Discrete": action_dim = action_space.n self.action_out = nn.Linear(self.hidden_size, action_dim) elif action_space.__class__.__name__ == "Box": action_dim = action_space.shape[0] self.action_out = nn.Linear(self.hidden_size, action_dim) elif action_space.__class__.__name__ == "MultiBinary": action_dim = action_space.shape[0] self.action_out = nn.Linear(self.hidden_size, action_dim) elif action_space.__class__.__name__ == "MultiDiscrete": self.multi_discrete = True action_dims = action_space.high - action_space.low + 1 self.action_outs = [] for action_dim in action_dims: self.action_outs.append(nn.Linear(self.hidden_size, action_dim)) self.action_outs = nn.ModuleList(self.action_outs) self.to(device)
def __init__(self, args, obs_space, action_space, device=torch.device("cpu")): super(R_Actor, self).__init__() self.hidden_size = args.hidden_size self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._activation_id = args.activation_id self._use_policy_active_masks = args.use_policy_active_masks self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._use_influence_policy = args.use_influence_policy self._influence_layer_N = args.influence_layer_N self._use_policy_vhead = args.use_policy_vhead self._recurrent_N = args.recurrent_N self.tpdv = dict(dtype=torch.float32, device=device) obs_shape = get_shape_from_obs_space(obs_space) if 'Dict' in obs_shape.__class__.__name__: self._mixed_obs = True self.base = MIXBase(args, obs_shape, cnn_layers_params=args.cnn_layers_params) else: self._mixed_obs = False self.base = CNNBase(args, obs_shape) if len(obs_shape)==3 else MLPBase(args, obs_shape, use_attn_internal=args.use_attn_internal, use_cat_self=True) input_size = self.base.output_size if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(input_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) input_size = self.hidden_size if self._use_influence_policy: self.mlp = MLPLayer(obs_shape[0], self.hidden_size, self._influence_layer_N, self._use_orthogonal, self._activation_id) input_size += self.hidden_size self.act = ACTLayer(action_space, input_size, self._use_orthogonal, self._gain) if self._use_policy_vhead: init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal] def init_(m): return init(m, init_method, lambda x: nn.init.constant_(x, 0)) if self._use_popart: self.v_out = init_(PopArt(input_size, 1, device=device)) else: self.v_out = init_(nn.Linear(input_size, 1)) self.to(device)
def __init__(self, args, obs_space, action_space, device=torch.device("cpu")): super(R_Actor, self).__init__() self.hidden_size = args.hidden_size self._gain = args.gain self._use_orthogonal = args.use_orthogonal self._use_policy_active_masks = args.use_policy_active_masks self._use_naive_recurrent_policy = args.use_naive_recurrent_policy self._use_recurrent_policy = args.use_recurrent_policy self._use_policy_vhead = args.use_policy_vhead self._recurrent_N = args.recurrent_N self.tpdv = dict(dtype=torch.float32, device=device) obs_shape = get_shape_from_obs_space(obs_space) base = CNNBase if len(obs_shape) == 3 else MLPBase self.base = base(args, obs_shape) if self._use_naive_recurrent_policy or self._use_recurrent_policy: self.rnn = RNNLayer(self.hidden_size, self.hidden_size, self._recurrent_N, self._use_orthogonal) self.act = ACTLayer(action_space, self.hidden_size, self._use_orthogonal, self._gain) if self._use_policy_vhead: init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal] def init_(m): return init(m, init_method, lambda x: nn.init.constant_(x, 0)) self.v_out = init_(nn.Linear(self.hidden_size, 1)) self.to(device)