def __init__( self, obs_dim: int, hidden_sizes: Union[List[int], Tuple[int]], activation: torch.nn.Module, ): super().__init__() self.f_net = mlp( [obs_dim, hidden_sizes[0]], activation, ) self.v_net = mlp(list(hidden_sizes[1:]) + [1], activation) self.apply(init_weights)
def __init__( self, obs_dim: int, act_space: Box, hidden_sizes: Union[List[int], Tuple[int]], activation: torch.nn.Module, history_len: int, feature_dim: int = 25, ): super().__init__() act_dim = act_space.shape[0] self.act_high = torch.as_tensor(act_space.high) self.act_low = torch.as_tensor(act_space.low) self.net = mlp( [obs_dim + feature_dim] + list(hidden_sizes), activation, activation, ) self.mu_layer = nn.Linear(hidden_sizes[-1], act_dim) self.lidar_features = nn.Sequential( nn.Conv1d(history_len, 1, 4, 2, 2, padding_mode="circular"), nn.Conv1d(1, 1, 4, 2, 2, padding_mode="circular"), nn.AdaptiveAvgPool1d(feature_dim), ) self.log_std = nn.Parameter(-0.5 * torch.ones(act_dim)).unsqueeze(0) self.history_len = history_len self.apply(init_weights)
def __init__( self, obs_dim: int, act_space: Discrete, hidden_sizes: Union[List[int], Tuple[int]] = [256, 256], activation: nn.Module = nn.ReLU, ): super().__init__() self.deviation_net = mlp( [obs_dim] + list(hidden_sizes) + [act_space.n], activation, ) self.apply(init_weights)
def __init__( self, obs_dim: int, hidden_sizes: Union[List[int], Tuple[int]], activation: torch.nn.Module, nagents: int, ): super().__init__() self.v_net = mlp( [obs_dim * nagents] + list(hidden_sizes) + [1], activation, ) self.nagents = nagents self.apply(init_weights)
def __init__( self, obs_dim: int, hidden_sizes: Union[List[int], Tuple[int]], activation: torch.nn.Module, history_len: int, feature_dim: int = 25, ): super().__init__() self.feature_net = mlp( [obs_dim + feature_dim] + [hidden_sizes[0]], activation, ) self.lidar_features = nn.Sequential( nn.Conv1d(history_len, 1, 4, 2, 2, padding_mode="circular"), nn.Conv1d(1, 1, 4, 2, 2, padding_mode="circular"), nn.AdaptiveAvgPool1d(feature_dim), ) self.v_net = mlp( list(hidden_sizes) + [1], activation, ) self.history_len = history_len self.apply(init_weights)
def __init__( self, obs_dim: int, act_space: Box, hidden_sizes: Union[List[int], Tuple[int]] = [256, 256], activation: torch.nn.Module = torch.nn.ReLU, ): super().__init__() act_dim = act_space.shape[0] self.act_high = torch.as_tensor(act_space.high) self.act_low = torch.as_tensor(act_space.low) self.net = mlp( [obs_dim] + list(hidden_sizes), activation, ) self.mu_layer = nn.Linear(hidden_sizes[-1], act_dim) self.log_std = nn.Parameter(-0.5 * torch.ones(act_dim)) self.apply(init_weights)