Exemple #1
0
 def __init__(
         self,
         image_shape,
         output_size,
         fc_sizes=512,
         dueling=False,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         ):
     super().__init__()
     self.dueling = dueling
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     if dueling:
         self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size)
     else:
         self.head = MlpModel(conv_out_size, fc_sizes, output_size)
Exemple #2
0
 def __init__(
         self,
         observation_shape,
         action_size,
         hidden_sizes=None,  # None for default (see below).
         lstm_size=256,
         nonlinearity=torch.nn.ReLU,
         normalize_observation=False,
         norm_obs_clip=10,
         norm_obs_var_clip=1e-6,
 ):
     super().__init__()
     self._obs_n_dim = len(observation_shape)
     self._action_size = action_size
     hidden_sizes = hidden_sizes or [256, 256]
     mlp_input_size = int(np.prod(observation_shape))
     self.mlp = MlpModel(
         input_size=mlp_input_size,
         hidden_sizes=hidden_sizes,
         output_size=None,
         nonlinearity=nonlinearity,
     )
     mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size
     self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size)
     self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1)
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Exemple #3
0
 def __init__(
     self,
     image_shape,
     output_size,
     fc_sizes=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.dueling = dueling
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     if dueling:
         self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size)
     else:
         self.head = MlpModel(conv_out_size, fc_sizes, output_size)
Exemple #4
0
    def __init__(
        self,
        image_shape,
        latent_size,
        use_fourth_layer=True,
        skip_connections=True,
        hidden_sizes=None,
        kiaming_init=True,
    ):
        super().__init__()
        c, h, w = image_shape
        self.conv = DmlabConv2dModel(
            in_channels=c,
            use_fourth_layer=True,
            skip_connections=skip_connections,
            use_maxpool=False,
        )
        self._output_size = self.conv.output_size(h, w)
        self._output_shape = self.conv.output_shape(h, w)

        self.head = MlpModel(  # gets to z_t, not necessarily c_t
            input_size=self._output_size,
            hidden_sizes=hidden_sizes,
            output_size=latent_size,
        )
        if kiaming_init:
            self.apply(weight_init)
Exemple #5
0
    def __init__(
        self,
        observation_shape,
        action_size,
        hidden_sizes=[64, 64],  # mlp after lstm
        fc_sizes=64,  # Between conv and lstm
        channels=None,
        kernel_sizes=None,
        strides=None,
        paddings=None,
        use_maxpool=False,
    ):
        """Instantiate neural net according to inputs."""
        super().__init__()
        self._obs_ndim = len(observation_shape)

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels or [4, 8],
            kernel_sizes=kernel_sizes or [8, 4],
            strides=strides or [4, 2],
            paddings=paddings or [0, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )  # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU
        self.mlp = MlpModel(
            input_size=self.conv.output_size + action_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
        )
Exemple #6
0
 def __init__(
     self,
     image_shape,
     latent_size,
     channels,
     kernel_sizes,
     strides,
     paddings=None,
     hidden_sizes=None,  # usually None; NOT the same as anchor MLP
     kiaming_init=True,
 ):
     super().__init__()
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels,
         kernel_sizes=kernel_sizes,
         strides=strides,
         paddings=paddings,
         use_maxpool=False,
     )
     self._output_size = self.conv.conv_out_size(h, w)
     self._output_shape = self.conv.conv_out_shape(h, w)
     self.head = MlpModel(
         input_size=self._output_size,
         hidden_sizes=hidden_sizes,
         output_size=latent_size,
     )
     if kiaming_init:
         self.apply(weight_init)
    def __init__(
        self,
        observation_shape,
        action_size,
        policy_hidden_sizes=None,
        policy_hidden_nonlinearity=torch.nn.Tanh,
        value_hidden_sizes=None,
        value_hidden_nonlinearity=torch.nn.Tanh,
        init_log_std=0.,
        min_std=0.,
        normalize_observation=False,
        norm_obs_clip=10,
        norm_obs_var_clip=1e-6,
        policy_inputs_indices=None,
    ):
        super().__init__()
        self.min_std = min_std
        self._obs_ndim = len(observation_shape)
        input_size = int(np.prod(observation_shape))
        self.policy_inputs_indices = policy_inputs_indices if policy_inputs_indices is not None else list(
            range(input_size))

        policy_hidden_sizes = [
            400, 300
        ] if policy_hidden_sizes is None else policy_hidden_sizes
        value_hidden_sizes = [
            400, 300
        ] if value_hidden_sizes is None else value_hidden_sizes
        self.mu = MlpModel(input_size=len(self.policy_inputs_indices),
                           hidden_sizes=policy_hidden_sizes,
                           output_size=action_size,
                           nonlinearity=policy_hidden_nonlinearity)
        self.v = MlpModel(
            input_size=input_size,
            hidden_sizes=value_hidden_sizes,
            output_size=1,
            nonlinearity=value_hidden_nonlinearity,
        )
        self._log_std = torch.nn.Parameter(
            (np.log(np.exp(init_log_std) - self.min_std)) *
            torch.ones(action_size))
        if normalize_observation:
            self.obs_rms = RunningMeanStdModel(observation_shape)
            self.norm_obs_clip = norm_obs_clip
            self.norm_obs_var_clip = norm_obs_var_clip
        self.normalize_observation = normalize_observation
Exemple #8
0
 def __init__(self, latent_size, action_size, hidden_sizes):
     super().__init__()
     self.head = MlpModel(
         input_size=latent_size + action_size,
         hidden_sizes=hidden_sizes,
         output_size=latent_size * 2,
     )
     self._latent_size = latent_size
Exemple #9
0
 def __init__(self, latent_size, local_size, anchor_hidden_sizes):
     super().__init__()
     self.anchor_mlp = MlpModel(
         input_size=latent_size,
         hidden_sizes=anchor_hidden_sizes,
         output_size=latent_size,
     )
     self.W = torch.nn.Linear(latent_size, local_size, bias=False)
 def __init__(
         self,
         input_size,
         hidden_sizes,
         output_size,
         grad_scale=2**(-1 / 2),
 ):
     super().__init__()
     if isinstance(hidden_sizes, int):
         hidden_sizes = [hidden_sizes]
     self.advantage_hidden = MlpModel(input_size, hidden_sizes)
     self.advantage_out = torch.nn.Linear(hidden_sizes[-1],
                                          output_size,
                                          bias=False)
     self.advantage_bias = torch.nn.Parameter(torch.zeros(1))
     self.value = MlpModel(input_size, hidden_sizes, output_size=1)
     self._grad_scale = grad_scale
Exemple #11
0
 def __init__(
         self,
         observation_shape,
         action_size,
         hidden_sizes=None,
         lstm_size=None,
         lstm_skip=True,
         constraint=True,
         hidden_nonlinearity="tanh",  # or "relu"
         mu_nonlinearity="tanh",
         init_log_std=0.,
         normalize_observation=True,
         var_clip=1e-6,
         ):
     super().__init__()
     if hidden_nonlinearity == "tanh":  # So these can be strings in config file.
         hidden_nonlinearity = torch.nn.Tanh
     elif hidden_nonlinearity == "relu":
         hidden_nonlinearity = torch.nn.ReLU
     else:
         raise ValueError(f"Unrecognized hidden_nonlinearity string: {hidden_nonlinearity}")
     if mu_nonlinearity == "tanh":  # So these can be strings in config file.
         mu_nonlinearity = torch.nn.Tanh
     elif mu_nonlinearity == "relu":
         mu_nonlinearity = torch.nn.ReLU
     else:
         raise ValueError(f"Unrecognized mu_nonlinearity string: {mu_nonlinearity}")
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     self.body = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes or [256, 256],
         nonlinearity=hidden_nonlinearity,
     )
     last_size = self.body.output_size
     if lstm_size:
         lstm_input_size = last_size + action_size + 1
         self.lstm = torch.nn.LSTM(lstm_input_size, lstm_size)
         last_size = lstm_size
     else:
         self.lstm = None
     mu_linear = torch.nn.Linear(last_size, action_size)
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_linear, mu_nonlinearity())
     else:
         self.mu = mu_linear
     self.value = torch.nn.Linear(last_size, 1)
     if constraint:
         self.constraint = torch.nn.Linear(last_size, 1)
     else:
         self.constraint = None
     self.log_std = torch.nn.Parameter(init_log_std *
         torch.ones(action_size))
     self._lstm_skip = lstm_skip
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.var_clip = var_clip
     self.normalize_observation = normalize_observation
Exemple #12
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     hidden_nonlinearity=torch.nn.Tanh,  # Module form.
     mu_nonlinearity=torch.nn.Tanh,  # Module form.
     init_log_std=0.,
     normalize_observation=True,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
     baselines_init=True,  # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     inits_mu = inits_v = None
     if baselines_init:
         inits_mu = (np.sqrt(2), 0.01)
         inits_v = (np.sqrt(2), 1.)
     mu_mlp = torch.jit.script(
         MlpModel(input_size=input_size,
                  hidden_sizes=hidden_sizes,
                  output_size=action_size,
                  nonlinearity=hidden_nonlinearity,
                  inits=inits_mu))
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
     else:
         self.mu = mu_mlp
     self.v = torch.jit.script(
         MlpModel(input_size=input_size,
                  hidden_sizes=hidden_sizes,
                  output_size=1,
                  nonlinearity=hidden_nonlinearity,
                  inits=inits_v))
     self.log_std = torch.nn.Parameter(init_log_std *
                                       torch.ones(action_size))
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Exemple #13
0
class POMDPRnnShared0Rnn(nn.Module):
    def __init__(self,
                 input_classes: int,
                 output_size: int,
                 rnn_type: str = 'gru',
                 rnn_size: int = 256,
                 hidden_sizes: [List, Tuple] = None,
                 baselines_init: bool = True,
                 layer_norm: bool = False,
                 prev_action: int = 2,
                 prev_reward: int = 2,
                 ):
        super().__init__()
        self._obs_dim = 0
        self.rnn_is_lstm = rnn_type != 'gru'
        self.preprocessor = tscr(OneHotLayer(input_classes))
        rnn_class = get_rnn_class(rnn_type, layer_norm)
        rnn_input_size = input_classes
        if prev_action: rnn_input_size += output_size  # Use previous action as input
        if prev_reward: rnn_input_size += 1  # Use previous reward as input
        self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
        self.body = MlpModel(rnn_size, hidden_sizes, None, nn.ReLU, None)
        self.pi = nn.Sequential(nn.Linear(self.body.output_size, output_size), nn.Softmax(-1))
        self.v = nn.Linear(self.body.output_size, 1)
        if baselines_init:
            self.rnn.apply(apply_init); self.body.apply(apply_init)
            self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi']))
            self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v']))
        self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v)
        self.p_a = prev_action > 0
        self.p_r = prev_reward > 0

    def forward(self, observation, prev_action, prev_reward, init_rnn_state):
        lead_dim, T, B, _ = infer_leading_dims(observation, self._obs_dim)
        if init_rnn_state is not None and self.rnn_is_lstm: init_rnn_state = tuple(init_rnn_state)  # namedarraytuple -> tuple (h, c)
        oh = self.preprocessor(observation)  # Leave in TxB format for lstm
        inp_list = [oh.view(T,B,-1)] + ([prev_action.view(T, B, -1)] if self.p_a else []) + ([prev_reward.view(T, B, 1)] if self.p_r else [])
        rnn_input = torch.cat(inp_list, dim=2)
        rnn_out, next_rnn_state = self.rnn(rnn_input, init_rnn_state)
        rnn_out = rnn_out.view(T*B, -1)
        rnn_out = self.body(rnn_out)
        pi, v = self.pi(rnn_out), self.v(rnn_out).squeeze(-1)
        pi, v = restore_leading_dims((pi, v), lead_dim, T, B)
        if self.rnn_is_lstm: next_rnn_state = RnnState(next_rnn_state)
        return pi, v, next_rnn_state
Exemple #14
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              inits: [(float, float, float), None] = (np.sqrt(2), 1., 0.01),
              nonlinearity: nn.Module = nn.ReLU,
              shared_processor: bool = False
              ):
     super().__init__()
     self._obs_ndim = 0
     if shared_processor:
         self.preprocessor = tscr(nn.Sequential(OneHotLayer(input_classes), MlpModel(input_classes, hidden_sizes, None, nonlinearity, inits[:-1] if inits is not None else inits)))
         self.v = tscr(layer_init(nn.Linear(hidden_sizes[-1], 1), inits[1]) if inits else nn.Linear(hidden_sizes[-1], 1))
         self.pi = tscr(nn.Sequential(layer_init(nn.Linear(hidden_sizes[-1], output_size), inits[1]) if inits else nn.Linear(hidden_sizes[-1], output_size), nn.Softmax(-1)))
     else:
         self.preprocessor = tscr(OneHotLayer(input_classes))
         self.v = tscr(MlpModel(input_classes, hidden_sizes, 1, nonlinearity, inits[:-1] if inits is not None else inits))
         self.pi = tscr(nn.Sequential(MlpModel(input_classes, hidden_sizes, output_size, nonlinearity, inits[0::2] if inits is not None else inits), nn.Softmax(-1)))
Exemple #15
0
 def __init__(
     self,
     input_size,
     action_size,
     hidden_sizes,
 ):
     super().__init__()
     self.mlp1 = MlpModel(
         input_size=input_size + action_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
     )
     self.mlp2 = MlpModel(
         input_size=input_size + action_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
     )
     self.apply(weight_init)
Exemple #16
0
 def __init__(
         self,
         input_shape,  # Must be 1D
         hidden_sizes,
         output_size=None,
         nonlinearity=torch.nn.Identity):
     """Instantiate MLP feature extractor. Does not support parameter sharing with base network."""
     super().__init__()
     self.extractor = MlpModel(input_shape, hidden_sizes, output_size,
                               nonlinearity)
 def __init__(self,
              input_shape,
              output_size,
              hidden_sizes=[256, 256],
              action_mask=True):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.head = MlpModel(input_shape, hidden_sizes, output_size)
     self.action_mask = action_mask
Exemple #18
0
 def __init__(self, latent_size, anchor_hidden_sizes):
     super().__init__()
     if anchor_hidden_sizes is not None:
         self.anchor_mlp = MlpModel(
             input_size=latent_size,
             hidden_sizes=anchor_hidden_sizes,
             output_size=latent_size,
         )
     else:
         self.anchor_mlp = None
     self.W = torch.nn.Linear(latent_size, latent_size, bias=False)
class CartpoleFfModel(torch.nn.Module):

    def __init__(
            self,
            image_shape,
            output_size,
            fc_sizes=[64, 64],
            basis=None,
            gain_type="xavier",
            out=None,
            ):
        super().__init__()

        input_size = image_shape[0]
        # Main body
        self.head = MlpModel(input_size, fc_sizes)
        # Policy output
        self.pi = torch.nn.Linear(fc_sizes[-1], output_size)
        # Value output
        self.value = torch.nn.Linear(fc_sizes[-1], 1)

        if gain_type == "xavier":
            self.head.apply(weight_init)
            self.pi.apply(weight_init)
            self.value.apply(weight_init)


    def forward(self, in_state, prev_action, prev_reward):
        """Feedforward layers process as [T*B,H]. Return same leading dims as
        input, can be [T,B], [B], or []."""
        state = in_state.type(torch.float)  # Expect torch.uint8 inputs
        # Infer (presence of) leading dimensions: [T,B], [B], or [].
        lead_dim, T, B, state_shape = infer_leading_dims(state, 1)

        base = self.head(state.view(T * B, -1))
        pi = F.softmax(self.pi(base), dim=-1)
        v = self.value(base).squeeze(-1)

        # Restore leading dimensions: [T,B], [B], or [], as input.
        pi, v = restore_leading_dims((pi, v), lead_dim, T, B)
        return pi, v
Exemple #20
0
    def __init__(
        self,
        observation_shape,
        action_size,
        hidden_sizes=None,  # None for default (see below).
        hidden_nonlinearity=torch.nn.Tanh,  # Module form.
        mu_nonlinearity=torch.nn.Tanh,  # Module form.
        init_log_std=0.,
        pooling="average",
    ):
        super().__init__()
        self._obs_ndim = len(observation_shape)
        self._n_pop = observation_shape[0]

        input_size = int(observation_shape[-1])
        output_size = int(action_size[-1])
        hidden_sizes = hidden_sizes or [64, 64]

        self.pooling = pooling
        # self.pool = self.make_pooler(pooling)
        if self.pooling is not None:
            input_size *= 2

        mu_mlp = MlpModel(
            input_size=input_size,
            hidden_sizes=hidden_sizes,
            output_size=output_size,
            nonlinearity=hidden_nonlinearity,
        )
        if mu_nonlinearity is not None:
            self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
        else:
            self.mu = mu_mlp
        self.v = MlpModel(
            input_size=input_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
            nonlinearity=hidden_nonlinearity,
        )
        self.log_std = torch.nn.Parameter(init_log_std *
                                          torch.ones(action_size))
Exemple #21
0
    def __init__(
            self,
            observation_shape,
            action_size,
            hidden_sizes=None,  # None for default (see below).
            hidden_nonlinearity=torch.nn.Tanh,  # Module form.
            mu_nonlinearity=torch.nn.Tanh,  # Module form.
            init_log_std=0.,
    ):
        super().__init__()
        assert hasattr(observation_shape, 'camera'), "VisionFfModel requires observation to contain 'camera' attr"
        assert hasattr(observation_shape,
                       'robot_state'), "VisionFfModel requires observation to contain 'robot_state' attr"
        self.height, self.width, self.channels = observation_shape.camera
        robot_state_shape = observation_shape.robot_state[0]
        self.conv = Conv2dModel(
            in_channels=self.channels,
            channels=[9, 18],
            kernel_sizes=[3, 3],
            strides=[2, 2],
            paddings=[1, 1],
        )
        conv_out_size = self.conv.conv_out_size(self.height, self.width)
        robot_state_out = 256
        self.robot_state_mlp = MlpModel(
            input_size=robot_state_shape,
            hidden_sizes=[256, ],
            output_size=robot_state_out
        )
        self.mu_head = MlpModel(
            input_size=robot_state_out + conv_out_size,
            hidden_sizes=[256, ],
            output_size=action_size
        )
        self.value_head = MlpModel(
            input_size=robot_state_out + conv_out_size,
            hidden_sizes=[256, ],
            output_size=1
        )

        self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
 def __init__(self,
              input_shape,
              output_size,
              fc_sizes=[128, 128, 128],
              dueling=False):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     if dueling:
         self.head = DuelingHeadModel(input_shape, fc_sizes, output_size)
     else:
         self.head = MlpModel(input_shape, fc_sizes, output_size)
Exemple #23
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              nonlinearity: nn.Module = nn.ReLU
              ):
     super().__init__()
     self._obs_ndim = 2  # All bsuite obs are 2 (even (1,1))
     input_size = input_shape[0] * input_shape[1]
     self.preprocessor = MlpModel(input_size, hidden_sizes, None, nonlinearity)
     self.v = tscr(nn.Linear(self.preprocessor.output_size, 1))
     self.pi = tscr(nn.Sequential(nn.Linear(self.preprocessor.output_size, output_size), nn.Softmax(-1)))
Exemple #24
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     hidden_nonlinearity=torch.nn.Tanh,  # Module form.
     mu_nonlinearity=torch.nn.Tanh,  # Module form.
     init_log_std=0.,
     normalize_observation=False,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     mu_mlp = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=action_size,
         nonlinearity=hidden_nonlinearity,
     )
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
     else:
         self.mu = mu_mlp
     self.v = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
         nonlinearity=hidden_nonlinearity,
     )
     self.log_std = torch.nn.Parameter(init_log_std *
                                       torch.ones(action_size))
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Exemple #25
0
class BsuiteRnnShared1Rnn(nn.Module):
    def __init__(self,
                 input_shape: Tuple,
                 output_size: int,
                 rnn_type: str = 'gru',
                 rnn_size: int = 256,
                 hidden_sizes: [List, Tuple] = None,
                 baselines_init: bool = True,
                 layer_norm: bool = False
                 ):
        super().__init__()
        self._obs_dim = 2
        self.rnn_is_lstm = rnn_type != 'gru'
        input_size = int(np.prod(input_shape))
        rnn_class = get_rnn_class(rnn_type, layer_norm)
        self.body = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
        self.rnn = rnn_class(self.body.output_size + output_size + 1, rnn_size)  # Concat action, reward
        self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))
        self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
        if baselines_init:
            self.rnn.apply(apply_init); self.body.apply(apply_init)
            self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi']))
            self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v']))
        self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v)

    def forward(self, observation, prev_action, prev_reward, init_rnn_state):
        lead_dim, T, B, _ = infer_leading_dims(observation, self._obs_dim)
        if init_rnn_state is not None and self.rnn_is_lstm: init_rnn_state = tuple(init_rnn_state)  # namedarraytuple -> tuple (h, c)
        features = self.body(observation.view(T*B, -1))
        rnn_input = torch.cat([
            features.view(T,B,-1),
            prev_action.view(T, B, -1),  # Assumed onehot.
            prev_reward.view(T, B, 1),
            ], dim=2)
        rnn_out, next_rnn_state = self.rnn(rnn_input, init_rnn_state)
        rnn_out = rnn_out.view(T*B, -1)
        pi, v = self.pi(rnn_out), self.v(rnn_out).squeeze(-1)
        pi, v = restore_leading_dims((pi, v), lead_dim, T, B)
        if self.rnn_is_lstm: next_rnn_state = RnnState(next_rnn_state)
        return pi, v, next_rnn_state
Exemple #26
0
    def __init__(
        self,
        image_shape,
        action_size,
        hidden_sizes=512,
        stop_conv_grad=False,
        channels=None,  # Defaults below.
        kernel_sizes=None,
        strides=None,
        paddings=None,
        kiaming_init=True,
        normalize_conv_out=False,
    ):
        super().__init__()
        c, h, w = image_shape
        self.conv = Conv2dModel(
            in_channels=c,
            channels=channels or [32, 64, 64],
            kernel_sizes=kernel_sizes or [8, 4, 3],
            strides=strides or [4, 2, 1],
            paddings=paddings,
        )
        self._conv_out_size = self.conv.conv_out_size(h=h, w=w)
        self.pi_v_mlp = MlpModel(
            input_size=self._conv_out_size,
            hidden_sizes=hidden_sizes,
            output_size=action_size + 1,
        )
        if kiaming_init:
            self.apply(weight_init)

        self.stop_conv_grad = stop_conv_grad
        logger.log("Model stopping gradient at CONV." if stop_conv_grad else
                   "Modeul using gradients on all parameters.")
        if normalize_conv_out:
            # Havent' seen this make a difference yet.
            logger.log("Model normalizing conv output across all pixels.")
            self.conv_rms = RunningMeanStdModel((1, ))
            self.var_clip = 1e-6
        self.normalize_conv_out = normalize_conv_out
Exemple #27
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.rnn = rnn_class(input_size + output_size + 1, rnn_size)  # Concat action, reward
     pi_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['pi']) if baselines_init else None
     v_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['v']) if baselines_init else None
     self.pi = nn.Sequential(MlpModel(rnn_size, hidden_sizes, output_size, nn.ReLU, pi_inits), nn.Softmax(-1))
     self.v = nn.Sequential(MlpModel(rnn_size, hidden_sizes, 1, nn.ReLU, v_inits))
     if baselines_init:
         self.rnn.apply(apply_init)
     self.pi, self.v = tscr(self.pi), tscr(self.v)
Exemple #28
0
 def __init__(
     self,
     observation_shape,
     hidden_sizes,
     action_size,
 ):
     super().__init__()
     self._obs_ndim = len(observation_shape)
     self.mlp = MlpModel(
         input_size=int(np.prod(observation_shape)) + action_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
     )
Exemple #29
0
 def __init__(
         self,
         observation_shape,
         hidden_sizes,
         action_size=None,  # Unused but accept kwarg.
 ):
     super().__init__()
     self._obs_ndim = len(observation_shape)
     self.mlp = MlpModel(
         input_size=int(np.prod(observation_shape)),
         hidden_sizes=hidden_sizes,
         output_size=1,
     )
Exemple #30
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     init_log_std=0.,
     normalize_observation=False,
     linear_value_output=True,
     norm_obs_clip=10,
     full_covariance=False,
     norm_obs_var_clip=1e-6,
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape.state)
     input_size = int(np.prod(observation_shape.state))
     self.full_covariance = full_covariance
     hidden_sizes = hidden_sizes or [256, 256]
     self.action_size = action_size
     self.shared_features_dim = 256
     self.softplus = torch.nn.Softplus()
     self.shared_mlp = MlpModel(
         input_size=input_size,
         hidden_sizes=[512, self.shared_features_dim])
     self.mu_head = MlpModel(
         input_size=input_size,
         hidden_sizes=[256, 256],
         # output_size=action_size * 2,
         output_size=action_size +
         np.sum(1 + np.arange(self.action_size)) if full_covariance else 2 *
         action_size)
     self.layer_norm = torch.nn.LayerNorm(input_size)
     # list(self.mu_head.parameters())[-1].data = list(self.mu_head.parameters())[-1].data / 100
     # list(self.mu_head.parameters())[-2].data = list(self.mu_head.parameters())[-2].data / 100
     self.v_head = MlpModel(
         input_size=input_size,
         hidden_sizes=[256, 256],
         output_size=1 if linear_value_output else None,
     )