Ejemplo n.º 1
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.rnn_pi = rnn_class(self.body_pi.output_size + output_size + 1, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(self.body_v.output_size + output_size + 1, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
Ejemplo n.º 2
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     hidden_nonlinearity=torch.nn.Tanh,  # Module form.
     mu_nonlinearity=torch.nn.Tanh,  # Module form.
     init_log_std=0.,
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     mu_mlp = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=action_size,
         nonlinearity=hidden_nonlinearity,
     )
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
     else:
         self.mu = mu_mlp
     self.v = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
         nonlinearity=hidden_nonlinearity,
     )
     self.log_std = torch.nn.Parameter(init_log_std *
                                       torch.ones(action_size))
Ejemplo n.º 3
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
            n_tile=20,
    ):
        super().__init__()
        self._obs_ndim = 1
        self._n_tile = n_tile
        input_dim = int(np.sum(observation_shape))

        self._action_size = action_size
        self.mlp_loc = MlpModel(
            input_size=input_dim,
            hidden_sizes=hidden_sizes,
            output_size=4
        )
        self.mlp_delta = MlpModel(
            input_size=input_dim + 4 * n_tile,
            hidden_sizes=hidden_sizes,
            output_size=3 * 2,
        )

        self.delta_distribution = Gaussian(
            dim=3,
            squash=True,
            min_std=np.exp(MIN_LOG_STD),
            max_std=np.exp(MAX_LOG_STD),
        )
        self.cat_distribution = Categorical(4)

        self._counter = 0
Ejemplo n.º 4
0
 def __init__(
         self,
         observation_shape,
         hidden_sizes,
         action_size,
 ):
     super().__init__()
     assert hasattr(observation_shape, 'state'), "mcp model requires observation dict to contain state attribute"
     assert hasattr(observation_shape, 'goal'), "mcp model requires observation to contain goal attribute"
     self.height, self.width = observation_shape.goal
     # self.conv = Conv2dModel(
     #     in_channels=1,
     #     channels=[8, 20],
     #     kernel_sizes=[5, 4],
     #     strides=[3, 3],
     #     paddings=[1, 1],
     # )
     # conv_out_size = self.conv.conv_out_size(self.height, self.width)
     self.robot_state_mlp = MlpModel(
         input_size=observation_shape.state[0],
         hidden_sizes=[512],
         output_size=256
     )
     self.mlp = MlpModel(
         input_size=256 + observation_shape.goal.relative_target[0] + action_size,
         hidden_sizes=[256],
         output_size=1
     )
Ejemplo n.º 5
0
 def __init__(
         self,
         observation_shape,
         action_size,
         fc_sizes=512,
         dueling=False,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         linear_value_output=True):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.dueling = dueling
     c, h, w = observation_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [3, 2, 2],
         strides=strides or [2, 1, 1],
         paddings=paddings or [0, 0, 0],
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     self.pi_head = MlpModel(conv_out_size, [
         256,
     ], action_size)
     self.value_head = MlpModel(conv_out_size, [
         256,
     ], 1 if linear_value_output else None)
Ejemplo n.º 6
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
            n_tile=50,
            loc_size=2,
            delta_size=3,
    ):
        super().__init__()
        self._obs_ndim = 1
        input_dim = int(np.sum(observation_shape))
        self._n_tile = n_tile
        self._loc_size = loc_size
        self._delta_size = delta_size

        # self._obs_ndim = len(observation_shape)
        # input_dim = int(np.prod(observation_shape))

        assert action_size == loc_size + delta_size # First 2 (location), then 3 (action)
        self._action_size = action_size

        self.mlp_loc = MlpModel(
            input_size=input_dim,
            hidden_sizes=hidden_sizes,
            output_size=loc_size * 2
        )
        self.mlp_delta = MlpModel(
            input_size=input_dim + loc_size * n_tile,
            hidden_sizes=hidden_sizes,
            output_size=delta_size * 2,
        )

        self._counter = 0
Ejemplo n.º 7
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
    ):
        super().__init__()
        assert hasattr(observation_shape, 'state'), "vision model requires observation dict to contain state attribute"
        assert hasattr(observation_shape, 'goal'), "vision model requires observation to contain goal attribute"
        assert hasattr(observation_shape.goal, 'camera'), 'vision model requires camera observation'
        self.height, self.width = observation_shape.goal.camera
        robot_state_shape = observation_shape.state[0]
        self.conv = Conv2dModel(
            in_channels=1,
            channels=[9, 18],
            kernel_sizes=[3, 3],
            strides=[2, 2],
            paddings=[1, 1],
        )
        conv_out_size = self.conv.conv_out_size(self.height, self.width)
        robot_state_out = 256
        self.robot_state_mlp = MlpModel(
            input_size=robot_state_shape,
            hidden_sizes=[],
            output_size=robot_state_out
        )
        self.mu_head = MlpModel(
            input_size=robot_state_out + conv_out_size,
            hidden_sizes=[256, ],
            output_size=action_size
        )

        init_log_std = 0.
        self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
Ejemplo n.º 8
0
 def __init__(self,
              input_size,
              hidden_sizes,
              output_size,
              n_atoms,
              grad_scale=2**(-1 / 2),
              fc_1_V=None):
     """
     Dueling distributional C51 head copied from rlpyt
     """
     super().__init__()
     if isinstance(hidden_sizes, int):
         hidden_sizes = [hidden_sizes]
     self.advantage_hidden = MlpModel(input_size, hidden_sizes)
     self.advantage_out = torch.nn.Linear(hidden_sizes[-1],
                                          output_size * n_atoms,
                                          bias=False)
     self.advantage_bias = torch.nn.Parameter(torch.zeros(n_atoms))
     if fc_1_V is None:
         self.value = MlpModel(input_size,
                               hidden_sizes,
                               output_size=n_atoms)
     else:
         self.value = nn.Sequential(
             *[fc_1_V,
               nn.ReLU(),
               nn.Linear(hidden_sizes[0], n_atoms)])
     self._grad_scale = grad_scale
     self._output_size = output_size
     self._n_atoms = n_atoms
Ejemplo n.º 9
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 3,
              prev_reward: int = 3,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     rnn_input_size_pi = self.body_pi.output_size + (prev_action in [1,3]) * output_size + (prev_reward in [1,3])
     rnn_input_size_v = self.body_v.output_size + (prev_action in [2,3]) * output_size + (prev_reward in [2,3])
     self.rnn_pi = rnn_class(rnn_input_size_pi, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(rnn_input_size_v, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action
     self.p_r = prev_reward
Ejemplo n.º 10
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 2,
              prev_reward: int = 2,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     rnn_input_size = input_classes
     if prev_action: rnn_input_size += output_size  # Use previous action as input
     if prev_reward: rnn_input_size += 1  # Use previous reward as input
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     pi_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['pi']) if baselines_init else None
     v_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['v']) if baselines_init else None
     self.pi = nn.Sequential(MlpModel(rnn_size, hidden_sizes, output_size, nn.ReLU, pi_inits), nn.Softmax(-1))
     self.v = nn.Sequential(MlpModel(rnn_size, hidden_sizes, 1, nn.ReLU, v_inits))
     if baselines_init:
         self.rnn.apply(apply_init)
     self.pi, self.v = tscr(self.pi), tscr(self.v)
     self.p_a = prev_action > 0
     self.p_r = prev_reward > 0
Ejemplo n.º 11
0
 def __init__(
     self,
     observation_shape,
     output_size,
     fc_size=512,  # Between mlp and lstm.
     lstm_size=512,
     head_size=256,
     dueling=False,
 ):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self._obs_n_dim = len(observation_shape)
     self.dueling = dueling
     self.mlp = MlpModel(
         observation_shape,
         [256],
         output_size=fc_size,
         nonlinearity=torch.nn.Tanh  # Match spinningup
     )
     self.lstm = torch.nn.LSTM(fc_size + output_size + 1, lstm_size)
     if dueling:
         self.head = DuelingHeadModel(lstm_size, head_size, output_size)
     else:
         self.head = MlpModel(lstm_size, head_size, output_size=output_size)
Ejemplo n.º 12
0
 def __init__(
     self,
     observation_shape,
     action_size,
     linear_value_output=True,
     sequence_length=64,
     seperate_value_network=True,
     size='medium',
 ):
     super().__init__()
     self.state_size = np.prod(observation_shape.state)
     self.action_size = action_size
     self.sequence_length = sequence_length
     self.transformer_dim = SIZES[size]['dim']
     self.depth = SIZES[size]['depth']
     self.cmem_ratio = SIZES[size]['cmem_ratio']
     self.cmem_length = self.sequence_length // self.cmem_ratio
     memory_layers = range(1, self.depth + 1)
     self.transformer = CompressiveTransformerPyTorch(
         num_tokens=20000,
         emb_dim=self.
         state_size,  # embedding dimensions, embedding factorization from Albert paper
         dim=self.transformer_dim,
         heads=SIZES[size]['num_heads'],
         depth=self.depth,
         seq_len=self.sequence_length,
         mem_len=self.sequence_length,  # memory length
         # cmem_len=self.cmem_length,  # compressed memory buffer length
         # cmem_ratio=self.cmem_ratio,  # compressed memory ratio, 4 was recommended in paper
         reconstruction_loss_weight=
         1,  # weight to place on compressed memory reconstruction loss
         gru_gated_residual=True,
         # whether to gate the residual intersection, from 'Stabilizing Transformer for RL' paper
         memory_layers=memory_layers,
     )
     self.transformer.token_emb = torch.nn.Identity(
     )  # don't use token embedding in compressive transforrmer
     self.transformer.to_logits = torch.nn.Identity()
     # self.input_layer_norm = torch.nn.LayerNorm(self.state_size)
     self.input_layer_norm = torch.nn.Identity()
     # self.output_layer_norm = torch.nn.LayerNorm(self.transformer_dim)
     self.output_layer_norm = torch.nn.Identity()
     self.softplus = torch.nn.Softplus()
     self.pi_head = MlpModel(input_size=self.transformer_dim,
                             hidden_sizes=[
                                 256,
                             ],
                             output_size=2 * action_size)
     self.value_head = MlpModel(
         input_size=self.transformer_dim,
         hidden_sizes=[
             256,
         ],
         output_size=1 if linear_value_output else None)
     self.mask = torch.ones((self.sequence_length, self.sequence_length),
                            dtype=torch.int8).triu()
Ejemplo n.º 13
0
    def __init__(
        self,
        observation_shape,
        action_size,
        fc_sizes=32,
        use_maxpool=False,
        channels=None,  # None uses default.
        kernel_sizes=None,
        strides=None,
        paddings=None,
        hidden_nonlinearity=torch.nn.Tanh,  # Module form.
        # mu_nonlinearity=torch.nn.Tanh,  # Module form.
        mu_nonlinearity=None,
        init_log_std=0.,
    ):
        """Instantiate neural net module according to inputs."""
        super().__init__()

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels or [16, 32],
            kernel_sizes=kernel_sizes or [8, 4],
            strides=strides or [4, 2],
            paddings=paddings or [0, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )
        mu_mlp = MlpModel(
            input_size=self.conv.output_size,
            hidden_sizes=fc_sizes,
            output_size=action_size,
            nonlinearity=hidden_nonlinearity,
        )
        # print(self.conv.output_size)
        # print('Num of encoder parameters: %d' % sum(p.numel() for p in self.conv.parameters() if p.requires_grad))
        # print('Num of encoder parameters: %d' % sum(p.numel() for p in mu_mlp.parameters() if p.requires_grad))
        if mu_nonlinearity is not None:
            self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
        else:
            self.mu = mu_mlp
        self.v = MlpModel(
            input_size=self.conv.output_size,
            hidden_sizes=fc_sizes,
            output_size=1,
            nonlinearity=hidden_nonlinearity,
        )

        self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size)
        self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1)

        self.log_std = torch.nn.Parameter(init_log_std *
                                          torch.ones(action_size))
Ejemplo n.º 14
0
    def __init__(self, observation_shape, action_size,
                 policy_hidden_sizes=None, policy_hidden_nonlinearity=torch.nn.Tanh,
                 value_hidden_sizes=None, value_hidden_nonlinearity=torch.nn.Tanh, ):
        super().__init__()
        self._obs_ndim = len(observation_shape)
        input_size = int(np.prod(observation_shape))

        policy_hidden_sizes = [400, 300] if policy_hidden_sizes is None else policy_hidden_sizes
        value_hidden_sizes = [400, 300] if value_hidden_sizes is None else value_hidden_sizes
        self.pi = MlpModel(input_size=input_size, hidden_sizes=policy_hidden_sizes, output_size=action_size,
                           nonlinearity=policy_hidden_nonlinearity)
        self.v = MlpModel(input_size=input_size, hidden_sizes=value_hidden_sizes, output_size=1,
                          nonlinearity=value_hidden_nonlinearity, )
Ejemplo n.º 15
0
    def __init__(
        self,
        observation_shape,
        action_size,
        hidden_sizes=None,  # None for default (see below).
        hidden_nonlinearity=torch.nn.Tanh,  # Module form.
        mu_nonlinearity=torch.nn.Tanh,  # Module form.
        init_log_std=0.,
        pooling="average",
    ):
        super().__init__()
        self._obs_ndim = len(observation_shape)
        self._n_pop = observation_shape[0]

        input_size = int(observation_shape[-1])
        output_size = int(action_size[-1])
        hidden_sizes = hidden_sizes or [64]

        # import pdb; pdb.set_trace()
        self.encoder = MlpModel(input_size=input_size,
                                hidden_sizes=hidden_sizes * 2,
                                output_size=None,
                                nonlinearity=hidden_nonlinearity)

        self.pooling = pooling

        input_size = 64
        if self.pooling is not None:
            input_size *= 2

        mu_mlp = MlpModel(
            input_size=input_size,
            hidden_sizes=hidden_sizes,
            output_size=output_size,
            nonlinearity=hidden_nonlinearity,
        )

        if mu_nonlinearity is not None:
            self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
        else:
            self.mu = mu_mlp

        self.v = MlpModel(
            input_size=input_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
            nonlinearity=hidden_nonlinearity,
        )
        self.log_std = torch.nn.Parameter(init_log_std *
                                          torch.ones(action_size))
Ejemplo n.º 16
0
 def __init__(
     self,
     image_shape,
     output_size,
     fc_size=512,  # Between conv and lstm.
     lstm_size=512,
     head_size=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     super().__init__()
     self.dueling = dueling
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_size,
     )
     self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1,
                               lstm_size)
     if dueling:
         self.head = DuelingHeadModel(lstm_size, head_size, output_size)
     else:
         self.head = MlpModel(lstm_size, head_size, output_size=output_size)
 def __init__(
     self,
     image_shape,
     output_size,
     fc_sizes=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.dueling = dueling
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     if dueling:
         self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size)
     else:
         self.head = MlpModel(conv_out_size, fc_sizes, output_size)
Ejemplo n.º 18
0
 def __init__(
         self,
         image_shape,
         channels,
         kernel_sizes,
         strides,
         hidden_sizes,
         output_size=None,  # if None: nonlinearity applied to output.
         paddings=None,
         nonlinearity=torch.nn.ReLU,
         use_maxpool=False,
         ):
     super().__init__()
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels,
         kernel_sizes=kernel_sizes,
         strides=strides,
         paddings=paddings,
         nonlinearity=nonlinearity,
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     if hidden_sizes or output_size:
         self.head = MlpModel(conv_out_size, hidden_sizes,
             output_size=output_size, nonlinearity=nonlinearity)
         if output_size is not None:
             self._output_size = output_size
         else:
             self._output_size = (hidden_sizes if
                 isinstance(hidden_sizes, int) else hidden_sizes[-1])
     else:
         self.head = lambda x: x
         self._output_size = conv_out_size
Ejemplo n.º 19
0
 def __init__(
         self,
         image_shape,
         output_size,
         fc_sizes=512,
         dueling=False,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         ):
     super().__init__()
     self.dueling = dueling
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
     )
     conv_out_size = self.conv.conv_out_size(h, w)
     if dueling:
         self.head = DuelingHeadModel(conv_out_size, fc_sizes, output_size)
     else:
         self.head = MlpModel(conv_out_size, fc_sizes, output_size)
Ejemplo n.º 20
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     lstm_size=256,
     nonlinearity=torch.nn.ReLU,
     normalize_observation=False,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
 ):
     super().__init__()
     self._obs_n_dim = len(observation_shape)
     self._action_size = action_size
     hidden_sizes = hidden_sizes or [256, 256]
     mlp_input_size = int(np.prod(observation_shape))
     self.mlp = MlpModel(
         input_size=mlp_input_size,
         hidden_sizes=hidden_sizes,
         output_size=None,
         nonlinearity=nonlinearity,
     )
     mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size
     self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size)
     self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1)
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Ejemplo n.º 21
0
    def __init__(
            self,
            observation_shape,
            action_size,
            hidden_sizes=[64,64],  # mlp after lstm
            fc_sizes=128, # Between conv and lstm
            lstm_size=64,
            channels=[8, 16],
            kernel_sizes=[8, 4],
            strides=[4, 2],
            paddings=[0, 1],
            use_maxpool=False,
            ):
        """Instantiate neural net according to inputs."""
        super().__init__()
        self._obs_ndim = len(observation_shape)

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels,
            kernel_sizes=kernel_sizes,
            strides=strides,
            paddings=paddings,
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )   # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU

        self.lstm = torch.nn.LSTM(self.conv.output_size + action_size + 1 + action_size, lstm_size)   # Input to LSTM: conv_output + prev_action + prev_reward + action
        self.mlp = MlpModel(
            input_size=lstm_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
        )
Ejemplo n.º 22
0
 def __init__(
     self,
     image_shape,
     latent_size,
     channels,
     kernel_sizes,
     strides,
     paddings=None,
     hidden_sizes=None,  # usually None; NOT the same as anchor MLP
     kiaming_init=True,
 ):
     super().__init__()
     c, h, w = image_shape
     self.conv = Conv2dModel(
         in_channels=c,
         channels=channels,
         kernel_sizes=kernel_sizes,
         strides=strides,
         paddings=paddings,
         use_maxpool=False,
     )
     self._output_size = self.conv.conv_out_size(h, w)
     self._output_shape = self.conv.conv_out_shape(h, w)
     self.head = MlpModel(
         input_size=self._output_size,
         hidden_sizes=hidden_sizes,
         output_size=latent_size,
     )
     if kiaming_init:
         self.apply(weight_init)
Ejemplo n.º 23
0
    def __init__(
        self,
        image_shape,
        latent_size,
        use_fourth_layer=True,
        skip_connections=True,
        hidden_sizes=None,
        kiaming_init=True,
    ):
        super().__init__()
        c, h, w = image_shape
        self.conv = DmlabConv2dModel(
            in_channels=c,
            use_fourth_layer=True,
            skip_connections=skip_connections,
            use_maxpool=False,
        )
        self._output_size = self.conv.output_size(h, w)
        self._output_shape = self.conv.output_shape(h, w)

        self.head = MlpModel(  # gets to z_t, not necessarily c_t
            input_size=self._output_size,
            hidden_sizes=hidden_sizes,
            output_size=latent_size,
        )
        if kiaming_init:
            self.apply(weight_init)
 def __init__(
     self,
     image_shape,
     output_size,
     fc_size=512,  # Between conv and lstm.
     lstm_size=512,
     head_size=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.dueling = dueling
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_size,  # ReLU applied here (Steven).
     )
     self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1,
                               lstm_size)
     if dueling:
         self.head = DuelingHeadModel(lstm_size, head_size, output_size)
     else:
         self.head = MlpModel(lstm_size, head_size, output_size=output_size)
Ejemplo n.º 25
0
    def __init__(
        self,
        observation_shape,
        output_size,
        hidden_sizes=None,  # None for default (see below).
        lstm_size=256,
        nonlinearity=torch.nn.ReLU,
        normalize_observation=False,
        norm_obs_clip=10,
        norm_obs_var_clip=1e-6,
    ):
        """Instantiate neural net module according to inputs."""
        super().__init__()
        self._obs_n_dim = len(observation_shape)
        hidden_sizes = hidden_sizes or [256, 256]
        mlp_input_size = int(np.prod(observation_shape))
        self.mlp = MlpModel(
            input_size=mlp_input_size,
            hidden_sizes=hidden_sizes,
            output_size=None,
            nonlinearity=nonlinearity,
        )

        mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size
        self.lstm = torch.nn.LSTM(mlp_output_size + output_size + 1, lstm_size)
        self.pi = torch.nn.Linear(lstm_size, output_size)
        self.value = torch.nn.Linear(lstm_size, 1)
        if normalize_observation:
            self.obs_rms = RunningMeanStdModel(observation_shape)
            self.norm_obs_clip = norm_obs_clip
            self.norm_obs_var_clip = norm_obs_var_clip
        self.normalize_observation = normalize_observation
Ejemplo n.º 26
0
 def __init__(
         self,
         input_size,
         hidden_sizes,
         output_size,
         grad_scale=2 ** (-1 / 2),
         ):
     super().__init__()
     if isinstance(hidden_sizes, int):
         hidden_sizes = [hidden_sizes]
     self.advantage_hidden = MlpModel(input_size, hidden_sizes)
     self.advantage_out = torch.nn.Linear(hidden_sizes[-1], output_size,
         bias=False)
     self.advantage_bias = torch.nn.Parameter(torch.zeros(1))
     self.value = MlpModel(input_size, hidden_sizes, output_size=1)
     self._grad_scale = grad_scale
Ejemplo n.º 27
0
 def __init__(
     self,
     image_shape,
     latent_size,
     channels=None,
     kernel_sizes=None,
     strides=None,
     paddings=None,
     hidden_sizes=None,
     kiaming_init=True,
 ):
     super().__init__()
     c, h, w = image_shape
     self.conv = Conv2dStdimModel(
         in_channels=c,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings,
         use_maxpool=False,
     )
     self._output_size = self.conv.conv_out_size(h, w)
     self._output_shape = self.conv.conv_out_shape(h, w)
     self._conv_layer_shapes = self.conv.conv_layer_shapes(h, w)
     self.head = MlpModel(
         input_size=self._output_size,
         hidden_sizes=hidden_sizes,
         output_size=latent_size,
     )
     if kiaming_init:
         self.apply(weight_init)
Ejemplo n.º 28
0
Archivo: conv.py Proyecto: zren96/rlpyt
    def __init__(
        self,
        observation_shape,
        action_size,
        hidden_sizes=[64, 64],  # mlp after lstm
        fc_sizes=64,  # Between conv and lstm
        channels=None,
        kernel_sizes=None,
        strides=None,
        paddings=None,
        use_maxpool=False,
    ):
        """Instantiate neural net according to inputs."""
        super().__init__()
        self._obs_ndim = len(observation_shape)

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels or [4, 8],
            kernel_sizes=kernel_sizes or [8, 4],
            strides=strides or [4, 2],
            paddings=paddings or [0, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )  # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU
        self.mlp = MlpModel(
            input_size=self.conv.output_size + action_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
        )
Ejemplo n.º 29
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
            all_corners=False
            ):
        super().__init__()
        self._obs_ndim = 1
        self._all_corners = all_corners
        input_dim = int(np.sum(observation_shape))

        print('all corners', self._all_corners)
        delta_dim = 12 if all_corners else 3
        self._delta_dim = delta_dim
        self.mlp = MlpModel(
            input_size=input_dim,
            hidden_sizes=hidden_sizes,
            output_size=2 * delta_dim + 4, # 3 for each corners, times two for std, 4 probs
        )

        self.delta_distribution = Gaussian(
            dim=delta_dim,
            squash=True,
            min_std=np.exp(MIN_LOG_STD),
            max_std=np.exp(MAX_LOG_STD),
        )
        self.cat_distribution = Categorical(4)
Ejemplo n.º 30
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 2,
              prev_reward: int = 2,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     rnn_input_size = self.body.output_size
     if prev_action: rnn_input_size += output_size  # Use previous action as input
     if prev_reward: rnn_input_size += 1  # Use previous reward as input
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.rnn.apply(apply_init); self.body.apply(apply_init)
         self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v']))
     self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action > 0
     self.p_r = prev_reward > 0