def __init__(self,
                 units=128,
                 state_dim=2,
                 use_states=False,
                 missing_modalities=None,
                 add_R_noise=1e-6):

        super().__init__(units=units,
                         state_dim=state_dim,
                         use_states=False,
                         missing_modalities=missing_modalities,
                         add_R_noise=add_R_noise)

        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3,
                      padding=1),
        )
        #each avg pool gives us 16x2
        self.gap_h = nn.AvgPool2d((32, 2))
        self.gap_w = nn.AvgPool2d((2, 32))

        self.gap_layers = nn.Sequential(
            nn.Linear(32 * 2, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
    def __init__(self,
                 units=128,
                 state_dim=2,
                 use_states=False,
                 missing_modalities=None,
                 add_R_noise=1e-6):

        super().__init__(units, state_dim, False, missing_modalities,
                         add_R_noise)

        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3,
                      padding=1),
            nn.AvgPool2d(5, 3),
            nn.Flatten(),
            nn.Linear(10 * 10 * 2, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
    def __init__(self, state_dim=2, units=64, missing_modalities=[]):
        super().__init__()

        obs_pos_dim = 3
        obs_sensors_dim = 7
        self.state_dim = state_dim

        # Missing modalities
        self.modalities = set(["image", 'gripper_sensors', 'gripper_pos'])
        if missing_modalities:
            if type(missing_modalities) == list:
                self.modalities -= set(missing_modalities)
            else:
                assert missing_modalities in self.modalities
                self.modalities -= set([missing_modalities])

        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3,
                      padding=1),
            nn.Flatten(),  # 32 * 32 * 8
            nn.Linear(8 * 32 * 32, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
        self.observation_pos_layers = nn.Sequential(
            nn.Linear(obs_pos_dim, units),
            resblocks.Linear(units),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            resblocks.Linear(units),
        )
        self.state_layers = nn.Sequential(nn.Linear(state_dim, units), )

        self.shared_layers = nn.Sequential(
            nn.Linear(units * (len(self.modalities) + 1), units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
            resblocks.Linear(units),
            nn.Linear(units, 1),
            # nn.LogSigmoid()
        )

        self.units = units
Ejemplo n.º 4
0
def observation_image_layers(units: int, spanning_avg_pool: bool = False) -> nn.Module:
    """Create an image encoder block.

    Args:
        units (int): # of hidden units in network layers.

    Returns:
        nn.Module: Encoder block.
    """
    if spanning_avg_pool:
        # Architecture with full width/height average pools
        return nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1),
            _DualSpanningAvgPool(rows=32, cols=32, reduce_size=2),
            nn.Linear(32 * 2, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
    else:
        # Default model
        return nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1),
            nn.Flatten(),  # 32 * 32 * 8
            nn.Linear(8 * 32 * 32, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
    def __init__(self, use_prev_state=True, units=32):
        super().__init__()

        self.use_prev_state = use_prev_state
        self.units = units

        obs_pos_dim = 7
        obs_sensors_dim = 7
        state_dim = 2
        control_dim = 7

        self.state_layers = nn.Sequential(
            nn.Linear(state_dim, units // 2),
            nn.ReLU(inplace=True),
            resblocks.Linear(units // 2),
        )
        self.control_layers = nn.Sequential(
            nn.Linear(control_dim, units // 2),
            nn.ReLU(inplace=True),
            resblocks.Linear(units // 2),
        )
        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=4),
            nn.Conv2d(in_channels=4, out_channels=1, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Flatten(),  # 32 * 32 = 1024
            nn.Linear(1024, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
        self.observation_pose_layers = nn.Sequential(
            nn.Linear(obs_pos_dim, units),
            resblocks.Linear(units),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            resblocks.Linear(units),
        )
        self.shared_layers = nn.Sequential(
            nn.Linear((units // 2) * 2 + units * 3, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
            resblocks.Linear(units),
            nn.Linear(units, state_dim),  # Directly output new state
            # nn.LogSigmoid()
        )
Ejemplo n.º 6
0
def test_resblock_smoke_test():
    """Make sure we can build all resblocks."""

    for inplace in (True, False):
        for activation in resblocks.Base._activation_types.keys():
            resblocks.Linear(20,
                             3,
                             activation=activation,
                             activations_inplace=inplace)
            resblocks.Conv2d(
                channels=20,
                bottleneck_channels=3,
                kernel_size=5,
                activation=activation,
                activations_inplace=inplace,
            )
Ejemplo n.º 7
0
 def __init__(self,
              c_in,
              c_out,
              kernel_size,
              h_out,
              w_out,
              batch_norm=True,
              **kwargs):
     super(_ConvT2dLayerNorm, self).__init__()
     self._convt = nn.ConvTranspose2d(c_in, c_out, kernel_size, **kwargs)
     self._act = nn.LeakyReLU()
     self._resblock = resblocks.Conv2d(c_out, activation="leaky_relu")
     if batch_norm:
         self._ln = nn.BatchNorm2d(c_out)
         self._ln_post = nn.BatchNorm2d(c_out)
     else:
         self._ln = nn.LayerNorm([c_out, h_out, w_out])
         self._ln_post = nn.LayerNorm([c_out, h_out, w_out])
Ejemplo n.º 8
0
def observation_image_layers(units: int) -> nn.Module:
    """Create an image encoder block.

    Args:
        units (int): # of hidden units in network layers.

    Returns:
        nn.Module: Encoder block.
    """
    return nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2),
        nn.ReLU(inplace=True),
        resblocks.Conv2d(channels=32, kernel_size=3),
        nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1),
        nn.Flatten(),  # 32 * 32 * 8
        nn.Linear(8 * 32 * 32, units),
        nn.ReLU(inplace=True),
        resblocks.Linear(units),
    )
Ejemplo n.º 9
0
def get_simple_encoder_and_dsd_decoder(
    in_channels: int,
    network_channels: List[int],
    img_size: int,
    latent_dim: int,
    pixel_res: int,
    cond_channels: int = 0,
) -> Tuple[nn.Module, nn.Module]:
    """Get and encoder and decoder for use in a VAE."""
    # Construct Encoder
    # Output is diagonal covariance Gaussian
    layers: List[nn.Module] = []
    in_c = in_channels + cond_channels
    for i, c in enumerate(network_channels):
        layers.append(
            _Conv2dLayerNorm(
                in_c,
                c,
                3,
                img_size // (2**(i + 1)),
                img_size // (2**(i + 1)),
                padding=1,
                stride=2,
            ))
        in_c = c
    layers.append(nn.Flatten(start_dim=-3))  # out: (..., in_c)
    layers.append(nn.Linear(in_c, 2 * latent_dim))
    encoder = nn.Sequential(*layers)

    # Construct Decoder
    # Output is discrete log-softmax distribution
    # > see: "Pixel Recurrent Neural Networks", sec. 5.3
    div_base = 2
    factor_list = reversed(range(1, int(np.log(pixel_res) / np.log(div_base))))
    pix_layers = [
        _ConvT2dLayerNorm(
            pixel_res // div_base**(i + 1),
            pixel_res // div_base**i,
            3,
            img_size,
            img_size,
            padding=1,
        ) for i in factor_list
    ]
    decoder = nn.Sequential(
        nn.Linear(latent_dim + cond_channels, 4 * img_size * img_size),
        _Reshape((-1, 64, int(img_size / 4), int(img_size / 4))),
        nn.LeakyReLU(),
        nn.LayerNorm([64, img_size // 4, img_size // 4]),
        _ConvT2dLayerNorm(64, 16, 2, img_size // 2, img_size // 2, stride=2),
        _ConvT2dLayerNorm(16, 1, 2, img_size, img_size, stride=2),
        *pix_layers,
        nn.ConvTranspose2d(pixel_res // div_base, pixel_res, 3, padding=1),
        nn.LeakyReLU(),
        nn.LayerNorm([pixel_res, img_size, img_size]),
        resblocks.Conv2d(pixel_res,
                         pixel_res * div_base,
                         activation="leaky_relu"),
        nn.LogSoftmax(dim=-3),
    )
    return encoder, decoder
    def __init__(self, units=32):

        obs_pos_dim = 3
        obs_sensors_dim = 7
        control_dim = 7
        self.state_dim = 2

        super().__init__()
        self.lstm_hidden_dim = 4
        self.lstm_num_layers = 2
        self.units = units

        # Observation encoders
        self.image_rows = 32
        self.image_cols = 32
        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3,
                      padding=1),
            nn.Flatten(),  # 32 * 32 * 8
            nn.Linear(8 * 32 * 32, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
        self.observation_pose_layers = nn.Sequential(
            nn.Linear(obs_pos_dim, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        # Control layers
        self.control_layers = nn.Sequential(
            nn.Linear(control_dim, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        # Fusion layer
        self.fusion_layers = nn.Sequential(
            nn.Linear(units * 4, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        # LSTM layers
        self.lstm = nn.LSTM(units,
                            self.lstm_hidden_dim,
                            self.lstm_num_layers,
                            batch_first=True)

        # Define the output layer
        self.output_layers = nn.Sequential(
            nn.Linear(self.lstm_hidden_dim, units),
            nn.ReLU(inplace=True),
            # resblocks.Linear(units),
            nn.Linear(units, self.state_dim),
        )
Ejemplo n.º 11
0
    def __init__(self,
                 state_dim=2,
                 units=32,
                 use_softmax=True,
                 use_log_softmax=False):
        super().__init__()

        obs_pose_dim = 3
        obs_sensors_dim = 7
        self.state_dim = state_dim
        self.use_softmax = use_softmax
        self.use_log_softmax = use_log_softmax

        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3,
                      padding=1),
            nn.Flatten(),  # 32 * 32 * 8
            nn.Linear(2 * 32 * 32, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        self.observation_pose_layers = nn.Sequential(
            nn.Linear(obs_pose_dim, units),
            resblocks.Linear(units, activation='leaky_relu'),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            resblocks.Linear(units, activation='leaky_relu'),
        )

        # todo: the +1 only works for state dim =2
        # it should be self.state_dim + (state_dim)(state_dim-1)/2

        if self.use_softmax:
            self.shared_layers = nn.Sequential(
                nn.Linear(units * 3, units),
                nn.ReLU(inplace=True),
                resblocks.Linear(units, units),
                resblocks.Linear(units, units),
                resblocks.Linear(units, units),
                nn.Linear(units, 2 * (self.state_dim + 1)),
            )
        else:
            self.shared_layers = nn.Sequential(
                nn.Linear(units * 3, units * 3),
                nn.ReLU(inplace=True),
                resblocks.Linear(3 * units),
            )

            self.force_prop_layer = nn.Sequential(
                nn.Linear(units, units),
                nn.ReLU(inplace=True),
                nn.Linear(units, self.state_dim),
                nn.Sigmoid(),
            )

            self.image_prop_layer = nn.Sequential(
                nn.Linear(units, units),
                nn.ReLU(inplace=True),
                nn.Linear(units, self.state_dim),
                nn.Sigmoid(),
            )

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
                elif isinstance(m, nn.BatchNorm2d):
                    m.weight.data.fill_(1)
                m.bias.data.zero_()

        self.units = units
Ejemplo n.º 12
0
    def __init__(self,
                 units=128,
                 state_dim=2,
                 use_states=False,
                 missing_modalities=None,
                 add_R_noise=1e-6):
        super().__init__()

        obs_pose_dim = 3
        obs_sensors_dim = 7
        image_dim = (32, 32)

        #We do not use states for EKF
        self.state_dim = state_dim
        # if we want to use states in measurement model update
        self.use_states = use_states

        # Missing modalities
        self.modalities = set(["image", 'gripper_sensors', 'gripper_pos'])
        if missing_modalities:
            if type(missing_modalities) == list:
                self.modalities -= set(missing_modalities)
            else:
                assert missing_modalities in self.modalities
                self.modalities -= set([missing_modalities])

        self.observation_image_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,
                      padding=2),
            nn.ReLU(inplace=True),
            resblocks.Conv2d(channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3,
                      padding=1),
            nn.Flatten(),  # 32 * 32 * 2
            nn.Linear(2 * 32 * 32, units),
            nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        self.observation_pose_layers = nn.Sequential(
            nn.Linear(obs_pose_dim, units),
            # nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            # nn.ReLU(inplace=True),
            resblocks.Linear(units),
        )

        # missing modalities
        self.shared_layers = nn.Sequential(
            nn.Linear(units * (len(self.modalities)), units * 2),
            nn.ReLU(inplace=True),
            resblocks.Linear(2 * units),
            resblocks.Linear(2 * units),
        )

        self.r_layer = nn.Sequential(
            nn.Linear(units, self.state_dim),
            nn.ReLU(inplace=True),
            resblocks.Linear(self.state_dim),
            nn.Linear(self.state_dim, self.state_dim),
        )

        self.z_layer = nn.Sequential(
            nn.Linear(units, self.state_dim),
            nn.ReLU(inplace=True),
            resblocks.Linear(self.state_dim),
            nn.Linear(self.state_dim, self.state_dim),
        )

        self.units = units
        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        #         nn.init.kaiming_normal_(m.weight.data)
        #         if m.bias is not None:
        #             m.bias.data.zero_()
        #     elif isinstance(m, nn.BatchNorm2d):
        #         m.weight.data.fill_(1)
        #         m.bias.data.zero_()

        self.add_R_noise = torch.ones(state_dim) * add_R_noise
Ejemplo n.º 13
0
    def __init__(self,
                 units=16,
                 state_dim=2,
                 use_states=False,
                 use_spatial_softmax=False):
        super().__init__()
        print("Currently deprecated. Use models in panda_models.py")

        obs_pose_dim = 3
        obs_sensors_dim = 7
        image_dim = (32, 32)

        self.state_dim = state_dim
        self.use_states = use_states

        if self.use_states:
            shared_layer_dim = units * 4
        else:
            shared_layer_dim = units * 3

        if use_spatial_softmax:
            self.observation_image_layers = nn.Sequential(
                nn.Conv2d(in_channels=1,
                          out_channels=32,
                          kernel_size=5,
                          padding=2),
                nn.ReLU(inplace=True),
                resblocks.Conv2d(channels=32, kernel_size=3),
                nn.Conv2d(in_channels=32,
                          out_channels=16,
                          kernel_size=3,
                          padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=16,
                          out_channels=16,
                          kernel_size=3,
                          padding=1),
                spatial_softmax.SpatialSoftmax(32, 32, 16),
                nn.Linear(16 * 2, units),
                nn.ReLU(inplace=True),
                resblocks.Linear(units),
            )
        else:
            self.observation_image_layers = nn.Sequential(
                nn.Conv2d(in_channels=1,
                          out_channels=32,
                          kernel_size=5,
                          padding=2),
                nn.ReLU(inplace=True),
                resblocks.Conv2d(channels=32, kernel_size=3),
                nn.Conv2d(in_channels=32,
                          out_channels=16,
                          kernel_size=3,
                          padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=16,
                          out_channels=8,
                          kernel_size=3,
                          padding=1),
                nn.Flatten(),  # 32 * 32 = 1024
                nn.Linear(8 * 32 * 32, units),
                nn.ReLU(inplace=True),
                resblocks.Linear(units),
            )

        self.observation_pose_layers = nn.Sequential(
            nn.Linear(obs_pose_dim, units),
            resblocks.Linear(units, activation='leaky_relu'),
        )
        self.observation_sensors_layers = nn.Sequential(
            nn.Linear(obs_sensors_dim, units),
            resblocks.Linear(units, activation='leaky_relu'),
        )
        self.state_layers = nn.Sequential(nn.Linear(self.state_dim, units), )

        self.shared_layers = nn.Sequential(
            nn.Linear(shared_layer_dim, units * 2),
            nn.ReLU(inplace=True),
            resblocks.Linear(2 * units),
            resblocks.Linear(2 * units),
        )

        self.r_layer = nn.Sequential(
            nn.Linear(units, self.state_dim),
            nn.ReLU(inplace=True),
            resblocks.Linear(self.state_dim),
        )

        self.z_layer = nn.Sequential(
            nn.Linear(units, self.state_dim),
            nn.ReLU(inplace=True),
            resblocks.Linear(self.state_dim),
        )

        self.units = units