def __init__(self, units=128, state_dim=2, use_states=False, missing_modalities=None, add_R_noise=1e-6): super().__init__(units=units, state_dim=state_dim, use_states=False, missing_modalities=missing_modalities, add_R_noise=add_R_noise) self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1), ) #each avg pool gives us 16x2 self.gap_h = nn.AvgPool2d((32, 2)) self.gap_w = nn.AvgPool2d((2, 32)) self.gap_layers = nn.Sequential( nn.Linear(32 * 2, units), nn.ReLU(inplace=True), resblocks.Linear(units), )
def __init__(self, units=128, state_dim=2, use_states=False, missing_modalities=None, add_R_noise=1e-6): super().__init__(units, state_dim, False, missing_modalities, add_R_noise) self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1), nn.AvgPool2d(5, 3), nn.Flatten(), nn.Linear(10 * 10 * 2, units), nn.ReLU(inplace=True), resblocks.Linear(units), )
def __init__(self, state_dim=2, units=64, missing_modalities=[]): super().__init__() obs_pos_dim = 3 obs_sensors_dim = 7 self.state_dim = state_dim # Missing modalities self.modalities = set(["image", 'gripper_sensors', 'gripper_pos']) if missing_modalities: if type(missing_modalities) == list: self.modalities -= set(missing_modalities) else: assert missing_modalities in self.modalities self.modalities -= set([missing_modalities]) self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 8 nn.Linear(8 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pos_layers = nn.Sequential( nn.Linear(obs_pos_dim, units), resblocks.Linear(units), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), resblocks.Linear(units), ) self.state_layers = nn.Sequential(nn.Linear(state_dim, units), ) self.shared_layers = nn.Sequential( nn.Linear(units * (len(self.modalities) + 1), units), nn.ReLU(inplace=True), resblocks.Linear(units), resblocks.Linear(units), nn.Linear(units, 1), # nn.LogSigmoid() ) self.units = units
def observation_image_layers(units: int, spanning_avg_pool: bool = False) -> nn.Module: """Create an image encoder block. Args: units (int): # of hidden units in network layers. Returns: nn.Module: Encoder block. """ if spanning_avg_pool: # Architecture with full width/height average pools return nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1), _DualSpanningAvgPool(rows=32, cols=32, reduce_size=2), nn.Linear(32 * 2, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) else: # Default model return nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 8 nn.Linear(8 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), )
def __init__(self, use_prev_state=True, units=32): super().__init__() self.use_prev_state = use_prev_state self.units = units obs_pos_dim = 7 obs_sensors_dim = 7 state_dim = 2 control_dim = 7 self.state_layers = nn.Sequential( nn.Linear(state_dim, units // 2), nn.ReLU(inplace=True), resblocks.Linear(units // 2), ) self.control_layers = nn.Sequential( nn.Linear(control_dim, units // 2), nn.ReLU(inplace=True), resblocks.Linear(units // 2), ) self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1), nn.ReLU(inplace=True), resblocks.Conv2d(channels=4), nn.Conv2d(in_channels=4, out_channels=1, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Flatten(), # 32 * 32 = 1024 nn.Linear(1024, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pose_layers = nn.Sequential( nn.Linear(obs_pos_dim, units), resblocks.Linear(units), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), resblocks.Linear(units), ) self.shared_layers = nn.Sequential( nn.Linear((units // 2) * 2 + units * 3, units), nn.ReLU(inplace=True), resblocks.Linear(units), resblocks.Linear(units), nn.Linear(units, state_dim), # Directly output new state # nn.LogSigmoid() )
def test_resblock_smoke_test(): """Make sure we can build all resblocks.""" for inplace in (True, False): for activation in resblocks.Base._activation_types.keys(): resblocks.Linear(20, 3, activation=activation, activations_inplace=inplace) resblocks.Conv2d( channels=20, bottleneck_channels=3, kernel_size=5, activation=activation, activations_inplace=inplace, )
def __init__(self, c_in, c_out, kernel_size, h_out, w_out, batch_norm=True, **kwargs): super(_ConvT2dLayerNorm, self).__init__() self._convt = nn.ConvTranspose2d(c_in, c_out, kernel_size, **kwargs) self._act = nn.LeakyReLU() self._resblock = resblocks.Conv2d(c_out, activation="leaky_relu") if batch_norm: self._ln = nn.BatchNorm2d(c_out) self._ln_post = nn.BatchNorm2d(c_out) else: self._ln = nn.LayerNorm([c_out, h_out, w_out]) self._ln_post = nn.LayerNorm([c_out, h_out, w_out])
def observation_image_layers(units: int) -> nn.Module: """Create an image encoder block. Args: units (int): # of hidden units in network layers. Returns: nn.Module: Encoder block. """ return nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 8 nn.Linear(8 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), )
def get_simple_encoder_and_dsd_decoder( in_channels: int, network_channels: List[int], img_size: int, latent_dim: int, pixel_res: int, cond_channels: int = 0, ) -> Tuple[nn.Module, nn.Module]: """Get and encoder and decoder for use in a VAE.""" # Construct Encoder # Output is diagonal covariance Gaussian layers: List[nn.Module] = [] in_c = in_channels + cond_channels for i, c in enumerate(network_channels): layers.append( _Conv2dLayerNorm( in_c, c, 3, img_size // (2**(i + 1)), img_size // (2**(i + 1)), padding=1, stride=2, )) in_c = c layers.append(nn.Flatten(start_dim=-3)) # out: (..., in_c) layers.append(nn.Linear(in_c, 2 * latent_dim)) encoder = nn.Sequential(*layers) # Construct Decoder # Output is discrete log-softmax distribution # > see: "Pixel Recurrent Neural Networks", sec. 5.3 div_base = 2 factor_list = reversed(range(1, int(np.log(pixel_res) / np.log(div_base)))) pix_layers = [ _ConvT2dLayerNorm( pixel_res // div_base**(i + 1), pixel_res // div_base**i, 3, img_size, img_size, padding=1, ) for i in factor_list ] decoder = nn.Sequential( nn.Linear(latent_dim + cond_channels, 4 * img_size * img_size), _Reshape((-1, 64, int(img_size / 4), int(img_size / 4))), nn.LeakyReLU(), nn.LayerNorm([64, img_size // 4, img_size // 4]), _ConvT2dLayerNorm(64, 16, 2, img_size // 2, img_size // 2, stride=2), _ConvT2dLayerNorm(16, 1, 2, img_size, img_size, stride=2), *pix_layers, nn.ConvTranspose2d(pixel_res // div_base, pixel_res, 3, padding=1), nn.LeakyReLU(), nn.LayerNorm([pixel_res, img_size, img_size]), resblocks.Conv2d(pixel_res, pixel_res * div_base, activation="leaky_relu"), nn.LogSoftmax(dim=-3), ) return encoder, decoder
def __init__(self, units=32): obs_pos_dim = 3 obs_sensors_dim = 7 control_dim = 7 self.state_dim = 2 super().__init__() self.lstm_hidden_dim = 4 self.lstm_num_layers = 2 self.units = units # Observation encoders self.image_rows = 32 self.image_cols = 32 self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 8 nn.Linear(8 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pose_layers = nn.Sequential( nn.Linear(obs_pos_dim, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) # Control layers self.control_layers = nn.Sequential( nn.Linear(control_dim, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) # Fusion layer self.fusion_layers = nn.Sequential( nn.Linear(units * 4, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) # LSTM layers self.lstm = nn.LSTM(units, self.lstm_hidden_dim, self.lstm_num_layers, batch_first=True) # Define the output layer self.output_layers = nn.Sequential( nn.Linear(self.lstm_hidden_dim, units), nn.ReLU(inplace=True), # resblocks.Linear(units), nn.Linear(units, self.state_dim), )
def __init__(self, state_dim=2, units=32, use_softmax=True, use_log_softmax=False): super().__init__() obs_pose_dim = 3 obs_sensors_dim = 7 self.state_dim = state_dim self.use_softmax = use_softmax self.use_log_softmax = use_log_softmax self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 8 nn.Linear(2 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pose_layers = nn.Sequential( nn.Linear(obs_pose_dim, units), resblocks.Linear(units, activation='leaky_relu'), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), resblocks.Linear(units, activation='leaky_relu'), ) # todo: the +1 only works for state dim =2 # it should be self.state_dim + (state_dim)(state_dim-1)/2 if self.use_softmax: self.shared_layers = nn.Sequential( nn.Linear(units * 3, units), nn.ReLU(inplace=True), resblocks.Linear(units, units), resblocks.Linear(units, units), resblocks.Linear(units, units), nn.Linear(units, 2 * (self.state_dim + 1)), ) else: self.shared_layers = nn.Sequential( nn.Linear(units * 3, units * 3), nn.ReLU(inplace=True), resblocks.Linear(3 * units), ) self.force_prop_layer = nn.Sequential( nn.Linear(units, units), nn.ReLU(inplace=True), nn.Linear(units, self.state_dim), nn.Sigmoid(), ) self.image_prop_layer = nn.Sequential( nn.Linear(units, units), nn.ReLU(inplace=True), nn.Linear(units, self.state_dim), nn.Sigmoid(), ) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): nn.init.kaiming_normal_(m.weight.data) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.units = units
def __init__(self, units=128, state_dim=2, use_states=False, missing_modalities=None, add_R_noise=1e-6): super().__init__() obs_pose_dim = 3 obs_sensors_dim = 7 image_dim = (32, 32) #We do not use states for EKF self.state_dim = state_dim # if we want to use states in measurement model update self.use_states = use_states # Missing modalities self.modalities = set(["image", 'gripper_sensors', 'gripper_pos']) if missing_modalities: if type(missing_modalities) == list: self.modalities -= set(missing_modalities) else: assert missing_modalities in self.modalities self.modalities -= set([missing_modalities]) self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=2, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 * 2 nn.Linear(2 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pose_layers = nn.Sequential( nn.Linear(obs_pose_dim, units), # nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), # nn.ReLU(inplace=True), resblocks.Linear(units), ) # missing modalities self.shared_layers = nn.Sequential( nn.Linear(units * (len(self.modalities)), units * 2), nn.ReLU(inplace=True), resblocks.Linear(2 * units), resblocks.Linear(2 * units), ) self.r_layer = nn.Sequential( nn.Linear(units, self.state_dim), nn.ReLU(inplace=True), resblocks.Linear(self.state_dim), nn.Linear(self.state_dim, self.state_dim), ) self.z_layer = nn.Sequential( nn.Linear(units, self.state_dim), nn.ReLU(inplace=True), resblocks.Linear(self.state_dim), nn.Linear(self.state_dim, self.state_dim), ) self.units = units # for m in self.modules(): # if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): # nn.init.kaiming_normal_(m.weight.data) # if m.bias is not None: # m.bias.data.zero_() # elif isinstance(m, nn.BatchNorm2d): # m.weight.data.fill_(1) # m.bias.data.zero_() self.add_R_noise = torch.ones(state_dim) * add_R_noise
def __init__(self, units=16, state_dim=2, use_states=False, use_spatial_softmax=False): super().__init__() print("Currently deprecated. Use models in panda_models.py") obs_pose_dim = 3 obs_sensors_dim = 7 image_dim = (32, 32) self.state_dim = state_dim self.use_states = use_states if self.use_states: shared_layer_dim = units * 4 else: shared_layer_dim = units * 3 if use_spatial_softmax: self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1), spatial_softmax.SpatialSoftmax(32, 32, 16), nn.Linear(16 * 2, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) else: self.observation_image_layers = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2), nn.ReLU(inplace=True), resblocks.Conv2d(channels=32, kernel_size=3), nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1), nn.Flatten(), # 32 * 32 = 1024 nn.Linear(8 * 32 * 32, units), nn.ReLU(inplace=True), resblocks.Linear(units), ) self.observation_pose_layers = nn.Sequential( nn.Linear(obs_pose_dim, units), resblocks.Linear(units, activation='leaky_relu'), ) self.observation_sensors_layers = nn.Sequential( nn.Linear(obs_sensors_dim, units), resblocks.Linear(units, activation='leaky_relu'), ) self.state_layers = nn.Sequential(nn.Linear(self.state_dim, units), ) self.shared_layers = nn.Sequential( nn.Linear(shared_layer_dim, units * 2), nn.ReLU(inplace=True), resblocks.Linear(2 * units), resblocks.Linear(2 * units), ) self.r_layer = nn.Sequential( nn.Linear(units, self.state_dim), nn.ReLU(inplace=True), resblocks.Linear(self.state_dim), ) self.z_layer = nn.Sequential( nn.Linear(units, self.state_dim), nn.ReLU(inplace=True), resblocks.Linear(self.state_dim), ) self.units = units