def __init__(self, z_dim, initialize_weights=True): super().__init__() self.z_dim = z_dim self.conv_1 = conv2d(1, 16, kernel_size=7, stride=2) self.conv_2 = conv2d(16, 32, kernel_size=7, stride=2) self.conv_3 = conv2d(32, 32, kernel_size=7, stride=2) self.conv_4 = conv2d(32, 32, kernel_size=7, stride=2) self.flatten = Flatten() self.linear = nn.Linear(32 * 10 * 8, self.z_dim) if initialize_weights: init_weights(self.modules())
def __init__(self, z_dim, initailize_weights=True): """ Image encoder taken from Making Sense of Vision and Touch """ super().__init__() self.z_dim = z_dim self.img_conv1 = conv2d(3, 16, kernel_size=7, stride=2) self.img_conv2 = conv2d(16, 32, kernel_size=5, stride=2) self.img_conv3 = conv2d(32, 64, kernel_size=5, stride=2) self.img_conv4 = conv2d(64, 64, stride=2) self.img_conv5 = conv2d(64, 128, stride=2) self.img_conv6 = conv2d(128, self.z_dim, stride=2) self.img_encoder = nn.Linear(4 * self.z_dim, 2 * self.z_dim) self.flatten = Flatten() if initailize_weights: init_weights(self.modules())
def __init__(self, z_dim, initailize_weights=True): """ Simplified Depth Encoder taken from Making Sense of Vision and Touch """ super().__init__() self.z_dim = z_dim self.depth_conv1 = conv2d(1, 32, kernel_size=3, stride=2) self.depth_conv2 = conv2d(32, 64, kernel_size=3, stride=2) self.depth_conv3 = conv2d(64, 64, kernel_size=4, stride=2) self.depth_conv4 = conv2d(64, 64, stride=2) self.depth_conv5 = conv2d(64, 128, stride=2) self.depth_conv6 = conv2d(128, self.z_dim, stride=2) self.depth_encoder = nn.Linear(16 * self.z_dim, 2 * self.z_dim) self.flatten = Flatten() if initailize_weights: init_weights(self.modules())