Example #1
0
    def __init__(self, z_dim, initialize_weights=True):
        super().__init__()
        self.z_dim = z_dim

        self.conv_1 = conv2d(1, 16, kernel_size=7, stride=2)
        self.conv_2 = conv2d(16, 32, kernel_size=7, stride=2)
        self.conv_3 = conv2d(32, 32, kernel_size=7, stride=2)
        self.conv_4 = conv2d(32, 32, kernel_size=7, stride=2)
        self.flatten = Flatten()

        self.linear = nn.Linear(32 * 10 * 8, self.z_dim)
        if initialize_weights:
            init_weights(self.modules())
Example #2
0
    def __init__(self, z_dim, initailize_weights=True):
        """
        Decodes the optical flow and optical flow mask.
        """
        super().__init__()

        self.optical_flow_conv = conv2d(2 * z_dim, 64, kernel_size=1, stride=1)

        self.img_deconv6 = deconv(64, 64)
        self.img_deconv5 = deconv(64, 32)
        self.img_deconv4 = deconv(162, 32)
        self.img_deconv3 = deconv(98, 32)
        self.img_deconv2 = deconv(98, 32)

        self.predict_optical_flow6 = predict_flow(64)
        self.predict_optical_flow5 = predict_flow(162)
        self.predict_optical_flow4 = predict_flow(98)
        self.predict_optical_flow3 = predict_flow(98)
        self.predict_optical_flow2 = predict_flow(66)

        self.upsampled_optical_flow6_to_5 = nn.ConvTranspose2d(2,
                                                               2,
                                                               4,
                                                               2,
                                                               1,
                                                               bias=False)
        self.upsampled_optical_flow5_to_4 = nn.ConvTranspose2d(2,
                                                               2,
                                                               4,
                                                               2,
                                                               1,
                                                               bias=False)
        self.upsampled_optical_flow4_to_3 = nn.ConvTranspose2d(2,
                                                               2,
                                                               4,
                                                               2,
                                                               1,
                                                               bias=False)
        self.upsampled_optical_flow3_to_2 = nn.ConvTranspose2d(2,
                                                               2,
                                                               4,
                                                               2,
                                                               1,
                                                               bias=False)

        self.predict_optical_flow2_mask = nn.Conv2d(66,
                                                    1,
                                                    kernel_size=3,
                                                    stride=1,
                                                    padding=1,
                                                    bias=False)

        if initailize_weights:
            init_weights(self.modules())
Example #3
0
    def __init__(self, z_dim, initailize_weights=True):
        """
        Image encoder taken from Making Sense of Vision and Touch
        """
        super().__init__()
        self.z_dim = z_dim

        self.img_conv1 = conv2d(3, 16, kernel_size=7, stride=2)
        self.img_conv2 = conv2d(16, 32, kernel_size=5, stride=2)
        self.img_conv3 = conv2d(32, 64, kernel_size=5, stride=2)
        self.img_conv4 = conv2d(64, 64, stride=2)
        self.img_conv5 = conv2d(64, 128, stride=2)
        self.img_conv6 = conv2d(128, self.z_dim, stride=2)
        self.img_encoder = nn.Linear(4 * self.z_dim, 2 * self.z_dim)
        self.flatten = Flatten()

        if initailize_weights:
            init_weights(self.modules())
Example #4
0
    def __init__(self, z_dim, initailize_weights=True):
        """
        Simplified Depth Encoder taken from Making Sense of Vision and Touch
        """
        super().__init__()
        self.z_dim = z_dim

        self.depth_conv1 = conv2d(1, 32, kernel_size=3, stride=2)
        self.depth_conv2 = conv2d(32, 64, kernel_size=3, stride=2)
        self.depth_conv3 = conv2d(64, 64, kernel_size=4, stride=2)
        self.depth_conv4 = conv2d(64, 64, stride=2)
        self.depth_conv5 = conv2d(64, 128, stride=2)
        self.depth_conv6 = conv2d(128, self.z_dim, stride=2)

        self.depth_encoder = nn.Linear(16 * self.z_dim, 2 * self.z_dim)
        self.flatten = Flatten()

        if initailize_weights:
            init_weights(self.modules())
 def test_same_shape_with_dilation(self):
     x = torch.randn(1, 1, 5, 5)
     conv = conv2d(1, 1, 3, dilation=2)
     with torch.no_grad():
         out = conv(x)
     assert out.shape[2:] == x.shape[2:]