def __init__(self,
                 image_emb_size,
                 input_num_channels,
                 image_height,
                 image_width,
                 text_emb_size,
                 using_recurrence=False):
        super(ImageTextKernelResnetModule, self).__init__()
        self.input_dims = (input_num_channels, image_height, image_width)
        self.using_recurrence = using_recurrence

        # Convert text embedding into 16 kernel of size 7x7 specific shape and size
        self.dense_text_to_kernel = nn.Linear(text_emb_size, 16 * 3 * 7 * 7)
        self.global_kernel = nn.Parameter(torch.FloatTensor(16, 3, 7, 7))

        self.norm1 = nn.InstanceNorm2d(input_num_channels)
        self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3)

        h, w = (image_height / 2, image_width / 2)

        block1 = nn.Sequential(blocks.ResBlockStrided(32, 32),
                               blocks.ResBlock(32))
        h, w = h / 2, w / 2

        block2 = nn.Sequential(blocks.ResBlockStrided(32, 32),
                               blocks.ResBlock(32))
        h, w = h / 2, w / 2

        block3 = nn.Sequential(blocks.ResBlockStrided(32, 32), )
        h, w = h / 2, w / 2

        self.resnet_blocks = nn.Sequential(block1, block2, block3)
        self.norm2 = nn.InstanceNorm2d(32)
        self.dense = nn.Linear(h * w * 32, image_emb_size)
        self.global_id = 0
Esempio n. 2
0
    def __init__(self, image_emb_size, input_num_channels,
                 image_height, image_width, using_recurrence=False):
        super(ImagePositionResnetModule, self).__init__()
        self.input_dims = (input_num_channels, image_height, image_width)
        self.using_recurrence = using_recurrence

        self.norm1 = nn.InstanceNorm2d(input_num_channels)
        self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3)
        h, w = (image_height / 2, image_width / 2)

        block1 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
            blocks.ResBlock(32)
        )
        h, w = h / 2, w / 2

        block2 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
            blocks.ResBlock(32)
        )
        h, w = h / 2, w / 2

        block3 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
        )
        h, w = h / 2, w / 2

        self.resnet_blocks = nn.Sequential(block1, block2, block3)
        self.norm2 = nn.InstanceNorm2d(32)
        self.dense = nn.Linear(h * w * 32, image_emb_size)
        self.global_id = 0
        self.ryan_top_layer_conv = nn.Conv2d(32, 1, 1, stride=1)
Esempio n. 3
0
    def __init__(self,
                 image_emb_size,
                 input_num_channels,
                 image_height,
                 image_width,
                 text_emb_size,
                 using_recurrence=False):
        super(ImageAttentionResnetModule, self).__init__()
        self.input_dims = (input_num_channels, image_height, image_width)
        self.using_recurrence = using_recurrence
        self.num_attention_heads = 5

        self.norm1 = nn.InstanceNorm2d(input_num_channels)
        self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3)
        h, w = (image_height / 2, image_width / 2)

        block1 = nn.Sequential(blocks.ResBlockStrided(32, 32),
                               blocks.ResBlock(32))
        h, w = h / 2, w / 2

        block2 = nn.Sequential(blocks.ResBlockStrided(32, 32),
                               blocks.ResBlock(32))
        h, w = h / 2, w / 2

        block3 = nn.Sequential(blocks.ResBlockStrided(32, 32), )
        h, w = h / 2, w / 2
        self.num_feature_maps = 32

        self.final_h = h
        self.final_w = w

        self.attention_dense_layers = nn.ModuleList()
        for _ in xrange(self.num_attention_heads):
            dense_layer = nn.Linear(text_emb_size, self.final_h * self.final_w)
            self.attention_dense_layers.append(dense_layer)

        self.resnet_blocks = nn.Sequential(block1, block2, block3)

        self.final_dense = nn.Linear(h * w * self.num_attention_heads,
                                     image_emb_size)
Esempio n. 4
0
    def __init__(self, image_emb_size, input_num_channels,
                 image_height, image_width, text_emb_size,
                 using_recurrence=False):
        super(ImageGAResnetModule, self).__init__()
        self.input_dims = (input_num_channels, image_height, image_width)
        self.using_recurrence = using_recurrence

        self.norm1 = nn.InstanceNorm2d(input_num_channels)
        self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3)
        h, w = (image_height / 2, image_width / 2)

        block1 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
            blocks.ResBlock(32)
        )
        h, w = h / 2, w / 2

        block2 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
            blocks.ResBlock(32)
        )
        h, w = h / 2, w / 2

        block3 = nn.Sequential(
            blocks.ResBlockStrided(32, 32),
        )
        h, w = h / 2, w / 2
        self.num_feature_maps = 32

        self.final_h = h
        self.final_w = w

        self.resnet_blocks = nn.Sequential(block1, block2, block3)
        self.norm2 = nn.InstanceNorm2d(self.num_feature_maps)

        self.text_dense = nn.Linear(text_emb_size, self.num_feature_maps)
        self.final_dense = nn.Linear(h * w * self.num_feature_maps, image_emb_size)
        self.global_id = 0
Esempio n. 5
0
    def __init__(self,
                 image_emb_size,
                 input_num_channels,
                 image_height,
                 image_width,
                 using_recurrence=False):
        super(ImageResnetModule, self).__init__()
        self.input_dims = (input_num_channels, image_height, image_width)
        self.using_recurrence = using_recurrence

        num_channels = 32
        self.norm1 = nn.InstanceNorm2d(input_num_channels)
        self.conv1 = nn.Conv2d(input_num_channels,
                               num_channels,
                               7,
                               stride=2,
                               padding=3)
        h, w = (image_height // 2, image_width // 2)

        block1 = nn.Sequential(
            blocks.ResBlockStrided(num_channels, num_channels),
            blocks.ResBlock(num_channels))
        h, w = h // 2, w // 2

        block2 = nn.Sequential(
            blocks.ResBlockStrided(num_channels, num_channels),
            blocks.ResBlock(num_channels))
        h, w = h // 2, w // 2

        block3 = nn.Sequential(
            blocks.ResBlockStrided(num_channels, num_channels), )
        h, w = h // 2, w // 2

        self.resnet_blocks = nn.Sequential(block1, block2, block3)
        self.norm2 = nn.InstanceNorm2d(num_channels)
        self.dense = nn.Linear(h * w * num_channels, image_emb_size)
        self.global_id = 0