def __init__(self, image_emb_size, input_num_channels, image_height, image_width, text_emb_size, using_recurrence=False): super(ImageTextKernelResnetModule, self).__init__() self.input_dims = (input_num_channels, image_height, image_width) self.using_recurrence = using_recurrence # Convert text embedding into 16 kernel of size 7x7 specific shape and size self.dense_text_to_kernel = nn.Linear(text_emb_size, 16 * 3 * 7 * 7) self.global_kernel = nn.Parameter(torch.FloatTensor(16, 3, 7, 7)) self.norm1 = nn.InstanceNorm2d(input_num_channels) self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3) h, w = (image_height / 2, image_width / 2) block1 = nn.Sequential(blocks.ResBlockStrided(32, 32), blocks.ResBlock(32)) h, w = h / 2, w / 2 block2 = nn.Sequential(blocks.ResBlockStrided(32, 32), blocks.ResBlock(32)) h, w = h / 2, w / 2 block3 = nn.Sequential(blocks.ResBlockStrided(32, 32), ) h, w = h / 2, w / 2 self.resnet_blocks = nn.Sequential(block1, block2, block3) self.norm2 = nn.InstanceNorm2d(32) self.dense = nn.Linear(h * w * 32, image_emb_size) self.global_id = 0
def __init__(self, image_emb_size, input_num_channels, image_height, image_width, using_recurrence=False): super(ImagePositionResnetModule, self).__init__() self.input_dims = (input_num_channels, image_height, image_width) self.using_recurrence = using_recurrence self.norm1 = nn.InstanceNorm2d(input_num_channels) self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3) h, w = (image_height / 2, image_width / 2) block1 = nn.Sequential( blocks.ResBlockStrided(32, 32), blocks.ResBlock(32) ) h, w = h / 2, w / 2 block2 = nn.Sequential( blocks.ResBlockStrided(32, 32), blocks.ResBlock(32) ) h, w = h / 2, w / 2 block3 = nn.Sequential( blocks.ResBlockStrided(32, 32), ) h, w = h / 2, w / 2 self.resnet_blocks = nn.Sequential(block1, block2, block3) self.norm2 = nn.InstanceNorm2d(32) self.dense = nn.Linear(h * w * 32, image_emb_size) self.global_id = 0 self.ryan_top_layer_conv = nn.Conv2d(32, 1, 1, stride=1)
def __init__(self, image_emb_size, input_num_channels, image_height, image_width, text_emb_size, using_recurrence=False): super(ImageAttentionResnetModule, self).__init__() self.input_dims = (input_num_channels, image_height, image_width) self.using_recurrence = using_recurrence self.num_attention_heads = 5 self.norm1 = nn.InstanceNorm2d(input_num_channels) self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3) h, w = (image_height / 2, image_width / 2) block1 = nn.Sequential(blocks.ResBlockStrided(32, 32), blocks.ResBlock(32)) h, w = h / 2, w / 2 block2 = nn.Sequential(blocks.ResBlockStrided(32, 32), blocks.ResBlock(32)) h, w = h / 2, w / 2 block3 = nn.Sequential(blocks.ResBlockStrided(32, 32), ) h, w = h / 2, w / 2 self.num_feature_maps = 32 self.final_h = h self.final_w = w self.attention_dense_layers = nn.ModuleList() for _ in xrange(self.num_attention_heads): dense_layer = nn.Linear(text_emb_size, self.final_h * self.final_w) self.attention_dense_layers.append(dense_layer) self.resnet_blocks = nn.Sequential(block1, block2, block3) self.final_dense = nn.Linear(h * w * self.num_attention_heads, image_emb_size)
def __init__(self, image_emb_size, input_num_channels, image_height, image_width, text_emb_size, using_recurrence=False): super(ImageGAResnetModule, self).__init__() self.input_dims = (input_num_channels, image_height, image_width) self.using_recurrence = using_recurrence self.norm1 = nn.InstanceNorm2d(input_num_channels) self.conv1 = nn.Conv2d(input_num_channels, 32, 7, stride=2, padding=3) h, w = (image_height / 2, image_width / 2) block1 = nn.Sequential( blocks.ResBlockStrided(32, 32), blocks.ResBlock(32) ) h, w = h / 2, w / 2 block2 = nn.Sequential( blocks.ResBlockStrided(32, 32), blocks.ResBlock(32) ) h, w = h / 2, w / 2 block3 = nn.Sequential( blocks.ResBlockStrided(32, 32), ) h, w = h / 2, w / 2 self.num_feature_maps = 32 self.final_h = h self.final_w = w self.resnet_blocks = nn.Sequential(block1, block2, block3) self.norm2 = nn.InstanceNorm2d(self.num_feature_maps) self.text_dense = nn.Linear(text_emb_size, self.num_feature_maps) self.final_dense = nn.Linear(h * w * self.num_feature_maps, image_emb_size) self.global_id = 0
def __init__(self, image_emb_size, input_num_channels, image_height, image_width, using_recurrence=False): super(ImageResnetModule, self).__init__() self.input_dims = (input_num_channels, image_height, image_width) self.using_recurrence = using_recurrence num_channels = 32 self.norm1 = nn.InstanceNorm2d(input_num_channels) self.conv1 = nn.Conv2d(input_num_channels, num_channels, 7, stride=2, padding=3) h, w = (image_height // 2, image_width // 2) block1 = nn.Sequential( blocks.ResBlockStrided(num_channels, num_channels), blocks.ResBlock(num_channels)) h, w = h // 2, w // 2 block2 = nn.Sequential( blocks.ResBlockStrided(num_channels, num_channels), blocks.ResBlock(num_channels)) h, w = h // 2, w // 2 block3 = nn.Sequential( blocks.ResBlockStrided(num_channels, num_channels), ) h, w = h // 2, w // 2 self.resnet_blocks = nn.Sequential(block1, block2, block3) self.norm2 = nn.InstanceNorm2d(num_channels) self.dense = nn.Linear(h * w * num_channels, image_emb_size) self.global_id = 0