def forward(self, x, lengths): """Handles variable size captions """ # Embed word ids to vectors x = self.embed(x) packed = pack_padded_sequence(x, lengths, batch_first=True) # Forward propagate RNN out, _ = self.rnn(packed) # Reshape *final* output to (batch_size, hidden_size) padded = pad_packed_sequence(out, batch_first=True) I = torch.LongTensor(lengths).view(-1, 1, 1) I = Variable(I.expand(x.size(0), 1, self.gru_units) - 1) if torch.cuda.is_available(): I = I.cuda() out = torch.gather(padded[0], 1, I).squeeze(1) if self.fc: out = self.fc(out) # normalization in the joint embedding space outnormed = l2norm(out) # take absolute value, used by order embeddings if self.use_abs: outnormed = torch.abs(outnormed) return outnormed
def forward(self, inputs, lengths): # Embed word ids to vectors x = self.embed(inputs) packed = pack_padded_sequence(x, lengths, batch_first=True) # Forward propagate RNN out, _ = self.rnn(packed) # Reshape *final* output to (batch_size, hidden_size) padded = pad_packed_sequence(out, batch_first=True)[0] out, att_weights = self.attention(padded, (inputs == 0)) self.attention_weights = att_weights out = out.view(inputs.size()[0], -1) fc_out = self.fc(out) # normalization in the joint embedding space outnormed = l2norm(fc_out) # take absolute value, used by order embeddings if self.use_abs: outnormed = torch.abs(outnormed) return outnormed
def forward(self, inputs, lengths): # Embed word ids to vectors x_embed = self.embed(inputs) x = x_embed.permute(0, 2, 1) # [B, F, T] conv1 = self.conv1(x)[:, :, :-1] conv1a, conv1a_vis = self.att_conv1(conv1.permute(0, 2, 1), (inputs == 0)) # 10 * 100 = 1000 conv1a = conv1a.view(conv1a.size()[0], -1) conv2 = self.conv2(x) conv2a, conv2a_vis = self.att_conv2(conv2.permute(0, 2, 1), (inputs == 0)) # 10 * 100 = 1000 conv2a = conv2a.view(conv2a.size()[0], -1) emb_att, emb_vis = self.att_emb(x_embed, (inputs == 0)) # 10 * 300 = 3000 self.attention_weights = emb_att emb_att = emb_att.view(emb_att.size()[0], -1) vectors = torch.cat([conv1a, conv2a, emb_att], 1) # [B, 5000] fc_out = self.fc(vectors) # normalization in the joint embedding space outnormed = l2norm(fc_out) # take absolute value, used by order embeddings if self.use_abs: outnormed = torch.abs(outnormed) return outnormed
def forward(self, x, lengths): """Handles variable size captions """ # Embed word ids to vectors x = self.embed(x) x = x.permute(0, 2, 1) init_conv = [] for net_module in self.init_convs: init_conv.append(net_module(x)) x = torch.cat(init_conv, 1) multi_aspect_feats = [] for tower in self.towers: _x = tower(x) _x = F.max_pool1d(_x, _x.size()[-1]) _x = _x.view(_x.size(0), _x.size(1)) multi_aspect_feats.append(_x) out = torch.cat(multi_aspect_feats, -1) latent = self.fc(out) # normalization in the joint embedding space out = l2norm(latent) # take absolute value, used by order embeddings if self.use_abs: out = torch.abs(out) self.outputs = {} self.outputs['output'] = out return out
def forward(self, images): """Extract image feature vectors.""" features = self.cnn(images) # normalization in the image embedding space features = l2norm(features) # linear projection to the joint embedding space features = self.fc(features) # normalization in the joint embedding space if not self.no_imgnorm: features = l2norm(features) # take the absolute value of the embedding (used in order embeddings) if self.use_abs: features = torch.abs(features) return features
def forward(self, images): """Extract image feature vectors.""" # assuming that the precomputed features are already l2-normalized features = self.fc(images) # normalize in the joint embedding space if not self.no_imgnorm: features = l2norm(features) # take the absolute value of embedding (used in order embeddings) if self.use_abs: features = torch.abs(features) return features
def __init__(self, phase, base, loc, conf, extras, num_classes, pretrained=True): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.size = 300 self.base = nn.ModuleList(base) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(loc) self.conf = nn.ModuleList(conf) self.l2norm = l2norm(512, 20)
def forward(self, inputs, lengths): # Embed word ids to vectors x = self.embed(inputs) out, att_weights = self.attention(x, (inputs == 0)) self.attention_weights = att_weights out = out.view(inputs.size()[0], -1) if self.fc: out = self.fc(out) # normalization in the joint embedding space outnormed = l2norm(out) # take absolute value, used by order embeddings if self.use_abs: outnormed = torch.abs(outnormed) return outnormed