def __init__(self, img_dim, embed_size, data_name, use_abs=False, no_imgnorm=False): super(EncoderImagePrecompAttn, self).__init__() self.embed_size = embed_size self.no_imgnorm = no_imgnorm self.data_name = data_name self.use_abs = use_abs self.fc = nn.Linear(img_dim, embed_size) self.init_weights() # GSR self.img_rnn = nn.GRU(embed_size, embed_size, 1, batch_first=True) # GCN reasoning self.Rs_GCN_1 = Rs_GCN(in_channels=embed_size, inter_channels=embed_size) self.Rs_GCN_2 = Rs_GCN(in_channels=embed_size, inter_channels=embed_size) self.Rs_GCN_3 = Rs_GCN(in_channels=embed_size, inter_channels=embed_size) self.Rs_GCN_4 = Rs_GCN(in_channels=embed_size, inter_channels=embed_size) if self.data_name == 'f30k_precomp': self.bn = nn.BatchNorm1d(embed_size)
def __init__(self, vocab_size, word_dim, embed_size=2048, inter_channels=2048, use_atten=False, word_vec='./vocab/word2vec300d_init_threshold4.npy'): super(EncoderTextPrecompAttn, self).__init__() self.use_atten = use_atten self.embed_size = embed_size self.embed = nn.Embedding(vocab_size, word_dim) weight_init = torch.from_numpy(np.load(word_vec)) self.embed.weight.data[:vocab_size] = weight_init self.fc = nn.Linear(word_dim, inter_channels) self.init_weights() # GSR self.img_rnn = nn.GRU(inter_channels, embed_size, 1, batch_first=True, bidirectional=True) # GCN reasoning self.Rs_GCN_1 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_2 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_3 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_4 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels)
def __init__(self, img_dim, embed_size, inter_channels=2048, use_atten=False, use_box=False, use_label=False): super(EncoderImagePrecompAttn, self).__init__() self.embed_size = embed_size self.use_atten = use_atten self.use_box = use_box self.use_label = use_label if self.use_box: self.fc_box = nn.Linear(6, embed_size) if self.use_label: self.fc_class = nn.Linear(embed_size, 33) #self.fc = nn.Linear(img_dim, inter_channels) #self.init_weights() # GSR self.img_rnn = nn.GRU(inter_channels, embed_size, 1, batch_first=True, bidirectional=True) # GCN reasoning self.Rs_GCN_1 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_2 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_3 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels) self.Rs_GCN_4 = Rs_GCN(in_channels=inter_channels, inter_channels=inter_channels)