def __init__(self, params):
     super(ABotEncoder, self).__init__()
     self.params = params
     self.image_encoder = ABot.ImageEncoder(params['vgg_out'],
                                            params['image_embed_size'])
     self.question_encoder = ABot.QuestionEncoder(
         params['embedding_size'],
         params['hidden_dim'],
         num_layers=params['num_layers'],
         rnn=params['rnn_type'],
         batch_first=params['batch_first'])
     self.fact_encoder = ABot.FactEncoder(params['embedding_size'],
                                          params['hidden_dim'],
                                          num_layers=params['num_layers'],
                                          rnn=params['rnn_type'],
                                          batch_first=params['batch_first'])
     self.history_encoder = ABot.HistoryEncoder(
         params['hidden_dim'],
         params['hidden_dim'],
         num_layers=params['num_layers'],
         rnn=params['rnn_type'],
         batch_first=params['batch_first'])
     # self.attention_layer = ABot.Attention(params['hidden_dim'], params['hidden_dim'], params['hidden_dim'], params['filter_size'], params['image_dim'], params['image_feature'], params['image_embed_size'])
     self.history_attention = ABot.HistoryAttention(params['hidden_dim'], 1)
     self.linear = nn.Linear(
         params['hidden_dim'] * 2 + params['image_embed_size'],
         params['embedding_size'])
Beispiel #2
0
 def __init__(self, params):
     super(ABotDecoder, self).__init__()
     self.params = params
     self.answer_decoder = ABot.AnswerDecoder(
         self.params['embed_size'],
         self.params['hidden_dim'],
         self.params['vocab_size'],
         num_layers=self.params['num_layers'],
         rnn=self.params['rnn_type'],
         batch_first=self.params['batch_first'])
     # self.linear = nn.Linear( self.params['hidden_dim'], self.params['embed_size'])
     self.attention = ABot.DecoderAttention(self.params['hidden_dim'],
                                            self.params['embed_size'])
Beispiel #3
0
 def __init__(self, params):
     super(ABotEncoder, self).__init__()
     self.params = params
     self.image_encoder = ABot.ImageEncoder(params['vgg_out'],
                                            params['image_embed_size'])
     self.question_encoder = ABot.QuestionEncoder(
         params['embedding_size'],
         params['hidden_dim'],
         num_layers=params['num_layers'],
         rnn=params['rnn_type'],
         batch_first=params['batch_first'])
     self.fact_encoder = ABot.FactEncoder(params['embedding_size'],
                                          params['hidden_dim'],
                                          num_layers=params['num_layers'],
                                          rnn=params['rnn_type'],
                                          batch_first=params['batch_first'])
     self.history_encoder = ABot.HistoryEncoder(
         params['hidden_dim'] * 2 + params['image_embed_size'],
         params['hidden_dim'],
         num_layers=params['num_layers'],
         rnn=params['rnn_type'],
         batch_first=params['batch_first'])
params['USE_CUDA'] = USE_CUDA
params['gpu'] = gpu

compute_ranks = False
current_epoch_ABot = 26
current_epoch_QBot = 23

#Define Models
AEncoder = ABot_Encoder.ABotEncoder(params)
ADecoder = ABot_Decoder.ABotDecoder(params)
QEncoder = QBot_Encoder.QBotEncoder(params)
QDecoder = QBot_Decoder.QBotDecoder(params)
embedding_weights = np.random.random(
    (params['vocab_size'], params['embed_size']))
embedding_weights[0, :] = np.zeros((1, params['embed_size']))
ABot_embedding_layer = ABot.EmbeddingLayer(embedding_weights)
QBot_embedding_layer = QBot.EmbeddingLayer(embedding_weights)
sampler = ABot.GumbelSampler()
embedding_weights_discr = np.random.random(
    (params['vocab_size'], params['embed_size']))
embedding_weights_discr[0, :] = np.zeros((1, params['embed_size']))
print(embedding_weights_discr)
discriminator = Discriminator.Discriminator(params, embedding_weights_discr)

#Criterion
criterion = {}
criterion['CrossEntropyLoss'] = nn.CrossEntropyLoss(reduce=False)
criterion['HingeEmbeddingLoss'] = nn.HingeEmbeddingLoss(margin=0.0,
                                                        size_average=False)
criterion['MSELoss'] = nn.MSELoss(size_average=False)
criterion['BCELoss'] = nn.BCELoss(size_average=False)
params['beamLen'] = 20
params['word2ind'] = data.word2ind
params['ind2word'] = data.ind2word
params['USE_CUDA'] = USE_CUDA
params['gpu'] = gpu
params['filter_size'] = 3
compute_ranks = False

#Define Models
AEncoder = ABot_Encoder.ABotEncoder(params)
ADecoder = ABot_Decoder.ABotDecoder(params)
embedding_weights = np.random.random(
    (params['vocab_size'], params['embed_size']))
embedding_weights[0, :] = np.zeros((1, params['embed_size']))
print(embedding_weights)
embedding_layer = ABot.EmbeddingLayer(embedding_weights)

if compute_ranks:
    checkpoint = torch.load(
        '../outputs/supervised_ABot_V2_LSTM_2Layers_test_10')
    print("DONE")
    AEncoder.load_state_dict(checkpoint['AEncoder'])
    ADecoder.load_state_dict(checkpoint['ADecoder'])
    embedding_layer.load_state_dict(checkpoint['embedding_layer'])
#Criterion
criterion = nn.CrossEntropyLoss(reduce=False)

#Optimizer
optimizer = torch.optim.Adam([{
    'params': AEncoder.parameters()
}, {