def train_model(graph_obj, gnn_obj, pos_nbr_dict, neg_nbr_dict, num_epochs=10): print(graph_obj) # Optimizer opt = torch.optim.Adam(list(gnn_obj.parameters())) pbar = tqdm(range(num_epochs)) for e in pbar: opt.zero_grad() triplets = generate_training_triplets(pos_nbr_dict, neg_nbr_dict) # Forward gnn_obj(graph_obj) idx_t = LT(triplets[:, 0]) emb_t = graph_obj.ndata['features'][idx_t, :] idx_p = LT(triplets[:, 1]) idx_n = LT(triplets[:, 2]) emb_p = graph_obj.ndata['features'][idx_p, :] emb_n = graph_obj.ndata['features'][idx_n, :] loss_val = triplet_loss(emb_t, emb_p, emb_n) loss_val.backward() opt.step() pbar.set_postfix( {'Loss': '{:4f}'.format(np.mean(loss_val.cpu().data.numpy()))}) return
def train_model(self, train_x_pos, train_x_neg, batch_size = 512, epochs = 10, log_interval=100): self.model.mode = 'train' bs = batch_size opt = torch.optim.Adam( list(self.model.parameters()) ) num_batches = train_x_pos.shape[0] // bs + 1 idx = np.arange(train_x_pos.shape[0]) loss_value_history = [] for e in tqdm(range(epochs)): np.random.shuffle(idx) for b in range(num_batches): opt.zero_grad() b_idx = idx[b*bs:(b+1)*bs] x_p = LT(train_x_pos[b_idx]).to(self.device) x_n = LT(train_x_neg[b_idx]).to(self.device) loss = -self.model(x_p,x_n) loss.backward() opt.step() loss_value_history.append(loss.cpu().data.numpy().tolist()) if b % log_interval == 0 : print('Epoch {} batch {} Loss {:4f}'.format(e,b, loss.cpu().data.numpy())) try: import matplotlib.pyplot as plt y = loss_value_history x = np.arange(len(loss_value_history)) plt.plot(x,y,'r') plt.ylabel('Loss') plt.xlabel('Batch') plt.show() plt.close() except: pass self.model.mode = 'test' return
def train(self): """Multiple training. Returns: None. """ batch_count = self.iteration process_bar = tqdm(range(int(self.iteration))) for i in process_bar: e1, e2, rel_ids, ground_truth = self.data.generate_batch_sampling( self.neg_sample_size) e1 = Variable(LT(e1)) e2 = Variable(LT(e2)) rel_ids = Variable(LT(rel_ids)) ground_truth = Variable(LT(ground_truth)) if self.use_cuda: e1 = e1.cuda() e2 = e2.cuda() ground_truth = ground_truth.cuda() rel_ids = rel_ids.cuda() self.optimizer.zero_grad() loss = self.Hin2Vec_model.forward(e1, e2, rel_ids, ground_truth) loss.backward() self.optimizer.step() process_bar.set_description("Loss: {:.4f}".format( loss.data.cpu().numpy())) # --- Save embedding ---- # self.Hin2Vec_model.save_embedding(self.output_file_name, use_cuda=self.use_cuda)
def train_model( w2v_object, words, contexts, LR=0.01, num_epochs=10, batch_size=512 ): opt = torch.optim.Adam(list(w2v_object.parameters()), LR) opt.zero_grad() idx = np.arange(words.shape[0], dtype=int) bs = batch_size for epochs in tqdm(range(num_epochs)): np.random.shuffle(idx) num_batches = idx.shape[0] // batch_size + 1 for b in range(num_batches): b_idx = idx[b * bs:(b + 1) * bs] b_idx = b_idx.astype(int) b_w = LT(words[b_idx]) b_c = LT(contexts[b_idx.astype(int)]) opt.zero_grad() # take each sentence _loss = w2v_object(b_w, b_c) _loss.backward() opt.step() if (b + 1) % 200 == 0: print('Batch {} Loss {:4f}'.format(b + 1, np.mean(_loss.data.numpy()))) return w2v_object
def __getitem__(self, index): """ """ context = self.context_data[index] context_ids, context_len = self.sentence_tokenids( context, max_len=config.context_len) question = self.quesiton_data[index] question_ids, question_len = self.sentence_tokenids( question, max_len=config.question_len) answer_span = self.answer_span_data[index] return (LT(context_ids), LT([context_len]), LT(question_ids), LT([question_len]), LT(answer_span))
def evaluate(model, docs, pairs): """ Evaluate a model by generating """ correct = 0 for index, _, summary, continuation in pairs: scores = model.forward(Var(LT(docs[index])), Var(LT(summary)), False) predict = scores.data.numpy().argmax(axis=1)[0] if predict == int(continuation[0]): correct += 1 return correct, len(pairs), correct / len(pairs)
def predict(self, question, k=3): """ Predicts """ self.model.eval() q_inps, q_lens = self.vect.transform([question], return_lengths=True) q_inps, q_lens = V(LT(q_inps)), LT(q_lens) question_h = self.model.encode(1, (q_inps, q_lens), use_cuda=False) if self.similarity == 'bilinear' or self.similarity == 'mlp': question_h = question_h.expand_as(self._encoded_answers) sim = self.model.sim(question_h, self._encoded_answers) values, indices = sim.topk(k, dim=0, sorted=True, largest=True) indices = torch.squeeze(indices.data).numpy() values = torch.squeeze(values.data).numpy() return self._answers[indices], values
def fill_batch(docs, batch): """ Turn data from training samples into a batch that can be read by the neural network. """ sequences = [] summaries = [] continuations = [] for index, _, summary, continuation in batch: sequences.append(docs[index]) summaries.append(summary) continuations.append(continuation) return LT(sequences), LT(summaries), LT(continuations).squeeze(1)
def fit(self, questions, answers): questions, answers = np.asarray(questions), np.asarray(answers) self.to_ix.fit(np.append(questions, answers)) questions_numpy = self.to_ix.transform(questions) answers_numpy = self.to_ix.transform(answers) num_embeddings, hdim = self.to_ix.vocabulary_size_, self.hidden_size self.embedding1 = nn.Sequential( nn.EmbeddingBag(num_embeddings, hdim, mode='mean'), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim)) self.embedding2 = nn.Sequential( nn.EmbeddingBag(num_embeddings, hdim, mode='mean'), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim)) self.embedding1 = self.embedding1.cuda() self.embedding2 = self.embedding2.cuda() self.optimizer1 = optim.Adam(self.embedding1.parameters(), lr=self.lr) self.optimizer2 = optim.Adam(self.embedding2.parameters(), lr=self.lr) n_samples = questions_numpy.shape[0] batch_size = self.n_negative + 1 try: self.last_epoch = -1 for epoch in range(self.n_epochs): q_shuffled, a_shuffled = shuffle(questions_numpy, answers_numpy) q_shuffled, a_shuffled = V(LT(q_shuffled)), V(LT(a_shuffled)) for start in range(0, n_samples, batch_size): end = start + batch_size if end > n_samples: break q_batch = q_shuffled[start:end] a_batch = a_shuffled[start:end] self.partial_fit(q_batch, a_batch) self.last_epoch = epoch except KeyboardInterrupt: print("STAHP!.") finally: print() self.embedding1 = self.embedding1.cpu() self.embedding2 = self.embedding2.cpu() self.embedding2.eval() self._embedded_answers = self.embedding2(V(LT(answers_numpy))) self._answers = answers
def forward(self, web_page, examples): e_logits = self._encoding_model(web_page, examples, logits_only=True) a_logits = self._alignment_model(web_page, examples, logits_only=True) # Normalize e_logprobs = F.log_softmax(e_logits, dim=1) a_logprobs = F.log_softmax(a_logits, dim=1) logits = e_logprobs * self._weight[0] + a_logprobs * self._weight[1] # Filter the candidates node_filter_mask = self.node_filter(web_page, examples[0].web_page_code) log_node_filter_mask = V(FT([0. if x else -999999. for x in node_filter_mask])) logits = logits + log_node_filter_mask # Losses and predictions targets = V(LT([web_page.xid_to_ref.get(x.target_xid, 0) for x in examples])) mask = V(FT([int( x.target_xid in web_page.xid_to_ref and node_filter_mask[web_page.xid_to_ref[x.target_xid]] ) for x in examples])) losses = self.loss(logits, targets) * mask #print '=' * 20, examples[0].web_page_code #print [node_filter_mask[web_page.xid_to_ref.get(x.target_xid, 0)] for x in examples] #print [logits.data[i, web_page.xid_to_ref.get(x.target_xid, 0)] for (i, x) in enumerate(examples)] #print logits, targets, mask, losses if not np.isfinite(losses.data.sum()): #raise ValueError('Losses has NaN') logging.warn('Losses has NaN') #print losses # num_phrases x top_k top_k = min(self.top_k, len(web_page.nodes)) predictions = torch.topk(logits, top_k, dim=1)[1] return logits, losses, predictions
def forward_o(self, data, owords_indicator, owords_numerals, owords_numeral_length): v = LT(data) v = v.cuda() if self.ovectors.weight.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0: return embed owords_numerals = owords_numerals.cuda( ) if self.is_cuda else owords_numerals owords_numeral_length = owords_numeral_length.cuda( ) if self.is_cuda else owords_numeral_length owords_numeral_length_permuted, perm_idx = owords_numeral_length.sort( 0, descending=True) owords_numerals_permuted = owords_numerals[perm_idx] packed_input = pack_padded_sequence(owords_numerals_permuted, owords_numeral_length_permuted, batch_first=True) invert_perm_idx = self.invert_permutation(perm_idx) assert t.equal(owords_numerals_permuted[invert_perm_idx], owords_numerals) assert owords_indicator.sum() == owords_numerals.size()[0] if self.scheme == 'LSTM': _, (hn, cn) = self.digital_RNN_o(packed_input) else: _, hn = self.digital_RNN_o(packed_input) embed[owords_indicator] = hn.squeeze(0)[invert_perm_idx] return embed
def forward(self, x): # x = [[11, 21, 31], [12, 22, 32]] # x.size() == [seq_len, batch_size] x = self.embedding( Variable(LT([list(x_) for x_ in x]).cuda(), requires_grad=False)) # x.size() == [seq_len, batch_size, e_dim] batch_size = x.size()[1] h = self.init_hidden(batch_size) c = self.init_hidden(batch_size) h = h.cuda() c = c.cuda() x = x.contiguous() out, (h, c) = self.encoder(x, (h, c)) # out.size() == [seq_len, batch_size, h_dim] h = h.squeeze() # h.size() == [batch_size, h_dim] out = self.fc1(h) # h_dim -> 2048 if (self.biLSTM == True): out = torch.cat((out[0], out[1]), 1) out = self.bn(out) # 2048 -> 2048 out = F.leaky_relu(out) # activation out = self.dropout(out) out = self.fc2(out) # 2048 -> 1 # out.size() == [batch_size, 1] return out
def forward_i(self, data, iword_indicator, iword_numerals, iword_numeral_length): v = LT(data) v = v.cuda() if self.is_cuda else v embed = self.ivectors(v) # B x T x F if iword_numerals.size()[0] == 0: return embed iword_numerals = iword_numerals.cuda( ) if self.is_cuda else iword_numerals iword_numeral_length = iword_numeral_length.cuda( ) if self.is_cuda else iword_numeral_length iword_numeral_length_permuted, perm_idx = iword_numeral_length.sort( 0, descending=True) iword_numerals_permuted = iword_numerals[perm_idx] packed_input = pack_padded_sequence(iword_numerals_permuted, iword_numeral_length_permuted, batch_first=True) invert_perm_idx = self.invert_permutation(perm_idx) # assert t.equal(iword_numerals_permuted[invert_perm_idx], iword_numerals) # assert iword_indicator.sum() == iword_numerals.size()[0] if self.scheme == 'LSTM': _, (hn, cn) = self.digital_RNN_i(packed_input) else: _, hn = self.digital_RNN_i(packed_input) # TODO: how to check? embed[iword_indicator] = hn.squeeze(0)[invert_perm_idx] return embed
def find_next_word(self, summary, i, model, sequence): # Ensure that we do not predict unk summary_i = Variable(LT([summary[i - self.C:i]])) scores = model.forward(sequence, summary_i, True) prob, index = torch.topk(scores.data, 2) summary.append(index[0][0]) return summary
def train_one_epoch_autoencoder(self, obj): print('objective: %s' % obj) non_obj = 'src' if obj == 'tgt' else 'tgt' w2i = self.converters[obj]['w2i'] # i2w = self.converters[obj]['i2w'] embedder = self.embedders[obj] embedder_optim = self.optims[obj] losses = [] for batch in tqdm(self.train_dataloader): # add noise org_batch = copy.deepcopy(batch) batch[obj] = [sent_noise.run(s) for s in org_batch[obj]] batch[non_obj] = org_batch[obj] # convert string to ids batch = utils.prepare_batch(batch, w2i, w2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, w2i, w2i) start_decode =\ Variable(LT([[w2i['<s>']] * inputs.size(0)])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(embedder, inputs, input_lengths) preds = self.decoder(embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() embedder_optim.step() # print(np.mean(losses)) # preds = preds.view(inputs.size(0), targets.size(1), -1) # preds_max = torch.max(preds, 2)[1] # print(' '.join([i2w[p] for p in preds_max.data[0].tolist()])) # print(' '.join([i2w[p] for p in preds_max.data[1].tolist()])) return np.mean(losses)
def __getitem__(self, idx): pdfSheetImages = pdf2image.convert_from_path(self.pdfPaths[idx], dpi=100)[:4] pdfSheetImages = [p.convert(mode='L') for p in pdfSheetImages] pdfSheetImages = [self.dataTransform(p) for p in pdfSheetImages] array = np.stack([np.asarray(p) for p in pdfSheetImages], 0) tensor = from_numpy((array.astype('float32') - 127.5) / 127.5) label = LT([self.labels[idx]]) return tensor, label
def train(self): """Multiple training. Returns: None. """ batch_count = self.iteration process_bar = tqdm(range(int(self.iteration))) for i in process_bar: e1, e2, rel_ids, ground_truth = self.data.generate_batch_sampling(self.neg_sample_size) e1 = Variable(LT(e1)) e2 = Variable(LT(e2)) rel_ids = Variable(LT(rel_ids)) ground_truth = Variable(LT(ground_truth)) if self.use_cuda: e1 = e1.cuda() e2 = e2.cuda() ground_truth = ground_truth.cuda() rel_ids = rel_ids.cuda() self.optimizer.zero_grad() loss = self.Hin2Vec_model.forward(e1, e2, rel_ids, ground_truth) loss.backward() self.optimizer.step() process_bar.set_description( "Loss: %0.8f, lr: %0.6f" % ( loss.item() / self.batch_size, self.optimizer.param_groups[0]['lr']) ) if i * self.batch_size % 10000 == 0: lr = self.initial_lr * (1.0 - 1.0 * i / batch_count) for param_group in self.optimizer.param_groups: param_group['lr'] = lr # --- Save embedding ---- # self.Hin2Vec_model.save_embedding( self.output_file_name, use_cuda=self.use_cuda )
def _get_neighbors(self, web_page): """Get indices of at most |max_neighbors| neighbors for each relation Args: web_page (WebPage) Returns: neighbors: SequenceBatch of shape num_nodes x ??? containing the neighbor refs (??? is at most max_neighbors * len(neighbor_rels)) rels: SequenceBatch of shape num_nodes x ??? containing the relation indices """ G = web_page.graph batch_neighbors = [[] for _ in range(len(web_page.nodes))] batch_rels = [[] for _ in range(len(web_page.nodes))] for src, tgts in G.nodes.items(): # Group by relation rel_to_tgts = defaultdict(list) for tgt, rels in tgts.items(): for rel in rels: rel_to_tgts[rel].append(tgt) # Sample if needed for rel, index in self._neighbor_rels.items(): tgts = rel_to_tgts[rel] random.shuffle(tgts) if not tgts: continue if len(tgts) > self._max_neighbors: tgts = tgts[:self._max_neighbors] batch_neighbors[src].extend(tgts) batch_rels[src].extend([index] * len(tgts)) # Create SequenceBatches max_len = max(len(x) for x in batch_neighbors) batch_mask = [] for neighbors, rels in zip(batch_neighbors, batch_rels): assert len(neighbors) == len(rels) this_len = len(neighbors) batch_mask.append([1.] * this_len + [0.] * (max_len - this_len)) neighbors.extend([0] * (max_len - this_len)) rels.extend([0] * (max_len - this_len)) return (SequenceBatch(V(LT(batch_neighbors)), V(FT(batch_mask))), SequenceBatch(V(LT(batch_rels)), V(FT(batch_mask))))
def predict(self, question, k=1): self.embedding1.eval() q_numpy = self.to_ix.transform([question]) embedded_question = self.embedding1(V(LT(q_numpy))) dist = F.pairwise_distance(embedded_question.expand_as( self._embedded_answers), self._embedded_answers, p=self.p) values, indices = dist.topk(k, sorted=True, largest=False, dim=0) indices = indices.data.squeeze().numpy() values = values.data.squeeze().numpy() return self._answers[indices], values
def forward_o(self, data, owords_indicator, owords_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0: return embed numeral_embed = self.get_numeral_embed_batch(owords_numerals) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] embed[owords_indicator] = numeral_embed return embed
def pad_to_batch(batch, xw2i, yw2i): src, tgt = batch['src'], batch['tgt'] batch = list(zip(src, tgt)) sorted_batch = sorted(batch, key=lambda b: b[0].size(1), reverse=True) x, y = list(zip(*sorted_batch)) max_x = max([s.size(1) for s in x]) max_y = max([s.size(1) for s in y]) x_p, y_p = [], [] for i in range(len(batch)): if x[i].size(1) < max_x: x_p.append( torch.cat([ x[i], Variable(LT([xw2i['<PAD>']] * (max_x - x[i].size(1)))).view(1, -1) ], 1)) else: x_p.append(x[i]) if y[i].size(1) < max_y: y_p.append( torch.cat([ y[i], Variable(LT([yw2i['<PAD>']] * (max_y - y[i].size(1)))).view(1, -1) ], 1)) else: y_p.append(y[i]) input_var = torch.cat(x_p) target_var = torch.cat(y_p) input_len = [ list(map(lambda s: s == 0, t.data)).count(False) for t in input_var ] target_len = [ list(map(lambda s: s == 0, t.data)).count(False) for t in target_var ] return input_var, target_var, input_len, target_len
def train_one_epoch_translator(self, _from='src', _to='tgt'): print('%s -> %s' % (_from, _to)) sw2i = self.converters[_from]['w2i'] tw2i = self.converters[_to]['w2i'] # ti2w = self.converters[_to]['i2w'] src_embedder = self.embedders[_from] tgt_embedder = self.embedders[_to] src_embedder_optim = self.optims['src'] tgt_embedder_optim = self.optims['tgt'] losses = [] for batch in tqdm(self.train_dataloader): batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1) self.encoder.zero_grad() self.decoder.zero_grad() src_embedder.zero_grad() tgt_embedder.zero_grad() if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(src_embedder, inputs, input_lengths) preds = self.decoder(tgt_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) loss.backward() nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0) nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0) self.enc_optim.step() self.dec_optim.step() src_embedder_optim.step() tgt_embedder_optim.step() print(np.mean(losses)) preds = preds.view(inputs.size(0), targets.size(1), -1)
def forward(self, x): # x = [[11, 21, 31], [12, 22, 32]] # x.size() == [seqs, batch_size] x = self.embedding( Variable(LT([list(x_) for x_ in x]).cuda(), requires_grad=False)) x = x.permute(1, 0, 2) x = x.unsqueeze(1) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks)] x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N,Co), ...]*len(Ks)] x = torch.cat(x, 1) x = self.dropout(x) # (N,len(Ks)*Co) out = self.fc1(x) # (N,C) return out
def forward_o(self, data, owords_indicator, owords_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0 or self.gmm_posterior is None: return embed prototype_weights = self.get_numeral_embed_weights_batch( owords_numerals) # [prototype_size x num_of_numerals] numeral_embed = t.matmul(prototype_weights, self.oprototypes_embeddings) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] embed[owords_indicator] = numeral_embed return embed
def forward_i(self, data, iword_indicator, iword_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ivectors(v) if iword_numerals.size()[0] == 0: return embed # prototype_weights = self.get_numeral_embed_weights_batch(iword_numerals) # [ num_of_numerals x prototype_size] # numeral_embed = t.matmul(prototype_weights, self.iprototypes_embeddings) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] numeral_embed = self.get_numeral_embed_batch(iword_numerals) embed[iword_indicator] = numeral_embed return embed
def score_samples(self, x_test): bs = 507 results = [] print(type(x_test), x_test.shape) num_batches = x_test.shape[0] // bs + 1 idx = np.arange(x_test.shape[0]) for b in range(num_batches): b_idx = idx[b * bs:(b + 1) * bs] if len(b_idx)==0 : break print(' >> ',x_test[b_idx]) x = LT(x_test[b_idx]).to(self.device) score_values = self.model(x) vals = score_values.cpu().data.numpy().tolist() results.extend(vals) return results
def predict(self, x_test): self.model.mode = 'test' self.model.eval() bs = 495 results = [] num_batches = x_test.shape[0] // bs + 1 idx = np.arange(x_test.shape[0]) for b in range(num_batches): b_idx = idx[b * bs:(b + 1) * bs] if len(b_idx) == 0: break x = LT(x_test[b_idx]).to(self.device) score_values = self.model(x) vals = score_values.cpu().data.numpy().tolist() results.extend(vals) return results
def decode(self, sequence, model, sentences=False): """Given a sequence and a model, generate a summary in a greedy manner.""" # Initalize the sentence with enough starting tags summary = [self.word2idx['<s>']] * self.C sequence = Variable(LT([[self.word2idx[w] for w in sequence]])) # Greedily select the word with the highest probability for i in range(self.C, self.length + self.C - 1): summary = self.find_next_word(summary, i, model, sequence) if self.word2idx['</s>'] in summary: break # Indices to words summary = [self.idx2word[w] for w in summary[self.C - 1:]] return (summary)
def translate(self, sents, input_lang): output_lang = 'src' if input_lang == 'tgt' else 'tgt' # print('Translating %s -> %s...' % (non_obj, obj)) sw2i = self.converters[input_lang]['w2i'] tw2i = self.converters[output_lang]['w2i'] ti2w = self.converters[output_lang]['i2w'] src_embedder = self.prev_embedders[input_lang] tgt_embedder = self.prev_embedders[output_lang] encoder = self.prev_encoder decoder = self.prev_decoder batch = {'src': sents, 'tgt': sents} batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1) if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = encoder(src_embedder, inputs, input_lengths) max_length = 50 preds = decoder(tgt_embedder, start_decode, hidden_c, max_length, output, None, True) preds = preds.view(inputs.size(0), max_length, -1) preds_max = torch.max(preds, 2)[1] result_sents = [] for i in range(len(sents)): result_sent =\ ' '.join([ti2w.get(p, '<UNK>') for p in preds_max.data[i].tolist()]) result_sents.append(result_sent) return result_sents
def calc_test_loss(self, log_dict): sw2i = self.sw2i tw2i = self.tw2i losses = [] for batch in self.test_dataloader: batch = utils.prepare_batch(batch, sw2i, tw2i) inputs, targets, input_lengths, target_lengths =\ utils.pad_to_batch(batch, sw2i, tw2i) start_decode =\ Variable(LT([[tw2i['<s>']] * targets.size(0)]), requires_grad=False)\ .transpose(0, 1) if self.args.use_cuda: inputs = inputs.cuda() targets = targets.cuda() start_decode = start_decode.cuda() output, hidden_c = self.encoder(self.src_embedder, inputs, input_lengths) preds = self.decoder(self.tgt_embedder, start_decode, hidden_c, targets.size(1), output, None, True) loss = self.loss_func(preds, targets.view(-1)) losses.append(loss.data[0]) log_dict['test_loss'] = np.mean(losses) log_dict['sample_translation'] = {} log_dict['sample_translation']['src'] =\ ' '.join([self.si2w[p] for p in inputs.data[0].tolist()]) log_dict['sample_translation']['tgt'] =\ ' '.join([self.ti2w[p] for p in targets.data[0].tolist()]) preds = preds.view(inputs.size(0), targets.size(1), -1) preds_max = torch.max(preds, 2)[1] log_dict['sample_translation']['prediction'] =\ ' '.join([self.ti2w[p] for p in preds_max.data[0].tolist()]) return log_dict