def get_tree_score(self, crf_score, scores, best_parse, partition, batch_sen): best_parse = np.array(list(best_parse), dtype=int) _, batch_size, sentence_length = best_parse.shape tree_score = [] likelihood = 0.0 for sentence_id in range(batch_size): sentence_tree_score = [] if self.prior_weight > 0 and self.training: scores[sentence_id] -= self.prior_dict[batch_sen[sentence_id]] for i in range(1, sentence_length): head_id = best_parse[0, sentence_id, i] sentence_tree_score.append(crf_score[sentence_id, best_parse[0, sentence_id, i], i, best_parse[1, sentence_id, head_id], best_parse[1, sentence_id, i]]) likelihood += scores[sentence_id, best_parse[0, sentence_id, i], i, best_parse[1, sentence_id, head_id], best_parse[1, sentence_id, i]] likelihood -= param.get_scalar(partition[sentence_id].cpu(), 0) if self.training: self.partition_table[ batch_sen[sentence_id]] = param.get_scalar( partition[sentence_id].cpu(), 0) self.encoder_score_table[batch_sen[sentence_id]] = crf_score[ sentence_id].cpu().data.numpy() tree_score.append(torch.sum(torch.cat(sentence_tree_score))) tree_score = torch.cat(tree_score) return tree_score, likelihood
def evaluate(self, batch_pos, batch_words, batch_sen, crf_scores): batch_size, sentence_length = batch_pos.data.shape if not self.initial_Flag: scores = np.copy(crf_scores.cpu().data.numpy()) else: scores = np.zeros((batch_size, sentence_length, sentence_length, self.tag_num, self.tag_num)) for sentence_id in range(batch_size): for i in range(sentence_length): for j in range(sentence_length): word_id = param.get_scalar( batch_words[sentence_id][j].cpu(), 0) h_pos_id = param.get_scalar( batch_pos[sentence_id][i].cpu(), 0) m_pos_id = param.get_scalar( batch_pos[sentence_id][j].cpu(), 0) if j == 0: continue if i == j: continue if abs(i - j) > self.dist_dim - 1: dist = self.dist_dim - 1 else: dist = abs(i - j) - 1 if self.dir_flag: if i > j: dir = 0 else: dir = 1 else: dir = 0 if not self.recons_param is None: scores[sentence_id, i, j, :, :] += np.log( self.recons_param[h_pos_id, :, m_pos_id, dist, dir]).reshape(self.tag_num, 1) if self.use_lex: scores[sentence_id, i, j, :, :] += np.log( self.lex_param[m_pos_id, :, word_id].reshape(1, self.tag_num)) if self.prior_weight > 0 and self.training: prior_score = self.prior_dict[batch_sen[sentence_id]] scores[sentence_id] += prior_score if self.training: self.tree_param[batch_sen[sentence_id]] = scores[sentence_id] return scores
def crf_train(self, sentences, best_trees, tree_params): batch_loss = 0.0 section_loss = 0.0 etotal = 0 epoch_loss = 0.0 training_likelihood = 0.0 param.init_param(sentences, self.vocab, self.tag_table, self.recons_param, self.lex_param, self.distdim) random.shuffle(sentences) start = time.time() for iSentence, sentence in enumerate(sentences): print iSentence if iSentence % 10 == 0 and iSentence != 0: print 'Loss', param.get_scalar(section_loss / etotal, 0), 'Time', time.time() - start start = time.time() section_loss = 0.0 etotal = 0 best_parse, encoder_loss, log_likelihood = self.forward( sentence, tree_params) best_trees[sentence] = best_parse batch_loss += encoder_loss section_loss += encoder_loss epoch_loss += encoder_loss etotal += sentence.size training_likelihood += log_likelihood if iSentence % self.batch == 0 and iSentence != 0: inner_start = time.time() batch_loss.backward() print 'time cost in one backward', time.time() - inner_start inner_start = time.time() self.trainer.step() nn.utils.clip_grad_norm(self.parameters(), 2.0) batch_loss = 0.0 #print 'time cost in one update',time.time()-inner_start self.trainer.zero_grad() if param.get_scalar(batch_loss, 0) > 0: batch_loss.backward() self.trainer.step() nn.utils.clip_grad_norm(self.parameters(), 2.0) self.trainer.zero_grad() print 'Iteration loss:', param.get_scalar(epoch_loss / iSentence, 0) print 'Training likelihood', training_likelihood
def forward(self, sentence, tree_params): for entry in sentence.entries: c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup( scalar(int(self.vocab.get(entry.norm, 0)) if dropFlag else 0) ) if self.wdims > 0 else None posvec = self.plookup(scalar(int( self.pos[entry.pos]))) if self.pdims > 0 else None evec = None if self.external_embedding is not None: evec = self.elookup( scalar( self.extrnd. get(entry.form, self.extrnd.get(entry.norm, 0)) if ( dropFlag or (random.random() < 0.5)) else 0)) entry.vec = cat([wordvec, posvec, evec]) entry.lstms = [entry.vec, entry.vec] entry.unary_potential = None lstm_forward = RNNState(self.builders[0]) lstm_backward = RNNState(self.builders[1]) for entry, rentry in zip(sentence.entries, reversed(sentence.entries)): lstm_forward = lstm_forward.next(entry.vec) lstm_backward = lstm_backward.next(rentry.vec) entry.lstms[1] = lstm_forward() rentry.lstms[0] = lstm_backward() for entry in sentence.entries: entry.hidden = torch.mm(cat([entry.lstms[0], entry.lstms[1]]), self.hidLayer) + self.hidBias tag_list = Variable(torch.LongTensor(self.tag_table.get( entry.pos))) tag_emb = torch.index_select(self.tlookup.weight, 0, tag_list) entry.unary_potential = self.activation( torch.mm(entry.hidden, torch.t(tag_emb))) start = time.time() scores, crf_scores = self.evaluate(sentence) #print "time cost in firing features",time.time()-start start = time.time() best_parse = eisner_parser.parse_proj(scores) #print "time cost in parsing", time.time() - start start = time.time() partition_score, inside_scores = eisner_parser.partition_inside( crf_scores) #print "time cost in computing partition", time.time() - start tree_params[sentence] = (scores, crf_scores.data.numpy(), param.get_scalar(partition_score, 0)) encoder_loss, log_likelihood = self.get_loss(best_parse, partition_score, crf_scores, scores) return best_parse, encoder_loss, log_likelihood
def get_loss(self, best_parse, partition_score, crf_scores, scores): encoder_loss = Variable(torch.FloatTensor([0])) log_likelihood = 0.0 for i, h in enumerate(best_parse[0]): if h == -1: continue h = int(h) m_tag_id = int(best_parse[1][i]) h_tag_id = int(best_parse[1][h]) encoder_loss = encoder_loss + crf_scores[h, i, h_tag_id, m_tag_id] log_likelihood += scores[h, i, h_tag_id, m_tag_id] # print 'tree_score',param.get_scalar(tree_loss, 0) encoder_loss = -(encoder_loss - partition_score) #encoder_loss = partition_score log_likelihood -= param.get_scalar(partition_score, 0) # print 'partition',param.get_scalar(partition_score, 0) return encoder_loss, log_likelihood
sub_batch_feats_v = utils.list2Variable(sub_batch_feats, options.gpu) sub_batch_loss, sub_batch_likelihood, l1 = dependency_tagging_model(sub_batch_words_v, sub_batch_pos_v, None, sub_batch_sen, sub_batch_feats_v) batch_loss_list.append(sub_batch_loss) batch_likelihood += sub_batch_likelihood batch_loss = torch.cat(batch_loss_list) batch_loss = torch.sum(batch_loss) training_likelihood += batch_likelihood l1 = l1.double() # batch_loss += options.lam*l1 batch_loss.backward() dependency_tagging_model.trainer.step() dependency_tagging_model.trainer.zero_grad() iter_loss += param.get_scalar(batch_loss.cpu(), 0) iter_loss /= tot_batch print ' loss for this iteration ', iter_loss print 'likelihood for this iteration ', training_likelihood if options.do_eval: do_eval(dependency_tagging_model, w2i, pos, feats, options) print 'To train decoder' if options.dir_flag: dir_dim = 2 else: dir_dim = 1 dependency_tagging_model.train() for n in range(options.d_pass): print 'Decoder training iteration ', n