def get_tree_score(self, crf_score, scores, best_parse, partition,
                    batch_sen):
     best_parse = np.array(list(best_parse), dtype=int)
     _, batch_size, sentence_length = best_parse.shape
     tree_score = []
     likelihood = 0.0
     for sentence_id in range(batch_size):
         sentence_tree_score = []
         if self.prior_weight > 0 and self.training:
             scores[sentence_id] -= self.prior_dict[batch_sen[sentence_id]]
         for i in range(1, sentence_length):
             head_id = best_parse[0, sentence_id, i]
             sentence_tree_score.append(crf_score[sentence_id,
                                                  best_parse[0, sentence_id,
                                                             i], i,
                                                  best_parse[1, sentence_id,
                                                             head_id],
                                                  best_parse[1, sentence_id,
                                                             i]])
             likelihood += scores[sentence_id, best_parse[0, sentence_id,
                                                          i], i,
                                  best_parse[1, sentence_id, head_id],
                                  best_parse[1, sentence_id, i]]
         likelihood -= param.get_scalar(partition[sentence_id].cpu(), 0)
         if self.training:
             self.partition_table[
                 batch_sen[sentence_id]] = param.get_scalar(
                     partition[sentence_id].cpu(), 0)
             self.encoder_score_table[batch_sen[sentence_id]] = crf_score[
                 sentence_id].cpu().data.numpy()
         tree_score.append(torch.sum(torch.cat(sentence_tree_score)))
     tree_score = torch.cat(tree_score)
     return tree_score, likelihood
 def evaluate(self, batch_pos, batch_words, batch_sen, crf_scores):
     batch_size, sentence_length = batch_pos.data.shape
     if not self.initial_Flag:
         scores = np.copy(crf_scores.cpu().data.numpy())
     else:
         scores = np.zeros((batch_size, sentence_length, sentence_length,
                            self.tag_num, self.tag_num))
     for sentence_id in range(batch_size):
         for i in range(sentence_length):
             for j in range(sentence_length):
                 word_id = param.get_scalar(
                     batch_words[sentence_id][j].cpu(), 0)
                 h_pos_id = param.get_scalar(
                     batch_pos[sentence_id][i].cpu(), 0)
                 m_pos_id = param.get_scalar(
                     batch_pos[sentence_id][j].cpu(), 0)
                 if j == 0:
                     continue
                 if i == j:
                     continue
                 if abs(i - j) > self.dist_dim - 1:
                     dist = self.dist_dim - 1
                 else:
                     dist = abs(i - j) - 1
                 if self.dir_flag:
                     if i > j:
                         dir = 0
                     else:
                         dir = 1
                 else:
                     dir = 0
                 if not self.recons_param is None:
                     scores[sentence_id, i, j, :, :] += np.log(
                         self.recons_param[h_pos_id, :, m_pos_id, dist,
                                           dir]).reshape(self.tag_num, 1)
                 if self.use_lex:
                     scores[sentence_id, i, j, :, :] += np.log(
                         self.lex_param[m_pos_id, :,
                                        word_id].reshape(1, self.tag_num))
         if self.prior_weight > 0 and self.training:
             prior_score = self.prior_dict[batch_sen[sentence_id]]
             scores[sentence_id] += prior_score
         if self.training:
             self.tree_param[batch_sen[sentence_id]] = scores[sentence_id]
     return scores
Beispiel #3
0
    def crf_train(self, sentences, best_trees, tree_params):
        batch_loss = 0.0
        section_loss = 0.0
        etotal = 0
        epoch_loss = 0.0
        training_likelihood = 0.0

        param.init_param(sentences, self.vocab, self.tag_table,
                         self.recons_param, self.lex_param, self.distdim)
        random.shuffle(sentences)
        start = time.time()
        for iSentence, sentence in enumerate(sentences):
            print iSentence
            if iSentence % 10 == 0 and iSentence != 0:
                print 'Loss', param.get_scalar(section_loss / etotal,
                                               0), 'Time', time.time() - start
                start = time.time()
                section_loss = 0.0
                etotal = 0
            best_parse, encoder_loss, log_likelihood = self.forward(
                sentence, tree_params)
            best_trees[sentence] = best_parse
            batch_loss += encoder_loss
            section_loss += encoder_loss
            epoch_loss += encoder_loss
            etotal += sentence.size
            training_likelihood += log_likelihood
            if iSentence % self.batch == 0 and iSentence != 0:
                inner_start = time.time()
                batch_loss.backward()
                print 'time cost in one backward', time.time() - inner_start
                inner_start = time.time()
                self.trainer.step()
                nn.utils.clip_grad_norm(self.parameters(), 2.0)
                batch_loss = 0.0
                #print 'time cost in one update',time.time()-inner_start
            self.trainer.zero_grad()
        if param.get_scalar(batch_loss, 0) > 0:
            batch_loss.backward()
            self.trainer.step()
            nn.utils.clip_grad_norm(self.parameters(), 2.0)
        self.trainer.zero_grad()
        print 'Iteration loss:', param.get_scalar(epoch_loss / iSentence, 0)
        print 'Training likelihood', training_likelihood
Beispiel #4
0
    def forward(self, sentence, tree_params):
        for entry in sentence.entries:
            c = float(self.wordsCount.get(entry.norm, 0))
            dropFlag = (random.random() < (c / (0.25 + c)))
            wordvec = self.wlookup(
                scalar(int(self.vocab.get(entry.norm, 0)) if dropFlag else 0)
            ) if self.wdims > 0 else None
            posvec = self.plookup(scalar(int(
                self.pos[entry.pos]))) if self.pdims > 0 else None
            evec = None
            if self.external_embedding is not None:
                evec = self.elookup(
                    scalar(
                        self.extrnd.
                        get(entry.form, self.extrnd.get(entry.norm, 0)) if (
                            dropFlag or (random.random() < 0.5)) else 0))
            entry.vec = cat([wordvec, posvec, evec])
            entry.lstms = [entry.vec, entry.vec]
            entry.unary_potential = None

            lstm_forward = RNNState(self.builders[0])
            lstm_backward = RNNState(self.builders[1])

        for entry, rentry in zip(sentence.entries, reversed(sentence.entries)):
            lstm_forward = lstm_forward.next(entry.vec)
            lstm_backward = lstm_backward.next(rentry.vec)

            entry.lstms[1] = lstm_forward()
            rentry.lstms[0] = lstm_backward()

        for entry in sentence.entries:
            entry.hidden = torch.mm(cat([entry.lstms[0], entry.lstms[1]]),
                                    self.hidLayer) + self.hidBias
            tag_list = Variable(torch.LongTensor(self.tag_table.get(
                entry.pos)))
            tag_emb = torch.index_select(self.tlookup.weight, 0, tag_list)
            entry.unary_potential = self.activation(
                torch.mm(entry.hidden, torch.t(tag_emb)))
        start = time.time()
        scores, crf_scores = self.evaluate(sentence)
        #print "time cost in firing features",time.time()-start
        start = time.time()
        best_parse = eisner_parser.parse_proj(scores)
        #print "time cost in parsing", time.time() - start
        start = time.time()
        partition_score, inside_scores = eisner_parser.partition_inside(
            crf_scores)
        #print "time cost in computing partition", time.time() - start
        tree_params[sentence] = (scores, crf_scores.data.numpy(),
                                 param.get_scalar(partition_score, 0))
        encoder_loss, log_likelihood = self.get_loss(best_parse,
                                                     partition_score,
                                                     crf_scores, scores)

        return best_parse, encoder_loss, log_likelihood
Beispiel #5
0
 def get_loss(self, best_parse, partition_score, crf_scores, scores):
     encoder_loss = Variable(torch.FloatTensor([0]))
     log_likelihood = 0.0
     for i, h in enumerate(best_parse[0]):
         if h == -1:
             continue
         h = int(h)
         m_tag_id = int(best_parse[1][i])
         h_tag_id = int(best_parse[1][h])
         encoder_loss = encoder_loss + crf_scores[h, i, h_tag_id, m_tag_id]
         log_likelihood += scores[h, i, h_tag_id, m_tag_id]
     # print 'tree_score',param.get_scalar(tree_loss, 0)
     encoder_loss = -(encoder_loss - partition_score)
     #encoder_loss = partition_score
     log_likelihood -= param.get_scalar(partition_score, 0)
     # print 'partition',param.get_scalar(partition_score, 0)
     return encoder_loss, log_likelihood
                    sub_batch_feats_v = utils.list2Variable(sub_batch_feats, options.gpu)
                    sub_batch_loss, sub_batch_likelihood, l1 = dependency_tagging_model(sub_batch_words_v,
                                                                                        sub_batch_pos_v, None,
                                                                                        sub_batch_sen,
                                                                                        sub_batch_feats_v)
                    batch_loss_list.append(sub_batch_loss)
                    batch_likelihood += sub_batch_likelihood
                batch_loss = torch.cat(batch_loss_list)
                batch_loss = torch.sum(batch_loss)
                training_likelihood += batch_likelihood
                l1 = l1.double()
                # batch_loss += options.lam*l1
                batch_loss.backward()
                dependency_tagging_model.trainer.step()
                dependency_tagging_model.trainer.zero_grad()
                iter_loss += param.get_scalar(batch_loss.cpu(), 0)
            iter_loss /= tot_batch
            print ' loss for this iteration ', iter_loss

            print 'likelihood for this iteration ', training_likelihood

        if options.do_eval:
            do_eval(dependency_tagging_model, w2i, pos, feats, options)
        print 'To train decoder'
        if options.dir_flag:
            dir_dim = 2
        else:
            dir_dim = 1
        dependency_tagging_model.train()
        for n in range(options.d_pass):
            print 'Decoder training iteration ', n