Ejemplo n.º 1
0
    def do_eval(dep_model, w2i, pos, feats, options):
        print "===================================="
        print 'Do evaluation on development set'
        # unknown_idx = feats["<UNKNOWN-FEATS>"]
        # dep_model.feat_param.weight.data[unknown_idx] = 0
        eval_sentences = utils.read_data(options.dev, True)
        dep_model.eval()
        eval_sen_idx = 0
        eval_data_list = list()
        devpath = os.path.join(options.output, 'test_pred' + str(epoch + 1) + '_' + str(options.sample_idx))
        for s in eval_sentences:
            s_word, s_pos = s.set_data_list(w2i, pos)
            s_data_list = list()
            s_data_list.append(s_word)
            s_data_list.append(s_pos)
            s_data_list.append([eval_sen_idx])
            s_feats = utils.construct_feats(feats, s)
            s_data_list.append(s_feats)
            eval_data_list.append(s_data_list)
            eval_sen_idx += 1
        eval_batch_data = utils.construct_batch_data(eval_data_list, options.batchsize)

        for batch_id, one_batch in enumerate(eval_batch_data):
            eval_batch_words, eval_batch_pos, eval_batch_sen, eval_batch_feats = [s[0] for s in one_batch], \
                                                                                 [s[1] for s in one_batch], \
                                                                                 [s[2][0] for s in one_batch], \
                                                                                 [s[3] for s in one_batch]
            eval_batch_words_v = utils.list2Variable(eval_batch_words, options.gpu)
            eval_batch_pos_v = utils.list2Variable(eval_batch_pos, options.gpu)
            eval_batch_feats_v = utils.list2Variable(eval_batch_feats, options.gpu)
            dep_model(eval_batch_words_v, eval_batch_pos_v, None, eval_batch_sen, eval_batch_feats_v)
        test_res = dep_model.parse_results
        utils.eval(test_res, eval_sentences, devpath, options.log + '_' + str(options.sample_idx), epoch)
        print "===================================="
Ejemplo n.º 2
0
 def computing_crf_scores(self, lstm_feats, batch_size, sentence_length):
     crf_scores = []
     unary_potentials = lstm_feats.contiguous().view(
         batch_size, sentence_length * self.tag_num)
     for sentence_id in range(batch_size):
         single_unary_potentials = unary_potentials[sentence_id]
         single_crf_scores = torch.log(
             torch.ger(torch.exp(single_unary_potentials),
                       torch.exp(single_unary_potentials)))
         single_crf_scores = single_crf_scores.contiguous().view(
             sentence_length, sentence_length, self.tag_num, self.tag_num)
         single_crf_scores = single_crf_scores.unsqueeze(0)
         crf_scores.append(single_crf_scores)
     crf_scores = torch.cat(crf_scores, 0)
     transition_map_var = utils.list2Variable(self.transition_map, self.gpu)
     transition_var = self.transitions(transition_map_var)
     transition_var = transition_var.view(self.tag_num, self.tag_num)
     transition_var = transition_var.unsqueeze(0)
     transition_var = transition_var.unsqueeze(0)
     transition_var = transition_var.unsqueeze(0)
     transition_var = transition_var.repeat(batch_size, sentence_length,
                                            sentence_length, 1, 1)
     crf_scores = crf_scores + transition_var
     crf_scores = crf_scores.double()
     if torch.cuda.is_available():
         crf_scores = crf_scores.cuda()
     return crf_scores.double()
Ejemplo n.º 3
0
    def do_eval(dep_model, w2i, pos, options):
        print "===================================="
        print 'Do evaluation on development set'
        eval_sentences = utils.read_data(options.dev, True)
        dep_model.eval()
        eval_sen_idx = 0
        eval_data_list = list()
        devpath = os.path.join(
            options.output,
            'test_pred' + str(epoch + 1) + '_' + str(options.sample_idx))
        for s in eval_sentences:
            s_word, s_pos = s.set_data_list(w2i, pos)
            s_data_list = list()
            s_data_list.append(s_word)
            s_data_list.append(s_pos)
            s_data_list.append([eval_sen_idx])
            if options.use_trigram:
                s_trigram = utils.construct_trigram(s_pos, pos)
                s_data_list.append(s_trigram)
            eval_data_list.append(s_data_list)
            eval_sen_idx += 1
        eval_batch_data = utils.construct_batch_data(eval_data_list,
                                                     options.batchsize)

        for batch_id, one_batch in enumerate(eval_batch_data):
            eval_batch_words, eval_batch_pos, eval_batch_sen = [s[0] for s in one_batch], [s[1] for s in one_batch], \
                                                               [s[2][0] for s in one_batch]
            if options.use_trigram:
                batch_trigram = [s[3] for s in one_batch]
                batch_trigram_v = utils.list2Variable(batch_trigram,
                                                      options.gpu)
            else:
                batch_trigram_v = None
            eval_batch_words_v = utils.list2Variable(eval_batch_words,
                                                     options.gpu)
            eval_batch_pos_v = utils.list2Variable(eval_batch_pos, options.gpu)
            dep_model(eval_batch_words_v, eval_batch_pos_v, None,
                      eval_batch_sen, batch_trigram_v)
        test_res = dep_model.parse_results
        utils.eval(test_res, eval_sentences, devpath,
                   options.log + '_' + str(options.sample_idx), epoch)
        print "===================================="
Ejemplo n.º 4
0
     enumerate(batch_data), mininterval=2,
     desc=' -Tot it %d (epoch %d)' % (tot_batch, 0), leave=False, file=sys.stdout):
 # batch_words, batch_pos, batch_sen, batch_feats = [s[0] for s in one_batch], [s[1] for s in one_batch], \
 #                                                  [s[2][0] for s in one_batch], [s[3] for s in one_batch]
 # batch_words_v = utils.list2Variable(batch_words, options.gpu)
 # batch_pos_v = utils.list2Variable(batch_pos, options.gpu)
 # batch_feats_v = utils.list2Variable(batch_feats, options.gpu)
 batch_loss_list = []
 batch_likelihood = 0.0
 sub_batch_data = utils.construct_batch_data(one_batch, options.sub_batch_size)
 for one_sub_batch in sub_batch_data:
     sub_batch_words, sub_batch_pos, sub_batch_sen, sub_batch_feats = [s[0] for s in one_sub_batch], \
                                                                      [s[1] for s in one_sub_batch], \
                                                                      [s[2][0] for s in one_sub_batch], \
                                                                      [s[3] for s in one_sub_batch]
     sub_batch_words_v = utils.list2Variable(sub_batch_words, options.gpu)
     sub_batch_pos_v = utils.list2Variable(sub_batch_pos, options.gpu)
     sub_batch_feats_v = utils.list2Variable(sub_batch_feats, options.gpu)
     sub_batch_loss, sub_batch_likelihood, l1 = dependency_tagging_model(sub_batch_words_v,
                                                                         sub_batch_pos_v, None,
                                                                         sub_batch_sen,
                                                                         sub_batch_feats_v)
     batch_loss_list.append(sub_batch_loss)
     batch_likelihood += sub_batch_likelihood
 batch_loss = torch.cat(batch_loss_list)
 batch_loss = torch.sum(batch_loss)
 training_likelihood += batch_likelihood
 l1 = l1.double()
 # batch_loss += options.lam*l1
 batch_loss.backward()
 dependency_tagging_model.trainer.step()
Ejemplo n.º 5
0
    def compute_trans(self, batch_pos, trans_masks, trans_back_masks):

        trans_var = []
        batch_size, sentence_length = batch_pos.data.shape
        # POS features
        # pos_emb = self.trans_pos_emb(batch_pos)
        pos_emb = self.plookup(batch_pos)
        pos_emb_h, pos_emb_m = utils.compute_trans('sentence', batch_size,
                                                   sentence_length,
                                                   self.tag_num, pos_emb)
        trans_var.append(pos_emb_h)
        trans_var.append(pos_emb_m)
        # Tag feature
        tag_list = []
        for i in range(self.tag_num):
            tag_list.append(i)
        tag_var = utils.list2Variable(tag_list, self.gpu)
        tag_emb = self.tlookup(tag_var)
        tag_emb_h, tag_emb_m = utils.compute_trans('tag', batch_size,
                                                   sentence_length,
                                                   self.tag_num, tag_emb)
        trans_var.append(tag_emb_h)
        trans_var.append(tag_emb_m)
        # Distance feature
        dist_list = list()
        for i in range(sentence_length):
            head_list = list()
            for j in range(sentence_length):
                # if abs(i - j) > self.dist_num:
                #     head_list.append(np.sign(i - j) * self.dist_num + self.dist_num)
                # else:
                #     head_list.append(i - j + self.dist_num)
                if abs(i - j) > self.dist_num:
                    head_list.append(self.dist_num)
                else:
                    head_list.append(abs(i - j))
            dist_list.append(head_list)
        dist_var = utils.list2Variable(dist_list, self.gpu)
        dist_emb = self.dlookup(dist_var)
        dist_emb = utils.compute_trans('global', batch_size, sentence_length,
                                       self.tag_num, dist_emb)
        trans_var.append(dist_emb)
        trans = torch.cat(trans_var, dim=5)

        # Different directions
        trans_hidden = self.feat2trans(trans)
        trans_hidden_back = self.feat2trans_back(trans)
        trans_hidden = F.relu(trans_hidden)
        trans_hidden_back = F.relu(trans_hidden_back)
        trans_matrix_forward = self.out_layer(trans_hidden)
        trans_matrix_back = self.out_layer(trans_hidden_back)
        trans_matrix_forward = trans_matrix_forward.contiguous().view(
            batch_size, sentence_length, sentence_length, self.tag_num,
            self.tag_num)
        trans_matrix_back = trans_matrix_back.contiguous().view(
            batch_size, sentence_length, sentence_length, self.tag_num,
            self.tag_num)
        trans_matrix_forward = trans_matrix_forward.masked_fill(
            trans_masks, 0.0)
        trans_matrix_back = trans_matrix_back.masked_fill(
            trans_back_masks, 0.0)

        trans_matrix = trans_matrix_forward + trans_matrix_back
        # trans_matrix = trans_matrix_forward
        # trans_matrix = F.sigmoid(trans_matrix)
        return trans_matrix, dist_emb
Ejemplo n.º 6
0
 # if options.use_trigram:
 #     batch_trigram = [s[3] for s in one_batch]
 #     batch_trigram_v = utils.list2Variable(batch_trigram,options.gpu)
 # else:
 #     batch_trigram_v = None
 # batch_loss, batch_likelihood = dependencyTaggingPl_model(batch_words_v, batch_pos_v, None,
 #                                                          batch_sen, batch_trigram_v)
 batch_loss_list = []
 batch_likelihood = 0.0
 sub_batch_data = utils.construct_batch_data(
     one_batch, options.sub_batch_size)
 for one_sub_batch in sub_batch_data:
     sub_batch_words, sub_batch_pos, sub_batch_sen = [s[0] for s in one_sub_batch], \
                                                     [s[1] for s in one_sub_batch], \
                                                     [s[2][0] for s in one_sub_batch]
     sub_batch_words_v = utils.list2Variable(
         sub_batch_words, options.gpu)
     sub_batch_pos_v = utils.list2Variable(
         sub_batch_pos, options.gpu)
     if options.use_trigram:
         sub_batch_trigram = [s[3] for s in one_sub_batch]
         sub_batch_trigram_v = utils.list2Variable(
             sub_batch_trigram, options.gpu)
     else:
         sub_batch_trigram_v = None
     sub_batch_loss, sub_batch_likelihood = dependencyTaggingPl_model(
         sub_batch_words_v, sub_batch_pos_v, None,
         sub_batch_sen, sub_batch_trigram_v)
     batch_loss_list.append(sub_batch_loss)
     batch_likelihood += sub_batch_likelihood
 batch_loss = torch.cat(batch_loss_list)
 batch_loss = torch.sum(batch_loss)