コード例 #1
0
def test_match_size_2():
    cand = '我我我我我我'
    ref = '中华人民的一份子我'
    n_size = 1
    cand_ngram = get_ngram(cand, n_size)
    ref_ngram = get_ngram(ref, n_size)
    match_size, cand_size = get_match_size(cand_ngram, ref_ngram)
    print('match size: {}'.format(match_size))
    print('cand size: {}'.format(cand_size))
コード例 #2
0
 def add_entity_bonus(self, cand, entity_ref):
     for n_size in range(self.n_size):
         cand_ngram = common.get_ngram(cand, n_size, label='ENTITY')
         ref_ngram = []
         for reff_id, r in enumerate(entity_ref):
             ref_ngram.append(common.get_ngram(r, n_size, label='ENTITY'))
         match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram)
         self.match_ngram[n_size] += self.beta * match_size
         self.candi_ngram[n_size] += self.beta * match_size
コード例 #3
0
 def add_yn_bonus(self, cand, ref_list, yn_label, yn_ref):
     for n_size in range(self.n_size):
         cand_ngram = common.get_ngram(cand, n_size, label=yn_label)
         ref_ngram = []
         for ref_id, r in enumerate(yn_ref):
             ref_ngram.append(common.get_ngram(ref_list[ref_id], n_size, label=r))
         match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram)
         self.match_ngram[n_size] += self.alpha * match_size
         self.candi_ngram[n_size] += self.alpha * match_size
コード例 #4
0
 def count_ngram(self, cand, ref_list, n_size):
     cand_ngram = common.get_ngram(cand, n_size)
     refs_ngram = []
     for ref in ref_list:
         refs_ngram.append(common.get_ngram(ref, n_size))
     if n_size not in self.match_ngram:
         self.match_ngram[n_size] = 0
         self.candi_ngram[n_size] = 0
     match_size, cand_size = common.get_match_size(cand_ngram, refs_ngram)
     self.match_ngram[n_size] += match_size
     self.candi_ngram[n_size] += cand_size
コード例 #5
0
    def count_ngram(self, cand: str, ref: str, n_size: int):
        """计算子序列重合的个数,并存储到字典中

        Arguments:
            cand {str} -- 预备答案
            ref {str} -- 参考答案
            n_size {int} -- 子序列的大小
        """

        cand_ngram = common.get_ngram(cand, n_size)
        ref_ngram = common.get_ngram(ref, n_size)
        if n_size not in self.match_ngram:
            self.match_ngram[n_size] = 0
            self.candi_ngram[n_size] = 0
        match_size, cand_size = common.get_match_size(cand_ngram, ref_ngram)
        self.match_ngram[n_size] += match_size
        self.candi_ngram[n_size] += cand_size
コード例 #6
0
def test_ngram_2():
    sentence = '中华人民共和国'
    n_size = 1
    print(get_ngram(sentence, n_size))
コード例 #7
0
def test_ngram_1():
    sentence = '我是孙维松'
    n_size = 2
    print(get_ngram(sentence, n_size))