Example #1
0
    def sentence_score_cobalt(self, sentence1, sentence2, alignments, word_level_scores):

        functional_words1 = list(filter(lambda x: word_sim.function_word(x.form), sentence1))
        functional_words2 = list(filter(lambda x: word_sim.function_word(x.form), sentence2))

        weighted_length1 = self.delta * (len(sentence1) - len(functional_words1)) + ((1.0 - self.delta) * len(functional_words1))
        weighted_length2 = self.delta * (len(sentence2) - len(functional_words2)) + ((1.0 - self.delta) * len(functional_words2))

        weighted_matches1 = 0
        weighted_matches2 = 0

        for i, a in enumerate(alignments[0]):

            if not word_sim.function_word(sentence1[a[0] - 1].form):
                weighted_matches1 += self.delta * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness))
            else:
                weighted_matches1 += (1 - self.delta) * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness))

            if not word_sim.function_word(sentence2[a[1] - 1].form):
                weighted_matches2 += self.delta * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness))
            else:
                weighted_matches2 += (1 - self.delta) * (max(word_level_scores[i].similarity - word_level_scores[i].penalty_mean, self.minimal_aligned_relatedness))

        if weighted_length1 == 0:
            precision = weighted_matches1
        else:
            precision = weighted_matches1 / weighted_length1

        if weighted_length2 == 0:
            recall = weighted_matches2
        else:
            recall = weighted_matches2 / weighted_length2

        if precision == 0 or recall == 0 or (((1.0 - self.alpha) / precision) + (self.alpha / recall)) == 0:
            fmean = 0
        else:
            fmean = 1.0 / (((1.0 - self.alpha) / precision) + (self.alpha / recall))

        score = fmean

        return score
    def run(self, cand, ref):
        counted = []
        aligned_tokens_cand = []

        for a in cand['alignments'][0]:
            aligned_tokens_cand.append(a[1])

        prev = None

        for i in sorted(aligned_tokens_cand):
            if (prev is None and i > 1) or (prev is not None and (i != prev + 1 and i != prev)):
                count = 0
                if prev is None:
                    prev = 0
                for j in range(prev, i - prev - 1):
                    if not word_sim.function_word(ref['tokens'][j]):
                        count += 1
                counted.append(count)
            if len(counted) == AbstractChunkFeature.chunk_number:
                break
            prev = i

        if prev is None:
            prev = 0

        if prev < len(cand['tokens']) and len(counted) < AbstractChunkFeature.chunk_number:
            count = 0
            for j in range(prev, len(cand['tokens']) - 1):
                if not word_sim.function_word(cand['tokens'][j]):
                    count += 1
            counted.append(count)

        while len(counted) < AbstractChunkFeature.chunk_number:
            counted.append(0)

        AbstractChunkFeature.set_value(self, counted)