def search_candidates(self, input_string):
     editDistance = EditDistance()
     for leaf in self.leaves:
         for index in leaf.indices:
             i = leaf.start
             parent = leaf.parent
             cand = True
             p1 = -1
             while cand and parent != None:
                 p1 = index - (i - parent.start)
                 p2 = index + (parent.end - i) + 1
                 if p1 < 0:
                     p1 = 0
                 if p2 > len(input_string):
                     p2 = len(input_string)
                 distance = editDistance.compute(input_string[p1:p2],
                                                 parent.pattern)
                 if distance <= parent.error:
                     parent = parent.parent
                 else:
                     p1 -= parent.error
                     counter = parent.error
                     withp1 = False
                     while counter != 0 and not withp1:
                         distance = editDistance.compute(
                             input_string[p1:p2], parent.pattern)
                         if distance <= parent.error:
                             parent = parent.parent
                             withp1 = True
                         else:
                             counter -= 1
                             p1 += 1
                     if not withp1:
                         p2 += parent.error
                         counter = parent.error
                         withp2 = False
                         while counter != 0 and not withp2:
                             distance = editDistance.compute(
                                 input_string[p1:p2], parent.pattern)
                             if distance <= parent.error:
                                 parent = parent.parent
                                 withp2 = True
                             else:
                                 counter -= 1
                                 p2 -= 1
                         if not withp2:
                             cand = False
             if cand:
                 self.indicesDict[p1] = input_string[p1:p2]
Esempio n. 2
0
    def edit_distance(self):
        ed = EditDistance()

        total_dist = 0
        total_norm_dist = 0
        op_count = {'m': 0, 'i': 0, 'd': 0, 'r': 0}
        op_count_norm = {'m': 0, 'i': 0, 'd': 0, 'r': 0}
        num_examples = len(self.gt)
        num_examples = max(num_examples, 1)
        for i in self.gt.keys():
            gt = self.gt[i][0].split()
            gen = self.gen[i][0].split()

            max_len = float(max(len(gt), len(gen)))
            max_len = max(max_len, 1.0)
            dist = ed.compute(gt, gen)
            total_dist += dist
            total_norm_dist += dist / max_len

            ops = ed.operations()
            for op in ops:
                op_count[op] += 1
                op_count_norm[op] += 1.0 / max_len

        mean_dist = total_dist / float(num_examples)
        mean_norm_dist = total_norm_dist / float(num_examples)

        for op in op_count:
            op_count[op] /= float(num_examples)
            op_count_norm[op] /= float(num_examples)

        return mean_dist, mean_norm_dist, op_count, op_count_norm