def search_candidates(self, input_string): editDistance = EditDistance() for leaf in self.leaves: for index in leaf.indices: i = leaf.start parent = leaf.parent cand = True p1 = -1 while cand and parent != None: p1 = index - (i - parent.start) p2 = index + (parent.end - i) + 1 if p1 < 0: p1 = 0 if p2 > len(input_string): p2 = len(input_string) distance = editDistance.compute(input_string[p1:p2], parent.pattern) if distance <= parent.error: parent = parent.parent else: p1 -= parent.error counter = parent.error withp1 = False while counter != 0 and not withp1: distance = editDistance.compute( input_string[p1:p2], parent.pattern) if distance <= parent.error: parent = parent.parent withp1 = True else: counter -= 1 p1 += 1 if not withp1: p2 += parent.error counter = parent.error withp2 = False while counter != 0 and not withp2: distance = editDistance.compute( input_string[p1:p2], parent.pattern) if distance <= parent.error: parent = parent.parent withp2 = True else: counter -= 1 p2 -= 1 if not withp2: cand = False if cand: self.indicesDict[p1] = input_string[p1:p2]
def edit_distance(self): ed = EditDistance() total_dist = 0 total_norm_dist = 0 op_count = {'m': 0, 'i': 0, 'd': 0, 'r': 0} op_count_norm = {'m': 0, 'i': 0, 'd': 0, 'r': 0} num_examples = len(self.gt) num_examples = max(num_examples, 1) for i in self.gt.keys(): gt = self.gt[i][0].split() gen = self.gen[i][0].split() max_len = float(max(len(gt), len(gen))) max_len = max(max_len, 1.0) dist = ed.compute(gt, gen) total_dist += dist total_norm_dist += dist / max_len ops = ed.operations() for op in ops: op_count[op] += 1 op_count_norm[op] += 1.0 / max_len mean_dist = total_dist / float(num_examples) mean_norm_dist = total_norm_dist / float(num_examples) for op in op_count: op_count[op] /= float(num_examples) op_count_norm[op] /= float(num_examples) return mean_dist, mean_norm_dist, op_count, op_count_norm