def get_all_equal_edits(self): edits = [] for op, group in groupby(self.align_seq, lambda x: x[0]): if op != "M": merged = self.merge_edits(list(group)) edits.append(Edit(self.orig, self.cor, merged[0][1:])) return edits
def get_all_merge_edits(self): edits = [] for op, group in groupby(self.align_seq, lambda x: True if x[0] == "M" else False): if not op: merged = self.merge_edits(list(group)) edits.append(Edit(self.orig, self.cor, merged[0][1:])) return edits
def import_edit(self, orig, cor, edit, min=True, old_cat=False): # Undefined error type if len(edit) == 4: edit = Edit(orig, cor, edit) # Existing error type elif len(edit) == 5: edit = Edit(orig, cor, edit[:4], edit[4]) # Unknown edit format else: raise Exception("Edit not of the form: " "[o_start, o_end, c_start, c_end, (cat)]") # Minimise edit if min: edit = edit.minimise() # Classify edit if not old_cat: edit = self.classify(edit) return edit
def get_rule_edits(alignment): edits = [] # Split alignment into groups of M, T and rest. (T has a number after it) for op, group in groupby(alignment.align_seq, lambda x: x[0][0] if x[0][0] in {"M", "T"} else False): group = list(group) # Ignore M if op == "M": continue # T is always split elif op == "T": for seq in group: edits.append(Edit(alignment.orig, alignment.cor, seq[1:])) # Process D, I and S subsequence else: processed = process_seq(group, alignment) # Turn the processed sequence into edits for seq in processed: edits.append(Edit(alignment.orig, alignment.cor, seq[1:])) return edits
def get_all_split_edits(self): edits = [] for align in self.align_seq: if align[0] != "M": edits.append(Edit(self.orig, self.cor, align[1:])) return edits