def correct_errors(seqs): corrections = [] c = Counter(seqs) correct = [item for item in c if c[item] >= 2] reverse = {reverse_complement(s) for s in correct} seqset = set(correct) for s in [item for item in c if c[item] < 2]: if s not in reverse: for fix in (f for f in seqset | reverse if hamming(s, f) == 1): corrections.append((s, fix)) return corrections
def p_distance(s1, s2): return hamming(s1, s2) / len(s1)