Ejemplo n.º 1
0
if __name__ == "__main__":

    mismatches = 1
    for record in sys.stdin:
        index = None
        for col in record.strip().split():
            col = col.upper()
            if len([c for c in col if c not in "ACGTN-"]) > 0:
                continue
            index = col
            break
        if index is None:
            continue

        names = []
        for name, sequence in BASIC_LOOKUP.items():
            try:
                dist = hamming_distance(index, sequence)
                if dist <= mismatches:
                    names.append([name, dist])
            except:
                pass

        print(
            "\t".join(
                record.strip().split()
                + [",".join(sorted([n[0] for n in names if n[1] == i])) for i in range(mismatches + 1)]
            )
        )
Ejemplo n.º 2
0
outfile = csv.writer(open('index_overlaps.csv', 'wb'))
outfile.writerow(['Index', 'Sequence', 'Identical',
                  'One mismatch', 'Two mismatches'])

def compare(key1, key2):
    for pos in xrange(len(key1)-1, -1, -1):
        if not key1[pos].isdigit():
            k1 = (key1[0:pos+1], int(key1[pos+1:]))
            break
    for pos in xrange(len(key2)-1, -1, -1):
        if not key2[pos].isdigit():
            k2 = (key2[0:pos+1], int(key2[pos+1:]))
            break
    return cmp(k1, k2)

keys = sorted(BASIC_LOOKUP.keys(), cmp=compare)

for key1 in keys:
    seq1 = BASIC_LOOKUP[key1]
    identical = []
    one = []
    two = []
    for key2 in keys:
        if key1 == key2: continue
        seq2 = BASIC_LOOKUP[key2]
        mismatch = 0
        for n1, n2 in zip(seq1, seq2):
            if n1 != n2: mismatch += 1
        if mismatch == 0:
            identical.append(key2)
        elif mismatch == 1: