Beispiel #1
0
def pileup2(read_pos_tuple_list):
    result = np.zeros((c.OUTPUT_SIZE, 4))
    #result = [[0,0,0,0] for x in xrange(c.OUTPUT_SIZE)]
    count = 0.0
    for read_pos_tuple in read_pos_tuple_list:
        read = utils.integer_to_key(read_pos_tuple[1],c.READ_SIZE)
        position = read_pos_tuple[0]

        for i in xrange(position,position+len(read)):
            j = i - position
            if read[j]=='A':
                result[i][0] += 1
            elif read[j]=='C':
                result[i][1] += 1
            elif read[j]=='G':
                result[i][2] += 1
            elif read[j]=='T':
                result[i][3] += 1
                #result[position:position+len(read)] = read
        count += 1
        if count % 100000 == 0:
            print 'done: {:.2f}'.format(count/len(read_pos_tuple_list))

    donor = consensus(result)
    return ''.join(donor)
Beispiel #2
0
def get_donor_for_stretch(stretch, ref, pos_to_read):
    STRETCH_LIMIT = 20
    MARGIN_LEFT = c.READ_SIZE
    MARGIN_RIGHT = stretch[1] - stretch[0] + 8

    stretch_length = stretch[1] - stretch[0]
    if stretch_length > STRETCH_LIMIT:
        print '{} is over stretch limit, skipping.'.format(stretch)
        return
    donor = ['.'] * (MARGIN_RIGHT + MARGIN_LEFT + stretch_length + 1)
    read_tuples = []
    (start, end) = (stretch[0] - MARGIN_LEFT, stretch[1] + MARGIN_RIGHT)
    if start < 0 or end > len(ref):
        return

    for i in xrange(start, end-c.READ_SIZE): # we don't want the extras on the right
        try:
            read_tuples.append((i, utils.integer_to_key(pos_to_read[i], c.READ_SIZE)))
        except KeyError:
            pass
            # print 'reads: {}'.format(read_tuples)
            # distances = []
            # for read_tuple in read_tuples:
            # ref_piece = ref[read_tuple[0]-MARGIN:read_tuple[0]+c.READ_SIZE+MARGIN]
            # read_str = utils.integer_to_key(read[1], c.READ_SIZE)
            # read_str = read_tuple
            # distances.append(utils.sliding_window(read_str, ref_piece))
            # print 'ref: {}\nrea: {}'.format(ref_piece, read_str)
            # print 'distances {}'.format(distances)

    # seed generation!
    # argmin = distances.index(min(distances))
    if len(read_tuples) < stretch_length:
        print 'skipping {} low read tuple count'.format(stretch)
        return
    elif len(read_tuples) > 250:
        print 'skipping {} HIGH read tuple count'.format(stretch)
        return

    # ham = []
    # for s in read_tuples:
    #     rr = s[1]
    #     po = s[0]
    #     ham.append(hamming_ignore_dots_list_of_char(ref[po:po + c.READ_SIZE], rr))
    # argmin = ham.index(min(ham))
    # print 'ARGMIN:{}.'.format(argmin)

    # SEED NUMBER 1
    #argmin = 0  # first one always behaves well!
    # try:
    # #    pos = read_tuples[argmin][0]
    # #    str = read_tuples[argmin][1]
    #     #if hamming_ignore_dots_list_of_char(ref[pos:pos + c.READ_SIZE], str) > -1 * c.READ_SIZE + 1:
    #     if sum([ref[pos+i] == str[i] for i in xrange(len(str))])<49:
    #         print 'skipping due to bad initial read'
    #         #print pos
    #         return
    donor[0:0 + c.READ_SIZE] = list(ref[start:start + c.READ_SIZE])
    #donor[-1*c.READ_SIZE-1:-1] = list(ref[end - c.READ_SIZE:end])
    # except IndexError:
    #     return


    # argmax = -1
    # try:
    #     pos = read_tuples[argmax][0]
    #     str = read_tuples[argmax][1]
    #     if sum([ref[pos+i] == str[i] for i in xrange(len(str))])<50:
    #         print 'skipping due to bad initial read'
    #         return
    #     donor[argmax:argmax + c.READ_SIZE] = list(str)
    # except IndexError:
    #     return

    #print 'initial state of donor:\n{}'.format(''.join(donor))

    iteration_count = xrange(6)
    to_be_removed = []
    still_unused = []
    threshold = -40
    for _ in iteration_count:
        threshold += 3
        for item in to_be_removed:
            try:
                read_tuples.remove(item)
            except ValueError:
                print 'Value not in list problem. repetitive region'
                return
        to_be_removed = []

        chosen_ones = []
        for read_tuple in read_tuples:
            if read_tuple == None:
                continue
            read = read_tuple[1]
            hams = []
            for offset in xrange(0, len(donor) - len(read)):
                j = len(donor) - offset - len(read)
                pre = ['.'] * offset
                post = ['.'] * j
                padded = pre + list(read) + post
                ham = hamming_ignore_dots_list_of_char(donor, padded)
                # if ham < -49:
                #     print 'repetitive region! skipping...'
                #     to_be_removed.extend(read_tuples)
                #     break

                hams.append(ham)
            if min(hams) < threshold:
                offset = hams.index(min(hams))
                j = len(donor) - offset - len(read)
                pre = ['.'] * offset
                post = ['.'] * j
                padded = pre + list(read) + post
                chosen_ones.append(padded)
                to_be_removed.append(read_tuple)
                #print '{} -> {}'.format(''.join(padded), min(hams))

        piece_of_donor = pileup_ignore_dots(chosen_ones, donor)
        donor = piece_of_donor  # new seed!


    #print '\n{} -> {}'.format(''.join(donor), stretch)
    return (start, donor.strip('.'))