Exemple #1
0
def last_part():
    the_donors = msgpack.load(file('donors_{}'.format(c.DATASET),'rb'))

    #clean up
    the_donors = [donors for donors in the_donors if donors]
    good_changes = []
    for donor_tuple in the_donors:
        donor = donor_tuple[1]
        pos = donor_tuple[0]
        ref_piece = ref[pos:pos+len(donor)]
        changes, score = dependencies.identify_changes(ref_piece,donor,0)
        if score < 10:
            #print visualize_lines(donor,pos,ref_piece)
            for cc in changes:
                try:
                    cc[3] += pos
                except:
                    cc[2] += pos

            good_changes.extend(changes)

    utils.write_indels(good_changes, DO_NUMBER)
Exemple #2
0
    #

    good_changes = []
    for donor_tuple in the_donors:
        donor = donor_tuple[1]
        pos = donor_tuple[0]
        ref_piece = ref[pos:pos+len(donor)]
        changes, score = dependencies.identify_changes(ref_piece,donor,-1)
        if score < len(donor) * 0.4: # thanks leah
            #visualize_lines(donor,pos,ref, pos)
            #print 'C.\n{}'.format(changes)
            for cc in changes:
                try:
                    if cc[2] == '.':
                        changes.remove(cc)
                        continue
                    cc[3] += pos
                except:
                    cc[2] += pos

            good_changes.extend(changes)

    for _ in good_changes:
        print _

    utils.write_indels(good_changes, DO_NUMBER)

    msgpack.dump(the_donors,file('donors_{}'.format(c.DATASET),'wb'))

    print 'done?'
Exemple #3
0
    the_donors = pileup.generate_donor_pieces(sub, ref, d)
    the_donors = [donors for donors in the_donors if donors]

    print "done assembling, SW time..."

    good_changes = []
    count = 0.0
    for donor_tuple in the_donors:
        donor = donor_tuple[1]
        pos = donor_tuple[0]
        ref_piece = ref[pos : pos + len(donor)]
        changes, score = dependencies.identify_changes(ref_piece, donor, -1)
        if score < len(donor) * 0.4:  # thanks leah
            for cc in changes:
                try:
                    if cc[2] == ".":
                        changes.remove(cc)
                        continue
                    cc[3] += pos
                except:
                    cc[2] += pos

            good_changes.extend(changes)
        count += 1
        if count % 100 == 0:
            print "SW progress: {}".format(count / len(the_donors))

    utils.write_indels(good_changes, job_number)
    msgpack.dump(the_donors, file("donors_{}_part_{}".format(c.DATASET, job_number), "wb"))