def last_part(): the_donors = msgpack.load(file('donors_{}'.format(c.DATASET),'rb')) #clean up the_donors = [donors for donors in the_donors if donors] good_changes = [] for donor_tuple in the_donors: donor = donor_tuple[1] pos = donor_tuple[0] ref_piece = ref[pos:pos+len(donor)] changes, score = dependencies.identify_changes(ref_piece,donor,0) if score < 10: #print visualize_lines(donor,pos,ref_piece) for cc in changes: try: cc[3] += pos except: cc[2] += pos good_changes.extend(changes) utils.write_indels(good_changes, DO_NUMBER)
# good_changes = [] for donor_tuple in the_donors: donor = donor_tuple[1] pos = donor_tuple[0] ref_piece = ref[pos:pos+len(donor)] changes, score = dependencies.identify_changes(ref_piece,donor,-1) if score < len(donor) * 0.4: # thanks leah #visualize_lines(donor,pos,ref, pos) #print 'C.\n{}'.format(changes) for cc in changes: try: if cc[2] == '.': changes.remove(cc) continue cc[3] += pos except: cc[2] += pos good_changes.extend(changes) for _ in good_changes: print _ utils.write_indels(good_changes, DO_NUMBER) msgpack.dump(the_donors,file('donors_{}'.format(c.DATASET),'wb')) print 'done?'
the_donors = pileup.generate_donor_pieces(sub, ref, d) the_donors = [donors for donors in the_donors if donors] print "done assembling, SW time..." good_changes = [] count = 0.0 for donor_tuple in the_donors: donor = donor_tuple[1] pos = donor_tuple[0] ref_piece = ref[pos : pos + len(donor)] changes, score = dependencies.identify_changes(ref_piece, donor, -1) if score < len(donor) * 0.4: # thanks leah for cc in changes: try: if cc[2] == ".": changes.remove(cc) continue cc[3] += pos except: cc[2] += pos good_changes.extend(changes) count += 1 if count % 100 == 0: print "SW progress: {}".format(count / len(the_donors)) utils.write_indels(good_changes, job_number) msgpack.dump(the_donors, file("donors_{}_part_{}".format(c.DATASET, job_number), "wb"))