def write_bam_byChr(bamFile, chr, excludeBamFile, editBamFile, modifyReadsName, deleteReadsName, addReadsName): print chr bam = pysam.AlignmentFile(bamFile, 'rb') excludeBam = pysam.AlignmentFile(excludeBamFile, 'wb', template=bam) editBam = pysam.AlignmentFile(editBamFile, 'wb', template=bam) # print bam delete = open(editBamFile + ".del", 'w') m = 0 for read in bam.fetch(chr): # print read m += 1 keyname = getKeyName(read) if keyname in modifyReadsName: editBam.write(read) elif keyname in deleteReadsName: delete.write(keyname + "\n") continue elif keyname in addReadsName: editBam.write(read) excludeBam.write(read) else: excludeBam.write(read) delete.close() print "Total reads: ", m bam.close() excludeBam.close() editBam.close()
def write_bam_byChr(bamFile, chr, excludeBamFile, editBamFile, total_modify_reads_file, total_delete_reads_file, total_add_reads_file): print chr bam = pysam.AlignmentFile(bamFile, 'rb') excludeBam = pysam.AlignmentFile(excludeBamFile, 'wb', template=bam) editBam = pysam.AlignmentFile(editBamFile, 'wb', template=bam) delete = open(editBamFile + ".del", 'w') m = 0 modifyReadsName = get_name_list(total_modify_reads_file, chr) deleteReadsName = get_name_list(total_delete_reads_file, chr) addReadsName = get_name_list(total_add_reads_file, chr) for read in bam.fetch(chr): m += 1 keyname = getKeyName(read) if keyname in modifyReadsName: editBam.write(read) elif keyname in deleteReadsName: delete.write(keyname + "\n") continue elif keyname in addReadsName: editBam.write(read) excludeBam.write(read) else: excludeBam.write(read) delete.close() print "Total reads: ", m bam.close() excludeBam.close() editBam.close()
def _get_write(total_reads, reads_file_out, reads_pair_out): reads_file = open(reads_file_out, 'w') reads_pair = open(reads_pair_out, 'w') for read_pair in total_reads: tmp = [] for read in read_pair: read_name = getKeyName(read) tmp.append(read_name) reads_file.write(str(read)) reads_pair.write("%s\n" % ",".join(tmp)) reads_file.close() reads_pair.close()
def merge_edit_reads(total_modify_reads, total_add_reads, total_delete_reads): total_modify_readname_list, total_add_readname_list, total_delete_readname_list = [], [], [] total_modify_reads_list, total_add_reads_list, total_delete_reads_list = [], [], [] for read_pair in total_modify_reads: for read in read_pair: read_name = getKeyName(read) total_modify_readname_list.append(read_name) total_modify_reads_list.append(read) for read_pair in total_add_reads: for read in read_pair: read_name = getKeyName(read) total_add_readname_list.append(read_name) total_add_reads_list.append(read) for read_pair in total_delete_reads: for read in read_pair: read_name = getKeyName(read) total_delete_readname_list.append(read_name) total_delete_reads_list.append(read) return total_modify_readname_list, total_delete_readname_list, total_add_readname_list
def get_write_reads(total_modify_reads, total_delete_reads, total_add_reads, total_reads_file_dict, total_reads_list_dict): for typ, reads_dict in zip( ('modify', 'delete', 'add'), (total_modify_reads, total_delete_reads, total_add_reads)): reads_file = total_reads_file_dict[typ] reads_pair = total_reads_list_dict[typ] for read_pair in reads_dict: tmp = [] for read in read_pair: read_name = getKeyName(read) tmp.append(read_name) reads_file.write(str(read)) reads_pair.write("%s\n" % ",".join(tmp))
def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir, total_modify_readname_list, total_delete_readname_list, total_add_readname_list, process): write_bam_pool = Pool(int(process)) exclude_bam_list = [exclude_bam_file_tmp] usedBamList = [] for chrom in chrom_list: excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom) exclude_bam_list.append(excludeBam_chr) usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom) usedBamList.append(usedBam_chr) write_bam_pool.apply_async( write_bam_byChr, args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr, total_modify_readname_list, total_delete_readname_list, total_add_readname_list)) write_bam_pool.close() write_bam_pool.join() exclude_bam_file = os.path.join(out_dir, "exclude.bam") bamMerge(exclude_bam_list, exclude_bam_file) used_bam_file = os.path.join(out_dir, "used.bam") if len(usedBamList) != 1: bamMerge(usedBamList, used_bam_file) else: used_bam_file = usedBamList[0] bamSort(used_bam_file, os.path.join(out_dir, "used.sort")) used_sort_bam_file = os.path.join(out_dir, "used.sort.bam") bamIndex(used_sort_bam_file) used_bam = pysam.AlignmentFile(used_sort_bam_file, 'rb') used_reads = {} for read in used_bam.fetch(): keyname = getKeyName(read) used_reads[keyname] = read used_bam.close() return used_reads, used_bam_file, exclude_bam_file
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads, total_add_reads, used_reads, seqer, aligner, aligner_index, flow_order, lib_key, barcode, tag): bam = pysam.AlignmentFile(bam_file, 'rb') edit_bam_file = os.path.join(out_dir, "edit.bam") edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam) readname_convert_file = os.path.join(out_dir, "readname_convert.txt") fout_convert = open(readname_convert_file, 'w') edit_bam_reads = {} if is_single: for read_pair in total_modify_reads: read1 = read_pair[0] keyname_read1 = getKeyName(read1) orig_read1 = used_reads[keyname_read1] new_read1 = copy.deepcopy(orig_read1) new_read1.query_sequence = read1.query_sequence new_read1.query_qualities = read1.query_qualities new_name = read1.query_name.split( ":")[0] + ":" + get_new_readname() new_read1.query_name = new_name if seqer == "life": new_read1 = deal_life_reads(new_read1, flow_order, lib_key, barcode) if tag: new_read1 = add_tag(new_read1) edit_bam.write(new_read1) fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) strand = getReadStrand(new_read1) if new_name not in edit_bam_reads: edit_bam_reads[new_name] = dict() edit_bam_reads[new_name][strand] = new_read1 for read_pair in total_add_reads: read1 = read_pair[0] keyname_read1 = getKeyName(read1) orig_read1 = used_reads[keyname_read1] new_read1 = copy.deepcopy(orig_read1) new_name = get_new_readname() new_read1.query_name = new_name edit_bam.write(new_read1) fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) strand = getReadStrand(new_read1) if new_name not in edit_bam_reads: edit_bam_reads[new_name] = dict() edit_bam_reads[new_name][strand] = new_read1 else: for read_pair in total_modify_reads + total_add_reads: read1 = read_pair[0] read2 = read_pair[1] keyname_read1 = getKeyName(read1) keyname_read2 = getKeyName(read2) orig_read1 = used_reads[keyname_read1] orig_read2 = used_reads[keyname_read2] orig_read1_name = orig_read1.query_name orig_read2_name = orig_read2.query_name new_read1 = copy.deepcopy(orig_read1) new_read2 = copy.deepcopy(orig_read2) new_read1.query_sequence = read1.query_sequence new_read1.query_qualities = read1.query_qualities new_read2.query_sequence = read2.query_sequence new_read2.query_qualities = read2.query_qualities new_name = get_new_readname() new_read1.query_name = new_name new_read2.query_name = new_name strand1 = getReadStrand(new_read1) strand2 = getReadStrand(new_read2) if new_name not in edit_bam_reads: edit_bam_reads[new_name] = dict() edit_bam_reads[new_name][strand1] = new_read1 edit_bam_reads[new_name][strand2] = new_read2 if tag: new_read1 = add_tag(new_read1) new_read2 = add_tag(new_read2) fout_convert.write("%s: %s, %s, %s, %s, %s-%s, %s-%s\n" % ( new_name, orig_read1_name, orig_read2_name, new_read1.is_read1, new_read2.is_read2, new_read1.reference_start, new_read1.reference_end, new_read2.reference_start, new_read2.reference_end, )) edit_bam.write(new_read1) edit_bam.write(new_read2) fout_convert.close() edit_bam.close() edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam") remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner, is_single) if not is_single: editRemap = pysam.AlignmentFile(edit_remap_bam_file, 'rb') editRemapBam_addRG_File = os.path.join(out_dir, "edit.remap.addRG.bam") bamAddRG(editRemap, edit_bam_reads, bam, editRemapBam_addRG_File) editRemap.close() else: editRemapBam_addRG_File = edit_remap_bam_file bamIndex(editRemapBam_addRG_File) return editRemapBam_addRG_File
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads_file, total_add_reads_file, used_bam_file, total_modify_list, total_add_list, seqer, aligner, aligner_index, flow_order, lib_key, barcode, tag): bam = pysam.AlignmentFile(bam_file, 'rb') edit_bam_file = os.path.join(out_dir, "edit.bam") edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam) readname_convert_file = os.path.join(out_dir, "readname_convert.txt") fout_convert = open(readname_convert_file, 'w') # edit_bam_reads = {} used_bam = pysam.AlignmentFile(used_bam_file, 'rb') used_reads = {} for read in used_bam.fetch(): keyname = getKeyName(read) used_reads[keyname] = read used_bam.close() # modify_read_name_dict = get_newname_dict(total_modify_list) # add_read_name_dict = get_newname_dict(total_add_list) modify_reads_seq, modify_reads_quan = get_sequence_dict( total_modify_reads_file) add_reads_seq, add_reads_quan = get_sequence_dict(total_add_reads_file) if is_single: with open(total_modify_list) as fin: for line in fin: if not line: break data = line.strip().split(",") read1_name = data[0] if read1_name not in used_reads: continue orig_read1 = used_reads[read1_name] new_read1 = copy.deepcopy(orig_read1) new_read1.query_sequence = modify_reads_seq[read1_name] new_read1.query_qualities = modify_reads_quan[read1_name] new_name = get_new_readname() new_read1.query_name = new_name if seqer == "life": new_read1 = deal_life_reads(new_read1, flow_order, lib_key, barcode) fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) edit_bam.write(new_read1) fin.close() with open(total_add_list) as fin: for line in fin: if not line: break data = line.strip().split(",") read1_name = data[0] if read1_name not in used_reads: continue orig_read1 = used_reads[read1_name] new_read1 = copy.deepcopy(orig_read1) new_read1.query_sequence = add_reads_seq[read1_name] new_read1.query_qualities = add_reads_quan[read1_name] new_name = get_new_readname() new_read1.query_name = new_name fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) edit_bam.write(new_read1) fin.close() else: with open(total_modify_list) as fin: for line in fin: if not line: break data = line.strip().split(",") read1_name, read2_name = data[0], data[1] if read1_name not in used_reads or read2_name not in used_reads: continue orig_read1 = used_reads[read1_name] orig_read2 = used_reads[read2_name] new_read1 = copy.deepcopy(orig_read1) new_read2 = copy.deepcopy(orig_read2) print read1_name new_read1.query_sequence = modify_reads_seq[read1_name] new_read1.query_qualities = modify_reads_quan[read1_name] new_read2.query_sequence = modify_reads_seq[read2_name] new_read2.query_qualities = modify_reads_quan[read2_name] new_name = get_new_readname() new_read1.query_name = new_name new_read2.query_name = new_name fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read2.query_name, new_read2.is_read1, new_read2.reference_start, new_read2.reference_end)) edit_bam.write(new_read1) edit_bam.write(new_read2) fin.close() with open(total_add_list) as fin: for line in fin: if not line: break data = line.strip().split(",") read1_name, read2_name = data[0], data[1] if read1_name not in used_reads or read2_name not in used_reads: continue orig_read1 = used_reads[read1_name] orig_read2 = used_reads[read2_name] new_read1 = copy.deepcopy(orig_read1) new_read2 = copy.deepcopy(orig_read2) new_read1.query_sequence = add_reads_seq[read1_name] new_read1.query_qualities = add_reads_quan[read1_name] new_read2.query_sequence = add_reads_seq[read2_name] new_read2.query_qualities = add_reads_quan[read2_name] new_name = get_new_readname() new_read1.query_name = new_name new_read2.query_name = new_name fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read1.query_name, new_read1.is_read1, new_read1.reference_start, new_read1.reference_end)) fout_convert.write( "%s: %s, %s, %s-%s\n" % (new_name, orig_read2.query_name, new_read2.is_read1, new_read2.reference_start, new_read2.reference_end)) edit_bam.write(new_read1) edit_bam.write(new_read2) fin.close() edit_bam.close() header = os.path.join(out_dir, 'bam.header') os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header)) head = open(header, 'r').readline().rstrip() if not head: head = None edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam") remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner, is_single, head) edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort") edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam") bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix) return edit_remap_bam_sorted_file