def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir, total_modify_reads_file, total_delete_reads_file, total_add_reads_file, process): write_bam_pool = Pool(int(process)) exclude_bam_list = [exclude_bam_file_tmp] usedBamList = [] for chrom in chrom_list: excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom) exclude_bam_list.append(excludeBam_chr) usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom) usedBamList.append(usedBam_chr) write_bam_pool.apply_async( write_bam_byChr, args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr, total_modify_reads_file, total_delete_reads_file, total_add_reads_file)) write_bam_pool.close() write_bam_pool.join() exclude_bam_file = os.path.join(out_dir, "exclude.bam") bamMerge(exclude_bam_list, exclude_bam_file) used_bam_file = os.path.join(out_dir, "used.bam") if len(usedBamList) != 1: bamMerge(usedBamList, used_bam_file) else: used_bam_file = usedBamList[0] bamSort(used_bam_file, os.path.join(out_dir, "used.sort")) used_sort_bam_file = os.path.join(out_dir, "used.sort.bam") bamIndex(used_sort_bam_file) return used_sort_bam_file, exclude_bam_file
def get_reads_by_region(bam_file, sv_list, out_dir): # get reads by region bed region_bed_file = os.path.join(out_dir, "consider_region.bed") chrom_list = write_region_bed(region_bed_file, sv_list) exclude_bam_file_tmp = os.path.join(out_dir, "exclude_tmp.bam") used_bam_file_tmp = os.path.join(out_dir, "used_tmp.bam") getRegionReads(bam_file, region_bed_file, used_bam_file_tmp, exclude_bam_file_tmp) bamIndex(used_bam_file_tmp) return chrom_list, used_bam_file_tmp, exclude_bam_file_tmp
def main(run_args): start_time = time.asctime(time.localtime(time.time())) # print start_time temp_out_dir = os.path.join(run_args.outdir, "tempDir") os.system("mkdir -p %s" % temp_out_dir) invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt') invalid_log = InvalidLog(invalid_log_file) # step1: deal with mutfile and get haplotypes print "step1: deal with mutfile and get haplotypes" haplotype_list = get_haplotypes(run_args.bamfile, run_args.reffasta, run_args.mutfile, int(run_args.haplosize), float(run_args.snpfrac), invalid_log) # step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts print "step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts" success_list_file = os.path.join(run_args.outdir, 'success_list.txt') total_chosen_reads, total_chosen_reads_muts = deal_haplotype_multi( run_args.bamfile, haplotype_list, temp_out_dir, run_args.reffasta, int(run_args.process), int(run_args.mindepth), int(run_args.minmutreads), int(run_args.minmapq), float(run_args.diffcover), run_args.single, run_args.multmapfilter, run_args.aligner, run_args.alignerIndex, invalid_log, success_list_file) invalid_log.close() if len(total_chosen_reads) == 0: print "Warning: No reads to deal with of all these sv, checkout your sv file" return # step3: modify the reads in total_chosen_reads itself print "step3: modify the reads in total_chosen_reads itself" reads_modify(total_chosen_reads, total_chosen_reads_muts, run_args.reffasta, int(run_args.process)) # step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference print "step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference" edit_remap_bam_file, exclude_bam_file = reads_replace( run_args.bamfile, total_chosen_reads, run_args.seqer, run_args.floworder, run_args.libkey, run_args.barcode, run_args.tag, temp_out_dir, run_args.aligner, run_args.alignerIndex, run_args.single) # step5: merge remap.edit.bam and exclude exclude.bam and sort print "step5: merge remap.edit.bam and exclude exclude.bam and sort" # edit_remap_bam_file, exclude_bam_file = os.path.join(temp_out_dir, "edit.remap.sort.bam"), os.path.join( # temp_out_dir, "exclude.bam") out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam") bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file) bamIndex(out_bam_file) end_time = time.asctime(time.localtime(time.time())) # speed_time = end_time - start_time print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % ( out_bam_file, success_list_file, invalid_log_file)
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key, barcode, tag, out_dir, aligner, aligner_index, is_single): bam = pysam.AlignmentFile(bam_file) edit_bam_reads = {} for read in bam.fetch(): read_name = read.query_name if read_name in total_chosen_reads: strand = getReadStrand(read) if read_name not in edit_bam_reads: edit_bam_reads[read_name] = {} if strand in total_chosen_reads[read_name]: my_read = total_chosen_reads[read_name][strand] read.query_sequence = my_read.query_sequence read.query_qualities = my_read.query_qualities if seqer == "life": read = deal_life_reads(read, flow_order, lib_key, barcode) if tag: read = add_tag(read) edit_bam_reads[read_name][strand] = read else: edit_bam_reads[read_name][strand] = read # write edited reads into edit.bam edit_bam_file = os.path.join(out_dir, "edit.bam") edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam) for read_name, readInfo in edit_bam_reads.items(): for strand, read in readInfo.items(): edit_bam.write(read) edit_bam.close() # write not edited reads into exclude.bam exclude_bam_file = os.path.join(out_dir, "exclude.bam") exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam) for read in bam.fetch(): read_name = read.query_name if read_name not in edit_bam_reads: exclude_bam.write(read) exclude_bam.close() # remap the edited reads header = os.path.join(out_dir, 'bam.header') os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header)) head = open(header, 'r').readline().rstrip() if not head: head = None edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam") remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner, is_single, header=head) edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort") edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam") bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix) bamIndex(edit_remap_bam_sorted_file) if tag: edit_remap_addtag_file = os.path.join(out_dir, "edit.remap.sort.bam") bam_add_tag(edit_remap_bam_sorted_file, edit_remap_addtag_file) else: edit_remap_addtag_file = edit_remap_bam_sorted_file return edit_remap_addtag_file, exclude_bam_file
def main(run_args): start_time = time.asctime(time.localtime(time.time())) # print start_time if not os.path.exists(run_args.outdir): os.mkdir(run_args.outdir) invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt') invalid_log = InvalidLog(invalid_log_file) run_log_file = os.path.join(run_args.outdir, 'run.log') run_log = RunLog(run_log_file) temp_out_dir = os.path.join(run_args.outdir, "tempDir") if not os.path.exists(temp_out_dir): os.mkdir(temp_out_dir) # step0: prepare sv list sv_list = check_sv_file(run_args.svfile, run_args.reffasta, invalid_log) if not sv_list: exit("no sv list to deal with") # step1: get insert size of paired reads print "step1: get insert size of paired reads" insert_size = get_insertSize_range(run_args.bamfile, run_args.readlength, run_args.single) # step2: deal with sv and get total edited reads print "step2: deal with sv and get total edited reads" success_file = os.path.join(run_args.outdir, 'success_list.txt') total_modify_reads_file, total_delete_reads_file, total_add_reads_file, total_modify_list, total_delete_list, total_add_list = deal_sv( run_args.bamfile, run_args.reffasta, sv_list, run_args.single, int(run_args.minmapq), run_args.multmapfilter, int(run_args.mindepth), int(run_args.minmutreads), int(run_args.readlength), temp_out_dir, insert_size, invalid_log, run_log, success_file) invalid_log.close() # step3: get reads by region bed and write bam file print "step3: get reads by region bed and write bam file" chrom_list, used_bam_file_tmp, exclude_bam_file_tmp = get_reads_by_region(run_args.bamfile, sv_list, temp_out_dir) # write reads which may probably used to used.bam and reads should not be used to exclude.bam used_bam_file, exclude_bam_file = write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, temp_out_dir, total_modify_reads_file, total_delete_reads_file, total_add_reads_file, int(run_args.process)) # step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads print "step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads" edit_remap_bam_file = merge_edit_bam(run_args.bamfile, temp_out_dir, run_args.single, total_modify_reads_file, total_add_reads_file, used_bam_file, total_modify_list, total_add_list, run_args.seqer, run_args.aligner, run_args.alignerIndex, run_args.floworder, run_args.libkey, run_args.barcode, run_args.tag) # step5: remapped edit reads and merge print "step5: remapped edit reads and merge" out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam") bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file) bamIndex(out_bam_file) end_time = time.asctime(time.localtime(time.time())) # print end_time # speed_time = end_time - start_time print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % ( out_bam_file, success_file, invalid_log_file)