def bamAddRG(editRemap, editBamReads, templateBamFile, outBamFile): # editRemapBam_addRG_File = tempOutDir + "/edit.remap.addRG.bam" head = editRemap.header head["RG"] = templateBamFile.header["RG"] addRGBam = pysam.AlignmentFile(outBamFile, 'wb', header=head) RG = _getRGs(templateBamFile) for read in editRemap.fetch(): readName = read.query_name strand = getReadStrand(read) if readName in editBamReads: orig = editBamReads[readName][strand] else: orig = None newRead = readAddRG(read, orig, RG) # print newRead addRGBam.write(newRead) addRGBam.close()
def deal_haplotype(bam_file, haplotype, reffasta, haplotype_prefix, mindepth, minmutreads, minmapq, diffcover, is_single, is_multmapfilter, aligner, aligner_index, **kwargs): reads_dict = OrderedDict() bam = pysam.AlignmentFile(bam_file, 'rb') reads = bam.fetch(reference=haplotype.chrom, start=haplotype.start, end=haplotype.end + 1) depth = 0 for read in reads: depth += 1 if read.reference_start is not None and not read.is_secondary and bin( read.flag & 2048) != bin(2048): if read.query_name not in reads_dict: reads_dict[read.query_name] = {} strand = getReadStrand(read) reads_dict[read.query_name][strand] = read # judge depth and mut reads whether qualified if depth < int(mindepth): print "depth less than min depth!" return False, "haplotype in position %s:%s-%s: depth less than min depth(%s)" % ( haplotype.chrom, haplotype.start, haplotype.end, mindepth) else: mut_reads_num = int(depth * haplotype.freq) if mut_reads_num < int(minmutreads): print "mutation reads num less than minmutreads!" return False, "haplotype in position %s:%s-%s: mut reads less than min mut reads(%s)" % ( haplotype.chrom, haplotype.start, haplotype.end, minmutreads) print "start pick reads" # print str(haplotype) res = pick_reads(bam, reads_dict, mut_reads_num, is_single, minmapq, is_multmapfilter) if res[0] is False: return False, "haplotype in position %s:%s-%s: %s" % ( haplotype.chrom, haplotype.start, haplotype.end, res[1]) chosen_reads, mate_reads = res print "end pick reads" # edit my_chosen_reads = {} my_mate_reads = {} tmp_bam_file = haplotype_prefix + ".chosen.edited.bam" tmp_bam = pysam.AlignmentFile(tmp_bam_file, 'wb', template=bam) chosen_reads_num = 0 real_mut_reads_num = 0 for readName, readInfo in chosen_reads.items(): my_chosen_reads[readName] = {} tmp_dict = {} tmp_dict2 = {} for strand, read in readInfo.items(): my_read = Read(read) res = editRead(my_read, reffasta, haplotype.mutList) if res is False: continue real_mut_reads_num += 1 sequence, quality, shift = res read.query_sequence = sequence read.query_qualities = quality tmp_dict[strand] = my_read tmp_dict2[strand] = read if is_single: for strand in tmp_dict: my_chosen_reads[readName][strand] = tmp_dict[strand] tmp_bam.write(tmp_dict2[strand]) chosen_reads_num += 1 else: if len(tmp_dict) == 0: continue elif len(tmp_dict) == 1 and readName in mate_reads: for strand in tmp_dict: my_chosen_reads[readName][strand] = tmp_dict[strand] tmp_bam.write(tmp_dict2[strand]) chosen_reads_num += 1 mate_read = mate_reads[readName] my_mate_reads[readName] = Read(mate_read) tmp_bam.write(mate_read) elif len(tmp_dict) == 2: for strand in tmp_dict: my_chosen_reads[readName][strand] = tmp_dict[strand] tmp_bam.write(tmp_dict2[strand]) chosen_reads_num += 1 tmp_bam.close() # alignment and judge coverdiff whether qualified chosen_bam_file = haplotype_prefix + ".chosen.remap.bam" genome_index = aligner_index remap(genome_index, tmp_bam_file, chosen_bam_file, aligner, is_single) chosen_bam = pysam.AlignmentFile(chosen_bam_file) if judge_coverdiff(bam, depth, chosen_bam, chosen_reads_num, haplotype, float(diffcover)): return my_chosen_reads, my_mate_reads, real_mut_reads_num, depth else: return False, "haplotype in position %s:%s-%s: coverdiff is less than minDiffCover" % ( haplotype.chrom, haplotype.start, haplotype.end)
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key, barcode, tag, out_dir, aligner, aligner_index, is_single): bam = pysam.AlignmentFile(bam_file) edit_bam_reads = {} for read in bam.fetch(): read_name = read.query_name if read_name in total_chosen_reads: strand = getReadStrand(read) if read_name not in edit_bam_reads: edit_bam_reads[read_name] = {} if strand in total_chosen_reads[read_name]: my_read = total_chosen_reads[read_name][strand] read.query_sequence = my_read.query_sequence read.query_qualities = my_read.query_qualities if seqer == "life": read = deal_life_reads(read, flow_order, lib_key, barcode) if tag: read = add_tag(read) edit_bam_reads[read_name][strand] = read else: edit_bam_reads[read_name][strand] = read # write edited reads into edit.bam edit_bam_file = os.path.join(out_dir, "edit.bam") edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam) for read_name, readInfo in edit_bam_reads.items(): for strand, read in readInfo.items(): edit_bam.write(read) edit_bam.close() # write not edited reads into exclude.bam exclude_bam_file = os.path.join(out_dir, "exclude.bam") exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam) for read in bam.fetch(): read_name = read.query_name if read_name not in edit_bam_reads: exclude_bam.write(read) exclude_bam.close() # remap the edited reads header = os.path.join(out_dir, 'bam.header') os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header)) head = open(header, 'r').readline().rstrip() if not head: head = None edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam") remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner, is_single, header=head) edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort") edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam") bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix) bamIndex(edit_remap_bam_sorted_file) if tag: edit_remap_addtag_file = os.path.join(out_dir, "edit.remap.sort.bam") bam_add_tag(edit_remap_bam_sorted_file, edit_remap_addtag_file) else: edit_remap_addtag_file = edit_remap_bam_sorted_file return edit_remap_addtag_file, exclude_bam_file