Exemple #1
def run(args):
    options = args.parse_args()
    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
            "Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    dige_site = options.dige_site
    remove_overlap = options.remove_overlap
    not_mapping = options.not_mapping

    info("Get the all parameter!!")
    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    #get reference information
    ref = GR.get_ref(ref_file)

    ##scan MspI site and trim the end-repaired C
    dige_dict, all_reads, all_mapping_bp, not_mapping_reads, filter_not_mapping_reads, filter_MspI_endrepair_bp, filter_remove_overlap_bp = parser_trim_sambam(
        sam_inf, ref, bsm, s_path, dige_site, single_on, remove_overlap,
        not_mapping, name)

    ##produce MspI Mbias plot
    RR.generator(dige_dict, single_on, name)

    ##produce the filter report
    report(all_reads, all_mapping_bp, not_mapping_reads,
           filter_not_mapping_reads, filter_MspI_endrepair_bp,
           filter_remove_overlap_bp, single_on, name)
Exemple #2
def run(args):
    options = args.parse_args()
    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
        error("Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    dige_site = options.dige_site
    remove_overlap = options.remove_overlap
    not_mapping = options.not_mapping

    info("Get the all parameter!!")
    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    #get reference information
    ref = GR.get_ref(ref_file)

    ##scan MspI site and trim the end-repaired C
    dige_dict, all_reads, all_mapping_bp, not_mapping_reads, filter_not_mapping_reads, filter_MspI_endrepair_bp, filter_remove_overlap_bp = parser_trim_sambam(
        sam_inf, ref, bsm, s_path, dige_site, single_on, remove_overlap, not_mapping, name)

    ##produce MspI Mbias plot
    RR.generator(dige_dict, single_on, name)

    ##produce the filter report
    report(all_reads, all_mapping_bp, not_mapping_reads, filter_not_mapping_reads, filter_MspI_endrepair_bp,
           filter_remove_overlap_bp, single_on, name)
Exemple #3
def run(args):
    Alternative module: Use the strategy in Bis-SNP to trim 5' bisulfite conversion failures
    options = args.parse_args()

    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
            "Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    remove_overlap = options.remove_overlap
    filter_dup = options.filter_dup
    p_poisson = options.p_poisson
    gsize = options.gsize
    not_mapping = options.not_mapping

    info("Get the all parameter!!")

    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    loc_dict = {}
    if filter_dup:
        ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf
        info("The filter_dup has been set True.")
        info("Assess the duplicate reads...")
        for sam in sam_inf:
            #check the input mapping files
            sam_format, read_inf = check.check_mapping_file(sam, s_path)
            if single_on:
                for read in read_inf:
                    loc_dict = LI.Loc_single(read, loc_dict, bsm)
                for read in read_inf:
                    loc_dict = LI.Loc_paired(read, loc_dict, bsm)
        max_cov = DR.duplicate_report(loc_dict, gsize, p_poisson, name)
        info('Get the duplicate reads distribution!')

    #get reference information
    ref = GR.get_ref(ref_file)
    trim_position = []

    filter_duplicate_reads = 0
    filter_nonuniform_trim_bp = 0
    filter_nonuniform_trim_bp_CG = 0
    filter_remove_overlap_bp = 0
    filter_not_mapping_reads = 0
    all_reads = 0
    not_mapping_reads = 0
    all_mapping_bp = 0

    ##filter the 5' bisulfite failure
    for sam in sam_inf:
        out_sam = sam[:-4] + '_' + name + '_filter.sam'
        out = open(out_sam, 'w')
        #check the input mapping files
        record_mate = {}
        sam_format, read_inf = check.check_mapping_file_header(sam, s_path)

        for read in read_inf:
            #for sam header
            if read.startswith('@'):
                all_reads += 1  ##record the read number (2013-06-20)

                #Get the read information for trimming
                #If the read isn't unique mapping, we will get a empty list ([]).
                #In: single unique mapping read  Out: [flag,strand,chr,pos,CIGAR,seq,score]
                #In: paired unique mapping read  Out: [flag,strand,chr,pos1,CIGAR,pos2,insert,seq,score]
            read_info = RI(read, bsm)
            read_info = read_info.extract_information()

            if len(read_info) == 0:
                not_mapping_reads += 1
                if not_mapping:  #keep the not_unique mapping reads (or not paired mapping)
                    filter_not_mapping_reads += 1  ##record the not mapping read number (2013-06-20)

            if len(
            ) > 0:  #the --filter_dup has been set True, have to remove duplicate reads
                duplicate, loc_dict = DF(read_info, loc_dict, max_cov,
                duplicate = False

            if single_on:
                all_mapping_bp += len(
                )  ##record the mapping read basepair (2013-06-20)
                all_mapping_bp += len(
                )  ##record the mapping read basepair (2013-06-20)

            record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp = NF.nonuniform_filter(
                read, out, read_info, ref, remove_overlap, duplicate,
                single_on, record_mate, trim_position,
                filter_nonuniform_trim_bp_CG, filter_duplicate_reads,
        del record_mate
    NR.nonuniform_generator(trim_position, name)

    for i in range(len(trim_position)):
        filter_nonuniform_trim_bp += i * trim_position[i]

    ##produce the filter report
    info('Produce the report file...')
    report_out = open(name + "_BSeQC_nonuniform_filter_report.txt", 'w')
    report_out.write('Total reads: %d\n' % all_reads)
    if single_on:
        report_out.write('Not unique mapping reads: %d(%.2f%s all reads)\n' %
                          float(not_mapping_reads) / all_reads * 100, "%"))
            'Unique mapping reads: %d(%.2f%s all reads)\n' %
            ((all_reads - not_mapping_reads),
             float(all_reads - not_mapping_reads) / all_reads * 100, "%"))
            'Skip not unique mapping reads: %d(%.2f%s all reads)\n' %
             float(filter_not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('In unique mapping reads:\n')
        report_out.write('All unique mapping basepairs: %d\n' % all_mapping_bp)
            'Filter Duplicate reads: %d(%.2f%s of unique mapping reads)\n' %
            (filter_duplicate_reads, float(filter_duplicate_reads) /
             (all_reads - not_mapping_reads) * 100, "%"))
            "Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n"
            % (filter_nonuniform_trim_bp,
               float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%"))
            "Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n"
             float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%"))

        report_out.write('Not unique paired mapping reads: %d(%.2f%s)\n' %
                          float(not_mapping_reads) / all_reads * 100, "%"))
            'Unique paired mapping reads: %d(%.2f%s)\n' %
            ((all_reads - not_mapping_reads),
             float(all_reads - not_mapping_reads) / all_reads * 100, "%"))
            'Skip not paired unique mapping reads: %d(%.2f%s)\n' %
             float(filter_not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('In unique paired mapping reads:\n')
        report_out.write('All unique paired mapping basepairs: %d\n' %
            'Filter Duplicate reads: %d(%.2f%s of unique paired mapping reads)\n'
            % (filter_duplicate_reads, float(filter_duplicate_reads) /
               (all_reads - not_mapping_reads * 100), "%"))
            "Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n"
            % (filter_nonuniform_trim_bp,
               float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%"))
            "Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n"
             float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%"))
            'Filter overlapped basepairs: %d(%.2f%s of unique paired mapping basepairs)\n'
            % (filter_remove_overlap_bp,
               float(filter_remove_overlap_bp) / all_mapping_bp * 100, "%"))
    info('Get the report file!')
Exemple #4
def run(args):
    options = args.parse_args()
    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
            "Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    if len(options.trim_file) != 0 and not os.path.isfile(options.trim_file):
        error("Can't open the ref file: " + options.trim_file)

    options.read_length = options.read_length.split(',')
    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    read_l = options.read_length
    auto = options.automatically
    pvalue = options.pvalue
    drift = options.drift
    trim_file = options.trim_file
    remove_overlap = options.remove_overlap
    filter_dup = options.filter_dup
    p_poisson = options.p_poisson
    gsize = options.gsize
    not_mapping = options.not_mapping

    info("Get the all parameter!!")

    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    if filter_dup:
        ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf
        ## and the loc_dict & max_cov will be used in the trimming step
        if len(trim_file) != 0:
                "The trimming file has been defined. But the filter_dup has been set True."
            info("QC_report will just generate Dup distribution!!")
                "And the user defined trimming file will be used in the trimming step!!"
            info("The filter_dup has been set True.")
                "QC_report not only includes Mbias plot, Mbias table and trimming file, but also Dup distribution."
        QC_report_MD = QR.QC_Report_Mbias_Dup(sam_inf, ref_file, bsm, s_path,
                                              name, read_l, single_on, pvalue,
                                              drift, trim_file, p_poisson,
        strand_t, loc_dict, max_cov = QC_report_MD.generator()

        if len(trim_file) != 0:
            info("The trimming file has been defined. So Ignore the ")
            "The filter_dup has been set False!! QC_report only includes Mbias plot, Mbias table and trimming file."
            "And ignore the collection of the location information for removing duplicate reads!!"
        QC_report_M = QR.QC_Report_Mias(sam_inf, ref_file, bsm, s_path, name,
                                        read_l, single_on, pvalue, drift,
        strand_t = QC_report_M.generator()
        #no duplicate location information
        loc_dict = {}
        max_cov = 10000

    if ((auto or filter_dup) and single_on) or (
        (auto or filter_dup or remove_overlap) and not single_on):
        ## for single-end: qc_filter Mbias or filter duplicate reads
        ## for paired-end: qc_filter Mbias, keep one copy of the overlapping segment, or filter duplicate reads
        info("Start to filter read...")
        if auto:
            info("Automatically trim Mbias...")
            info("--auto has been set %s ! Ignore trimming Mbias!!" % auto)
        if filter_dup:
            info("Filter duplicate reads...")
                "--filter_dup has been set %s ! Ignore removing duplicate reads!!"
                % filter_dup)
        if remove_overlap and not single_on:
            info("Keep one copy of the overlapping segment...")
        if not remove_overlap and not single_on:
                "--remove_overlap has been set %s ! Ignore removing one copy of the overlapping segment!!"
                % remove_overlap)
        if not_mapping:
            info("Keep the not_unique mapping reads!")
            info("Remove the not_unique mapping reads!!")
        QF.filter_sam(sam_inf, ref_file, bsm, strand_t, read_l, single_on,
                      name, s_path, auto, remove_overlap, loc_dict, max_cov,
        info("Get the filtered SAM file!")
        if single_on:
            info("Skip the trimming Mbias and removing duplicate reads!!")
            info("Not BSeQC filter report!!")
                "Skip the trimming Mbias, removing duplicate reads and removing one copy of the overlapping segment!!"
            info("Not BSeQC filter report!!")
Exemple #5
def run(args):
    options = args.parse_args()
    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
        error("Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    if len(options.trim_file) != 0 and not os.path.isfile(options.trim_file):
        error("Can't open the ref file: " + options.trim_file)

    options.read_length = options.read_length.split(',')
    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    read_l = options.read_length
    auto = options.automatically
    pvalue = options.pvalue
    drift = options.drift
    trim_file = options.trim_file
    remove_overlap = options.remove_overlap
    filter_dup = options.filter_dup
    p_poisson = options.p_poisson
    gsize = options.gsize
    not_mapping = options.not_mapping

    info("Get the all parameter!!")

    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    if filter_dup:
        ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf
        ## and the loc_dict & max_cov will be used in the trimming step
        if len(trim_file) != 0:
            info("The trimming file has been defined. But the filter_dup has been set True.")
            info("QC_report will just generate Dup distribution!!")
            info("And the user defined trimming file will be used in the trimming step!!")
            info("The filter_dup has been set True.")
            info("QC_report not only includes Mbias plot, Mbias table and trimming file, but also Dup distribution.")
        QC_report_MD = QR.QC_Report_Mbias_Dup(sam_inf, ref_file, bsm, s_path, name, read_l, single_on, pvalue, drift,
                                              p_poisson, gsize)
        strand_t, loc_dict, max_cov = QC_report_MD.generator()

        if len(trim_file) != 0:
            info("The trimming file has been defined. So Ignore the ")
        info("The filter_dup has been set False!! QC_report only includes Mbias plot, Mbias table and trimming file.")
        info("And ignore the collection of the location information for removing duplicate reads!!")
        QC_report_M = QR.QC_Report_Mias(sam_inf, ref_file, bsm, s_path, name, read_l, single_on, pvalue, drift,
        strand_t = QC_report_M.generator()
        #no duplicate location information
        loc_dict = {}
        max_cov = 10000

    if ((auto or filter_dup) and single_on) or ((auto or filter_dup or remove_overlap) and not single_on):
    ## for single-end: qc_filter Mbias or filter duplicate reads
    ## for paired-end: qc_filter Mbias, keep one copy of the overlapping segment, or filter duplicate reads
        info("Start to filter read...")
        if auto:
            info("Automatically trim Mbias...")
            info("--auto has been set %s ! Ignore trimming Mbias!!" % auto)
        if filter_dup:
            info("Filter duplicate reads...")
            info("--filter_dup has been set %s ! Ignore removing duplicate reads!!" % filter_dup)
        if remove_overlap and not single_on:
            info("Keep one copy of the overlapping segment...")
        if not remove_overlap and not single_on:
                "--remove_overlap has been set %s ! Ignore removing one copy of the overlapping segment!!" % remove_overlap)
        if not_mapping:
            info("Keep the not_unique mapping reads!")
            info("Remove the not_unique mapping reads!!")
        QF.filter_sam(sam_inf, ref_file, bsm, strand_t, read_l, single_on, name, s_path, auto, remove_overlap, loc_dict, max_cov,
        info("Get the filtered SAM file!")
        if single_on:
            info("Skip the trimming Mbias and removing duplicate reads!!")
            info("Not BSeQC filter report!!")
            info("Skip the trimming Mbias, removing duplicate reads and removing one copy of the overlapping segment!!")
            info("Not BSeQC filter report!!")
Exemple #6
def run(args):
    Alternative module: Use the strategy in Bis-SNP to trim 5' bisulfite conversion failures
    options = args.parse_args()

    if len(options.sam_file) == 0:
        error("Missing the SAM file, use -s or --sam option.")
        options.sam_file = options.sam_file.split(',')
    for s in options.sam_file:
        if not os.path.isfile(s):
            error("Can't open the SAM file: " + s)

    if len(options.ref_file) == 0:
        error("Missing the reference genome fasta file, use -r or --ref option.")
        if not os.path.isfile(options.ref_file):
            error("Can't open the ref file: " + options.ref_file)

    if len(options.samtools) != 0:
        if options.samtools[-1] != '/':
            options.samtools += '/'

    if len(options.name) == 0:
        error("Missing the output file name, use -n or --name options.")

    sam_inf = options.sam_file
    ref_file = options.ref_file
    bsm = options.bsm
    s_path = options.samtools
    name = options.name
    remove_overlap = options.remove_overlap
    filter_dup = options.filter_dup
    p_poisson = options.p_poisson
    gsize = options.gsize
    not_mapping = options.not_mapping

    info("Get the all parameter!!")

    #check the input mapping files
    sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path)
    pre_flag = read_inf.readline().split('\t')[1]
    if 'p' in pre_flag:
        single_on = False
        info("The input mapping files are paired-end sequencing!")
        single_on = True
        info("The input mapping files are single-end sequencing!")

    loc_dict = {}
    if filter_dup:
        ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf
        info("The filter_dup has been set True.")
        info("Assess the duplicate reads...")
        for sam in sam_inf:
            #check the input mapping files
            sam_format, read_inf = check.check_mapping_file(sam, s_path)
            if single_on:
                for read in read_inf:
                    loc_dict = LI.Loc_single(read, loc_dict, bsm)
                for read in read_inf:
                    loc_dict = LI.Loc_paired(read, loc_dict, bsm)
        max_cov = DR.duplicate_report(loc_dict, gsize, p_poisson, name)
        info('Get the duplicate reads distribution!')

    #get reference information
    ref = GR.get_ref(ref_file)
    trim_position = []

    filter_duplicate_reads = 0
    filter_nonuniform_trim_bp = 0
    filter_nonuniform_trim_bp_CG = 0
    filter_remove_overlap_bp = 0
    filter_not_mapping_reads = 0
    all_reads = 0
    not_mapping_reads = 0
    all_mapping_bp = 0

    ##filter the 5' bisulfite failure
    for sam in sam_inf:
        out_sam = sam[:-4] + '_' + name + '_filter.sam'
        out = open(out_sam, 'w')
        #check the input mapping files
        record_mate = {}
        sam_format, read_inf = check.check_mapping_file_header(sam, s_path)

        for read in read_inf:
            #for sam header
            if read.startswith('@'):
                all_reads += 1  ##record the read number (2013-06-20)

                #Get the read information for trimming
                #If the read isn't unique mapping, we will get a empty list ([]).
                #In: single unique mapping read  Out: [flag,strand,chr,pos,CIGAR,seq,score]
                #In: paired unique mapping read  Out: [flag,strand,chr,pos1,CIGAR,pos2,insert,seq,score]
            read_info = RI(read, bsm)
            read_info = read_info.extract_information()

            if len(read_info) == 0:
                not_mapping_reads += 1
                if not_mapping:         #keep the not_unique mapping reads (or not paired mapping)
                    filter_not_mapping_reads += 1  ##record the not mapping read number (2013-06-20)

            if len(loc_dict) > 0: #the --filter_dup has been set True, have to remove duplicate reads
                duplicate, loc_dict = DF(read_info, loc_dict, max_cov, single_on)
                duplicate = False

            if single_on:
                all_mapping_bp += len(read_info[5])   ##record the mapping read basepair (2013-06-20)
                all_mapping_bp += len(read_info[7])   ##record the mapping read basepair (2013-06-20)

            record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp = NF.nonuniform_filter(read,
        del record_mate
    NR.nonuniform_generator(trim_position, name)

    for i in range(len(trim_position)):
        filter_nonuniform_trim_bp += i * trim_position[i]

    ##produce the filter report
    info('Produce the report file...')
    report_out = open(name + "_BSeQC_nonuniform_filter_report.txt", 'w')
    report_out.write('Total reads: %d\n' % all_reads)
    if single_on:
        report_out.write('Not unique mapping reads: %d(%.2f%s all reads)\n' % (
            not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('Unique mapping reads: %d(%.2f%s all reads)\n' % (
            (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('Skip not unique mapping reads: %d(%.2f%s all reads)\n' % (
            filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('In unique mapping reads:\n')
        report_out.write('All unique mapping basepairs: %d\n' % all_mapping_bp)
        report_out.write('Filter Duplicate reads: %d(%.2f%s of unique mapping reads)\n' % (
            filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads) * 100, "%"))
        report_out.write("Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (
            filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%"))
        report_out.write("Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (
            filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%"))

        report_out.write('Not unique paired mapping reads: %d(%.2f%s)\n' % (
            not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('Unique paired mapping reads: %d(%.2f%s)\n' % (
            (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('Skip not paired unique mapping reads: %d(%.2f%s)\n' % (
            filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%"))
        report_out.write('In unique paired mapping reads:\n')
        report_out.write('All unique paired mapping basepairs: %d\n' % all_mapping_bp)
        report_out.write('Filter Duplicate reads: %d(%.2f%s of unique paired mapping reads)\n' % (
            filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads * 100), "%"))
        report_out.write("Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (
            filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%"))
        report_out.write("Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (
            filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%"))
        report_out.write('Filter overlapped basepairs: %d(%.2f%s of unique paired mapping basepairs)\n' % (
            filter_remove_overlap_bp, float(filter_remove_overlap_bp) / all_mapping_bp * 100, "%"))
    info('Get the report file!')