Ejemplo n.º 1
0
def sam_to_bam(input_fastq_files,
               input_sam_file,
               output_bam_file,
               quals,
               multihits,
               pe_sr_mode=False,
               softclip=True,
               keep_unmapped=True):
    samfh = pysam.Samfile(input_sam_file, "r")
    num_unmapped = 0
    num_multihits = 0
    num_frags = 0
    bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh)
    # setup fastq parsing
    if softclip and (quals != SANGER_FORMAT):
        kwargs = {"convert_quals": True, "qual_format": quals}
    else:
        kwargs = {"convert_quals": False}
    fqiters = [
        parse_fastq_record(open(fq), **kwargs) for fq in input_fastq_files
    ]

    # handle single-read and paired-end
    if len(fqiters) == 1:
        reorder_func = fix_sr_alignment_ordering(samfh, fqiters[0])
    else:
        reorder_func = fix_alignment_ordering(samfh, fqiters, pe_sr_mode)
    # iterate through buffer
    for bufitems in reorder_func:
        num_frags += 1
        for bufitem in bufitems:
            for r in bufitem.reads:
                # softclip uses the fastq record to replace the sequence
                # and quality scores of the read
                if softclip:
                    soft_pad_read(bufitem.fqrec, r)
                # keep statistics of unmapped/multimapped reads and
                # suppress output if 'keep_unmapped' is False
                if r.is_unmapped:
                    xm_tag = r.opt('XM')
                    if xm_tag < multihits:
                        num_unmapped += 1
                        if not keep_unmapped:
                            continue
                    else:
                        num_multihits += 1
                bamfh.write(r)
    for fqfh in fqiters:
        fqfh.close()
    bamfh.close()
    samfh.close()
    logging.debug("Found %d fragments" % (num_frags))
    logging.debug("\t%d unmapped reads" % (num_unmapped))
    logging.debug("\t%d multimapping (>%dX) reads" %
                  (num_multihits, multihits))
Ejemplo n.º 2
0
def sam_to_bam(input_fastq_files, input_sam_file, output_bam_file, 
               quals, multihits, pe_sr_mode=False, softclip=True, 
               keep_unmapped=True):
    samfh = pysam.Samfile(input_sam_file, "r")
    num_unmapped = 0
    num_multihits = 0
    num_frags = 0
    bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh)
    # setup fastq parsing
    if softclip and (quals != SANGER_FORMAT):
        kwargs = {"convert_quals": True, "qual_format": quals}
    else:
        kwargs = {"convert_quals": False}
    fqiters = [parse_fastq_record(open(fq), **kwargs) for fq in input_fastq_files]
    
    # handle single-read and paired-end
    if len(fqiters) == 1:
        reorder_func = fix_sr_alignment_ordering(samfh, fqiters[0])
    else:
        reorder_func = fix_alignment_ordering(samfh, fqiters, pe_sr_mode)
    # iterate through buffer
    for bufitems in reorder_func:
        num_frags += 1
        for bufitem in bufitems:
            for r in bufitem.reads:
                # softclip uses the fastq record to replace the sequence
                # and quality scores of the read 
                if softclip:
                    soft_pad_read(bufitem.fqrec, r)
                # keep statistics of unmapped/multimapped reads and
                # suppress output if 'keep_unmapped' is False
                if r.is_unmapped:
                    xm_tag = r.opt('XM')
                    if xm_tag < multihits:
                        num_unmapped += 1
                        if not keep_unmapped:
                            continue
                    else:
                        num_multihits += 1
                bamfh.write(r)
    for fqfh in fqiters:
        fqfh.close()
    bamfh.close()
    samfh.close()
    logging.debug("Found %d fragments" % (num_frags))
    logging.debug("\t%d unmapped reads" % (num_unmapped))
    logging.debug("\t%d multimapping (>%dX) reads" % 
                  (num_multihits, multihits))
Ejemplo n.º 3
0
def sam_stdin_to_bam(output_bam_file,
                     input_fastq_file,
                     multihits,
                     is_paired=True,
                     keep_unmapped=True):
    samfh = pysam.Samfile("-", "r")
    bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh)
    num_unmapped = 0
    num_multihits = 0
    if is_paired:
        for pe_reads in fix_pe_alignment_ordering(samfh,
                                                  open(input_fastq_file),
                                                  is_paired=is_paired):
            for reads in pe_reads:
                for r in reads:
                    if r.is_unmapped:
                        xm_tag = r.opt('XM')
                        if xm_tag < multihits:
                            num_unmapped += 1
                            if not keep_unmapped:
                                continue
                        num_multihits += 1
                    bamfh.write(r)
    else:
        for reads in fix_sr_alignment_ordering(samfh, open(input_fastq_file)):
            for r in reads:
                if r.is_unmapped:
                    xm_tag = r.opt('XM')
                    if xm_tag < multihits:
                        num_unmapped += 1
                        if not keep_unmapped:
                            continue
                    num_multihits += 1
                bamfh.write(r)
    bamfh.close()
    samfh.close()
    logging.debug("[SAMTOBAM] Filtered %d unmapped reads" % (num_unmapped))
    logging.debug("[SAMTOBAM] Found %d multimapping (>%d) reads" %
                  (num_multihits, multihits))
    logging.info("[SAMTOBAM] Finished converting SAM -> BAM")
Ejemplo n.º 4
0
def sam_stdin_to_bam(output_bam_file, input_fastq_file, multihits, 
                     is_paired=True, keep_unmapped=True):
    samfh = pysam.Samfile("-", "r")
    bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh)    
    num_unmapped = 0
    num_multihits = 0
    if is_paired:
        for pe_reads in fix_pe_alignment_ordering(samfh, 
                                                  open(input_fastq_file), 
                                                  is_paired=is_paired):
            for reads in pe_reads:
                for r in reads:
                    if r.is_unmapped:
                        xm_tag = r.opt('XM')
                        if xm_tag < multihits:
                            num_unmapped += 1
                            if not keep_unmapped:
                                continue
                        num_multihits += 1
                    bamfh.write(r)
    else:
        for reads in fix_sr_alignment_ordering(samfh, open(input_fastq_file)): 
            for r in reads:
                if r.is_unmapped:
                    xm_tag = r.opt('XM')
                    if xm_tag < multihits:
                        num_unmapped += 1
                        if not keep_unmapped:
                            continue
                    num_multihits += 1
                bamfh.write(r)
    bamfh.close()
    samfh.close()
    logging.debug("[SAMTOBAM] Filtered %d unmapped reads" % (num_unmapped))
    logging.debug("[SAMTOBAM] Found %d multimapping (>%d) reads" % 
                  (num_multihits, multihits))
    logging.info("[SAMTOBAM] Finished converting SAM -> BAM")