Esempio n. 1
0
def fix_insert_size(in_bam, config):
    """
    Tophat sets PI in the RG to be the inner distance size, but the SAM spec
    states should be the insert size. This fixes the RG in the alignment
    file generated by Tophat header to match the spec
    """
    fixed_file = os.path.splitext(in_bam)[0] + ".pi_fixed.bam"
    if file_exists(fixed_file):
        return fixed_file
    header_file = os.path.splitext(in_bam)[0] + ".header.sam"
    read_length = bam.estimate_read_length(in_bam)
    bam_handle = bam.open_samfile(in_bam)
    header = bam_handle.header.copy()
    rg_dict = header['RG'][0]
    if 'PI' not in rg_dict:
        return in_bam
    PI = int(rg_dict.get('PI'))
    PI = PI + 2 * read_length
    rg_dict['PI'] = PI
    header['RG'][0] = rg_dict
    with pysam.Samfile(header_file, "wb", header=header) as out_handle:
        with bam.open_samfile(in_bam) as in_handle:
            for record in in_handle:
                out_handle.write(record)
    shutil.move(header_file, fixed_file)
    return fixed_file
Esempio n. 2
0
def fix_insert_size(in_bam, config):
    """
    Tophat sets PI in the RG to be the inner distance size, but the SAM spec
    states should be the insert size. This fixes the RG in the alignment
    file generated by Tophat header to match the spec
    """
    fixed_file = os.path.splitext(in_bam)[0] + ".pi_fixed.bam"
    if file_exists(fixed_file):
        return fixed_file
    header_file = os.path.splitext(in_bam)[0] + ".header.sam"
    read_length = bam.estimate_read_length(in_bam)
    bam_handle= bam.open_samfile(in_bam)
    header = bam_handle.header.copy()
    rg_dict = header['RG'][0]
    if 'PI' not in rg_dict:
        return in_bam
    PI = int(rg_dict.get('PI'))
    PI = PI + 2*read_length
    rg_dict['PI'] = PI
    header['RG'][0] = rg_dict
    with pysam.Samfile(header_file, "wb", header=header) as out_handle:
        with bam.open_samfile(in_bam) as in_handle:
            for record in in_handle:
                out_handle.write(record)
    shutil.move(header_file, fixed_file)
    return fixed_file
Esempio n. 3
0
def pick_kmersize(fq):
    """
    pick an appropriate kmer size based off of https://www.biostars.org/p/201474/
    tl;dr version: pick 31 unless the reads are very small, if not then guess
    that readlength / 2 is about right.
    """
    if bam.is_bam(fq):
        readlength = bam.estimate_read_length(fq)
    else:
        readlength = fastq.estimate_read_length(fq)
    halfread = int(round(readlength / 2))
    if halfread >= 31:
        kmersize = 31
    else:
        kmersize = halfread
    if kmersize % 2 == 0:
        kmersize += 1
    return kmersize
Esempio n. 4
0
def pick_kmersize(fq):
    """
    pick an appropriate kmer size based off of https://www.biostars.org/p/201474/
    tl;dr version: pick 31 unless the reads are very small, if not then guess
    that readlength / 2 is about right.
    """
    if bam.is_bam(fq):
        readlength = bam.estimate_read_length(fq)
    else:
        readlength = fastq.estimate_read_length(fq)
    halfread = int(round(readlength / 2))
    if halfread >= 31:
        kmersize = 31
    else:
        kmersize = halfread
    if kmersize % 2 == 0:
        kmersize += 1
    return kmersize