def check_division(data_folder, adaID, fragment, seq_run, qual_min=35,
                   reference='HXB2', maxreads=-1, VERBOSE=0, minor_allele=False):
    '''Check division into fragments: coverage, etc.'''
    ref_fn = get_reference_premap_filename(data_folder, adaID, fragment)

    # FIXME: old nomenclature for F3a
    if not os.path.isfile(ref_fn):
        if fragment[:2] == 'F3':
            ref_fn = ref_fn.replace('F3a', 'F3')

    refseq = SeqIO.read(ref_fn, 'fasta')

    # Scan reads
    input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam')

    # FIXME: old nomenclature for F3a
    if not os.path.isfile(input_filename):
        if fragment[:2] == 'F3':
            input_filename = input_filename.replace('F3a', 'F3')

    counts, inserts = get_allele_counts_insertions_from_file(input_filename,
                                                             len(refseq),
                                                             maxreads=maxreads,
                                                             VERBOSE=VERBOSE)

    # Plot results
    title=', '.join(map(lambda x: ' '.join([x[0], str(x[1])]),
                        [['run', seq_run],
                         ['adaID', adaID],
                         ['fragment', fragment],
                         ['maxreads', maxreads],
                        ]))
    plot_coverage(counts, suptitle=title, minor_allele=minor_allele)
Esempio n. 2
0
def get_allele_counts(data_folder, adaID, fragment, VERBOSE=0, maxreads=1e10):
    '''Extract allele and insert counts from a bamfile'''

    # Read reference
    reffilename = get_consensus_filename(data_folder,
                                         adaID,
                                         fragment,
                                         trim_primers=True)
    refseq = SeqIO.read(reffilename, 'fasta')

    # Open BAM file
    # Note: the reads should already be filtered of unmapped stuff at this point
    bamfilename = get_mapped_filename(data_folder,
                                      adaID,
                                      fragment,
                                      type='bam',
                                      filtered=True)
    if not os.path.isfile(bamfilename):
        convert_sam_to_bam(bamfilename)

    # Call lower-level function
    return get_allele_counts_insertions_from_file(bamfilename,
                                                  len(refseq),
                                                  qual_min=qual_min,
                                                  maxreads=maxreads,
                                                  VERBOSE=VERBOSE)
def get_allele_counts(data_folder, adaID, fragment, VERBOSE=0, maxreads=1e10):
    """Extract allele and insert counts from a bamfile"""

    # Read reference
    reffilename = get_consensus_filename(data_folder, adaID, fragment, trim_primers=True)
    refseq = SeqIO.read(reffilename, "fasta")

    # Open BAM file
    # Note: the reads should already be filtered of unmapped stuff at this point
    bamfilename = get_mapped_filename(data_folder, adaID, fragment, type="bam", filtered=True)
    if not os.path.isfile(bamfilename):
        convert_sam_to_bam(bamfilename)

    # Call lower-level function
    return get_allele_counts_insertions_from_file(
        bamfilename, len(refseq), qual_min=qual_min, maxreads=maxreads, VERBOSE=VERBOSE
    )
Esempio n. 4
0
def check_division(data_folder,
                   adaID,
                   fragment,
                   seq_run,
                   qual_min=35,
                   reference='HXB2',
                   maxreads=-1,
                   VERBOSE=0,
                   minor_allele=False):
    '''Check division into fragments: coverage, etc.'''
    ref_fn = get_reference_premap_filename(data_folder, adaID, fragment)

    # FIXME: old nomenclature for F3a
    if not os.path.isfile(ref_fn):
        if fragment[:2] == 'F3':
            ref_fn = ref_fn.replace('F3a', 'F3')

    refseq = SeqIO.read(ref_fn, 'fasta')

    # Scan reads
    input_filename = get_divided_filename(data_folder,
                                          adaID,
                                          fragment,
                                          type='bam')

    # FIXME: old nomenclature for F3a
    if not os.path.isfile(input_filename):
        if fragment[:2] == 'F3':
            input_filename = input_filename.replace('F3a', 'F3')

    counts, inserts = get_allele_counts_insertions_from_file(input_filename,
                                                             len(refseq),
                                                             maxreads=maxreads,
                                                             VERBOSE=VERBOSE)

    # Plot results
    title = ', '.join(
        map(lambda x: ' '.join([x[0], str(x[1])]), [
            ['run', seq_run],
            ['adaID', adaID],
            ['fragment', fragment],
            ['maxreads', maxreads],
        ]))
    plot_coverage(counts, suptitle=title, minor_allele=minor_allele)