Ejemplo n.º 1
0
def pos_counter_by_pos(bam_fpath, positions):
    alignmentfile = AlignmentFile(bam_fpath)
    ref_name_index = {}
    for pileup_col in alignmentfile.pileup():
        ref_id = pileup_col.reference_id
        ref_pos = pileup_col.reference_pos
        ref_name = ref_name_index.get(ref_id, alignmentfile.getrname(ref_id))
        position = (ref_name, ref_pos)
        if position in positions:
            pos_counter = Counter()
            print position
            for pileup_read in pileup_col.pileups:
                alignement = pileup_read.alignment
                blocks = alignement.get_blocks()

                if pileup_read.alignment.is_reverse:
                    base_pos = (pileup_read.alignment.query_length -
                                pileup_read.query_position - 1)
                else:
                    base_pos = pileup_read.query_position


#
#                 base_pos = pileup_read.query_position
                print base_pos, pileup_read.alignment.qname
                pos_counter[base_pos] += 1
            yield pos_counter
Ejemplo n.º 2
0
def downgrade_read_edges(in_fpath,
                         out_fpath,
                         size,
                         qual_to_substract=QUAL_TO_SUBSTRACT):
    in_sam = AlignmentFile(in_fpath)
    out_sam = AlignmentFile(out_fpath, 'wb', template=in_sam)
    for aligned_read in in_sam:
        _downgrade_edge_qualities(aligned_read,
                                  size,
                                  qual_to_substract=qual_to_substract)
        out_sam.write(aligned_read)
Ejemplo n.º 3
0
def downgrade_read_edges(in_fpath,
                         out_fpath,
                         size,
                         qual_to_substract=QUAL_TO_SUBSTRACT):
    in_sam = AlignmentFile(in_fpath)
    out_sam = AlignmentFile(out_fpath, 'wb', template=in_sam)
    for aligned_read in in_sam:
        if (aligned_read.has_tag(LEFT_DOWNGRADED_TAG)
                or aligned_read.has_tag(RIGTH_DOWNGRADED_TAG)):
            raise RuntimeError('Edge qualities already downgraded\n')

        _downgrade_edge_qualities(aligned_read,
                                  size,
                                  qual_to_substract=qual_to_substract)
        out_sam.write(aligned_read)
Ejemplo n.º 4
0
def classify_mapped_reads(bam_fhand,
                          mate_distance,
                          settings=get_setting('CHIMERAS_SETTINGS')):
    '''It classifies sequences from bam file in chimeric, unknown and
    non chimeric, according to its distance and orientation in the reference
    sequence'''
    bamfile = AlignmentFile(bam_fhand.name)

    # settings. Include in function properties with default values
    max_clipping = settings['MAX_CLIPPING']
    max_pe_len = settings['MAX_PE_LEN']
    variation = settings['MATE_DISTANCE_VARIATION']
    mate_length_range = [mate_distance - variation, mate_distance + variation]
    reference_lengths = _get_ref_lengths(bamfile)
    # It tries to find out the kind of each pair of sequences
    for grouped_mates in _group_alignments_reads_by_qname(bamfile):
        mates_alignments = _split_mates(grouped_mates)
        if _mates_are_not_chimeric(mates_alignments, max_clipping,
                                   mate_length_range, bamfile,
                                   reference_lengths):
            kind = NON_CHIMERIC
        elif _mates_are_chimeric(mates_alignments, bamfile, max_clipping,
                                 max_pe_len, reference_lengths):
            kind = CHIMERA
        else:
            kind = UNKNOWN

        pair = [
            alignedread_to_seqitem(_get_primary_alignment(mates))
            for mates in mates_alignments
        ]

        if None not in pair:
            yield pair, kind
Ejemplo n.º 5
0
def mapped_count_by_rg(bam_fpaths, mapqx=None):
    do_mapqx = True if mapqx is not None else False
    counter_by_rg = {}
    for bam_fpath in bam_fpaths:
        bam = AlignmentFile(bam_fpath, 'rb')
        readgroups = get_bam_readgroups(bam)
        if readgroups is None:
            bam_basename = os.path.splitext(os.path.basename(bam_fpath))[0]
            readgroups = [bam_basename]
        else:
            readgroups = [rg['ID'] for rg in readgroups]
        for readgroup in readgroups:
            counter = IntCounter({'unmapped': 0, 'mapped': 0})
            if do_mapqx:
                counter['bigger_mapqx'] = 0
            counter_by_rg[readgroup] = counter

        for read in bam:
            rg = get_rg_from_alignedread(read)
            if rg is None:
                rg = bam_basename
            if do_mapqx and read.mapq >= mapqx:
                counter_by_rg[rg]['bigger_mapqx'] += 1
            if read.is_unmapped:
                counter_by_rg[rg]['unmapped'] += 1
            else:
                counter_by_rg[rg]['mapped'] += 1
    return counter_by_rg
Ejemplo n.º 6
0
def calculate_distance_distribution_in_bam(bam_fhand,
                                           max_clipping,
                                           max_distance=None):
    bamfile = AlignmentFile(bam_fhand.name)
    stats = {
        'outies': IntCounter(),
        'innies': IntCounter(),
        'others': IntCounter()
    }
    for grouped_mates in _group_alignments_reads_by_qname(bamfile):
        mates = _split_mates(grouped_mates)
        for aligned_read1 in _get_totally_mapped_alignments(
                mates[0], max_clipping):
            for aligned_read2 in _get_totally_mapped_alignments(
                    mates[1], max_clipping):
                if aligned_read1.rname == aligned_read2.rname:
                    aligned_reads = [aligned_read1, aligned_read2]
                    distance = _find_distance(aligned_reads)
                    if _mates_are_outies(aligned_reads):
                        kind = 'outies'
                    elif _mates_are_innies(aligned_reads):
                        kind = 'innies'
                    else:
                        kind = 'others'
                    if max_distance is None or max_distance > distance:
                        stats[kind][distance] += 1
    return stats
Ejemplo n.º 7
0
 def __call__(self, trim_packet):
     'It trims the seqs'
     self._pre_trim(trim_packet)
     trimmed_seqs = []
     bamfile = AlignmentFile(self._bam_fhand.name)
     for grouped_mates in _group_alignments_reads_by_qname(bamfile):
         for aligned_reads in _split_mates(grouped_mates):
             trimmed_seqs.append([self._do_trim(aligned_reads)])
     self._post_trim()
     return {
         SEQS_PASSED: trimmed_seqs,
         ORPHAN_SEQS: trim_packet[ORPHAN_SEQS]
     }
Ejemplo n.º 8
0
def pos_counter_by_pos(bam_fpath, positions):
    alignmentfile = AlignmentFile(bam_fpath)
    ref_name_index = {}
    for pileup_col in alignmentfile.pileup():
        ref_id = pileup_col.reference_id
        ref_pos = pileup_col.reference_pos
        ref_name = ref_name_index.get(ref_id, alignmentfile.getrname(ref_id))
        position = (ref_name, ref_pos)
        if position in positions:
            pos_counter = Counter()
            print position
            for pileup_read in pileup_col.pileups:
                alignement = pileup_read.alignment
                blocks = alignement.get_blocks()

                if pileup_read.alignment.is_reverse:
                    base_pos = pileup_read.alignment.query_length - pileup_read.query_position - 1
                else:
                    base_pos = pileup_read.query_position
                #
                #                 base_pos = pileup_read.query_position
                print base_pos, pileup_read.alignment.qname
                pos_counter[base_pos] += 1
            yield pos_counter
Ejemplo n.º 9
0
def sort_by_position_in_ref(in_fhand, index_fpath, directory=None,
                            tempdir=None):
    # changed to bwa mem from bowtie, test doesn't work well, check it out
    in_fpath = in_fhand.name
    file_format = get_format(open(in_fpath))
    extra_params = ['--very-fast']
    if 'fasta' in file_format:
        extra_params.append('-f')
    bowtie2_process = map_with_bowtie2(index_fpath, paired_fpaths=None,
                                       unpaired_fpath=in_fpath,
                                       extra_params=extra_params)
    out_fhand = NamedTemporaryFile()
    map_process_to_sortedbam(bowtie2_process, out_fhand.name, tempdir=tempdir)
    samfile = AlignmentFile(out_fhand.name)
    for aligned_read in samfile:
        yield alignedread_to_seqitem(aligned_read)
Ejemplo n.º 10
0
 def _prepare_bams(self, bam_fpaths):
     bams = []
     rgs = {}
     for idx, bam_fpath in enumerate(bam_fpaths):
         bam = AlignmentFile(bam_fpath)
         rgs_ = get_bam_readgroups(bam)
         if rgs_ is None:
             rgs_ = [{'ID': None,
                      self.bam_rg_field_for_vcf_sample: str(None)}]
         bams.append({'fpath': bam_fpath, 'rgs': rgs_})
         for read_group in rgs_:
             read_group['bam'] = idx
             rgs[read_group['ID']] = read_group
     self._bams = bams
     self._rgs = rgs
     # We have to assume that all bmas have the same references
     ref_lens = {ref: le_ for ref, le_ in zip(bam.references, bam.lengths)}
     self._ref_lens = ref_lens
Ejemplo n.º 11
0
def calculate_distance_distribution(interleave_fhand,
                                    index_fpath,
                                    max_clipping,
                                    max_distance=None,
                                    tempdir=None,
                                    threads=None):
    bam_fhand = NamedTemporaryFile(suffix='.bam')
    extra_params = ['-a', '-M']
    bwa = map_with_bwamem(index_fpath,
                          interleave_fpath=interleave_fhand.name,
                          extra_params=extra_params,
                          threads=threads)
    map_process_to_sortedbam(bwa,
                             bam_fhand.name,
                             key='queryname',
                             tempdir=tempdir)
    bamfile = AlignmentFile(bam_fhand.name)
    stats = {
        'outies': IntCounter(),
        'innies': IntCounter(),
        'others': IntCounter()
    }
    for grouped_mates in _group_alignments_reads_by_qname(bamfile):
        mates = _split_mates(grouped_mates)
        for aligned_read1 in _get_totally_mapped_alignments(
                mates[0], max_clipping):
            for aligned_read2 in _get_totally_mapped_alignments(
                    mates[1], max_clipping):
                if aligned_read1.rname == aligned_read2.rname:
                    aligned_reads = [aligned_read1, aligned_read2]
                    distance = _find_distance(aligned_reads)
                    if _mates_are_outies(aligned_reads):
                        kind = 'outies'
                    elif _mates_are_innies(aligned_reads):
                        kind = 'innies'
                    else:
                        kind = 'others'
                    if max_distance is None or max_distance > distance:
                        stats[kind][distance] += 1
    return stats
Ejemplo n.º 12
0
def _get_mapped_reads(bam_fpath, min_mapq=0):
    bam = AlignmentFile(bam_fpath)
    return [
        read.qname for read in bam
        if not read.is_unmapped and (not min_mapq or read.mapq > min_mapq)
    ]