Ejemplo n.º 1
0
    def clip(self, read):
        """ trim a read """
        
        first_base_pos = self.start_trim_dict.get(read.qname, 0)
        last_base_pos = self.end_trim_dict.get(read.qname, -1)
    
        # cache original details, otherwise they go to None
        seq, qual, cig, end_pos = read.seq, read.qual, read.cigar, read.aend
        
        if first_base_pos: 
            self.stats.start_trim(self.external_id)
            read.seq = seq[first_base_pos:]
            read.qual = qual[first_base_pos:]            
            read.cigar = cigar.trim_cigar(cig, len(read.seq), start=True)
            read.pos = end_pos - cigar.ref_length(read.cigar)
            assert read.aend == end_pos
            
            # recache new stuff and recalculate last base
            seq, qual, cig, end_pos = read.seq, read.qual, read.cigar, read.aend
            
            if last_base_pos != -1:
                last_base_pos = last_base_pos - first_base_pos
            
        if last_base_pos != -1: 
            self.stats.end_trim(self.external_id)
            read.seq = seq[:last_base_pos]
            read.qual = qual[:last_base_pos]
            read.cigar = cigar.trim_cigar(cig, len(read.seq))

        return read
Ejemplo n.º 2
0
    def clip(self, read):
        """ clip a read to the trim locations of this amplicon """

        # calculate where to clip
        posns = read.positions
        first_base_pos = self._find_position(self.trim_start, posns)
        last_base_pos = self._find_position(self.trim_end, posns, lower=False)

        # exlude primer only reads
        # TODO: may be better to check the read start end for this
        if not (first_base_pos or last_base_pos):
            return None

        # cache original details, otherwise they go to None
        seq, qual, cig, end_pos = read.seq, read.qual, read.cigar, read.aend

        # pileup position ignores soft clips, so remove
        cig, seq, qual = cigar.remove_soft(cig, seq, qual)

        if first_base_pos:
            self.stats.start_trim(self.external_id)
            read.seq = seq[first_base_pos:]
            if qual:
                read.qual = qual[first_base_pos:]
            read.cigar = cigar.trim_cigar(cig, len(read.seq), start=True)
            read.pos = end_pos - cigar.ref_length(read.cigar)
            assert read.aend == end_pos

            # recache new stuff and recalculate last base
            seq, qual, cig, end_pos = read.seq, read.qual, read.cigar, read.aend

            if last_base_pos is not None:
                last_base_pos = last_base_pos - first_base_pos

        if last_base_pos is not None:
            self.stats.end_trim(self.external_id)
            read.seq = seq[:last_base_pos]
            if qual:
                read.qual = qual[:last_base_pos]
            read.cigar = cigar.trim_cigar(cig, len(read.seq))

        return read