Example #1
0
    def alignPrimerToSequence(self):
        """
        Aligns a primer sequence to the base calls of a single trace file.
        Also adjusts the alignment and alignment indices to accomodate
        the alignment with the primer, if extra gaps are needed.  An
        alignment-length string containing the aligned primer in the
        appropriate location is saved as self.alignedprimers.
        """
        if self.numseqs == 2:
            return

        # Figure out if we're working on a reverse trace and get the
        # appropriate primer sequence to search for.
        isreverse = self.seqtraces[0].isReverseComplemented()
        if isreverse:
            primer = self.settings.getForwardPrimer()
        else:
            primer = sequencetrace.reverseCompSequence(
                self.settings.getReversePrimer())

        if primer == '':
            return

        # Align the primer sequence to the trace sequence.  Using a harsher gap
        # penalty (-12 instead of -6) seems to improve the chances of getting the
        # primer to align in the correct location.
        align = PairwiseAlignment()
        align.setGapPenalty(-12)
        align.setSequences(primer, self.alignedseqs[0])
        align.doAlignment()
        praligned, seqaligned = align.getAlignedSequences()

        # Use the new alignment for the trace sequence and index list.
        self.alignedseqs[0] = seqaligned
        self.seqindexes[0] = align.getAlignedSeqIndexes()[1]

        # Replace starting and ending gap characters in the aligned primer
        # sequences with spaces.
        praligned = self.trimAlignedPrimerEnds(praligned)
        self.alignedprimers = praligned
Example #2
0
    def makeConsensusSequence(self):
        min_confscore = self.settings.getMinConfScore()

        # Get the raw sequences and align the forward/reverse traces if we have both.
        if self.numseqs == 2:
            align = PairwiseAlignment()
            align.setSequences(self.seqtraces[0].getBaseCalls(),
                               self.seqtraces[1].getBaseCalls())
            align.doAlignment()
            self.alignedseqs[0], self.alignedseqs[
                1] = align.getAlignedSequences()
            self.seqindexes[0], self.seqindexes[
                1] = align.getAlignedSeqIndexes()
        else:
            self.alignedseqs[0] = self.seqtraces[0].getBaseCalls()
            self.seqindexes[0] = range(0, len(self.alignedseqs[0]))

        # If we have primers, align them to the alignment or single sequence.
        haveprimers = (self.settings.getForwardPrimer() != ''
                       and self.settings.getReversePrimer() != '')
        if haveprimers:
            if self.numseqs == 1:
                self.alignPrimerToSequence()
            else:
                self.alignPrimersToAlignment()

        # Build the consensus sequence.
        if self.numseqs == 1:
            self.makeSingleConsensus(min_confscore)
        else:
            if self.settings.getConsensusAlgorithm() == 'Bayesian':
                self.makeBayesianConsensus(min_confscore)
            else:
                self.makeLegacyConsensus(min_confscore)

        # Do sequence trimming, if requested.
        if self.settings.getTrimConsensus():
            if self.settings.getTrimPrimers() and haveprimers:
                if self.numseqs == 1:
                    self.trimPrimerFromSequence()
                else:
                    self.trimPrimersFromAlignment()

            if self.settings.getTrimEndGaps():
                self.trimEndGaps()

            if self.settings.getDoQualityTrim():
                winsize, basecnt = self.settings.getQualityTrimParams()
                self.trimConsensus(winsize, basecnt)
Example #3
0
    def alignPrimerToSequence(self):
        """
        Aligns a primer sequence to the base calls of a single trace file.
        Also adjusts the alignment and alignment indices to accomodate
        the alignment with the primer, if extra gaps are needed.  An
        alignment-length string containing the aligned primer in the
        appropriate location is saved as self.alignedprimers.
        """
        if self.numseqs == 2:
            return

        # Figure out if we're working on a reverse trace and get the
        # appropriate primer sequence to search for.
        isreverse = self.seqtraces[0].isReverseComplemented()
        if isreverse:
            primer = self.settings.getForwardPrimer()
        else:
            primer = sequencetrace.reverseCompSequence(self.settings.getReversePrimer())

        if primer == '':
            return

        # Align the primer sequence to the trace sequence.  Using a harsher gap
        # penalty (-12 instead of -6) seems to improve the chances of getting the
        # primer to align in the correct location.
        align = PairwiseAlignment()
        align.setGapPenalty(-12)
        align.setSequences(primer, self.alignedseqs[0])
        align.doAlignment()
        praligned, seqaligned = align.getAlignedSequences()

        # Use the new alignment for the trace sequence and index list.
        self.alignedseqs[0] = seqaligned
        self.seqindexes[0] = align.getAlignedSeqIndexes()[1]

        # Replace starting and ending gap characters in the aligned primer
        # sequences with spaces.
        praligned = self.trimAlignedPrimerEnds(praligned)
        self.alignedprimers = praligned
Example #4
0
    def makeConsensusSequence(self):
        min_confscore = self.settings.getMinConfScore()

        # Get the raw sequences and align the forward/reverse traces if we have both.
        if self.numseqs == 2:
            align = PairwiseAlignment()
            align.setSequences(self.seqtraces[0].getBaseCalls(), self.seqtraces[1].getBaseCalls())
            align.doAlignment()
            self.alignedseqs[0], self.alignedseqs[1] = align.getAlignedSequences()
            self.seqindexes[0], self.seqindexes[1] = align.getAlignedSeqIndexes()
        else:
            self.alignedseqs[0] = self.seqtraces[0].getBaseCalls()
            self.seqindexes[0] = range(0, len(self.alignedseqs[0]))

        # If we have primers, align them to the alignment or single sequence.
        haveprimers = (self.settings.getForwardPrimer() != '' and self.settings.getReversePrimer() != '')
        if haveprimers:
            if self.numseqs == 1:
                self.alignPrimerToSequence()
            else:
                self.alignPrimersToAlignment()

        # Build the consensus sequence.
        if self.numseqs == 1:
            self.makeSingleConsensus(min_confscore)
        else:
            if self.settings.getConsensusAlgorithm() == 'Bayesian':
                self.makeBayesianConsensus(min_confscore)
            else:
                self.makeLegacyConsensus(min_confscore)

        # Do sequence trimming, if requested.
        if self.settings.getTrimConsensus():
            if self.settings.getTrimPrimers() and haveprimers:
                if self.numseqs == 1:
                    self.trimPrimerFromSequence()
                else:
                    self.trimPrimersFromAlignment()

            if self.settings.getTrimEndGaps():
                self.trimEndGaps()

            if self.settings.getDoQualityTrim():
                winsize, basecnt = self.settings.getQualityTrimParams()
                self.trimConsensus(winsize, basecnt)
Example #5
0
    def alignPrimersToAlignment(self):
        """
        Aligns the forward and reverse primer sequences to the forward/reverse
        sequencing trace alignment.  Searching is limited to the non-overlapping
        end regions of the alignment because the primers can only be in these
        regions.  Also adjusts the alignment and alignment indices to accomodate
        the alignment with the primers, if extra gaps are needed.  An alignment-
        length string containing the aligned primers in the appropriate locations
        is saved as self.alignedprimers.  The aligned primers and end gap regions
        of the trace alignment are also saved in a list as self.pr_alignments =
        [[fwd_align, left_end], [rev_align, right_end]].
        """
        if self.numseqs != 2:
            return

        # Figure out which trace is the reverse trace.
        if self.seqtraces[0].isReverseComplemented():
            rev = 0
            fwd = 1
        else:
            rev = 1
            fwd = 0

        # Get the portions of the trace sequences that are in the end gaps, as
        # these regions are where the primers will be located.  We don't use
        # the getEndGapStart() methods because we want to be sure that we're
        # operating on the correct sequence.
        lgapstart = 0
        while self.alignedseqs[fwd][lgapstart] == '-' and lgapstart < len(self.alignedseqs[fwd]):
            lgapstart += 1

        rgapstart = len(self.alignedseqs[rev]) - 1
        while self.alignedseqs[rev][rgapstart] == '-' and rgapstart >= 0:
            rgapstart -= 1

        # Check if either sequence is empty.  If so, we can't proceed.
        if lgapstart == len(self.alignedseqs[fwd]) or rgapstart < 0:
            return

        # Get the left and right ends that should contain the primers.
        leftend = self.alignedseqs[rev][0:lgapstart]
        rightend = self.alignedseqs[fwd][rgapstart + 1:]
        #print leftend
        #print rightend

        # Align the forward primer sequence to the left end gap sequence.  Using
        # a harsher gap penalty (-9 instead of -6) seems to generally produce
        # more useful alignments.
        forward = self.settings.getForwardPrimer()
        align = PairwiseAlignment()
        align.setGapPenalty(-9)
        align.setSequences(forward, leftend)
        align.doAlignment()
        fwdaligned, lendaligned = align.getAlignedSequences()

        # Align the reverse complemented reverse primer sequence to the
        # right end gap sequence.
        reverse = sequencetrace.reverseCompSequence(self.settings.getReversePrimer())
        align.setSequences(reverse, rightend)
        align.doAlignment()
        #print align.getAlignedSequences()[0]
        #print align.getAlignedSequences()[1]
        revaligned, rendaligned = align.getAlignedSequences()

        # Replace starting and ending gap characters in the aligned primer
        # sequences with spaces.
        fwdaligned = self.trimAlignedPrimerEnds(fwdaligned)
        revaligned = self.trimAlignedPrimerEnds(revaligned)

        # If the primer alignment introduced gaps into the end gap region of the
        # trace alignment, update the alignment, alignment indices, consensus
        # sequence, and consensus quality score list to include the extra gaps.
        for index in range(len(lendaligned)):
            if lendaligned[index] == '-':
                # Update the starts of the end gaps to reflect the new gaps
                # that are added to the left side of the alignment.
                lgapstart += 1
                rgapstart += 1

                # Update both aligned sequences and sequence indexes.
                for seqnum in range(2):
                    self.alignedseqs[seqnum] = self.alignedseqs[seqnum][0:index] + '-' + self.alignedseqs[seqnum][index:]
                    sindex = self.seqindexes[seqnum][index]
                    if sindex > 0:
                        sindex = (sindex * -1) - 1
                    self.seqindexes[seqnum].insert(index, sindex)

        # Do the same for the right end region.
        for index in range(len(rendaligned)):
            if rendaligned[index] == '-':
                # Update both aligned sequences and sequence indexes.
                for seqnum in range(2):
                    gappos = rgapstart + index + 1
                    self.alignedseqs[seqnum] = self.alignedseqs[seqnum][0:gappos] + '-' + self.alignedseqs[seqnum][gappos:]
                    # Check if we're at the end of the alignment.
                    if gappos != len(self.seqindexes[seqnum]):
                        sindex = self.seqindexes[seqnum][gappos]
                        if sindex > 0:
                            sindex = (sindex * -1) - 1
                    else:
                        sindex = self.seqindexes[seqnum][gappos - 1]
                        if sindex > 0:
                            sindex = ((sindex + 1) * -1) - 1
                    self.seqindexes[seqnum].insert(gappos, sindex)

        # Save the primer and end gap alignments.
        self.pr_alignments = [[fwdaligned, lendaligned], [revaligned, rendaligned]]

        # Construct a full-length sequence to contain the primer alignments.
        self.alignedprimers = (
                fwdaligned + ' ' * (rgapstart - lgapstart + 1)
                + revaligned)
Example #6
0
    def alignPrimersToAlignment(self):
        """
        Aligns the forward and reverse primer sequences to the forward/reverse
        sequencing trace alignment.  Searching is limited to the non-overlapping
        end regions of the alignment because the primers can only be in these
        regions.  Also adjusts the alignment and alignment indices to accomodate
        the alignment with the primers, if extra gaps are needed.  An alignment-
        length string containing the aligned primers in the appropriate locations
        is saved as self.alignedprimers.  The aligned primers and end gap regions
        of the trace alignment are also saved in a list as self.pr_alignments =
        [[fwd_align, left_end], [rev_align, right_end]].
        """
        if self.numseqs != 2:
            return

        # Figure out which trace is the reverse trace.
        if self.seqtraces[0].isReverseComplemented():
            rev = 0
            fwd = 1
        else:
            rev = 1
            fwd = 0

        # Get the portions of the trace sequences that are in the end gaps, as
        # these regions are where the primers will be located.  We don't use
        # the getEndGapStart() methods because we want to be sure that we're
        # operating on the correct sequence.
        lgapstart = 0
        while self.alignedseqs[fwd][lgapstart] == '-' and lgapstart < len(self.alignedseqs[fwd]):
            lgapstart += 1

        rgapstart = len(self.alignedseqs[rev]) - 1
        while self.alignedseqs[rev][rgapstart] == '-' and rgapstart >= 0:
            rgapstart -= 1

        # Check if either sequence is empty.  If so, we can't proceed.
        if lgapstart == len(self.alignedseqs[fwd]) or rgapstart < 0:
            return

        # Get the left and right ends that should contain the primers.
        leftend = self.alignedseqs[rev][0:lgapstart]
        rightend = self.alignedseqs[fwd][rgapstart + 1:]
        #print leftend
        #print rightend

        # Align the forward primer sequence to the left end gap sequence.  Using
        # a harsher gap penalty (-9 instead of -6) seems to generally produce
        # more useful alignments.
        forward = self.settings.getForwardPrimer()
        align = PairwiseAlignment()
        align.setGapPenalty(-9)
        align.setSequences(forward, leftend)
        align.doAlignment()
        fwdaligned, lendaligned = align.getAlignedSequences()

        # Align the reverse complemented reverse primer sequence to the
        # right end gap sequence.
        reverse = sequencetrace.reverseCompSequence(self.settings.getReversePrimer())
        align.setSequences(reverse, rightend)
        align.doAlignment()
        #print align.getAlignedSequences()[0]
        #print align.getAlignedSequences()[1]
        revaligned, rendaligned = align.getAlignedSequences()

        # Replace starting and ending gap characters in the aligned primer
        # sequences with spaces.
        fwdaligned = self.trimAlignedPrimerEnds(fwdaligned)
        revaligned = self.trimAlignedPrimerEnds(revaligned)

        # If the primer alignment introduced gaps into the end gap region of the
        # trace alignment, update the alignment, alignment indices, consensus
        # sequence, and consensus quality score list to include the extra gaps.
        for index in range(len(lendaligned)):
            if lendaligned[index] == '-':
                # Update the starts of the end gaps to reflect the new gaps
                # that are added to the left side of the alignment.
                lgapstart += 1
                rgapstart += 1

                # Update both aligned sequences and sequence indexes.
                for seqnum in range(2):
                    self.alignedseqs[seqnum] = self.alignedseqs[seqnum][0:index] + '-' + self.alignedseqs[seqnum][index:]
                    sindex = self.seqindexes[seqnum][index]
                    if sindex > 0:
                        sindex = (sindex * -1) - 1
                    self.seqindexes[seqnum].insert(index, sindex)

        # Do the same for the right end region.
        for index in range(len(rendaligned)):
            if rendaligned[index] == '-':
                # Update both aligned sequences and sequence indexes.
                for seqnum in range(2):
                    gappos = rgapstart + index + 1
                    self.alignedseqs[seqnum] = self.alignedseqs[seqnum][0:gappos] + '-' + self.alignedseqs[seqnum][gappos:]
                    # Check if we're at the end of the alignment.
                    if gappos != len(self.seqindexes[seqnum]):
                        sindex = self.seqindexes[seqnum][gappos]
                        if sindex > 0:
                            sindex = (sindex * -1) - 1
                    else:
                        sindex = self.seqindexes[seqnum][gappos - 1]
                        if sindex > 0:
                            sindex = ((sindex + 1) * -1) - 1
                    self.seqindexes[seqnum].insert(gappos, sindex)

        # Save the primer and end gap alignments.
        self.pr_alignments = [[fwdaligned, lendaligned], [revaligned, rendaligned]]

        # Construct a full-length sequence to contain the primer alignments.
        self.alignedprimers = (
                fwdaligned + ' ' * (rgapstart - lgapstart + 1)
                + revaligned)