Esempio n. 1
0
def FilterAlignedPairForPositions(seq1, seq2, method):
    """given the method, return a set of aligned sequences
    only containing certain positions.

    Available filters:
    all:        do nothing.
    codon1,codon2,codon3: return 1st, 2nd, 3rd codon positions only.
    d4: only changes within fourfold-degenerate sites
    """

    l1 = len(seq1)
    l2 = len(seq2)

    if method == "all":
        return seq1, seq2
    elif method == "codon1":
        return ("".join([seq1[x] for x in range(0, l1, 3)]),
                "".join([seq2[x] for x in range(0, l2, 3)]))
    elif method == "codon2":
        return ("".join([seq1[x] for x in range(1, l1, 3)]),
                "".join([seq2[x] for x in range(1, l2, 3)]))
    elif method == "codon3":
        return ("".join([seq1[x] for x in range(2, l1, 3)]),
                "".join([seq2[x] for x in range(2, l2, 3)]))
    elif method == "d4":
        s1 = []
        s2 = []
        for x in range(0, l1, 3):
            codon1 = seq1[x:x + 3]
            codon2 = seq2[x:x + 3]
            try:
                aa1, deg11, deg12, deg13 = Genomics.GetDegeneracy(codon1)
                aa2, deg11, deg22, deg23 = Genomics.GetDegeneracy(codon2)
            except KeyError:
                continue
            if aa1 == aa2 and deg13 == 4 and deg23 == 4:
                s1.append(codon1[2])
                s2.append(codon2[2])
        return "".join(s1), "".join(s2)
Esempio n. 2
0
    def loadSequence(self, sequence, seqtype="na"):
        """load sequence properties from a sequence."""

        SequencePropertiesLength.loadSequence(self, sequence, seqtype)
        if len(sequence) % 3:
            raise ValueError(
                '''sequence length is not a multiple of 3 (length=%i)''' %
                (len(sequence)))

        # uppercase all letters
        sequence = sequence.upper()

        self.mNStopCodons = 0

        # setup counting arrays
        # nucleotide counts for each position (is not a sum of the counts
        # per degenerate site, as the codon might be intelligible, e.g. GNN).
        self.mCounts = [{'A': 0, 'C': 0, 'G': 0, 'T': 0, 'X': 0, 'N': 0},
                        {'A': 0, 'C': 0, 'G': 0, 'T': 0, 'X': 0, 'N': 0},
                        {'A': 0, 'C': 0, 'G': 0, 'T': 0, 'X': 0, 'N': 0}]

        # nucleotide counts for each position per degeneracy
        self.mCountsDegeneracy = []

        for x in (0, 1, 2):
            xx = []
            for y in range(5):
                yy = {}
                for z in Bio.Alphabet.IUPAC.extended_dna.letters:
                    yy[z] = 0
                xx.append(yy)
            self.mCountsDegeneracy.append(xx)

        # use generator rather than list to save memory
        for codon in (sequence[x:x + 3] for x in xrange(0, len(sequence), 3)):

            for x in (0, 1, 2):
                self.mCounts[x][codon[x]] += 1

            if Genomics.IsStopCodon(codon):
                self.mNStopCodons += 1
                continue

            try:
                aa, deg1, deg2, deg3 = Genomics.GetDegeneracy(codon)
                degrees = (deg1, deg2, deg3)
                for x in range(len(degrees)):
                    self.mCountsDegeneracy[x][degrees[x]][codon[x]] += 1

            except KeyError:
                pass
Esempio n. 3
0
def countSites(model):
    """count number of expected synonymous/nonsynonymous sites in a grammar.
    """

    ## number of synonymous/non-synonymous sites
    n, s = 0.0, 0.0

    xpi = model.evaluateTerminalFrequencies()[('COD0', 'COD1', 'COD2')]
    pi = {}
    for codon, f in xpi.items():
        pi["".join(codon).upper()] = f

    ## translate pi and the matrix to codons
    for key, value in pi.items():
        del pi[key]
        pi["".join(key).upper()] = value

    for codon, freq in pi.items():

        try:
            degeneracy = Genomics.GetDegeneracy(codon)
        except KeyError:
            continue

        for x in range(1, 4):
            d = (degeneracy[x] - 1.0) / 3.0
            s += freq * d
            n += freq * (1.0 - d)


##              if degeneracy[x] > 1:
##                  s += freq
##              else:
##                  n += freq

    assert (float("%5.2f" % (n + s)) == 3.0)

    ##     print s / (n+s)

    ##     n = 184.9
    ##     s = 76.1
    ##     t = n + s
    ##     n /= t
    ##     s /= t
    ##     print s / (n+s)

    return n, s
Esempio n. 4
0
    def Load(self, in_sequence):
        """load sequence properties from a sequence."""

        ## uppercase all letters
        sequence = in_sequence.upper()

        self.mNCodons = len(sequence) / 3

        self.mNStopCodons = 0

        ## setup counting arrays
        ## counts of amino acids
        self.mCountsAA = {}

        for x in Bio.Alphabet.IUPAC.extended_protein.letters:
            self.mCountsAA[x] = 0

        ## nucleotide counts for each position (is not a sum of the counts
        ## per degenerate site, as the codon might be intelligible, e.g. GNN).
        self.mCounts = [{
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }, {
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }, {
            'A': 0,
            'C': 0,
            'G': 0,
            'T': 0,
            'X': 0,
            'N': 0
        }]

        ## nucleotide counts for each position per degeneracy
        self.mCountsDegeneracy = []

        self.mLength = len(sequence)

        for x in (0, 1, 2):
            xx = []
            for y in range(5):
                yy = {}
                for z in Bio.Alphabet.IUPAC.extended_dna.letters:
                    yy[z] = 0
                xx.append(yy)
            self.mCountsDegeneracy.append(xx)

        for codon in [sequence[x:x + 3] for x in range(0, len(sequence), 3)]:

            for x in (0, 1, 2):
                self.mCounts[x][codon[x]] += 1

            if Genomics.IsStopCodon(codon):
                self.mNStopCodons += 1
                continue

            try:
                aa, deg1, deg2, deg3 = Genomics.GetDegeneracy(codon)
                degrees = (deg1, deg2, deg3)
                for x in range(len(degrees)):
                    self.mCountsDegeneracy[x][degrees[x]][codon[x]] += 1
                self.mCountsAA[aa] += 1

            except KeyError:
                pass

        self.Update()