Exemple #1
0
    def __formatList(aminoAcids):
        """Formats 'aminoAcids' into a list of AminoAcid objects."""

        # No amino acids result in an empty list
        if aminoAcids is None:
            return []

        # A single AminoAcid is copied and put within a list
        elif isinstance(aminoAcids, AminoAcid):
            return [AminoAcid(aminoAcids)]  # Copy constructor

        # A string is converted to a list, based on its
        elif isinstance(aminoAcids, str):
            if aminoAcids.isupper():  # Multiple Amino Acids in short name mode
                return [AminoAcid(aa) for aa in aminoAcids]
            else:  # A single Amino Acid with any name mode
                return [AminoAcid(aminoAcids)]

        # A list is copied with all of its items converted to AminoAcid objects
        elif isinstance(aminoAcids, list):
            return [AminoAcid(aa) for aa in aminoAcids]

        # No other supported types
        else:
            raise TypeError(
                "aminoAcids must be a Sequence, list, AminoAcid object, string or None"
            )
Exemple #2
0
def getFrequencies(groupValues, groupSizes):
    """
    Evaluates the frequencies of AminoAcids within columns of groups in 'groupValues'.
    Frequencies are weighted according to group sizes in 'groupSizes'.
    Returns two dictionaries and a number:
    -'freqPairs' maps pairs of AminoAcids to their frequencies
    -'freqSingle' maps single AminoAcids to their frequencies
    -'freqSum' is the sum of all frequencies
    """
    seqSize = len(groupValues[0])  # Size of the Sequences
    groupCount = len(groupSizes)  # Number of groups

    freqPairs = {}  # frequencies of amino acid pairs (fAB)

    # Frequencies of single amino acids (fA)
    freqSingle = {AminoAcid(aa): 0 for aa in AminoAcid.getAllNames()}

    freqSum = 0  # Sum of frequencies  sum(fAB)

    for col in range(seqSize):  # Each column
        for groupAIndex in range(groupCount - 1):  # Each groupA
            groupA = groupValues[groupAIndex]
            groupASize = groupSizes[groupAIndex]

            for groupBIndex in range(groupAIndex + 1,
                                     groupCount):  # Each further groupB
                groupB = groupValues[groupBIndex]
                groupBSize = groupSizes[groupBIndex]

                for aaA, aaACount in groupA[col].items(
                ):  # Each AA from groupA
                    aaAFreq = aaACount / groupASize  # Its frequency within groupA

                    for aaB, aaBCount in groupB[col].items(
                    ):  # Each AA from groupB
                        aaBFreq = aaBCount / groupBSize  # Its frequency within groupB

                        aaPairFreq = aaAFreq * aaBFreq  # Pair frequency
                        freqSum += aaPairFreq  # Sum of all frequencies
                        freqSingle[aaA] += aaPairFreq / 2
                        freqSingle[aaB] += aaPairFreq / 2

                        # Index is unique to this pair
                        pairIndex = (aaA, aaB) if aaA > aaB else (aaB, aaA)
                        try:
                            freqPairs[pairIndex] += aaPairFreq
                        except:
                            freqPairs[pairIndex] = aaPairFreq

    return freqPairs, freqSingle, freqSum
Exemple #3
0
    def __init__(self, path="", description="", ignore=None):
        """
        Creates a Score object.
        If 'path' is provided, loads the Score values from an iij file.
        Otherwise, creates a Score for all possible AminoAcids with values 0.
        """
        self._description = description
        self._ignore = Sequence(ignore)
        self._matrix = []
        self._aaOrder = {}
        self._aaSequence = Sequence()

        # If path is provided, load directly from iij file
        if path != "":
            with open(path, 'r') as file:
                foundAAOrder = False  # Have we found the line with the amino acid values and order yet?
                for line in file:
                    if line[0] != "#":  # Comments

                        if not foundAAOrder:  # Read aa values and order
                            for aa in line.split():
                                self._aaSequence.extend(aa)
                            self._aaOrder = {
                                aa: index
                                for aa, index in zip(
                                    self._aaSequence,
                                    range(len(self._aaSequence)))
                            }
                            foundAAOrder = True
                        else:  # Read matrix values
                            self._matrix.append([int(v) for v in line.split()])

        # Otherwise initialize matrix with 0
        else:
            lineSize = 1
            for aa in AminoAcid.getAllNames():
                if AminoAcid(aa) not in self._ignore:
                    self._aaSequence.extend(aa)
                    self._aaOrder[self._aaSequence[-1]] = lineSize - 1
                    self._matrix.append([0 for i in range(lineSize)])
                    lineSize += 1
Exemple #4
0
    def __init__(self):
        self.structures = "HETC"
        self.roc = {(s, r): 0
                    for s in self.structures for r in ("TP", "TN", "FP", "FN")}
        self.minScore, self.maxScore = 0, 0
        self.scores = {(classifier, realS): []
                       for classifier in self.structures
                       for realS in self.structures}
        self.correctPred = 0
        self.totalPred = 0
        self.neighbourOffset = 8

        self.trainings = 0  # Number of trainings (one per AA)
        self.strucCount = {s: 0 for s in self.structures}
        self.pairCount = {(s, a): 0
                          for s in self.structures
                          for a in AminoAcid.getAllNames()}
        self.tripletCount = {}
        for s in self.structures:
            for a in AminoAcid.getAllNames():
                for na in AminoAcid.getAllNames():  # Neighbour AA
                    self.tripletCount[(s, a, na)] = 0
Exemple #5
0
 def test_sliceSingleItemIsAminoAcid(self):
     prot = Sequence("ABC")
     aa = prot[0]
     self.assertEquals(aa, AminoAcid("A"))
Exemple #6
0
 def test_count(self):
     prot = Sequence("ABCAX")
     self.assertEquals(prot.count(AminoAcid("A")), 2)
Exemple #7
0
 def test_remove(self):
     prot = Sequence("TRANKIL")
     prot.remove(AminoAcid("A"))
     self.assertEquals(prot, Sequence("TRNKIL"))
Exemple #8
0
 def __setitem__(self, key, value):
     """Sets value for a slice of the sequence"""
     list.__setitem__(self, key, AminoAcid(value))
Exemple #9
0
 def canCreateAminoAcid(self, name):
     try:
         aa = AminoAcid(name)
     except ValueError:
         self.fail()
Exemple #10
0
 def test_hashValuesAreEqual(self):
     aa1 = AminoAcid("ala")
     aa2 = AminoAcid("A")
     self.assertEquals(hash(aa1), hash(aa2))
Exemple #11
0
 def test_isTermination(self):
     aa = AminoAcid("term")
     self.assertTrue(aa.isTermination())
Exemple #12
0
 def test_isGap(self):
     aa = AminoAcid("gap")
     self.assertTrue(aa.isGap())
Exemple #13
0
 def test_getName(self):
     aa = AminoAcid("ala")
     self.assertEquals("A", aa.getName())
Exemple #14
0
 def test_getAllNames(self):
     for nameMode in ("short", "medium", "long"):
         for name in AminoAcid.getAllNames(nameMode):
             self.canCreateAminoAcid(name)
Exemple #15
0
    def getScore(self, aa1, aa2):
        """
        Get the score assigned to AminoAcids 'aa1', 'aa2'.
        """
        id1 = self._aaOrder[aa1]
        id2 = self._aaOrder[aa2]
        if id1 > id2:
            return self._matrix[id1][id2]
        else:
            return self._matrix[id2][id1]


# AA frequencies for complete UniProt database
# from http://web.expasy.org/docs/relnotes/relstat.html, "AMINO ACID COMPOSITION"
uniprob = {
    AminoAcid("Ala"): .0826,
    AminoAcid("Gln"): .0393,
    AminoAcid("Leu"): .0965,
    AminoAcid("Ser"): .0660,
    AminoAcid("Arg"): .0553,
    AminoAcid("Glu"): .0674,
    AminoAcid("Lys"): .0582,
    AminoAcid("Thr"): .0535,
    AminoAcid("Asn"): .0406,
    AminoAcid("Gly"): .0708,
    AminoAcid("Met"): .0241,
    AminoAcid("Trp"): .0109,
    AminoAcid("Asp"): .0546,
    AminoAcid("His"): .0227,
    AminoAcid("Phe"): .0386,
    AminoAcid("Tyr"): .0292,