Esempio n. 1
0
    def testOneByTwoReturnAnalysis(self):
        """
        If affinityMatrix is called with one read and two subjects, the
        resulting matrix must be 1x2 with each entry containing an
        analysis dict if returnAnalysis is True (and the query matches the
        subject). The analysis must contain the keys from a full analysis.
        """
        reads = Reads([AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF')])
        subjects = Reads([AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'),
                          AARead('id3', 'FFF')])
        matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                                subjects=subjects, computeDiagonal=True,
                                returnAnalysis=True)
        analysis = matrix[0][0]
        self.assertEqual(
            {
                'bestBinScore',
                'histogram',
                'overallScore',
                'overallScoreAnalysis',
                'significanceAnalysis',
                'significantBins',
            },
            set(analysis))
        self.assertEqual(1.0, analysis['overallScore'])

        # The query doesn't match the second subject.
        self.assertIs(None, matrix[0][1])
Esempio n. 2
0
    def fromSequences(cls, labels, sequences, findParams=None, **kwargs):
        """
        Construct an NJTree instance from some seqeunces.

        @param cls: Our class.
        @param labels: An iterable producing C{str} labels for the sequences.
        @param sequences: Either A C{str} filename of sequences to consider or
            a C{light.reads.Reads} instance.
        @param findParams: An instance of C{FindParameters}.
        @param kwargs: See
            C{database.DatabaseSpecifier.getDatabaseFromKeywords} for
            additional keywords, all of which are optional.
        @return: An C{NJTree} instance.
        """
        if isinstance(sequences, str):
            sequences = FastaReads(sequences,
                                   readClass=AAReadWithX,
                                   upperCase=True)

        new = cls()
        new.sequences = list(sequences)
        new.labels = labels
        findParams = findParams or FindParameters()
        affinity = np.array(
            affinityMatrix(new.sequences, findParams=findParams, **kwargs))
        new.distance = np.ones(affinity.shape) - affinity
        new.tree = nj(DistanceMatrix(new.distance, labels))
        return new
Esempio n. 3
0
    def _checkSymmetry(self, sequences, findParams, symmetric=False, **kwargs):
        """
        Create an affinity matrix for a set of sequences and check its
        symmetry.

        @param sequences: A C{list} of C{AARead} instances.
        @param findParams: A {light.parameters.FindParameters} instance.
        @param symmetric: If C{True}, pass symmetric=True to the affinityMatrix
            function, allowing it to speed up the calculation by assuming
            scores are symmetric. We still check that the result actually is
            symmetric.
        @param kwargs: See
            C{database.DatabaseSpecifier.getDatabaseFromKeywords} for
            additional keywords, all of which are optional.
        """
        matrix = affinityMatrix(sequences, findParams, symmetric=symmetric,
                                **kwargs)

        for i in range(len(sequences)):

            # Test the diagonal score of each sequence against itself is 1.0.
            self.assertEqual(
                1.0, matrix[i][i],
                'Diagonal entry (%d, %d) for %s against itself has non-1.0 '
                'score of %f.' % (i, i, sequences[i].id, matrix[i][i]))

            # Test that off-diagonal score pairs are identical.
            for j in range(i + 1, len(sequences)):
                self.assertEqual(
                    matrix[i][j], matrix[j][i],
                    'Off-diagonal entries (%d, %d) and (%d, %d) for %s '
                    'against %s have unequal scores %f and %f.' %
                    (i, j, j, i, sequences[i].id, sequences[j].id,
                     matrix[i][j], matrix[j][i]))
Esempio n. 4
0
    def testTwoByTwoAsDict(self):
        """
        If affinityMatrix is called with two reads and the database has two
        subjects, and a dict result is requested, the result must be as
        expected.
        """
        reads = Reads()
        reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
        reads.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects = Reads()
        subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects.add(AARead('id4', 'FRRRFRRRFAAAFRRRFRRRF'))

        matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                                subjects=subjects, computeDiagonal=True,
                                returnDict=True)
        self.assertEqual(
            {
                'id1': {
                    'id3': 1.0,
                    'id4': 1.0,
                },
                'id2': {
                    'id3': 1.0,
                    'id4': 1.0,
                },
            },
            matrix)
Esempio n. 5
0
def ultrametric(sequenceFileOrMatrix, findParams=None, **kwargs):
    """
    Test whether ultrametricity is satisfied for a distance matrix.
    Ultrametricity is satisfied when: d(A, C) <= max(d(A, B), d(B, C)) for any
    three scores (from sequence comparisons) A, B, C.

    @param sequenceFileOrMatrix: Either a C{str} file name of a file
        containing sequences or a distance matrix as returned from
        C{light.performance.affinity}.
    @param findParams: A C{light.parameters.FindParameters} instance.
    @param kwargs: See
        C{database.DatabaseSpecifier.getDatabaseFromKeywords} for
        additional keywords, all of which are optional.

    @return: A generator which returns non-ultrametric triplets.
    """
    if isinstance(sequenceFileOrMatrix, np.ndarray):
        matrix = sequenceFileOrMatrix

    else:
        matrix = affinity.affinityMatrix(sequenceFileOrMatrix, findParams,
                                         **kwargs)

    for a, b, c in permutations(range(len(matrix)), 3):
        if matrix[a][c] < max(matrix[a][b], matrix[b][c]):
            yield a, b, c
Esempio n. 6
0
 def testNoReads(self):
     """
     If affinityMatrix is called with no reads and no subjects, an empty
     score matrix must be returned.
     """
     reads = Reads()
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'])
     self.assertEqual([], matrix)
Esempio n. 7
0
 def testOneSequenceSpecificDiagonalValue(self):
     """
     If affinityMatrix is called with a single read and a specific
     diagonal value, that diagonal value must be in the result.
     """
     reads = Reads()
     read = AARead('id1', 'AAA')
     reads.add(read)
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             diagonalValue=2.0)
     self.assertEqual([[2.0]], matrix)
Esempio n. 8
0
 def testOneByZero(self):
     """
     If affinityMatrix is called with no reads and three subjects, the
     resulting matrix must be 1x0.
     """
     reads = Reads()
     read = AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF')
     reads.add(read)
     subjects = Reads()
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual([[]], matrix)
Esempio n. 9
0
 def testSequenceWithFeaturesAgainstItself(self):
     """
     If affinityMatrix is called with a read that is also the only subject
     in the database, and the read has features, a matrix with just a
     single 1.0 value must be returned.
     """
     reads = Reads()
     read = AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF')
     reads.add(read)
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             computeDiagonal=True)
     self.assertEqual([[1.0]], matrix)
Esempio n. 10
0
 def testZeroByThree(self):
     """
     If affinityMatrix is called with no reads and three subjects, the
     resulting matrix must be empty.
     """
     reads = Reads()
     subjects = Reads()
     subjects.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id4', 'FRRRFRRRFAAAFRRRFRRRF'))
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual([], matrix)
Esempio n. 11
0
 def testOneByTwo(self):
     """
     If affinityMatrix is called with one query and two subjects, with the
     query matching just the first subject, getScore must work as expected
     in retrieving the two scores.
     """
     reads = Reads([AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF')])
     subjects = Reads([AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'),
                       AARead('id3', 'FFF')])
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True,
                             returnAnalysis=True)
     self.assertEqual(1.0, getScore(matrix, 0, 0))
     self.assertEqual(0.0, getScore(matrix, 0, 1))
Esempio n. 12
0
    def testTwoByTwoWithProgressFunction(self):
        """
        If affinityMatrix is called with two reads and the database has two
        subjects, and a progress function is passed, the progress function
        must be called as expected.
        """
        reads = Reads()
        reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
        reads.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects = Reads()
        subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects.add(AARead('id4', 'FRRRFRRRFAAAFRRRFRRRF'))

        output = []

        def progress(i, query):
            output.append((i, query.id))

        affinityMatrix(reads, landmarks=['AlphaHelix'],
                       subjects=subjects, computeDiagonal=True,
                       progressFunc=progress)

        self.assertEqual([(0, 'id1'), (1, 'id2')], output)
Esempio n. 13
0
 def testOneByThree(self):
     """
     If affinityMatrix is called with a read and the database has three
     subjects, the resulting matrix must be 1x3.
     """
     reads = Reads()
     read = AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF')
     reads.add(read)
     subjects = Reads()
     subjects.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id4', 'FRRRFRRRFAAAFRRRFRRRF'))
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual([[1.0, 1.0, 1.0]], matrix)
Esempio n. 14
0
 def testTwoByThreeWithRepeatedQueryAndSubjectIds(self):
     """
     If affinityMatrix is called with two reads and the database has three
     subjects, the resulting matrix must be 2x3, and the fact that query
     and subject ids are not all different must not cause a problem (as it
     would if we called affinityMatrix with returnDict=True).
     """
     reads = Reads()
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects = Reads()
     subjects.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual(
         [
             [1.0, 1.0, 1.0],
             [1.0, 1.0, 1.0]
         ],
         matrix)
Esempio n. 15
0
    def getCorrelation(self):
        """
        Compute the correlation between light matter scores for the perfect
        PDB finders and for the finders in the subset that are being evaluated.
        """
        result = {}
        datasets = {
            '2HLA': {
                'queries': HLA_Q,
                'subjects': HLA_S,
            },
            '4MTP': {
                'queries': MTP_Q,
                'subjects': MTP_S,
            },
            'Polymerase': {
                'queries': POLY_Q,
                'subjects': POLY_S,
            },
            'HA': {
                'queries': HA_Q,
                'subjects': HA_S,
            },
        }

        for data in datasets:
            pdbScores = []
            evaluateScores = []

            pdbMatrix = affinityMatrix(
                datasets[data]['queries'],
                subjects=datasets[data]['subjects'],
                symmetric=False,
                computeDiagonal=True,
                returnDict=True,
                findParams=self.findParams,
                landmarks=['PDB ' + self.structureType],
                trigPoints=[],
                acAlphaHelixFilename=self.acAlphaHelixFilename,
                acAlphaHelix310Filename=self.acAlphaHelix310Filename,
                acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename,
                acAlphaHelixPiFilename=self.acAlphaHelixPiFilename,
                acExtendedStrandFilename=self.acExtendedStrandFilename)

            for query in datasets[data]['queries']:
                for subject in datasets[data]['subjects']:
                    if query.id != subject.id:
                        pdbScores.append(pdbMatrix[query.id][subject.id])

            evaluateMatrix = affinityMatrix(
                datasets[data]['queries'],
                subjects=datasets[data]['subjects'],
                symmetric=False,
                computeDiagonal=True,
                returnDict=True,
                findParams=self.findParams,
                landmarks=['AC ' + self.structureType],
                trigPoints=[],
                acAlphaHelixFilename=self.acAlphaHelixFilename,
                acAlphaHelix310Filename=self.acAlphaHelix310Filename,
                acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename,
                acAlphaHelixPiFilename=self.acAlphaHelixPiFilename,
                acExtendedStrandFilename=self.acExtendedStrandFilename)

            for query in datasets[data]['queries']:
                for subject in datasets[data]['subjects']:
                    if query.id != subject.id:
                        evaluateScores.append(
                            evaluateMatrix[query.id][subject.id])

            slope, intercept, rValue, pValue, se = stats.linregress(
                pdbScores, evaluateScores)

            result[data] = rValue

        return result