예제 #1
0
    def testFindWithIdenticalNonMatchingHashes(self):
        """
        Identical non-matching hashes must be found correctly when
        storeFullAnalysis is passed to find() as True.
        """
        subject = AARead('subject', 'F')
        query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks], maxDistance=10)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query, True)

        self.assertEqual({}, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual(
            {
                'A2:P:10': [
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)],
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)]
                ],
            },
            nonMatchingHashes)
예제 #2
0
    def testFindOneMatchingHashInTwoLocations(self):
        """
        One matching subject with one matching hash (that occurs in two
        locations) must be found correctly.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
        query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
                }]
            }, matches)

        self.assertEqual(14, hashCount)
        self.assertEqual({}, nonMatchingHashes)
예제 #3
0
    def testFindOneMatchingHashInOneLocation(self):
        """
        One matching subject with one matching hash (that occurs in one
        location) must be found correctly.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=11)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }]
            },
            matches)
        self.assertEqual(1, hashCount)
        self.assertEqual({}, nonMatchingHashes)
예제 #4
0
 def testCollectReadHashes(self):
     """
     The getHashes method must return a dict keyed by (landmark, trigPoints)
     hash with values containing the read offsets.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], distanceBase=1.0)
     be = Backend()
     be.configure(dbParams)
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFASAASA')
     scannedQuery = be.scan(query)
     hashCount = be.getHashes(scannedQuery)
     helixAt0 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 0, 9, 2)
     helixAt15 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 15, 9, 2)
     peakAt10 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10)
     peakAt13 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13)
     peakAt25 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 25)
     peakAt28 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 28)
     self.assertEqual(
         {
             'A2:P:28': [[helixAt0, peakAt28]],
             'A2:P:25': [[helixAt0, peakAt25]],
             'A2:P:13': [[helixAt0, peakAt13], [helixAt15, peakAt28]],
             'A2:P:10': [[helixAt0, peakAt10], [helixAt15, peakAt25]],
             'A2:P:-5': [[helixAt15, peakAt10]],
             'A2:P:-2': [[helixAt15, peakAt13]],
             'A2:A2:15': [[helixAt0, helixAt15]],
         }, hashCount)
예제 #5
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                },
                    {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual({}, nonMatchingHashes)
예제 #6
0
 def testHashkeyNoSymbolDetail(self):
     """
     The hashkey function must return as expected when no value is given for
     the symbol detail.
     """
     landmark = Landmark('name', 'L', 0, 1)
     self.assertEqual('L', landmark.hashkey())
예제 #7
0
 def testApoamicyaninFiveCoilsWithNonDefaultFeatureLengthBase(self):
     """
     The GOR4Coil landmark finder must find the five expected landmarks
     in a fragment of the APOAMICYANIN sequence from the GOR IV reference
     database. It must have the right scaled length of the landmark after a
     non-default featureLengthBase has been applied.
     """
     seq = 'DKATIPSESPFAAAEVADGAIVVDIAKMKYETPELHVKVGDTVTWINREA'
     read = AARead('id', seq)
     featureLengthBase = 1.5
     dbParams = DatabaseParameters(featureLengthBase=featureLengthBase)
     landmark = GOR4Coil(dbParams)
     result = list(landmark.find(read))
     scaled2 = scaleLog(2, featureLengthBase)
     scaled3 = scaleLog(3, featureLengthBase)
     scaled4 = scaleLog(4, featureLengthBase)
     scaled6 = scaleLog(6, featureLengthBase)
     scaled10 = scaleLog(10, featureLengthBase)
     # The GOR IV secondary structure prediction is
     # 'CCCCCCCCCCHHHHHHHCCHHHHHHHHHHHCCCCEEEEECCEEEEEEEEC'.
     self.assertEqual([
         Landmark('GOR4Coil', 'GC', 0, 10, scaled10),
         Landmark('GOR4Coil', 'GC', 17, 2, scaled2),
         Landmark('GOR4Coil', 'GC', 28, 6, scaled6),
         Landmark('GOR4Coil', 'GC', 39, 3, scaled3),
         Landmark('GOR4Coil', 'GC', 45, 5, scaled4)
     ], result)
예제 #8
0
    def find(self, read):
        """
        Find possible coils in a sequence, using GOR IV. Coils are indicated by
        'C' characters in the GOR IV prediction string.

        @param read: An instance of C{dark.reads.AARead}.
        @return: A generator that yields C{Landmark} instances.
        """
        preds = predictions(read.sequence)
        featureLengthBase = self._dbParams.featureLengthBase
        length = 0
        for offset, prediction in enumerate(preds):
            if prediction == 'C':
                if length:
                    # We're already in a string of C's. Keep counting.
                    length += 1
                else:
                    start = offset
                    length = 1
            else:
                if length:
                    # We were in a string of C's, but it has just ended.
                    yield Landmark(self.NAME, self.SYMBOL, start, length,
                                   scaleLog(length, featureLengthBase))
                    length = 0

        if length:
            # We reached the end of the string still in a coil.
            yield Landmark(self.NAME, self.SYMBOL, start, length,
                           scaleLog(length, featureLengthBase))
예제 #9
0
 def testHashkeyWithSymbolDetail(self):
     """
     The hashkey function must return as expected when symbol detail is
     given.
     """
     landmark = Landmark('name', 'L', 0, 1, 2)
     self.assertEqual('L2', landmark.hashkey())
예제 #10
0
 def testFindOneMatchingInsignificant(self):
     """
     One matching subject should be found, but is not significant with the
     default value of significanceFraction.
     """
     subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks])
     db = Database(dbParams)
     db.addSubject(subject)
     result = db.find(query)
     self.assertEqual(
         {
             '0': [{
                 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                 'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
             }, {
                 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                 'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
             }]
         }, result.matches)
     self.assertEqual(0, len(list(result.significantSubjects())))
예제 #11
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(subject)
        result = db.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, result.matches)
예제 #12
0
 def testFindOneMatchingSignificant(self):
     """
     One matching and significant subject must be found if the
     significanceFraction is sufficiently low.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams)
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
예제 #13
0
 def testDifferingSymbolDetailsNonEqual(self):
     """
     Landmarks with different symbol details must not compare equal.
     """
     landmark1 = Landmark('name', 'L', 0, 1, 0)
     landmark2 = Landmark('name', 'L', 0, 1, 1)
     self.assertNotEqual(landmark1, landmark2)
예제 #14
0
 def testDifferingOffsetsNonEqual(self):
     """
     Landmarks with different offsets must not compare equal.
     """
     landmark1 = Landmark('name', 'L', 0, 1)
     landmark2 = Landmark('name', 'L', 1, 1)
     self.assertNotEqual(landmark1, landmark2)
예제 #15
0
 def testDifferingLengthsNonEqual(self):
     """
     Landmarks with different lengths must not compare equal.
     """
     landmark1 = Landmark('name', 'L', 0, 1)
     landmark2 = Landmark('name', 'L', 0, 2)
     self.assertNotEqual(landmark1, landmark2)
예제 #16
0
 def testDifferingNamesNonEqual(self):
     """
     Landmarks with different names must not compare equal.
     """
     landmark1 = Landmark('name1', 'L', 0, 1)
     landmark2 = Landmark('name2', 'L', 0, 1)
     self.assertNotEqual(landmark1, landmark2)
예제 #17
0
 def testEqual(self):
     """
     Identical landmarks must compare equal.
     """
     landmark1 = Landmark('name', 'L', 0, 1)
     landmark2 = Landmark('name', 'L', 0, 1)
     self.assertEqual(landmark1, landmark2)
예제 #18
0
 def testFindMatchAfterSaveRestore(self):
     """
     A matching subject found before a save/restore must also be found
     following a database save/restore.
     """
     subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks])
     db1 = Database(dbParams)
     db1.addSubject(subject)
     result = db1.find(query)
     expected = {
         '0': [{
             'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
             'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
             'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
             'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
         }, {
             'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
             'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
             'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
             'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
         }]
     }
     self.assertEqual(expected, result.matches)
     fp = StringIO()
     db1.save(fp)
     fp.seek(0)
     db2 = Database.restore(fp)
     result = db2.find(query)
     self.assertEqual(expected, result.matches)
예제 #19
0
 def testFindOneMatchingSignificantWithSubjectIndicesIncludingIt(self):
     """
     One matching and significant subject must be found, including when a
     non-empty subjectIndices is passed which includes the found index (and
     other non-matched subject indices)
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams, subjectIndices={'0', 'x', 'y'})
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
예제 #20
0
 def testTwoHelices(self):
     """
     The find method must find more than one helix.
     """
     read = AARead('id', 'FRRFRRFRFRFRFRFRFRFRFRFRFRFRFFRRFRRFRRF')
     landmark = AlphaHelix_3_10()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('AlphaHelix_3_10', 'B', 0, 7, 2),
                       Landmark('AlphaHelix_3_10', 'B', 29, 10, 3)],
                      result)
예제 #21
0
 def testNothingInRange(self):
     """
     If a combined feature list has no landmarks or trig points within range
     of the offset passed to its nearest method, the method should not
     generate any features.
     """
     landmark1 = Landmark('name1', 'L1', 0, 1)
     landmark2 = Landmark('name2', 'L2', 10, 1)
     cfl = CombinedFeatureList([landmark1, landmark2], [])
     self.assertEqual([], list(cfl.nearest(5, maxDistance=2)))
예제 #22
0
 def testTwoHelices(self):
     """
     The find method must find more than one helix.
     """
     read = AARead('id', 'FRRRFRRRFRFRFRFRFRFRFRFRFRFRFRFFRRRFRRRFRRRF')
     landmark = AlphaHelix()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('AlphaHelix', 'A', 0, 9, 2),
                       Landmark('AlphaHelix', 'A', 31, 13, 3)],
                      result)
예제 #23
0
 def testFindPartiallyOverlappingMatches(self):
     """
     The find method must return overlapping helices.
     """
     finder = setAlphaHelices_3_10(['FFFFR', 'FRMMM'])
     read = AARead('id', 'FFFFRMMM')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 0, 5),
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 3, 5),
     ], sorted(result))
예제 #24
0
 def testFindSeparatedMatches(self):
     """
     The find method must find matches that are separated.
     """
     finder = setExtendedStrands(['RRRRR', 'FFF'])
     read = AARead('id', 'FFFMMRRRRR')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC ExtendedStrand', 'ACES', 0, 3),
         Landmark('AC ExtendedStrand', 'ACES', 5, 5),
     ], sorted(result))
예제 #25
0
 def testFindContiguousMatches(self):
     """
     The find method must find matches that are contiguous.
     """
     finder = setExtendedStrands(['RRR', 'FFF'])
     read = AARead('id', 'FFFRRR')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC ExtendedStrand', 'ACES', 0, 3),
         Landmark('AC ExtendedStrand', 'ACES', 3, 3),
     ], sorted(result))
예제 #26
0
 def testCoverageOfTwoDifferentOverlappingLandmarkPoints(self):
     """
     If a scanned read has two landmarks, its coveredIndices method must
     return the indices of those two landmarks.
     """
     read = ScannedRead(AARead('id', 'AAAAA'))
     landmark = Landmark('name', 'symbol1', 0, 5)
     read.landmarks.append(landmark)
     landmark = Landmark('name', 'symbol2', 4, 2)
     read.landmarks.append(landmark)
     self.assertEqual({0, 1, 2, 3, 4, 5}, read.coveredIndices())
예제 #27
0
 def testFindPartiallyOverlappingMatches(self):
     """
     The find method must return overlapping strands.
     """
     finder = setExtendedStrands(['FFFFR', 'FRMMM'])
     read = AARead('id', 'FFFFRMMM')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC ExtendedStrand', 'ACES', 0, 5),
         Landmark('AC ExtendedStrand', 'ACES', 3, 5),
     ], sorted(result))
예제 #28
0
 def testFindContiguousMatches(self):
     """
     The find method must find matches that are contiguous.
     """
     finder = setAlphaHelices_3_10(['RRR', 'FFF'])
     read = AARead('id', 'FFFRRR')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 0, 3),
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 3, 3),
     ], sorted(result))
예제 #29
0
 def testMultipleMatches(self):
     """
     The find method must find several beta turn sequences in the same read.
     """
     read = AARead('id', 'NPNWAACSDYAADKAY')
     landmark = BetaTurn()
     result = list(landmark.find(read))
     self.assertEqual([
         Landmark('BetaTurn', 'BT', 0, 4),
         Landmark('BetaTurn', 'BT', 6, 4)
     ], result)
예제 #30
0
 def testFindCompletelyOverlappingMatches(self):
     """
     The find method must return all helices, including those that overlap.
     """
     finder = setAlphaHelices_3_10(['FF', 'FFF'])
     read = AARead('id', 'FFF')
     result = list(finder.find(read))
     self.assertEqual([
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 0, 2),
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 0, 3),
         Landmark('AC AlphaHelix_3_10', 'ACAH310', 1, 2),
     ], sorted(result))