Esempio n. 1
0
    def testFindOneMatchingHashInOneLocation(self):
        """
        One matching subject with one matching hash (that occurs in one
        location) must be found correctly.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=11)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }]
            },
            matches)
        self.assertEqual(1, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 2
0
    def testTwoReadsTwoLandmarksSameOffsets(self):
        """
        If two identical reads are added, both with two landmarks at the same
        offsets, only one key is added to the backend and both reads are
        listed in the dictionary values for the key.

        Note that A3:A2:-23 is not added to the backend since that would be
        redundant (it's the same two landmarks, with the same separation,
        just with the sign changed).
        """
        dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(
            AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
        be.addSubject(
            AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
        distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE))
        self.assertEqual(
            {
                'A2:A3:' + distance23: {
                    '0': [[0, 9, 23, 13]],
                    '1': [[0, 9, 23, 13]],
                },
            },
            be.d)
Esempio n. 3
0
    def testFindWithIdenticalNonMatchingHashes(self):
        """
        Identical non-matching hashes must be found correctly when
        storeFullAnalysis is passed to find() as True.
        """
        subject = AARead('subject', 'F')
        query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks], maxDistance=10)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query, True)

        self.assertEqual({}, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual(
            {
                'A2:P:10': [
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)],
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)]
                ],
            },
            nonMatchingHashes)
Esempio n. 4
0
    def testFindOneMatchingHashInTwoLocations(self):
        """
        One matching subject with one matching hash (that occurs in two
        locations) must be found correctly.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
        query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
                }]
            }, matches)

        self.assertEqual(14, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 5
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                },
                    {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 6
0
 def testPrint(self):
     """
     The print_ function should produce the expected output.
     """
     subject = AARead('subject-id', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks, Troughs],
                                   limitPerLandmark=16, maxDistance=10,
                                   minDistance=0, distanceBase=1)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     expected = (
         'Name: backend\n'
         'Hash count: 3\n'
         'Checksum: 2751160351\n'
         'Subjects (with offsets) by hash:\n'
         '  A2:P:10\n'
         '    0 [[0, 9, 10, 1]]\n'
         '  A2:T:4\n'
         '    0 [[0, 9, 4, 1]]\n'
         '  A2:T:8\n'
         '    0 [[0, 9, 8, 1]]\n'
         'Landmark symbol counts:\n'
         '  AlphaHelix (A2): 3\n'
         'Trig point symbol counts:\n'
         '  Peaks (P): 1\n'
         '  Troughs (T): 2')
     self.assertEqual(expected, be.print_())
Esempio n. 7
0
 def testOneReadOneLandmark(self):
     """
     If one subject is added but it only has one landmark, nothing is added
     to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual({}, be.d)
Esempio n. 8
0
 def testOneReadOneLandmarkOnePeakNoTrigFinders(self):
     """
     If one subject is added and it has one landmark and one peak, but no
     trig finders are in use, nothing is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     self.assertEqual({}, be.d)
Esempio n. 9
0
 def testAddSameSubjectIncreasesBackendSize(self):
     """
     If an identical subject is added multiple times, the backend size
     does not increase, because the backend subject store detect duplicates.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
Esempio n. 10
0
 def testOneReadOneLandmarkTwoPeaksSevereMinDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, but a
     severe minimum distance is imposed, no keys are added to
     the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], minDistance=100)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     self.assertEqual({}, be.d)
Esempio n. 11
0
 def testScan(self):
     """
     The scan method must return a scanned subject.
     """
     subject = AARead('subject', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     scannedSubject = be.scan(subject)
     self.assertIsInstance(scannedSubject, ScannedRead)
Esempio n. 12
0
 def testTwoReadsTwoLandmarksLimitZeroPairsPerLandmark(self):
     """
     If two identical reads are added, both with two landmarks, no keys
     will be added to the dictionary if limitPerLandmark is zero.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[], limitPerLandmark=0)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(
         AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
     be.addSubject(
         AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
     self.assertEqual({}, be.d)
Esempio n. 13
0
 def testOneReadOneLandmarkOnePeak(self):
     """
     If one subject is added and it has one landmark and one peak, one pair
     is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Esempio n. 14
0
 def testOneReadTwoLandmarks(self):
     """
     If one subject is added and it has two landmarks, one key is added
     to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(
         AARead('id', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
     distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:A3:' + distance23: {'0': [[0, 9, 23, 13]]},
         },
         be.d)
Esempio n. 15
0
    def testFindNoMatch(self):
        """
        A query against an empty backend must produce no results.
        """
        subject = AARead('subject', 'FRRRFRRRFASAASA')
        query = AARead('query', 'FRRR')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual({}, matches)
        self.assertEqual(0, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 16
0
 def testOneReadOneLandmarkTwoPeaksIntermediateMinDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, but an
     intermediate minimum distance is imposed, only the key for the pair
     that exceeds the minimum distance is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], minDistance=11)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     distance13 = str(scaleLog(13, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance13: {'0': [[0, 9, 13, 1]]},
         },
         be.d)
Esempio n. 17
0
 def testOneReadOneLandmarkTwoPeaksLimitOnePairPerLandmark(self):
     """
     If one subject is added and it has one landmark and two peaks, but a
     limit of one pair per landmarks is imposed, only one key is added to
     the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], limitPerLandmark=1)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Esempio n. 18
0
    def testFindNoneMatchingTooSmallDistance(self):
        """
        No matches should be found if the max distance is too small.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=1)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual({}, matches)
        self.assertEqual(0, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 19
0
    def testChecksumAfterSubjectAdded(self):
        """
        The backend checksum must be as expected when a subject has been
        added to the backend.
        """
        dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
        be = Backend()
        be.configure(dbParams, 'backend', 10)
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('id', sequence)
        be.addSubject(subject, '0')

        expected = Checksum(10).update([
            'id',
            sequence,
        ])
        self.assertEqual(expected.value, be.checksum())
Esempio n. 20
0
    def testFindNoneMatchingNoTrigPoint(self):
        """
        No matches should be found if there is only one landmark and there are
        no trig point finders.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual({}, matches)
        self.assertEqual(0, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 21
0
 def testOneReadOneLandmarkTwoPeaksIntermediateMaxDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, but a
     maximum distance is imposed that makes one of the peaks too far
     away, only one key is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], maxDistance=11)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Esempio n. 22
0
 def testSaveRestoreNonEmpty(self):
     """
     When asked to save and then restore a non-empty backend, the correct
     backend must result.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks, Troughs])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     fp = StringIO()
     be.save(fp)
     fp.seek(0)
     result = Backend.restore(fp)
     self.assertEqual(be.subjectCount(), result.subjectCount())
     self.assertEqual(be.d, result.d)
     self.assertEqual(be.checksum(), result.checksum())
     self.assertIs(None, be.dbParams.compare(result.dbParams))
Esempio n. 23
0
 def testOneReadOneLandmarkTwoPeaksLargeMaxDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, and a
     maximum distance is imposed that is greater than the distance to the
     peaks, two keys are added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], maxDistance=15)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     distance13 = str(scaleLog(13, _DEFAULT_DISTANCE_BASE))
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance13: {'0': [[0, 9, 13, 1]]},
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Esempio n. 24
0
    def testFindOneMatchingHashButSubjectExcluded(self):
        """
        One matching subject with one matching hash (that occurs in one
        location) must not be returned if a subjectIndices argument that
        excludes it is passed to find.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=11)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query,
                                                        subjectIndices=set())

        self.assertEqual({}, matches)
        self.assertEqual(1, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Esempio n. 25
0
    def testChecksumAfterSaveRestore(self):
        """
        A backend that has a sequence added to it, which is then saved and
        restored, and then has a second sequence is added to it must have the
        same checksum as a backend that simply has the two sequences added to
        it without interruption.
        """
        seq1 = 'FRRRFRRRFASAASA'
        seq2 = 'MMMMMMMMMFRRRFR'
        dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks, Troughs])
        be1 = Backend()
        be1.configure(dbParams1, 'name1', 0)
        be1.addSubject(AARead('id1', seq1), '0')
        fp = StringIO()
        be1.save(fp)
        fp.seek(0)
        be1 = Backend.restore(fp)
        be1.addSubject(AARead('id2', seq2), '1')

        dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks, Troughs])
        be2 = Backend()
        be2.configure(dbParams2, 'name2', 0)
        be2.addSubject(AARead('id1', seq1), '0')
        be2.addSubject(AARead('id2', seq2), '1')

        self.assertEqual(be1.checksum(), be2.checksum())
Esempio n. 26
0
 def testOneReadOneLandmarkOnePeakDistanceBase(self):
     """
     If a non-default distanceBase of 2.0 is used, the right distance needs
     to be calculated. In this case, the offsets are 10 AA apart, and the
     distanceBase scaling will change that to a 3 (since int(log base 2 10)
     = 3), though we don't test the 3 value explicitly since that may change
     if we ever change the scale function. That's desirable, but we already
     have tests in test_distance.py that will break in that case.
     """
     distanceBase = 2.0
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   distanceBase=distanceBase)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     distance10 = str(scaleLog(10, distanceBase))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Esempio n. 27
0
 def testAddSubjectReturnsCorrectResult(self):
     """
     If one subject is added, addSubject must return whether the subject
     already existed, the index ('0' in this case) of the added subject,
     and the backend name.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     subject = AARead('id', 'FRRRFRRRF')
     preExisting, subjectIndex, hashCount = be.addSubject(subject, '0')
     self.assertFalse(preExisting)
     self.assertEqual('0', subjectIndex)
     self.assertEqual(0, hashCount)
Esempio n. 28
0
 def testMultipleSubjectOffsets(self):
     """
     If one subject is added and it has one landmark and one peak separated
     by 10 bases and then, later in the subject, the same pair with the
     same separation, one key must be added to the backend and it
     should have two subject offsets.  Note that minDistance and
     maxDistance are used to discard the matches some longer and shorter
     distance pairs that only have one subject offset (i.e., that only
     appear in the subject once).
     """
     seq = 'FRRRFRRRFASA'
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], minDistance=5,
                                   maxDistance=10)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', seq + seq), '0')
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1], [12, 9, 22, 1]]},
         },
         be.d)
Esempio n. 29
0
 def testGetScannedPairs(self):
     """
     The getSequencePairs method must return pairs of
     (landmark, trigPoints).
     """
     subject = AARead('subject', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], distanceBase=1.0)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     scannedSubject = be.scan(subject)
     pairs = list(be.getScannedPairs(scannedSubject))
     # First pair.
     landmark, trigPoint = pairs[0]
     self.assertEqual(Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL,
                               0, 9, 2), landmark)
     self.assertEqual(TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10), trigPoint)
     # Second pair.
     landmark, trigPoint = pairs[1]
     self.assertEqual(Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL,
                               0, 9, 2), landmark)
     self.assertEqual(TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13), trigPoint)
     self.assertEqual(2, len(pairs))