Beispiel #1
0
    def testSymmetricFindScoresSameSubjectAndQuery(self):
        """
        The score of matching a sequence A against a sequence B must
        be the same as when matching B against A, and that score must
        be 1.0 when the subject and the query are identical.
        """
        sequence = 'AFRRRFRRRFASAASAFRRRFRRRF'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(subject)
        findParams = FindParameters(significanceFraction=0.0)
        result = db.find(query, findParams)
        score1 = result.analysis['0']['bestBinScore']

        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(query)
        result = db.find(subject, findParams)
        score2 = result.analysis['0']['bestBinScore']

        self.assertEqual(score1, score2)
        self.assertEqual(1.0, score1)
Beispiel #2
0
    def testSymmetricFindScoresDifferingSubjectAndQuery(self):
        """
        The score of matching a sequence A against a sequence B must
        be the same as when matching B against A, including when the number
        of hashes in the two differs and the scores are not 1.0.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAFRRRFRRRF')
        query = AARead('query', 'FRRRFRRRFASAVVVVVV')
        dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks])
        db = Database(dbParams1)
        _, index, _ = db.addSubject(subject)
        hashCount1 = db.getSubjectByIndex(index).hashCount
        findParams = FindParameters(significanceFraction=0.0)
        result = db.find(query, findParams)
        score1 = result.analysis['0']['bestBinScore']

        dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks])
        db = Database(dbParams2)
        _, index, _ = db.addSubject(query)
        hashCount2 = db.getSubjectByIndex(index).hashCount
        result = db.find(subject, findParams)
        score2 = result.analysis['0']['bestBinScore']

        self.assertNotEqual(hashCount1, hashCount2)
        self.assertEqual(score1, score2)
        self.assertNotEqual(1.0, score1)
Beispiel #3
0
 def testFindOneMatchingSignificant(self):
     """
     One matching and significant subject must be found if the
     significanceFraction is sufficiently low.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams)
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
Beispiel #4
0
 def testFindBug493Minimal(self):
     """
     A minimal failing test case for
     https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV',
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-')
     dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'],
                                   trigPoints=[],
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Beispiel #5
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(subject)
        result = db.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, result.matches)
Beispiel #6
0
 def testFindOneMatchingSignificantWithSubjectIndicesIncludingIt(self):
     """
     One matching and significant subject must be found, including when a
     non-empty subjectIndices is passed which includes the found index (and
     other non-matched subject indices)
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams, subjectIndices={'0', 'x', 'y'})
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
Beispiel #7
0
 def testFindOneMatchingInsignificant(self):
     """
     One matching subject should be found, but is not significant with the
     default value of significanceFraction.
     """
     subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks])
     db = Database(dbParams)
     db.addSubject(subject)
     result = db.find(query)
     self.assertEqual(
         {
             '0': [{
                 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                 'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
             }, {
                 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                 'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
             }]
         }, result.matches)
     self.assertEqual(0, len(list(result.significantSubjects())))
Beispiel #8
0
 def testFindMatchAfterSaveRestore(self):
     """
     A matching subject found before a save/restore must also be found
     following a database save/restore.
     """
     subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks])
     db1 = Database(dbParams)
     db1.addSubject(subject)
     result = db1.find(query)
     expected = {
         '0': [{
             'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
             'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
             'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
             'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
         }, {
             'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
             'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
             'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
             'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
         }]
     }
     self.assertEqual(expected, result.matches)
     fp = StringIO()
     db1.save(fp)
     fp.seek(0)
     db2 = Database.restore(fp)
     result = db2.find(query)
     self.assertEqual(expected, result.matches)
Beispiel #9
0
 def testFindNoMatching(self):
     """
     A non-matching key must not be found.
     """
     subject = AARead('subject', 'FRRRFRRRFASAASA')
     query = AARead('query', 'FRRR')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     db = Database(dbParams)
     db.addSubject(subject)
     result = db.find(query)
     self.assertEqual({}, result.matches)
Beispiel #10
0
 def testFindNoneMatchingNoTrigPoint(self):
     """
     No matches should be found if there is only one landmark and there are
     no trig point finders.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     db = Database(dbParams)
     db.addSubject(subject)
     result = db.find(query)
     self.assertEqual({}, result.matches)
Beispiel #11
0
 def testFindNoneMatchingTooSmallDistance(self):
     """
     No matches should be found if the max distance is too small.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=1)
     db = Database(dbParams)
     db.addSubject(subject)
     result = db.find(query)
     self.assertEqual({}, result.matches)
Beispiel #12
0
 def testFindBug493(self):
     """
     Failing test case for https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR'
         'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI'
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV'
         'VPSGQEQRYTCHVQHEGLPKPL',
         '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH'
         'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE'
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE'
         'EETT-GGGEEEEEEETTB-S--')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL'
         'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT'
         'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE'
         'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-'
         'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--')
     dbParams = DatabaseParameters(landmarks=[
         'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi',
         'PDB ExtendedStrand', 'AminoAcidsLm'
     ],
                                   trigPoints=[
                                       'AminoAcids', 'Peaks', 'Troughs',
                                       'IndividualPeaks',
                                       'IndividualTroughs'
                                   ],
                                   featureLengthBase=1.01,
                                   maxDistance=10000,
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Beispiel #13
0
 def testFindOneMatchingButSubjectExcluded(self):
     """
     Despite one matching and significant subject, no result should be
     returned if a subjectIndices argument that excludes it is passed to
     find.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams, subjectIndices=set())
     self.assertEqual({}, result.matches)
Beispiel #14
0
 def __init__(self, histogram, query, database):
     self._histogram = histogram
     # A top-level import of Database would be circular.
     from light.database import Database
     db = Database(database.dbParams)
     _, subjectIndex, _ = db.addSubject(query)
     from light.parameters import FindParameters
     findParams = FindParameters(significanceMethod='Always')
     result = db.find(query, findParams, storeFullAnalysis=True)
     bins = result.analysis[subjectIndex]['histogram'].bins
     # The highest-scoring bin is ignored.
     binHeights = sorted([len(h) for h in bins], reverse=True)[1:]
     self.significanceCutoff = binHeights[0]
     self.analysis = {
         'significanceMethod': self.__class__.__name__,
         'significanceCutoff': self.significanceCutoff,
     }
Beispiel #15
0
 def testFindSelf(self):
     """
     Does a sequence match itself using different landmark and trig point
     finders.
     """
     read = AARead(self.ID, self.SEQUENCE)
     dbParams = DatabaseParameters(landmarks=self.LANDMARKS,
                                   trigPoints=self.TRIG_POINTS,
                                   limitPerLandmark=self.LIMIT_PER_LANDMARK,
                                   maxDistance=self.MAX_DISTANCE,
                                   minDistance=self.MIN_DISTANCE)
     database = Database(dbParams=dbParams)
     _, subjectIndex, _ = database.addSubject(read)
     result = database.find(read)
     if subjectIndex in result.analysis:
         self.details = {
             'result': True,
             'score': result.analysis[subjectIndex]['bestBinScore'],
         }
     else:
         self.details = {
             'result': False,
         }
Beispiel #16
0
MUMMYPOX = AARead('Mummypox',
                  _BETA + _ALPHA + _ALPHA + _ALPHA + _ALPHA + _TRYPTOPHAN)

SQUIRRELPOX = AARead('Squirrelpox', _ALPHA + _BETA + _BETA + _ALPHA + _BETA)

_, _CATPOX_INDEX, _ = DB.addSubject(CATPOX)
_, _COWPOX_INDEX, _ = DB.addSubject(COWPOX)
_, _MONKEYPOX_INDEX, _ = DB.addSubject(MONKEYPOX)
_, _MUMMYPOX_INDEX, _ = DB.addSubject(MUMMYPOX)
_, _SQUIRRELPOX_INDEX, _ = DB.addSubject(SQUIRRELPOX)

# Run find on a read that matches squirrelpox and catpox.
READ0 = AARead('read0', _ALPHA + _BETA + _BETA + _ALPHA + _BETA)
_findParams = FindParameters(significanceFraction=0.2)
_result = DB.find(READ0, _findParams, storeFullAnalysis=True)
READ0_SQUIRRELPOX_SCORE = _result.analysis[_SQUIRRELPOX_INDEX]['bestBinScore']
READ0_CATPOX_SCORE = _result.analysis[_CATPOX_INDEX]['bestBinScore']
RECORD0 = _result.save(StringIO()).getvalue()

# Run find on a read that matches both monkeypox and mummypox.
READ1 = AARead('read1', _BETA + _ALPHA + _ALPHA + _ALPHA + _BETA + _TRYPTOPHAN)
_findParams = FindParameters(significanceFraction=0.25)
_result = DB.find(READ1, _findParams, storeFullAnalysis=True)
READ1_MONKEYPOX_SCORE = _result.analysis[_MONKEYPOX_INDEX]['bestBinScore']
READ1_MONKEYPOX_HSP2_SCORE = _result.analysis[_MONKEYPOX_INDEX][
    'significantBins'][1]['score']
READ1_MUMMYPOX_SCORE = _result.analysis[_MUMMYPOX_INDEX]['bestBinScore']
RECORD1 = _result.save(StringIO()).getvalue()

# Run find on a read that matches only cowpox.