Example #1
0
    def testSandraSymmetry_259(self):
        """
        Make sure we get a symmetric affinity matrix on two of the sequences
        received from Sandra Junglen on March 13, 2015.

        The sequences below are the ones that caused the non-symmetric
        scores issue in https://github.com/acorg/light-matter/issues/259
        """
        sequences = [
            # Read index 8.
            AARead('RGSV',
                   ('HVCIFRKNQHGGLREIYVLNIYERIVQKCVEDLARAILSVVPSETMTHPKNKF'
                    'QIPNKHNIAARKEFGDSYFTVCTSDDASKWNQGHHVSKFITILVRILPKFWHG'
                    'FIVRALQLWFHKRLFLGDDLLRLFCANDVLNTTDEKVKKVHEVFKGREVAPWM'
                    'TRGMTYIETESGFMQGILHYISSLFHAIFLEDLAERQKKQLPQMARIIQPDNE'
                    'SNVIIDCMESSDDSSMMISFSTKSMNDRQTFAMLLLVDRAFSLKEYYGDMLGI'
                    'YKSIKSTTGTIFMMEFNIEFFFAGDTHRPTIRWVNAALN')),

            # Read index 47.
            AARead('InfluenzaC',
                   ('TINTMAKDGERGKLQRRAIATPGMIVRPFSKIVETVAQKICEKLKESGLPVGG'
                    'NEKKAKLKTTVTSLNARMNSDQFAVNITGDNSKWNECQQPEAYLALLAYITKD'
                    'SSDLMKDLCSVAPVLFCNKFVKLGQGIRLSNKRKTKEVIIKAEKMGKYKNLMR'
                    'EEYKNLFEPLEKYIQKDVCFLPGGMLMGMFNMLSTVLGVSTLCYMDEELKAKG'
                    'CFWTGLQSSDDFVLFAVASNWSNIHWTIRRFNAVCKLIGINMSLEKSYGSLPE'
                    'LFEFTSMFFDGEFVSNLAMELPAFT')),
        ]

        findParams = FindParameters(significanceFraction=0.01)
        self._checkSymmetry(
            sequences, findParams, distanceBase=1.025,
            landmarks=['GOR4AlphaHelix', 'GOR4Coil'],
            trigPoints=['Peaks', 'Troughs'],
            limitPerLandmark=50, minDistance=1, maxDistance=100,
            symmetric=False)
Example #2
0
 def testFindOneMatchingSignificant(self):
     """
     One matching and significant subject must be found if the
     significanceFraction is sufficiently low.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams)
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
Example #3
0
 def testFindBug493Minimal(self):
     """
     A minimal failing test case for
     https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV',
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-')
     dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'],
                                   trigPoints=[],
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Example #4
0
 def testFindOneMatchingSignificantWithSubjectIndicesIncludingIt(self):
     """
     One matching and significant subject must be found, including when a
     non-empty subjectIndices is passed which includes the found index (and
     other non-matched subject indices)
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams, subjectIndices={'0', 'x', 'y'})
     self.assertEqual(
         {
             '0': [
                 {
                     'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                     'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                     'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9,
                                                 2),
                     'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                 },
             ],
         }, result.matches)
Example #5
0
 def testPlotHistogramMustRun(self):
     """
     The plotHistogram function must run properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     plotHistogram(GOLV, AKAV, findParams=findParams)
Example #6
0
    def testSymmetricFindScoresSameSubjectAndQuery(self):
        """
        The score of matching a sequence A against a sequence B must
        be the same as when matching B against A, and that score must
        be 1.0 when the subject and the query are identical.
        """
        sequence = 'AFRRRFRRRFASAASAFRRRFRRRF'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(subject)
        findParams = FindParameters(significanceFraction=0.0)
        result = db.find(query, findParams)
        score1 = result.analysis['0']['bestBinScore']

        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        db = Database(dbParams)
        db.addSubject(query)
        result = db.find(subject, findParams)
        score2 = result.analysis['0']['bestBinScore']

        self.assertEqual(score1, score2)
        self.assertEqual(1.0, score1)
Example #7
0
    def testShowBestBinOnlyIssuesWarning(self):
        """
        The showBestBinOnly option must issue a warning when more than one bin
        has the best score.
        """

        A = 'FRRRFRRRFXXXXXX'
        C = 'FRRRRFRRRRFXXXXXX'

        subject = AARead('subject', 5 * A + C + 5 * A)
        query = AARead('query', 5 * A)

        findParams = FindParameters(significanceFraction=0.01,
                                    binScoreMethod='FeatureAAScore')

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            PlotHashesInSubjectAndRead(
                query, subject, findParams,
                landmarks=['AlphaHelix', 'AlphaHelix_pi'],
                trigPoints=[], distanceBase=1.025, limitPerLandmark=50,
                minDistance=1, maxDistance=100, showInsignificant=False,
                showBestBinOnly=True)

            self.assertEqual(1, len(w))
            self.assertTrue(issubclass(w[0].category, RuntimeWarning))
            error = ('Multiple bins share the best score (1.000000). '
                     'Displaying just one of them.')
            self.assertIn(error, str(w[0].message))
Example #8
0
    def testSymmetricFindScoresDifferingSubjectAndQuery(self):
        """
        The score of matching a sequence A against a sequence B must
        be the same as when matching B against A, including when the number
        of hashes in the two differs and the scores are not 1.0.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAFRRRFRRRF')
        query = AARead('query', 'FRRRFRRRFASAVVVVVV')
        dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks])
        db = Database(dbParams1)
        _, index, _ = db.addSubject(subject)
        hashCount1 = db.getSubjectByIndex(index).hashCount
        findParams = FindParameters(significanceFraction=0.0)
        result = db.find(query, findParams)
        score1 = result.analysis['0']['bestBinScore']

        dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks])
        db = Database(dbParams2)
        _, index, _ = db.addSubject(query)
        hashCount2 = db.getSubjectByIndex(index).hashCount
        result = db.find(subject, findParams)
        score2 = result.analysis['0']['bestBinScore']

        self.assertNotEqual(hashCount1, hashCount2)
        self.assertEqual(score1, score2)
        self.assertNotEqual(1.0, score1)
Example #9
0
    def fromSequences(cls, labels, sequences, findParams=None, **kwargs):
        """
        Construct an NJTree instance from some seqeunces.

        @param cls: Our class.
        @param labels: An iterable producing C{str} labels for the sequences.
        @param sequences: Either A C{str} filename of sequences to consider or
            a C{light.reads.Reads} instance.
        @param findParams: An instance of C{FindParameters}.
        @param kwargs: See
            C{database.DatabaseSpecifier.getDatabaseFromKeywords} for
            additional keywords, all of which are optional.
        @return: An C{NJTree} instance.
        """
        if isinstance(sequences, str):
            sequences = FastaReads(sequences,
                                   readClass=AAReadWithX,
                                   upperCase=True)

        new = cls()
        new.sequences = list(sequences)
        new.labels = labels
        findParams = findParams or FindParameters()
        affinity = np.array(
            affinityMatrix(new.sequences, findParams=findParams, **kwargs))
        new.distance = np.ones(affinity.shape) - affinity
        new.tree = nj(DistanceMatrix(new.distance, labels))
        return new
Example #10
0
    def __init__(self, fileToEvaluate, fileToEvaluateTpFp, pdbFile,
                 structureFile, structureType):

        self.fileToEvaluate = fileToEvaluate
        self.fileToEvaluateTpFp = fileToEvaluateTpFp
        self.pdbFile = pdbFile
        self.structureFile = structureFile
        self.structureType = structureType
        self.findParams = FindParameters(significanceFraction=0.01,
                                         binScoreMethod='FeatureAAScore')

        self.acAlphaHelixFilename = None
        self.acAlphaHelix310Filename = None
        self.acAlphaHelixCombinedFilename = None
        self.acAlphaHelixPiFilename = None
        self.acExtendedStrandFilename = None

        if self.structureType == 'AlphaHelix':
            self.acAlphaHelixFilename = self.fileToEvaluate
        elif self.structureType == 'AlphaHelix_3_10':
            self.acAlphaHelix310Filename = self.fileToEvaluate
        elif self.structureType == 'AlphaHelix_combined':
            self.acAlphaHelixCombinedFilename = self.fileToEvaluate
        elif self.structureType == 'AlphaHelix_pi':
            self.acAlphaHelixPiFilename = self.fileToEvaluate
        elif self.structureType == 'ExtendedStrand':
            self.acExtendedStrandFilename = self.fileToEvaluate
        else:
            ('structureType %s must be one of "AlphaHelix", '
             '"AlphaHelix_3_10", "AlphaHelix_combined", "AlphaHelix_pi", '
             '"ExtendedStrand"' % structureType)
Example #11
0
 def testResultIsAnAffinityMatrix(self):
     """
     An AffinityMatrices instance must return affinity matrices.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(landmarks=['AlphaHelix']),
             'findParams': FindParameters(),
         }
     }
     sequence = 'FRRRFRRRFAAAFRRRFRRRF'
     queries = Reads([AARead('query1', sequence)])
     subjects = Reads([AARead('subject1', sequence),
                       AARead('subject2', sequence)])
     am = AffinityMatrices(queries, subjects=subjects,
                           parameterSets=parameterSets, returnDict=True)
     matrix = am['test']
     self.assertEqual(
         {
             'query1': {
                 'subject1': 1.0,
                 'subject2': 1.0,
             },
         },
         matrix)
Example #12
0
 def testAlignmentGraphWithHistogramMustRun(self):
     """
     The alignmentGraph function showing the histogram must run properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     alignmentGraph(GOLV, AKAV, showHistogram=True, showHorizontal=False,
                    findParams=findParams, showFigure=False)
Example #13
0
 def testAlignmentPanel(self):
     """
     The alignmentPanel function must work properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     reads = Reads()
     reads.add(GOLV)
     reads.add(AKAV)
     alignmentPanel(reads, reads, findParams=findParams, showFigure=False)
 def testPassUnknownBinScoreMethod(self):
     """
     If a FindParameters is passed and its bin score method is unknown,
     the Template calculateScore method must raise a ValueError.
     """
     findParams = FindParameters(binScoreMethod='unknown')
     error = ("^Unknown bin score method 'unknown'$")
     match = Template(SAMPLE_TEMPLATE)
     six.assertRaisesRegex(self, ValueError, error, match.calculateScore,
                           findParams=findParams)
Example #15
0
 def testAlignmentGraphMultipleQueries(self):
     """
     The alignmentGraphMultipleQueries function must run properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     reads = Reads()
     reads.add(GOLV)
     reads.add(AKAV)
     alignmentGraphMultipleQueries(reads, BUNV, findParams=findParams,
                                   showFigure=False)
Example #16
0
 def testToDict(self):
     """
     The toDict method must return a correct dict.
     """
     findParams = FindParameters(significanceMethod='yyy',
                                 significanceFraction=0.5,
                                 binScoreMethod='xxx',
                                 featureMatchScore=3.4,
                                 featureMismatchScore=9.3)
     self.assertEqual(
         {
             'binScoreMethod': 'xxx',
             'deltaScale': 1.0,
             'featureMatchScore': 3.4,
             'featureMismatchScore': 9.3,
             'overallScoreMethod': 'BestBinScore',
             'significanceFraction': 0.5,
             'significanceMethod': 'yyy',
             'weights': FindParameters.DEFAULT_WEIGHTS,
         }, findParams.toDict())
Example #17
0
 def testPlotHistogramLinesMustRun(self):
     """
     The plotHistogramLines function must run properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     reads = Reads()
     reads.add(GOLV)
     reads.add(AKAV)
     reads.add(BUNV)
     plotHistogramLines(reads, findParams=findParams)
Example #18
0
    def testArgs(self):
        """
        It must be possible to parse command line arguments to create a new
        instance of FindParameters.
        """
        parser = argparse.ArgumentParser()
        FindParameters.addArgsToParser(parser)
        args = parser.parse_args([
            '--significanceMethod',
            'Always',
            '--significanceFraction',
            '0.4',
            '--binScoreMethod',
            'MinHashesScore',
            '--featureMatchScore',
            '5',
            '--featureMismatchScore',
            '6',
            '--weights',
            'AlphaHelix 2',
            '--deltaScale',
            '0.2',
        ])

        # Parsing must do the expected thing.
        self.assertEqual('Always', args.significanceMethod)
        self.assertEqual(0.4, args.significanceFraction)
        self.assertEqual('MinHashesScore', args.binScoreMethod)
        self.assertEqual(5, args.featureMatchScore)
        self.assertEqual(6, args.featureMismatchScore)
        self.assertEqual(0.2, args.deltaScale)

        # We must be able to make an instance from the parsed args.
        findParams = FindParameters.fromArgs(args)
        self.assertEqual('Always', findParams.significanceMethod)
        self.assertEqual(0.4, findParams.significanceFraction)
        self.assertEqual('MinHashesScore', findParams.binScoreMethod)
        self.assertEqual(5, findParams.featureMatchScore)
        self.assertEqual(6, findParams.featureMismatchScore)
        self.assertEqual(0.2, findParams.deltaScale)
Example #19
0
 def testNoQueriesOrSubjects(self):
     """
     An AffinityMatrices instance with no queries or subjects must return
     an empty matrix.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(),
             'findParams': FindParameters(),
         }
     }
     am = AffinityMatrices(Reads(), parameterSets=parameterSets)
     matrix = am['test']
     self.assertEqual([], matrix)
Example #20
0
 def testNotDefaults(self):
     """
     If specific parameter values are given, the passed values must be set.
     """
     findParams = FindParameters(significanceMethod='yyy',
                                 significanceFraction=0.5,
                                 binScoreMethod='xxx',
                                 featureMatchScore=3.4,
                                 featureMismatchScore=9.3)
     self.assertEqual('yyy', findParams.significanceMethod)
     self.assertEqual(0.5, findParams.significanceFraction)
     self.assertEqual('xxx', findParams.binScoreMethod)
     self.assertEqual(3.4, findParams.featureMatchScore)
     self.assertEqual(9.3, findParams.featureMismatchScore)
Example #21
0
 def testIdenticalMatrixIsReturnedOnRepeatedRequest(self):
     """
     An AffinityMatrices instance must return the identical affinity matrix
     object when asked for it a second time.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(),
             'findParams': FindParameters(),
         }
     }
     am = AffinityMatrices(Reads(), parameterSets=parameterSets,
                           returnDict=True)
     self.assertIs(am['test'], am['test'])
Example #22
0
 def __init__(self, histogram, query, subject, dbParams, findParams=None):
     self._histogram = histogram
     self._queryLen = len(query)
     self._subjectLen = len(subject)
     from light.parameters import FindParameters
     self._findParams = findParams or FindParameters()
     from light.backend import Backend
     backend = Backend()
     backend.configure(dbParams)
     scannedQuery = backend.scan(query)
     self._allQueryFeatures = set(scannedQuery.landmarks +
                                  scannedQuery.trigPoints)
     scannedSubject = backend.scan(subject.read)
     self._allSubjectFeatures = set(scannedSubject.landmarks +
                                    scannedSubject.trigPoints)
Example #23
0
 def testNoQueriesOrSubjectsWithResultAsDict(self):
     """
     An AffinityMatrices instance with no queries or subjects must return
     an empty dictionary when returnDict is True.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(),
             'findParams': FindParameters(),
         }
     }
     am = AffinityMatrices(Reads(), parameterSets=parameterSets,
                           returnDict=True)
     matrix = am['test']
     self.assertEqual({}, matrix)
Example #24
0
 def testFindBug493(self):
     """
     Failing test case for https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR'
         'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI'
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV'
         'VPSGQEQRYTCHVQHEGLPKPL',
         '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH'
         'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE'
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE'
         'EETT-GGGEEEEEEETTB-S--')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL'
         'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT'
         'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE'
         'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-'
         'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--')
     dbParams = DatabaseParameters(landmarks=[
         'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi',
         'PDB ExtendedStrand', 'AminoAcidsLm'
     ],
                                   trigPoints=[
                                       'AminoAcids', 'Peaks', 'Troughs',
                                       'IndividualPeaks',
                                       'IndividualTroughs'
                                   ],
                                   featureLengthBase=1.01,
                                   maxDistance=10000,
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Example #25
0
 def __init__(self, histogram, query, database):
     self._histogram = histogram
     # A top-level import of Database would be circular.
     from light.database import Database
     db = Database(database.dbParams)
     _, subjectIndex, _ = db.addSubject(query)
     from light.parameters import FindParameters
     findParams = FindParameters(significanceMethod='Always')
     result = db.find(query, findParams, storeFullAnalysis=True)
     bins = result.analysis[subjectIndex]['histogram'].bins
     # The highest-scoring bin is ignored.
     binHeights = sorted([len(h) for h in bins], reverse=True)[1:]
     self.significanceCutoff = binHeights[0]
     self.analysis = {
         'significanceMethod': self.__class__.__name__,
         'significanceCutoff': self.significanceCutoff,
     }
Example #26
0
 def testFindOneMatchingButSubjectExcluded(self):
     """
     Despite one matching and significant subject, no result should be
     returned if a subjectIndices argument that excludes it is passed to
     find.
     """
     sequence = 'AFRRRFRRRFASAASA'
     subject = AARead('subject', sequence)
     query = AARead('query', sequence)
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     db = Database(dbParams)
     db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.0)
     result = db.find(query, findParams, subjectIndices=set())
     self.assertEqual({}, result.matches)
Example #27
0
    def testSandraSymmetryWithSymmetricSpeedup_235(self):
        """
        Make sure we get a symmetric affinity matrix on a few of the sequences
        received from Sandra Junglen on March 13, 2015 if we pass no value for
        'symmetric' (which defaults to True) to affinityMatrix.

        The sequences below are the ones that caused the non-symmetric
        scores issue in https://github.com/acorg/light-matter/issues/235
        """
        sequences = [
            # Read index 3.
            AARead('BUNV', ('SFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERISKERCKLNTDE'
                            'MISEPGDSKLKILEKKAEEEIRYIVERTKDSIIKGDPSKALKLEI'
                            'NADMSKWSAQDVFYKYFWLIAMDPILYPAEKTRILYFMCNYMQKL'
                            'LILPDDLIANILDQKRPYNDDLILEMTNGLNYNYVQIKRNWLQGN'
                            'FNYISSYVHSCAMLVYKDILKECMKLLDGDCLINSMVHSDDNQTS'
                            'LAIIQNKVSDQIVIQYAANTFESVCLTFGCQANMKKTYITHTCKE'
                            'FVSLFNLHGEPLSVFGRFLLPSVG')),

            # Read index 24.
            AARead('LACV', ('YFTFFNKGQKTSKDREIFVGEYEAKMCMYAVERIAKERCKLNPDE'
                            'MISEPGDGKLKVLEQKSEQEIRFLVETTRQKNREIDEAIEALAAE'
                            'GYESNLEKIEKLSLGKAKGLKMEINADMSKWSAQDVFYKYFWLIA'
                            'LDPILYPQEKERILYFMCNYMDKELILPDELLFNLLDQKVAYQND'
                            'IIATMTNQLNSNTVLIKRNWLQGNFNYTSSYVHSCAMSVYKEILK'
                            'EAITLLDGSILVNSLVHSDDNQTSITIVQDKMENDKIIDFAMKEF'
                            'ERACLTFGCQANMKKTYVTNCIKEFVSLFNLYGEPFSIYGRFLLT'
                            'SVG')),

            # Read index 48.
            AARead('WYOV', ('TFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERIAKERCKLNSDE'
                            'MISEPGDAKLKILEQKAEQELRFIVERTKDKFLKGDPCKALKMEI'
                            'NADMSKWSAQDVFYKYFWLIAMDPILYPKEKYRILFFMCNYLQKV'
                            'LVLPDELIGNILDQKKTYNNDIILEGTDFLHQNYVNIRRNWLQGN'
                            'FNYLSSYIHTCAMSVFKDILKEVSYLLDGDVLVNSMVHSDDNQTS'
                            'ITYVQNKIEESVLINHGLKTFETVCLTFGCQANMKKTYLTHNIKE'
                            'FVSLFNIHGEPMSVYGRFLLPSVG')),
        ]

        findParams = FindParameters(significanceFraction=0.05)
        self._checkSymmetry(
            sequences, findParams, distanceBase=1.0,
            landmarks=ALL_LANDMARK_CLASSES,
            trigPoints=ALL_TRIG_CLASSES,
            limitPerLandmark=50, minDistance=1, maxDistance=100)
Example #28
0
 def testDefaults(self):
     """
     If no specific parameter values are given, the defaults must be set.
     """
     findParams = FindParameters()
     self.assertEqual(FindParameters.DEFAULT_SIGNIFICANCE_METHOD,
                      findParams.significanceMethod)
     self.assertEqual(FindParameters.DEFAULT_BIN_SCORE_METHOD,
                      findParams.binScoreMethod)
     self.assertEqual(FindParameters.DEFAULT_OVERALL_SCORE_METHOD,
                      findParams.overallScoreMethod)
     self.assertEqual(FindParameters.DEFAULT_FEATURE_MATCH_SCORE,
                      findParams.featureMatchScore)
     self.assertEqual(FindParameters.DEFAULT_FEATURE_MISMATCH_SCORE,
                      findParams.featureMismatchScore)
     self.assertEqual(FindParameters.DEFAULT_WEIGHTS, findParams.weights)
     self.assertEqual(FindParameters.DEFAULT_DELTA_SCALE,
                      findParams.deltaScale)
Example #29
0
 def testFindOneMatchingSignificant(self):
     """
     One matching and significant subject must be found if the
     significanceFraction is sufficiently low.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks],
                                   maxDistance=11)
     database = Database(dbParams)
     subjects = Reads()
     subjects.add(AARead('subject', 'AFRRRFRRRFASAASA'))
     queries = Reads()
     queries.add(AARead('query', 'FRRRFRRRFASAASA'))
     result = queryDatabase(subjects, queries, database,
                            FindParameters(significanceFraction=0.1))
     self.assertEqual({
         'query': {
             'subject': 1,
         },
     }, result)
Example #30
0
    def testShowBestBinOnly(self):
        """
        The showBestBinOnly option must work correctly, by setting the 'bins'
        attribute to contain just one bin.
        """

        A = 'FRRRFRRRFXXXXXX'
        C = 'FRRRRFRRRRFXXXXXX'

        subject = AARead('subject', 5 * A + C + 2 * A)
        query = AARead('query', 5 * A)

        findParams = FindParameters(significanceFraction=0.01)

        # There are 11 significant bins.
        hashes = PlotHashesInSubjectAndRead(
            query, subject, findParams,
            landmarks=['AlphaHelix', 'AlphaHelix_pi'], trigPoints=[],
            distanceBase=1.025, limitPerLandmark=50, minDistance=1,
            maxDistance=100, showInsignificant=False)
        self.assertEqual(11, len(hashes.bins))

        bestScore = hashes.result.analysis['bestScore']

        # Same input, but restricting ourselves to only the single most
        # significant bin:
        hashes = PlotHashesInSubjectAndRead(
            query, subject, findParams,
            landmarks=['AlphaHelix', 'AlphaHelix_pi'], trigPoints=[],
            distanceBase=1.025, limitPerLandmark=50, minDistance=1,
            maxDistance=100, showInsignificant=False, showBestBinOnly=True)
        self.assertEqual(1, len(hashes.bins))

        # Check that the best bin when we use onlyShowBestBin has the same
        # score as the bin we get when we don't use onlyShowBestBin.
        self.assertEqual(bestScore, hashes.result.analysis['bestScore'])