def testSandraSymmetry_259(self): """ Make sure we get a symmetric affinity matrix on two of the sequences received from Sandra Junglen on March 13, 2015. The sequences below are the ones that caused the non-symmetric scores issue in https://github.com/acorg/light-matter/issues/259 """ sequences = [ # Read index 8. AARead('RGSV', ('HVCIFRKNQHGGLREIYVLNIYERIVQKCVEDLARAILSVVPSETMTHPKNKF' 'QIPNKHNIAARKEFGDSYFTVCTSDDASKWNQGHHVSKFITILVRILPKFWHG' 'FIVRALQLWFHKRLFLGDDLLRLFCANDVLNTTDEKVKKVHEVFKGREVAPWM' 'TRGMTYIETESGFMQGILHYISSLFHAIFLEDLAERQKKQLPQMARIIQPDNE' 'SNVIIDCMESSDDSSMMISFSTKSMNDRQTFAMLLLVDRAFSLKEYYGDMLGI' 'YKSIKSTTGTIFMMEFNIEFFFAGDTHRPTIRWVNAALN')), # Read index 47. AARead('InfluenzaC', ('TINTMAKDGERGKLQRRAIATPGMIVRPFSKIVETVAQKICEKLKESGLPVGG' 'NEKKAKLKTTVTSLNARMNSDQFAVNITGDNSKWNECQQPEAYLALLAYITKD' 'SSDLMKDLCSVAPVLFCNKFVKLGQGIRLSNKRKTKEVIIKAEKMGKYKNLMR' 'EEYKNLFEPLEKYIQKDVCFLPGGMLMGMFNMLSTVLGVSTLCYMDEELKAKG' 'CFWTGLQSSDDFVLFAVASNWSNIHWTIRRFNAVCKLIGINMSLEKSYGSLPE' 'LFEFTSMFFDGEFVSNLAMELPAFT')), ] findParams = FindParameters(significanceFraction=0.01) self._checkSymmetry( sequences, findParams, distanceBase=1.025, landmarks=['GOR4AlphaHelix', 'GOR4Coil'], trigPoints=['Peaks', 'Troughs'], limitPerLandmark=50, minDistance=1, maxDistance=100, symmetric=False)
def testFindOneMatchingSignificant(self): """ One matching and significant subject must be found if the significanceFraction is sufficiently low. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) self.assertEqual( { '0': [ { 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, ], }, result.matches)
def testFindBug493Minimal(self): """ A minimal failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV', 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-') dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'], trigPoints=[], limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def testFindOneMatchingSignificantWithSubjectIndicesIncludingIt(self): """ One matching and significant subject must be found, including when a non-empty subjectIndices is passed which includes the found index (and other non-matched subject indices) """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams, subjectIndices={'0', 'x', 'y'}) self.assertEqual( { '0': [ { 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, ], }, result.matches)
def testPlotHistogramMustRun(self): """ The plotHistogram function must run properly. """ findParams = FindParameters(significanceMethod='HashFraction', binScoreMethod='FeatureAAScore') plotHistogram(GOLV, AKAV, findParams=findParams)
def testSymmetricFindScoresSameSubjectAndQuery(self): """ The score of matching a sequence A against a sequence B must be the same as when matching B against A, and that score must be 1.0 when the subject and the query are identical. """ sequence = 'AFRRRFRRRFASAASAFRRRFRRRF' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) score1 = result.analysis['0']['bestBinScore'] dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(query) result = db.find(subject, findParams) score2 = result.analysis['0']['bestBinScore'] self.assertEqual(score1, score2) self.assertEqual(1.0, score1)
def testShowBestBinOnlyIssuesWarning(self): """ The showBestBinOnly option must issue a warning when more than one bin has the best score. """ A = 'FRRRFRRRFXXXXXX' C = 'FRRRRFRRRRFXXXXXX' subject = AARead('subject', 5 * A + C + 5 * A) query = AARead('query', 5 * A) findParams = FindParameters(significanceFraction=0.01, binScoreMethod='FeatureAAScore') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') PlotHashesInSubjectAndRead( query, subject, findParams, landmarks=['AlphaHelix', 'AlphaHelix_pi'], trigPoints=[], distanceBase=1.025, limitPerLandmark=50, minDistance=1, maxDistance=100, showInsignificant=False, showBestBinOnly=True) self.assertEqual(1, len(w)) self.assertTrue(issubclass(w[0].category, RuntimeWarning)) error = ('Multiple bins share the best score (1.000000). ' 'Displaying just one of them.') self.assertIn(error, str(w[0].message))
def testSymmetricFindScoresDifferingSubjectAndQuery(self): """ The score of matching a sequence A against a sequence B must be the same as when matching B against A, including when the number of hashes in the two differs and the scores are not 1.0. """ subject = AARead('subject', 'AFRRRFRRRFASAASAFRRRFRRRF') query = AARead('query', 'FRRRFRRRFASAVVVVVV') dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams1) _, index, _ = db.addSubject(subject) hashCount1 = db.getSubjectByIndex(index).hashCount findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) score1 = result.analysis['0']['bestBinScore'] dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams2) _, index, _ = db.addSubject(query) hashCount2 = db.getSubjectByIndex(index).hashCount result = db.find(subject, findParams) score2 = result.analysis['0']['bestBinScore'] self.assertNotEqual(hashCount1, hashCount2) self.assertEqual(score1, score2) self.assertNotEqual(1.0, score1)
def fromSequences(cls, labels, sequences, findParams=None, **kwargs): """ Construct an NJTree instance from some seqeunces. @param cls: Our class. @param labels: An iterable producing C{str} labels for the sequences. @param sequences: Either A C{str} filename of sequences to consider or a C{light.reads.Reads} instance. @param findParams: An instance of C{FindParameters}. @param kwargs: See C{database.DatabaseSpecifier.getDatabaseFromKeywords} for additional keywords, all of which are optional. @return: An C{NJTree} instance. """ if isinstance(sequences, str): sequences = FastaReads(sequences, readClass=AAReadWithX, upperCase=True) new = cls() new.sequences = list(sequences) new.labels = labels findParams = findParams or FindParameters() affinity = np.array( affinityMatrix(new.sequences, findParams=findParams, **kwargs)) new.distance = np.ones(affinity.shape) - affinity new.tree = nj(DistanceMatrix(new.distance, labels)) return new
def __init__(self, fileToEvaluate, fileToEvaluateTpFp, pdbFile, structureFile, structureType): self.fileToEvaluate = fileToEvaluate self.fileToEvaluateTpFp = fileToEvaluateTpFp self.pdbFile = pdbFile self.structureFile = structureFile self.structureType = structureType self.findParams = FindParameters(significanceFraction=0.01, binScoreMethod='FeatureAAScore') self.acAlphaHelixFilename = None self.acAlphaHelix310Filename = None self.acAlphaHelixCombinedFilename = None self.acAlphaHelixPiFilename = None self.acExtendedStrandFilename = None if self.structureType == 'AlphaHelix': self.acAlphaHelixFilename = self.fileToEvaluate elif self.structureType == 'AlphaHelix_3_10': self.acAlphaHelix310Filename = self.fileToEvaluate elif self.structureType == 'AlphaHelix_combined': self.acAlphaHelixCombinedFilename = self.fileToEvaluate elif self.structureType == 'AlphaHelix_pi': self.acAlphaHelixPiFilename = self.fileToEvaluate elif self.structureType == 'ExtendedStrand': self.acExtendedStrandFilename = self.fileToEvaluate else: ('structureType %s must be one of "AlphaHelix", ' '"AlphaHelix_3_10", "AlphaHelix_combined", "AlphaHelix_pi", ' '"ExtendedStrand"' % structureType)
def testResultIsAnAffinityMatrix(self): """ An AffinityMatrices instance must return affinity matrices. """ parameterSets = { 'test': { 'dbParams': DatabaseParameters(landmarks=['AlphaHelix']), 'findParams': FindParameters(), } } sequence = 'FRRRFRRRFAAAFRRRFRRRF' queries = Reads([AARead('query1', sequence)]) subjects = Reads([AARead('subject1', sequence), AARead('subject2', sequence)]) am = AffinityMatrices(queries, subjects=subjects, parameterSets=parameterSets, returnDict=True) matrix = am['test'] self.assertEqual( { 'query1': { 'subject1': 1.0, 'subject2': 1.0, }, }, matrix)
def testAlignmentGraphWithHistogramMustRun(self): """ The alignmentGraph function showing the histogram must run properly. """ findParams = FindParameters(significanceMethod='HashFraction', binScoreMethod='FeatureAAScore') alignmentGraph(GOLV, AKAV, showHistogram=True, showHorizontal=False, findParams=findParams, showFigure=False)
def testAlignmentPanel(self): """ The alignmentPanel function must work properly. """ findParams = FindParameters(significanceMethod='HashFraction', binScoreMethod='FeatureAAScore') reads = Reads() reads.add(GOLV) reads.add(AKAV) alignmentPanel(reads, reads, findParams=findParams, showFigure=False)
def testPassUnknownBinScoreMethod(self): """ If a FindParameters is passed and its bin score method is unknown, the Template calculateScore method must raise a ValueError. """ findParams = FindParameters(binScoreMethod='unknown') error = ("^Unknown bin score method 'unknown'$") match = Template(SAMPLE_TEMPLATE) six.assertRaisesRegex(self, ValueError, error, match.calculateScore, findParams=findParams)
def testAlignmentGraphMultipleQueries(self): """ The alignmentGraphMultipleQueries function must run properly. """ findParams = FindParameters(significanceMethod='HashFraction', binScoreMethod='FeatureAAScore') reads = Reads() reads.add(GOLV) reads.add(AKAV) alignmentGraphMultipleQueries(reads, BUNV, findParams=findParams, showFigure=False)
def testToDict(self): """ The toDict method must return a correct dict. """ findParams = FindParameters(significanceMethod='yyy', significanceFraction=0.5, binScoreMethod='xxx', featureMatchScore=3.4, featureMismatchScore=9.3) self.assertEqual( { 'binScoreMethod': 'xxx', 'deltaScale': 1.0, 'featureMatchScore': 3.4, 'featureMismatchScore': 9.3, 'overallScoreMethod': 'BestBinScore', 'significanceFraction': 0.5, 'significanceMethod': 'yyy', 'weights': FindParameters.DEFAULT_WEIGHTS, }, findParams.toDict())
def testPlotHistogramLinesMustRun(self): """ The plotHistogramLines function must run properly. """ findParams = FindParameters(significanceMethod='HashFraction', binScoreMethod='FeatureAAScore') reads = Reads() reads.add(GOLV) reads.add(AKAV) reads.add(BUNV) plotHistogramLines(reads, findParams=findParams)
def testArgs(self): """ It must be possible to parse command line arguments to create a new instance of FindParameters. """ parser = argparse.ArgumentParser() FindParameters.addArgsToParser(parser) args = parser.parse_args([ '--significanceMethod', 'Always', '--significanceFraction', '0.4', '--binScoreMethod', 'MinHashesScore', '--featureMatchScore', '5', '--featureMismatchScore', '6', '--weights', 'AlphaHelix 2', '--deltaScale', '0.2', ]) # Parsing must do the expected thing. self.assertEqual('Always', args.significanceMethod) self.assertEqual(0.4, args.significanceFraction) self.assertEqual('MinHashesScore', args.binScoreMethod) self.assertEqual(5, args.featureMatchScore) self.assertEqual(6, args.featureMismatchScore) self.assertEqual(0.2, args.deltaScale) # We must be able to make an instance from the parsed args. findParams = FindParameters.fromArgs(args) self.assertEqual('Always', findParams.significanceMethod) self.assertEqual(0.4, findParams.significanceFraction) self.assertEqual('MinHashesScore', findParams.binScoreMethod) self.assertEqual(5, findParams.featureMatchScore) self.assertEqual(6, findParams.featureMismatchScore) self.assertEqual(0.2, findParams.deltaScale)
def testNoQueriesOrSubjects(self): """ An AffinityMatrices instance with no queries or subjects must return an empty matrix. """ parameterSets = { 'test': { 'dbParams': DatabaseParameters(), 'findParams': FindParameters(), } } am = AffinityMatrices(Reads(), parameterSets=parameterSets) matrix = am['test'] self.assertEqual([], matrix)
def testNotDefaults(self): """ If specific parameter values are given, the passed values must be set. """ findParams = FindParameters(significanceMethod='yyy', significanceFraction=0.5, binScoreMethod='xxx', featureMatchScore=3.4, featureMismatchScore=9.3) self.assertEqual('yyy', findParams.significanceMethod) self.assertEqual(0.5, findParams.significanceFraction) self.assertEqual('xxx', findParams.binScoreMethod) self.assertEqual(3.4, findParams.featureMatchScore) self.assertEqual(9.3, findParams.featureMismatchScore)
def testIdenticalMatrixIsReturnedOnRepeatedRequest(self): """ An AffinityMatrices instance must return the identical affinity matrix object when asked for it a second time. """ parameterSets = { 'test': { 'dbParams': DatabaseParameters(), 'findParams': FindParameters(), } } am = AffinityMatrices(Reads(), parameterSets=parameterSets, returnDict=True) self.assertIs(am['test'], am['test'])
def __init__(self, histogram, query, subject, dbParams, findParams=None): self._histogram = histogram self._queryLen = len(query) self._subjectLen = len(subject) from light.parameters import FindParameters self._findParams = findParams or FindParameters() from light.backend import Backend backend = Backend() backend.configure(dbParams) scannedQuery = backend.scan(query) self._allQueryFeatures = set(scannedQuery.landmarks + scannedQuery.trigPoints) scannedSubject = backend.scan(subject.read) self._allSubjectFeatures = set(scannedSubject.landmarks + scannedSubject.trigPoints)
def testNoQueriesOrSubjectsWithResultAsDict(self): """ An AffinityMatrices instance with no queries or subjects must return an empty dictionary when returnDict is True. """ parameterSets = { 'test': { 'dbParams': DatabaseParameters(), 'findParams': FindParameters(), } } am = AffinityMatrices(Reads(), parameterSets=parameterSets, returnDict=True) matrix = am['test'] self.assertEqual({}, matrix)
def testFindBug493(self): """ Failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR' 'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI' 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV' 'VPSGQEQRYTCHVQHEGLPKPL', '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH' 'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE' 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE' 'EETT-GGGEEEEEEETTB-S--') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL' 'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT' 'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' 'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE' 'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-' 'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--') dbParams = DatabaseParameters(landmarks=[ 'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi', 'PDB ExtendedStrand', 'AminoAcidsLm' ], trigPoints=[ 'AminoAcids', 'Peaks', 'Troughs', 'IndividualPeaks', 'IndividualTroughs' ], featureLengthBase=1.01, maxDistance=10000, limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def __init__(self, histogram, query, database): self._histogram = histogram # A top-level import of Database would be circular. from light.database import Database db = Database(database.dbParams) _, subjectIndex, _ = db.addSubject(query) from light.parameters import FindParameters findParams = FindParameters(significanceMethod='Always') result = db.find(query, findParams, storeFullAnalysis=True) bins = result.analysis[subjectIndex]['histogram'].bins # The highest-scoring bin is ignored. binHeights = sorted([len(h) for h in bins], reverse=True)[1:] self.significanceCutoff = binHeights[0] self.analysis = { 'significanceMethod': self.__class__.__name__, 'significanceCutoff': self.significanceCutoff, }
def testFindOneMatchingButSubjectExcluded(self): """ Despite one matching and significant subject, no result should be returned if a subjectIndices argument that excludes it is passed to find. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams, subjectIndices=set()) self.assertEqual({}, result.matches)
def testSandraSymmetryWithSymmetricSpeedup_235(self): """ Make sure we get a symmetric affinity matrix on a few of the sequences received from Sandra Junglen on March 13, 2015 if we pass no value for 'symmetric' (which defaults to True) to affinityMatrix. The sequences below are the ones that caused the non-symmetric scores issue in https://github.com/acorg/light-matter/issues/235 """ sequences = [ # Read index 3. AARead('BUNV', ('SFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERISKERCKLNTDE' 'MISEPGDSKLKILEKKAEEEIRYIVERTKDSIIKGDPSKALKLEI' 'NADMSKWSAQDVFYKYFWLIAMDPILYPAEKTRILYFMCNYMQKL' 'LILPDDLIANILDQKRPYNDDLILEMTNGLNYNYVQIKRNWLQGN' 'FNYISSYVHSCAMLVYKDILKECMKLLDGDCLINSMVHSDDNQTS' 'LAIIQNKVSDQIVIQYAANTFESVCLTFGCQANMKKTYITHTCKE' 'FVSLFNLHGEPLSVFGRFLLPSVG')), # Read index 24. AARead('LACV', ('YFTFFNKGQKTSKDREIFVGEYEAKMCMYAVERIAKERCKLNPDE' 'MISEPGDGKLKVLEQKSEQEIRFLVETTRQKNREIDEAIEALAAE' 'GYESNLEKIEKLSLGKAKGLKMEINADMSKWSAQDVFYKYFWLIA' 'LDPILYPQEKERILYFMCNYMDKELILPDELLFNLLDQKVAYQND' 'IIATMTNQLNSNTVLIKRNWLQGNFNYTSSYVHSCAMSVYKEILK' 'EAITLLDGSILVNSLVHSDDNQTSITIVQDKMENDKIIDFAMKEF' 'ERACLTFGCQANMKKTYVTNCIKEFVSLFNLYGEPFSIYGRFLLT' 'SVG')), # Read index 48. AARead('WYOV', ('TFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERIAKERCKLNSDE' 'MISEPGDAKLKILEQKAEQELRFIVERTKDKFLKGDPCKALKMEI' 'NADMSKWSAQDVFYKYFWLIAMDPILYPKEKYRILFFMCNYLQKV' 'LVLPDELIGNILDQKKTYNNDIILEGTDFLHQNYVNIRRNWLQGN' 'FNYLSSYIHTCAMSVFKDILKEVSYLLDGDVLVNSMVHSDDNQTS' 'ITYVQNKIEESVLINHGLKTFETVCLTFGCQANMKKTYLTHNIKE' 'FVSLFNIHGEPMSVYGRFLLPSVG')), ] findParams = FindParameters(significanceFraction=0.05) self._checkSymmetry( sequences, findParams, distanceBase=1.0, landmarks=ALL_LANDMARK_CLASSES, trigPoints=ALL_TRIG_CLASSES, limitPerLandmark=50, minDistance=1, maxDistance=100)
def testDefaults(self): """ If no specific parameter values are given, the defaults must be set. """ findParams = FindParameters() self.assertEqual(FindParameters.DEFAULT_SIGNIFICANCE_METHOD, findParams.significanceMethod) self.assertEqual(FindParameters.DEFAULT_BIN_SCORE_METHOD, findParams.binScoreMethod) self.assertEqual(FindParameters.DEFAULT_OVERALL_SCORE_METHOD, findParams.overallScoreMethod) self.assertEqual(FindParameters.DEFAULT_FEATURE_MATCH_SCORE, findParams.featureMatchScore) self.assertEqual(FindParameters.DEFAULT_FEATURE_MISMATCH_SCORE, findParams.featureMismatchScore) self.assertEqual(FindParameters.DEFAULT_WEIGHTS, findParams.weights) self.assertEqual(FindParameters.DEFAULT_DELTA_SCALE, findParams.deltaScale)
def testFindOneMatchingSignificant(self): """ One matching and significant subject must be found if the significanceFraction is sufficiently low. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) database = Database(dbParams) subjects = Reads() subjects.add(AARead('subject', 'AFRRRFRRRFASAASA')) queries = Reads() queries.add(AARead('query', 'FRRRFRRRFASAASA')) result = queryDatabase(subjects, queries, database, FindParameters(significanceFraction=0.1)) self.assertEqual({ 'query': { 'subject': 1, }, }, result)
def testShowBestBinOnly(self): """ The showBestBinOnly option must work correctly, by setting the 'bins' attribute to contain just one bin. """ A = 'FRRRFRRRFXXXXXX' C = 'FRRRRFRRRRFXXXXXX' subject = AARead('subject', 5 * A + C + 2 * A) query = AARead('query', 5 * A) findParams = FindParameters(significanceFraction=0.01) # There are 11 significant bins. hashes = PlotHashesInSubjectAndRead( query, subject, findParams, landmarks=['AlphaHelix', 'AlphaHelix_pi'], trigPoints=[], distanceBase=1.025, limitPerLandmark=50, minDistance=1, maxDistance=100, showInsignificant=False) self.assertEqual(11, len(hashes.bins)) bestScore = hashes.result.analysis['bestScore'] # Same input, but restricting ourselves to only the single most # significant bin: hashes = PlotHashesInSubjectAndRead( query, subject, findParams, landmarks=['AlphaHelix', 'AlphaHelix_pi'], trigPoints=[], distanceBase=1.025, limitPerLandmark=50, minDistance=1, maxDistance=100, showInsignificant=False, showBestBinOnly=True) self.assertEqual(1, len(hashes.bins)) # Check that the best bin when we use onlyShowBestBin has the same # score as the bin we get when we don't use onlyShowBestBin. self.assertEqual(bestScore, hashes.result.analysis['bestScore'])