def testTwoFiles(self): """ It must be possible to read from two FASTA files. """ class SideEffect(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, **kwargs): if self.count == 0: self.test.assertEqual('file1.fasta', filename) self.count += 1 return File(['>id1\n', 'ACTG\n', '>id1\n', 'hhhh\n']) elif self.count == 1: self.test.assertEqual('file2.fasta', filename) self.count += 1 return File(['>id2\n', 'CAGT\n', '>id2\n', 'eeee\n']) else: self.test.fail('We are only supposed to be called twice!') sideEffect = SideEffect(self) with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect.sideEffect reads = SSFastaReads(['file1.fasta', 'file2.fasta']) self.assertEqual([ SSAARead('id1', 'ACTG', 'hhhh'), SSAARead('id2', 'CAGT', 'eeee'), ], list(reads))
def testFindBug493Minimal(self): """ A minimal failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV', 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-') dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'], trigPoints=[], limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def testTwoReads(self): """ A PDB FASTA file with two reads must be read properly and its sequences must be returned in the correct order. """ data = '\n'.join(['>seq1', 'REDD', '>str1', 'HH--', '>seq2', 'REAA', '>str2', 'HHEE']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(SSFastaReads(data)) self.assertEqual(2, len(reads)) self.assertEqual([SSAARead('seq1', 'REDD', 'HH--'), SSAARead('seq2', 'REAA', 'HHEE')], reads)
def testFindBug493(self): """ Failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR' 'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI' 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV' 'VPSGQEQRYTCHVQHEGLPKPL', '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH' 'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE' 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE' 'EETT-GGGEEEEEEETTB-S--') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL' 'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT' 'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' 'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE' 'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-' 'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--') dbParams = DatabaseParameters(landmarks=[ 'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi', 'PDB ExtendedStrand', 'AminoAcidsLm' ], trigPoints=[ 'AminoAcids', 'Peaks', 'Troughs', 'IndividualPeaks', 'IndividualTroughs' ], featureLengthBase=1.01, maxDistance=10000, limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def testSubjectsOfIncompatibleReadTypesCompareUnequal(self): """ Two Subject instances that have reads that cannot be compared must compare unequal. """ read1 = AARead('id', 'AA') read2 = SSAARead('id', 'AA', 'HH') self.assertNotEqual(Subject(read1), Subject(read2))
def testOneRead(self): """ A PDB FASTA file with one read must be read properly. """ data = '\n'.join(['>seq1', 'REDD', '>str1', 'HH--']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(SSFastaReads(data)) self.assertEqual([SSAARead('seq1', 'REDD', 'HH--')], reads)
def testDontConvertLowerToUpperCaseIfNotSpecified(self): """ A read sequence and its structure must not be converted from lower to upper case if the conversion is not requested. """ data = '\n'.join(['>seq1', 'rrFF', '>str1', 'HHee']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(SSFastaReads(data)) self.assertEqual([SSAARead('seq1', 'rrFF', 'HHee')], reads)
def testConvertLowerToUpperCaseIfSpecified(self): """ A read sequence and structure must be converted from lower to upper case if requested. """ data = '\n'.join(['>seq1', 'rrrff', '>str1', 'hheeh']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(SSFastaReads(data, upperCase=True)) self.assertEqual([SSAARead('seq1', 'RRRFF', 'HHEEH')], reads)
def testGetFeatures(self): """ Calling getFeatures on an SSAARead must give the correct result. """ sequence = 'SMEQVAMELRLTELTRLLRSVLDQLQDKDPARIFAQPVSLKEVPDYLDHIKHPMD' structure = '-HHHHHHHHHHHHHHHHHHHHHHHHHHT-TT-TTSS---TTT-TTHHHH-SS---' ssAARead = SSAARead('5AMF', sequence, structure) co = CalculateOverlap() features, intersection, union = co.getFeatures(ssAARead) expected = { 'AC AlphaHelix_pi': set(), 'AlphaHelix': set(), 'BetaTurn': set(), 'AC AlphaHelix_3_10': set(), 'AminoAcidsLm': set(), 'GOR4Coil': { 0, 1, 2, 3, 4, 5, 6, 25, 26, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, }, 'IndividualPeaks': { 9, 31, }, 'PDB AlphaHelix_combined': { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 45, 46, 47, 48 }, 'IndividualTroughs': { 49, 10, }, 'ClusterAlphaHelix': { 33, 34, 35, 36, 10, 11, 12, 13, 14, 15, 16, 17, }, 'Troughs': { 1, 33, 4, 37, 6, 39, 8, 10, 42, 13, 46, 49, 21, 53, 24, 30, }, 'Peaks': { 3, 35, 5, 38, 7, 40, 9, 12, 44, 15, 47, 18, 50, 22, 27, 31, }, 'AminoAcids': set(), 'AC AlphaHelix': { 32, 33, 34, 35, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 31, }, 'AC AlphaHelix_combined': { 32, 33, 34, 35, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 31, }, 'GOR4BetaStrand': set(), 'EukaryoticLinearMotif': { 9, 10, 11, 12, 13, 14, 15, 16, 17, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, }, 'AC ExtendedStrand': { 18, 19, 20, 21, 22, }, 'PDB AlphaHelix_3_10': set(), 'BetaStrand': set(), 'THAlphaHelix': { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, }, 'GOR4AlphaHelix': { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 30, 31, 32, 33, }, 'AlphaHelix_pi': set(), 'PDB AlphaHelix': { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 45, 46, 47, 48, }, 'Prosite': { 32, 38, 39, 40, 41, 14, 15, 16, 19, 20, 21, 22, 30, 31, }, 'AlphaHelix_3_10': { 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, }, 'PDB AlphaHelix_pi': set(), 'PDB ExtendedStrand': set(), } self.assertEqual(expected, features) self.assertEqual({ 49, 10, }, intersection[frozenset(('IndividualTroughs', 'Troughs'))]) self.assertEqual( { 1, 33, 4, 37, 6, 39, 8, 10, 42, 13, 46, 49, 21, 53, 24, 30, }, union[frozenset(('IndividualTroughs', 'Troughs'))]) # Note that the following don't test much. There are 28 features # examined by default by CalculateOverlap. So there are 28 * 27 / 2 # = 378 pairs of features. So these two tests are just testing that # all pairs of features are present in the returned dicts. self.assertEqual(378, len(intersection)) self.assertEqual(378, len(union))