Ejemplo n.º 1
0
    def testTwoFiles(self):
        """
        It must be possible to read from two FASTA files.
        """
        class SideEffect(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('file1.fasta', filename)
                    self.count += 1
                    return File(['>id1\n', 'ACTG\n', '>id1\n', 'hhhh\n'])
                elif self.count == 1:
                    self.test.assertEqual('file2.fasta', filename)
                    self.count += 1
                    return File(['>id2\n', 'CAGT\n', '>id2\n', 'eeee\n'])
                else:
                    self.test.fail('We are only supposed to be called twice!')

        sideEffect = SideEffect(self)
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect.sideEffect
            reads = SSFastaReads(['file1.fasta', 'file2.fasta'])
            self.assertEqual([
                SSAARead('id1', 'ACTG', 'hhhh'),
                SSAARead('id2', 'CAGT', 'eeee'),
            ], list(reads))
Ejemplo n.º 2
0
 def testFindBug493Minimal(self):
     """
     A minimal failing test case for
     https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV',
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-')
     dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'],
                                   trigPoints=[],
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Ejemplo n.º 3
0
 def testTwoReads(self):
     """
     A PDB FASTA file with two reads must be read properly and its
     sequences must be returned in the correct order.
     """
     data = '\n'.join(['>seq1', 'REDD', '>str1', 'HH--',
                       '>seq2', 'REAA', '>str2', 'HHEE'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(SSFastaReads(data))
         self.assertEqual(2, len(reads))
         self.assertEqual([SSAARead('seq1', 'REDD', 'HH--'),
                           SSAARead('seq2', 'REAA', 'HHEE')],
                          reads)
Ejemplo n.º 4
0
 def testFindBug493(self):
     """
     Failing test case for https://github.com/acorg/light-matter/issues/493
     """
     query = SSAARead(
         '2HLA:A',
         'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR'
         'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI'
         'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK'
         'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV'
         'VPSGQEQRYTCHVQHEGLPKPL',
         '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH'
         'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE'
         'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E'
         'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE'
         'EETT-GGGEEEEEEETTB-S--')
     subject = SSAARead(
         '3D2U:A',
         'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY'
         'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL'
         'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP'
         'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT'
         'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT',
         '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH'
         'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE'
         'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-'
         '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-'
         'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--')
     dbParams = DatabaseParameters(landmarks=[
         'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi',
         'PDB ExtendedStrand', 'AminoAcidsLm'
     ],
                                   trigPoints=[
                                       'AminoAcids', 'Peaks', 'Troughs',
                                       'IndividualPeaks',
                                       'IndividualTroughs'
                                   ],
                                   featureLengthBase=1.01,
                                   maxDistance=10000,
                                   limitPerLandmark=50,
                                   distanceBase=1.1)
     db = Database(dbParams)
     _, subjectIndex, _ = db.addSubject(subject)
     findParams = FindParameters(significanceFraction=0.01)
     result = db.find(query, findParams, storeFullAnalysis=True)
     significantBins = result.analysis[subjectIndex]['significantBins']
     for binInfo in significantBins:
         normalizeBin(binInfo['bin'], len(query))
Ejemplo n.º 5
0
 def testSubjectsOfIncompatibleReadTypesCompareUnequal(self):
     """
     Two Subject instances that have reads that cannot be compared must
     compare unequal.
     """
     read1 = AARead('id', 'AA')
     read2 = SSAARead('id', 'AA', 'HH')
     self.assertNotEqual(Subject(read1), Subject(read2))
Ejemplo n.º 6
0
 def testOneRead(self):
     """
     A PDB FASTA file with one read must be read properly.
     """
     data = '\n'.join(['>seq1', 'REDD', '>str1', 'HH--'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(SSFastaReads(data))
         self.assertEqual([SSAARead('seq1', 'REDD', 'HH--')], reads)
Ejemplo n.º 7
0
 def testDontConvertLowerToUpperCaseIfNotSpecified(self):
     """
     A read sequence and its structure must not be converted from lower to
     upper case if the conversion is not requested.
     """
     data = '\n'.join(['>seq1', 'rrFF', '>str1', 'HHee'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(SSFastaReads(data))
         self.assertEqual([SSAARead('seq1', 'rrFF', 'HHee')], reads)
Ejemplo n.º 8
0
 def testConvertLowerToUpperCaseIfSpecified(self):
     """
     A read sequence and structure must be converted from lower to upper
     case if requested.
     """
     data = '\n'.join(['>seq1', 'rrrff', '>str1', 'hheeh'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(SSFastaReads(data, upperCase=True))
         self.assertEqual([SSAARead('seq1', 'RRRFF', 'HHEEH')], reads)
Ejemplo n.º 9
0
    def testGetFeatures(self):
        """
        Calling getFeatures on an SSAARead must give the correct result.
        """
        sequence = 'SMEQVAMELRLTELTRLLRSVLDQLQDKDPARIFAQPVSLKEVPDYLDHIKHPMD'
        structure = '-HHHHHHHHHHHHHHHHHHHHHHHHHHT-TT-TTSS---TTT-TTHHHH-SS---'
        ssAARead = SSAARead('5AMF', sequence, structure)
        co = CalculateOverlap()
        features, intersection, union = co.getFeatures(ssAARead)
        expected = {
            'AC AlphaHelix_pi': set(),
            'AlphaHelix': set(),
            'BetaTurn': set(),
            'AC AlphaHelix_3_10': set(),
            'AminoAcidsLm': set(),
            'GOR4Coil': {
                0,
                1,
                2,
                3,
                4,
                5,
                6,
                25,
                26,
                27,
                28,
                29,
                34,
                35,
                36,
                37,
                38,
                39,
                40,
                41,
                42,
                43,
                44,
                45,
                46,
                47,
                48,
                49,
                50,
                51,
                52,
                53,
                54,
            },
            'IndividualPeaks': {
                9,
                31,
            },
            'PDB AlphaHelix_combined': {
                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
                19, 20, 21, 22, 23, 24, 25, 26, 45, 46, 47, 48
            },
            'IndividualTroughs': {
                49,
                10,
            },
            'ClusterAlphaHelix': {
                33,
                34,
                35,
                36,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
            },
            'Troughs': {
                1,
                33,
                4,
                37,
                6,
                39,
                8,
                10,
                42,
                13,
                46,
                49,
                21,
                53,
                24,
                30,
            },
            'Peaks': {
                3,
                35,
                5,
                38,
                7,
                40,
                9,
                12,
                44,
                15,
                47,
                18,
                50,
                22,
                27,
                31,
            },
            'AminoAcids': set(),
            'AC AlphaHelix': {
                32,
                33,
                34,
                35,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                31,
            },
            'AC AlphaHelix_combined': {
                32,
                33,
                34,
                35,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                31,
            },
            'GOR4BetaStrand': set(),
            'EukaryoticLinearMotif': {
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                35,
                36,
                37,
                38,
                39,
                40,
                41,
                45,
                46,
                47,
                48,
            },
            'AC ExtendedStrand': {
                18,
                19,
                20,
                21,
                22,
            },
            'PDB AlphaHelix_3_10': set(),
            'BetaStrand': set(),
            'THAlphaHelix': {
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
                21,
                22,
                23,
                24,
                25,
                26,
                27,
                28,
                29,
                30,
                31,
                32,
                33,
                34,
                35,
                36,
                37,
                38,
                39,
                40,
                41,
                42,
                43,
                44,
                45,
                46,
                47,
                48,
                49,
                50,
                51,
                52,
                53,
                54,
            },
            'GOR4AlphaHelix': {
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
                21,
                22,
                23,
                24,
                30,
                31,
                32,
                33,
            },
            'AlphaHelix_pi': set(),
            'PDB AlphaHelix': {
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
                21,
                22,
                23,
                24,
                25,
                26,
                45,
                46,
                47,
                48,
            },
            'Prosite': {
                32,
                38,
                39,
                40,
                41,
                14,
                15,
                16,
                19,
                20,
                21,
                22,
                30,
                31,
            },
            'AlphaHelix_3_10': {
                21,
                22,
                23,
                24,
                25,
                26,
                27,
                28,
                29,
                30,
            },
            'PDB AlphaHelix_pi': set(),
            'PDB ExtendedStrand': set(),
        }

        self.assertEqual(expected, features)

        self.assertEqual({
            49,
            10,
        }, intersection[frozenset(('IndividualTroughs', 'Troughs'))])

        self.assertEqual(
            {
                1,
                33,
                4,
                37,
                6,
                39,
                8,
                10,
                42,
                13,
                46,
                49,
                21,
                53,
                24,
                30,
            }, union[frozenset(('IndividualTroughs', 'Troughs'))])

        # Note that the following don't test much. There are 28 features
        # examined by default by CalculateOverlap. So there are 28 * 27 / 2
        # = 378 pairs of features. So these two tests are just testing that
        # all pairs of features are present in the returned dicts.
        self.assertEqual(378, len(intersection))
        self.assertEqual(378, len(union))