예제 #1
0
    def testSandraSymmetry_235(self):
        """
        Make sure we get a symmetric affinity matrix on a few of the sequences
        received from Sandra Junglen on March 13, 2015.

        The sequences below are the ones that caused the non-symmetric
        scores issue in https://github.com/acorg/light-matter/issues/235
        """
        sequences = [
            # Read index 3.
            AARead('BUNV', ('SFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERISKERCKLNTDE'
                            'MISEPGDSKLKILEKKAEEEIRYIVERTKDSIIKGDPSKALKLEI'
                            'NADMSKWSAQDVFYKYFWLIAMDPILYPAEKTRILYFMCNYMQKL'
                            'LILPDDLIANILDQKRPYNDDLILEMTNGLNYNYVQIKRNWLQGN'
                            'FNYISSYVHSCAMLVYKDILKECMKLLDGDCLINSMVHSDDNQTS'
                            'LAIIQNKVSDQIVIQYAANTFESVCLTFGCQANMKKTYITHTCKE'
                            'FVSLFNLHGEPLSVFGRFLLPSVG')),

            # Read index 24.
            AARead('LACV', ('YFTFFNKGQKTSKDREIFVGEYEAKMCMYAVERIAKERCKLNPDE'
                            'MISEPGDGKLKVLEQKSEQEIRFLVETTRQKNREIDEAIEALAAE'
                            'GYESNLEKIEKLSLGKAKGLKMEINADMSKWSAQDVFYKYFWLIA'
                            'LDPILYPQEKERILYFMCNYMDKELILPDELLFNLLDQKVAYQND'
                            'IIATMTNQLNSNTVLIKRNWLQGNFNYTSSYVHSCAMSVYKEILK'
                            'EAITLLDGSILVNSLVHSDDNQTSITIVQDKMENDKIIDFAMKEF'
                            'ERACLTFGCQANMKKTYVTNCIKEFVSLFNLYGEPFSIYGRFLLT'
                            'SVG')),

            # Read index 48.
            AARead('WYOV', ('TFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERIAKERCKLNSDE'
                            'MISEPGDAKLKILEQKAEQELRFIVERTKDKFLKGDPCKALKMEI'
                            'NADMSKWSAQDVFYKYFWLIAMDPILYPKEKYRILFFMCNYLQKV'
                            'LVLPDELIGNILDQKKTYNNDIILEGTDFLHQNYVNIRRNWLQGN'
                            'FNYLSSYIHTCAMSVFKDILKEVSYLLDGDVLVNSMVHSDDNQTS'
                            'ITYVQNKIEESVLINHGLKTFETVCLTFGCQANMKKTYLTHNIKE'
                            'FVSLFNIHGEPMSVYGRFLLPSVG')),
        ]

        findParams = FindParameters(significanceFraction=0.05)
        self._checkSymmetry(
            sequences, findParams, distanceBase=1.0,
            landmarks=ALL_LANDMARK_CLASSES,
            trigPoints=ALL_TRIG_CLASSES,
            limitPerLandmark=50, minDistance=1, maxDistance=100,
            symmetric=False)
예제 #2
0
 def testAddSubjectWithIndex(self):
     """
     Adding one subject to a SubjectStore and giving a subject index
     must return the expected result.
     """
     subject = Subject(AARead('id', 'AA'))
     preExisting, subjectIndex = SubjectStore().add(subject, '3')
     self.assertFalse(preExisting)
     self.assertEqual('3', subjectIndex)
예제 #3
0
 def testCoverageOfOneLandmarkPoint(self):
     """
     If a scanned read has one landmark, its coveredIndices method must
     return the indices of that landmark.
     """
     read = ScannedRead(AARead('id', 'AAA'))
     landmark = Landmark('name', 'symbol', 0, 2)
     read.landmarks.append(landmark)
     self.assertEqual({0, 1}, read.coveredIndices())
예제 #4
0
 def testGetPairsOneLandmark(self):
     """
     If a scanned read has one landmark, its getPairs method should generate
     no pairs because there is nothing to pair with.
     """
     read = ScannedRead(AARead('id', 'AAAAA'))
     landmark = Landmark('name', 'symbol', 0, 2)
     read.landmarks.append(landmark)
     self.assertEqual([], list(read.getPairs()))
예제 #5
0
 def testDontConvertLowerToUpperCaseIfNotSpecified(self):
     """
     A read must not be converted from lower to upper case if not specified.
     """
     data = '\n'.join(['>id1', 'actgs'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(FastaReads('filename.fasta', readClass=AARead))
         self.assertEqual([AARead('id1', 'actgs')], reads)
예제 #6
0
 def testOneProperty(self):
     """
     If one property is wanted, a dict with the property must be returned,
     and have the expected property cluster number.
     """
     read = AARead('id', 'AI')
     self.assertEqual({
         'hydropathy': [3, 4],
     }, clustersForSequence(read, ['hydropathy']))
예제 #7
0
 def testOneProperty(self):
     """
     If one property is wanted, a dict with the property must be returned,
     and have the expected property values.
     """
     read = AARead('id', 'AI')
     self.assertEqual({
         'hydropathy': [0.4, 1.0],
     }, propertiesForSequence(read, ['hydropathy']))
예제 #8
0
 def testFindOneClusterEnd(self):
     """
     The find method must find a cluster at the end of a sequence.
     """
     read = AARead('id', 'AAAAAAAAKKAH')
     finder = ClusterAlphaHelix()
     result = list(finder.find(read))
     self.assertEqual([Landmark('ClusterAlphaHelix', 'CAH', 8, 4, '1131')],
                      result)
예제 #9
0
 def testNoBetaStrand(self):
     """
     The find method must return an empty generator when no beta strand is
     present.
     """
     read = AARead('id', 'PAPAPA')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([], result)
예제 #10
0
 def testHelixTwoRepeatsWithNonZeroOffset(self):
     """
     The find method must find a helix with a repeat count of two when it
     is not at the start of the sequence.
     """
     read = AARead('id', 'AAAFRRRFRRRF')
     landmark = AlphaHelix()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('AlphaHelix', 'A', 3, 9, 2)], result)
예제 #11
0
 def testFindWithOneRepeat(self):
     """
     The find method must return an empty generator if there is only one
     instance of the OIII pattern
     """
     read = AARead('id', 'FRRR')
     landmark = AlphaHelix()
     result = list(landmark.find(read))
     self.assertEqual([], result)
예제 #12
0
 def testFindWithoutHelix(self):
     """
     The find method must return an empty generator when no helix is
     present.
     """
     read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF')
     landmark = AlphaHelix()
     result = list(landmark.find(read))
     self.assertEqual([], result)
예제 #13
0
 def testNoIndividualTroughAtEnd(self):
     """
     The find method must not find an individual trough at the end of the
     sequence.
     """
     read = AARead('id', 'AAAAAAV')
     troughs = IndividualTroughs()
     result = list(troughs.find(read))
     self.assertEqual([], result)
예제 #14
0
 def testFindWithoutIndividualTrough(self):
     """
     The find method must return an empty generator when no individual
     trough is present.
     """
     read = AARead('id', 'RRRRRRRRRRRRRRR')
     troughs = IndividualTroughs()
     result = list(troughs.find(read))
     self.assertEqual([], result)
예제 #15
0
 def testFindWithoutCluster(self):
     """
     The find method must return an empty generator when no alpha helix
     cluster is present.
     """
     read = AARead('id', 'FFFFFFFFFFFFFFFFFFFFFFFFFFF')
     finder = ClusterAlphaHelix()
     result = list(finder.find(read))
     self.assertEqual([], result)
예제 #16
0
 def testBelowMinimumLength(self):
     """
     The find method must not find a too-short (<3 AA) sequence of beta
     strand amino acids.
     """
     read = AARead('id', 'VIVIV')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([], result)
예제 #17
0
 def testFindOneClusterBeginning(self):
     """
     The find method must find one cluster at the beginning of a sequence.
     """
     read = AARead('id', 'KKAHFFFFFFFFF')
     finder = ClusterAlphaHelix()
     result = list(finder.find(read))
     self.assertEqual([Landmark('ClusterAlphaHelix', 'CAH', 0, 4, '1131')],
                      result)
예제 #18
0
 def testMinimumLength(self):
     """
     The find method must find a minimal length (6) sequence of beta strand
     amino acids.
     """
     read = AARead('id', 'VICVIC')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('BetaStrand', 'S', 0, 6, 6)], result)
예제 #19
0
 def testFindWithoutPeak(self):
     """
     The find method must return an empty generator when no peak is
     present.
     """
     read = AARead('id', 'RRRRRRRRRRRRRRR')
     peaks = Peaks()
     result = list(peaks.find(read))
     self.assertEqual([], result)
예제 #20
0
 def testAtStartOfSequence(self):
     """
     The find method must find a sequence of beta strand amino acids that
     begins at the start of the sequence.
     """
     read = AARead('id', 'VICVICV')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('BetaStrand', 'S', 0, 7, 7)], result)
예제 #21
0
 def testOnePropertyEmptySequence(self):
     """
     If one property is wanted but the sequence is empty, a dict with the
     property must be returned, and have an empty list value.
     """
     read = AARead('id', '')
     self.assertEqual({
         'hydropathy': [],
     }, clustersForSequence(read, ['hydropathy']))
예제 #22
0
 def testAtEndOfSequence(self):
     """
     The find method must find a sequence of beta strand amino acids that
     occurs at the end of the sequence.
     """
     read = AARead('id', 'PVICVICV')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('BetaStrand', 'S', 1, 7, 7)], result)
예제 #23
0
 def testConvertLowerToUpperCaseIfSpecifiedDNARead(self):
     """
     A read needs to be converted from lower to upper case if specified.
     """
     data = '\n'.join(['>id1', 'actg'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(FastaReads('filename.fasta', upperCase=True))
         self.assertEqual([AARead('id1', 'ACTG')], reads)
예제 #24
0
 def testInMiddleOfSequence(self):
     """
     The find method must find a sequence of beta strand amino acids that
     starts in the middle of the sequence.
     """
     read = AARead('id', 'PAVICVCFYPA')
     landmark = BetaStrand()
     result = list(landmark.find(read))
     self.assertEqual([Landmark('BetaStrand', 'S', 2, 7, 7)], result)
예제 #25
0
 def testNoAlphaHelices(self):
     """
     The GOR4AlphaHelix landmark finder must not find any alpha helices when
     none are present.
     """
     read = AARead('id', 'EA')
     landmark = GOR4AlphaHelix()
     result = list(landmark.find(read))
     self.assertEqual([], result)
예제 #26
0
 def testFindNothing(self):
     """
     The find method must return an empty generator when no strand is
     present.
     """
     finder = setExtendedStrands(['XXX', 'YYY'])
     read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF')
     result = list(finder.find(read))
     self.assertEqual([], result)
예제 #27
0
 def testCoverageOfOneTrigPoint(self):
     """
     If a scanned read has one trig point, its coveredIndices method must
     return the index of that trig point.
     """
     read = ScannedRead(AARead('id', 'AAA'))
     trigPoint = TrigPoint('name', 'symbol', 0)
     read.trigPoints.append(trigPoint)
     self.assertEqual({0}, read.coveredIndices())
예제 #28
0
 def testFullMatch(self):
     """
     The find method must return the full read sequence when it fully
     matches an extended strand.
     """
     finder = setExtendedStrands(['FFFF'])
     read = AARead('id', 'FFFF')
     result = list(finder.find(read))
     self.assertEqual([Landmark('AC ExtendedStrand', 'ACES', 0, 4)], result)
예제 #29
0
 def testFindNothing(self):
     """
     The find method must return an empty generator when no helix is
     present.
     """
     finder = setAlphaHelices_3_10(['XXX', 'YYY'])
     read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF')
     result = list(finder.find(read))
     self.assertEqual([], result)
예제 #30
0
 def testLengthIsOneAfterAddSubject(self):
     """
     Adding a subject to a new SubjectStore results in SubjectStore with
     length one.
     """
     ss = SubjectStore()
     subject = Subject(AARead('id', 'AA'))
     ss.add(subject)
     self.assertEqual(1, len(ss))