def testSandraSymmetry_235(self): """ Make sure we get a symmetric affinity matrix on a few of the sequences received from Sandra Junglen on March 13, 2015. The sequences below are the ones that caused the non-symmetric scores issue in https://github.com/acorg/light-matter/issues/235 """ sequences = [ # Read index 3. AARead('BUNV', ('SFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERISKERCKLNTDE' 'MISEPGDSKLKILEKKAEEEIRYIVERTKDSIIKGDPSKALKLEI' 'NADMSKWSAQDVFYKYFWLIAMDPILYPAEKTRILYFMCNYMQKL' 'LILPDDLIANILDQKRPYNDDLILEMTNGLNYNYVQIKRNWLQGN' 'FNYISSYVHSCAMLVYKDILKECMKLLDGDCLINSMVHSDDNQTS' 'LAIIQNKVSDQIVIQYAANTFESVCLTFGCQANMKKTYITHTCKE' 'FVSLFNLHGEPLSVFGRFLLPSVG')), # Read index 24. AARead('LACV', ('YFTFFNKGQKTSKDREIFVGEYEAKMCMYAVERIAKERCKLNPDE' 'MISEPGDGKLKVLEQKSEQEIRFLVETTRQKNREIDEAIEALAAE' 'GYESNLEKIEKLSLGKAKGLKMEINADMSKWSAQDVFYKYFWLIA' 'LDPILYPQEKERILYFMCNYMDKELILPDELLFNLLDQKVAYQND' 'IIATMTNQLNSNTVLIKRNWLQGNFNYTSSYVHSCAMSVYKEILK' 'EAITLLDGSILVNSLVHSDDNQTSITIVQDKMENDKIIDFAMKEF' 'ERACLTFGCQANMKKTYVTNCIKEFVSLFNLYGEPFSIYGRFLLT' 'SVG')), # Read index 48. AARead('WYOV', ('TFTFFNKGQKTAKDREIFVGEFEAKMCMYVVERIAKERCKLNSDE' 'MISEPGDAKLKILEQKAEQELRFIVERTKDKFLKGDPCKALKMEI' 'NADMSKWSAQDVFYKYFWLIAMDPILYPKEKYRILFFMCNYLQKV' 'LVLPDELIGNILDQKKTYNNDIILEGTDFLHQNYVNIRRNWLQGN' 'FNYLSSYIHTCAMSVFKDILKEVSYLLDGDVLVNSMVHSDDNQTS' 'ITYVQNKIEESVLINHGLKTFETVCLTFGCQANMKKTYLTHNIKE' 'FVSLFNIHGEPMSVYGRFLLPSVG')), ] findParams = FindParameters(significanceFraction=0.05) self._checkSymmetry( sequences, findParams, distanceBase=1.0, landmarks=ALL_LANDMARK_CLASSES, trigPoints=ALL_TRIG_CLASSES, limitPerLandmark=50, minDistance=1, maxDistance=100, symmetric=False)
def testAddSubjectWithIndex(self): """ Adding one subject to a SubjectStore and giving a subject index must return the expected result. """ subject = Subject(AARead('id', 'AA')) preExisting, subjectIndex = SubjectStore().add(subject, '3') self.assertFalse(preExisting) self.assertEqual('3', subjectIndex)
def testCoverageOfOneLandmarkPoint(self): """ If a scanned read has one landmark, its coveredIndices method must return the indices of that landmark. """ read = ScannedRead(AARead('id', 'AAA')) landmark = Landmark('name', 'symbol', 0, 2) read.landmarks.append(landmark) self.assertEqual({0, 1}, read.coveredIndices())
def testGetPairsOneLandmark(self): """ If a scanned read has one landmark, its getPairs method should generate no pairs because there is nothing to pair with. """ read = ScannedRead(AARead('id', 'AAAAA')) landmark = Landmark('name', 'symbol', 0, 2) read.landmarks.append(landmark) self.assertEqual([], list(read.getPairs()))
def testDontConvertLowerToUpperCaseIfNotSpecified(self): """ A read must not be converted from lower to upper case if not specified. """ data = '\n'.join(['>id1', 'actgs']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(FastaReads('filename.fasta', readClass=AARead)) self.assertEqual([AARead('id1', 'actgs')], reads)
def testOneProperty(self): """ If one property is wanted, a dict with the property must be returned, and have the expected property cluster number. """ read = AARead('id', 'AI') self.assertEqual({ 'hydropathy': [3, 4], }, clustersForSequence(read, ['hydropathy']))
def testOneProperty(self): """ If one property is wanted, a dict with the property must be returned, and have the expected property values. """ read = AARead('id', 'AI') self.assertEqual({ 'hydropathy': [0.4, 1.0], }, propertiesForSequence(read, ['hydropathy']))
def testFindOneClusterEnd(self): """ The find method must find a cluster at the end of a sequence. """ read = AARead('id', 'AAAAAAAAKKAH') finder = ClusterAlphaHelix() result = list(finder.find(read)) self.assertEqual([Landmark('ClusterAlphaHelix', 'CAH', 8, 4, '1131')], result)
def testNoBetaStrand(self): """ The find method must return an empty generator when no beta strand is present. """ read = AARead('id', 'PAPAPA') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([], result)
def testHelixTwoRepeatsWithNonZeroOffset(self): """ The find method must find a helix with a repeat count of two when it is not at the start of the sequence. """ read = AARead('id', 'AAAFRRRFRRRF') landmark = AlphaHelix() result = list(landmark.find(read)) self.assertEqual([Landmark('AlphaHelix', 'A', 3, 9, 2)], result)
def testFindWithOneRepeat(self): """ The find method must return an empty generator if there is only one instance of the OIII pattern """ read = AARead('id', 'FRRR') landmark = AlphaHelix() result = list(landmark.find(read)) self.assertEqual([], result)
def testFindWithoutHelix(self): """ The find method must return an empty generator when no helix is present. """ read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF') landmark = AlphaHelix() result = list(landmark.find(read)) self.assertEqual([], result)
def testNoIndividualTroughAtEnd(self): """ The find method must not find an individual trough at the end of the sequence. """ read = AARead('id', 'AAAAAAV') troughs = IndividualTroughs() result = list(troughs.find(read)) self.assertEqual([], result)
def testFindWithoutIndividualTrough(self): """ The find method must return an empty generator when no individual trough is present. """ read = AARead('id', 'RRRRRRRRRRRRRRR') troughs = IndividualTroughs() result = list(troughs.find(read)) self.assertEqual([], result)
def testFindWithoutCluster(self): """ The find method must return an empty generator when no alpha helix cluster is present. """ read = AARead('id', 'FFFFFFFFFFFFFFFFFFFFFFFFFFF') finder = ClusterAlphaHelix() result = list(finder.find(read)) self.assertEqual([], result)
def testBelowMinimumLength(self): """ The find method must not find a too-short (<3 AA) sequence of beta strand amino acids. """ read = AARead('id', 'VIVIV') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([], result)
def testFindOneClusterBeginning(self): """ The find method must find one cluster at the beginning of a sequence. """ read = AARead('id', 'KKAHFFFFFFFFF') finder = ClusterAlphaHelix() result = list(finder.find(read)) self.assertEqual([Landmark('ClusterAlphaHelix', 'CAH', 0, 4, '1131')], result)
def testMinimumLength(self): """ The find method must find a minimal length (6) sequence of beta strand amino acids. """ read = AARead('id', 'VICVIC') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([Landmark('BetaStrand', 'S', 0, 6, 6)], result)
def testFindWithoutPeak(self): """ The find method must return an empty generator when no peak is present. """ read = AARead('id', 'RRRRRRRRRRRRRRR') peaks = Peaks() result = list(peaks.find(read)) self.assertEqual([], result)
def testAtStartOfSequence(self): """ The find method must find a sequence of beta strand amino acids that begins at the start of the sequence. """ read = AARead('id', 'VICVICV') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([Landmark('BetaStrand', 'S', 0, 7, 7)], result)
def testOnePropertyEmptySequence(self): """ If one property is wanted but the sequence is empty, a dict with the property must be returned, and have an empty list value. """ read = AARead('id', '') self.assertEqual({ 'hydropathy': [], }, clustersForSequence(read, ['hydropathy']))
def testAtEndOfSequence(self): """ The find method must find a sequence of beta strand amino acids that occurs at the end of the sequence. """ read = AARead('id', 'PVICVICV') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([Landmark('BetaStrand', 'S', 1, 7, 7)], result)
def testConvertLowerToUpperCaseIfSpecifiedDNARead(self): """ A read needs to be converted from lower to upper case if specified. """ data = '\n'.join(['>id1', 'actg']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(FastaReads('filename.fasta', upperCase=True)) self.assertEqual([AARead('id1', 'ACTG')], reads)
def testInMiddleOfSequence(self): """ The find method must find a sequence of beta strand amino acids that starts in the middle of the sequence. """ read = AARead('id', 'PAVICVCFYPA') landmark = BetaStrand() result = list(landmark.find(read)) self.assertEqual([Landmark('BetaStrand', 'S', 2, 7, 7)], result)
def testNoAlphaHelices(self): """ The GOR4AlphaHelix landmark finder must not find any alpha helices when none are present. """ read = AARead('id', 'EA') landmark = GOR4AlphaHelix() result = list(landmark.find(read)) self.assertEqual([], result)
def testFindNothing(self): """ The find method must return an empty generator when no strand is present. """ finder = setExtendedStrands(['XXX', 'YYY']) read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF') result = list(finder.find(read)) self.assertEqual([], result)
def testCoverageOfOneTrigPoint(self): """ If a scanned read has one trig point, its coveredIndices method must return the index of that trig point. """ read = ScannedRead(AARead('id', 'AAA')) trigPoint = TrigPoint('name', 'symbol', 0) read.trigPoints.append(trigPoint) self.assertEqual({0}, read.coveredIndices())
def testFullMatch(self): """ The find method must return the full read sequence when it fully matches an extended strand. """ finder = setExtendedStrands(['FFFF']) read = AARead('id', 'FFFF') result = list(finder.find(read)) self.assertEqual([Landmark('AC ExtendedStrand', 'ACES', 0, 4)], result)
def testFindNothing(self): """ The find method must return an empty generator when no helix is present. """ finder = setAlphaHelices_3_10(['XXX', 'YYY']) read = AARead('id', 'FRFRFRFRFRFRFRFRFRFF') result = list(finder.find(read)) self.assertEqual([], result)
def testLengthIsOneAfterAddSubject(self): """ Adding a subject to a new SubjectStore results in SubjectStore with length one. """ ss = SubjectStore() subject = Subject(AARead('id', 'AA')) ss.add(subject) self.assertEqual(1, len(ss))