def test_subset_seqs_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna2, rna3], MolType=RNA) aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = aln.takeSeqs(['rna2', 'rna3']) self.assertEqual(obs_sub_aln, sub_aln) self.assertEqual(str(obs_sub_aln), str(sub_aln)) # Selected sequences should be in specified order? obs_sub_aln_1 = self.aln.takeSeqs(['rna3', 'rna2']) obs_sub_aln_2 = self.aln.takeSeqs(['rna2', 'rna3']) self.assertNotEqual(str(obs_sub_aln_1), str(obs_sub_aln_2))
def test_subset_seqs_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna2, rna3], MolType=RNA) aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = aln.takeSeqs(['rna2','rna3']) self.assertEqual(obs_sub_aln, sub_aln) self.assertEqual(str(obs_sub_aln), str(sub_aln)) # Selected sequences should be in specified order? obs_sub_aln_1 = self.aln.takeSeqs(['rna3','rna2']) obs_sub_aln_2 = self.aln.takeSeqs(['rna2','rna3']) self.failIfEqual(str(obs_sub_aln_1), str(obs_sub_aln_2))
class AllTests(TestCase): def setUp(self): """setUp method for all tests""" # named sequences self.rna1 = RnaSequence('UCAGGG', Name='rna1') self.rna2 = RnaSequence('YCU-RG', Name='rna2') self.rna3 = RnaSequence('CAA-NR', Name='rna3') self.model1 = ModelSequence('UCAGGG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.model2 = ModelSequence('YCU-RG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) self.model3 = ModelSequence('CAA-NR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) self.aln = Alignment([self.rna1, self.rna2, self.rna3], MolType=RNA) self.da = DenseAlignment([self.model1, self.model2, self.model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # seqs no name self.nn_rna1 = RnaSequence('UCAGGG') self.nn_rna2 = RnaSequence('YCU-RG') self.nn_rna3 = RnaSequence('CAA-NR') self.nn_model1 = ModelSequence('UCAGGG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model2 = ModelSequence('YCU-RG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model3 = ModelSequence('CAA-NR',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],\ MolType=RNA) self.nn_da = DenseAlignment([self.nn_model1, self.nn_model2,\ self.nn_model3], MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) def test_printing_named_seqs(self): """Printing named seqs should work the same on Aln and DenseAln""" #Note: the newline trailing each sequence is intentional, because #we want each FASTA-format record to be separated. exp_lines_general = [ '>rna1', 'UCAGGG', '>rna2', 'YCU-RG', '>rna3', 'CAA-NR' ] self.assertEqual(str(self.aln), '\n'.join(exp_lines_general) + '\n') self.assertEqual(str(self.da), '\n'.join(exp_lines_general) + '\n') def test_printing_unnamed_seqs(self): """Printing unnamed sequences should work the same on Aln and DenseAln """ exp_lines_gen = [ '>seq_0', 'UCAGGG', '>seq_1', 'YCU-RG', '>seq_2', 'CAA-NR\n' ] self.assertEqual(str(self.nn_aln), '\n'.join(exp_lines_gen)) self.assertEqual(str(self.nn_da), '\n'.join(exp_lines_gen)) def test_DenseAlignment_without_moltype(self): """Expect MolType to be picked up from the sequences.""" m1 = ModelSequence('UCAG',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna1') m2 = ModelSequence('CCCR',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna2') da = DenseAlignment([m1, m2]) exp_lines = ['>rna1', 'UCAG', '>rna2', 'CCCR'] self.assertEqual(str(da), '\n'.join(exp_lines) + '\n') def test_names(self): # Should both alignments handle names the same way? self.assertEqual(self.aln.Names, ['rna1', 'rna2', 'rna3']) self.assertEqual(self.da.Names, ['rna1', 'rna2', 'rna3']) # On unnamed sequences the behavior is now the same. self.assertEqual(self.nn_aln.Names, ['seq_0', 'seq_1', 'seq_2']) self.assertEqual(self.nn_da.Names, ['seq_0', 'seq_1', 'seq_2']) def test_seqFreqs(self): """seqFreqs should work the same on Alignment and DenseAlignment""" # Used alphabet: ('U', 'C', 'A', 'G', '-', 'B', 'D', 'H',\ # 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y') exp = [[1,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0],\ [1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0],\ [0,1,2,0,1,0,0,0,0,0,1,0,1,0,0,0,0]] # This works self.assertEqual(self.da.getSeqFreqs().Data, exp) # This used to raise an error, but now works self.assertEqual(self.aln.getSeqFreqs().Data, exp) def test_subset_positions_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) full_data = array([[0, 1, 2, 3, 3, 3], [15, 1, 0, 4, 12, 3], [1, 2, 2, 4, 10, 12]]) sub_data = array([[0, 1, 3], [15, 1, 3], [1, 2, 12]]) # First check some data self.assertEqual(self.da.ArraySeqs, full_data) self.assertEqual(self.da.ArrayPositions, transpose(full_data)) self.assertEqual(sub_da.ArraySeqs, sub_data) self.assertEqual(sub_da.ArrayPositions, transpose(sub_data)) obs_sub_da_TP = self.da.takePositions([0, 1, 5]) obs_sub_da_SA = self.da.getSubAlignment(pos=[0, 1, 5]) # When using the getSubAlignment method the data is right self.assertEqual(obs_sub_da_SA, sub_da) self.assertNotEqual(obs_sub_da_SA, self.da) self.assertEqual(obs_sub_da_SA.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_SA.ArrayPositions, transpose(sub_data)) # For the takePositions method: Why does this work self.assertEqual(obs_sub_da_TP, sub_da) self.assertNotEqual(obs_sub_da_TP, self.da) # If the data doesn't match? self.assertEqual(obs_sub_da_TP.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_TP.ArrayPositions, transpose(sub_data)) # Shouldn't the __eq__ method check the data at least? def test_subset_positions_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = self.aln.takePositions([0, 1, 5]) self.assertEqual(obs_sub_aln, sub_aln) self.assertNotEqual(obs_sub_aln, self.aln) # string representations should be the same. This fails right # now, because sequence order is not maintained. See separate test. self.assertEqual(str(obs_sub_aln), str(sub_aln)) def test_takePositions_sequence_order(self): """Alignment takePositions should maintain seq order""" #This works self.assertEqual(self.da.Names, ['rna1', 'rna2', 'rna3']) sub_da = self.da.getSubAlignment(pos=[0, 1, 5]) self.assertEqual(sub_da.Names, ['rna1', 'rna2', 'rna3']) # seq order not maintained in Alignment self.assertEqual(self.aln.Names, ['rna1', 'rna2', 'rna3']) sub_aln = self.aln.takePositions([0, 1, 5]) self.assertEqual(sub_aln.Names, ['rna1', 'rna2', 'rna3']) def test_subset_seqs_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna2, rna3], MolType=RNA) aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = aln.takeSeqs(['rna2', 'rna3']) self.assertEqual(obs_sub_aln, sub_aln) self.assertEqual(str(obs_sub_aln), str(sub_aln)) # Selected sequences should be in specified order? obs_sub_aln_1 = self.aln.takeSeqs(['rna3', 'rna2']) obs_sub_aln_2 = self.aln.takeSeqs(['rna2', 'rna3']) self.assertNotEqual(str(obs_sub_aln_1), str(obs_sub_aln_2)) def test_subset_seqs_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # takeSeqs by name should have the same effect as # getSubAlignment by seq idx? obs_sub_da_TS = self.da.takeSeqs(['rna1']) obs_sub_da_SA = self.da.getSubAlignment(seqs=[0]) # These two are now the same. Fixed mapping of key to char array. self.assertEqual(obs_sub_da_TS, obs_sub_da_SA) self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA)) def test_aln_equality(self): # When does something compare equal? self.assertEqual(self.da == self.da, True) # one sequence less other_da1 = DenseAlignment([self.model1, self.model2],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(self.da == other_da1, False) # seqs in different order -- doesn't matter other_da2 = DenseAlignment([self.model1, self.model3, self.model2],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(self.da == other_da2, True) # seqs in different encoding -- doesn't matter, only looks at data other_da3 = DenseAlignment([self.model1, self.model2, self.model3]) # Should this compare False even though the data is exactly the same? # The MolType is different... self.assertEqual(self.da == other_da3, True) assert alltrue( list(map(alltrue, self.da.ArraySeqs == other_da3.ArraySeqs))) def test_seq_equality(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) # Shouldn't the above two sequences be equal? self.assertEqual(model1, model2) # string comparison is True self.assertEqual(str(model1), str(model2)) def test_seq_ungapping(self): rna1 = RnaSequence('U-C-A-G-', Name='rna1') model1 = ModelSequence('U-C-A-G-', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(rna1, 'U-C-A-G-') self.assertEqual(rna1.degap(), 'UCAG') # check is produces the right string from the beginning self.assertEqual(str(model1), 'U-C-A-G-') self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) # ModelSequence should maybe have the same degap method as normal Seq self.assertEqual(str(model1.degap()), 'UCAG') def test_the_rest_of_ModelSequence(self): """The class ModelSequence has 14 methods, but only 2 unittests. You might want to add some tests there...""" #note: mostly these are tested in derived classes, for convenience. pass
class AllTests(TestCase): def setUp(self): """setUp method for all tests""" # named sequences self.rna1 = RnaSequence('UCAGGG', Name='rna1') self.rna2 = RnaSequence('YCU-RG', Name='rna2') self.rna3 = RnaSequence('CAA-NR', Name='rna3') self.model1 = ModelSequence('UCAGGG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.model2 = ModelSequence('YCU-RG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) self.model3 = ModelSequence('CAA-NR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) self.aln = Alignment([self.rna1, self.rna2, self.rna3], MolType=RNA) self.da = DenseAlignment([self.model1, self.model2, self.model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # seqs no name self.nn_rna1 = RnaSequence('UCAGGG') self.nn_rna2 = RnaSequence('YCU-RG') self.nn_rna3 = RnaSequence('CAA-NR') self.nn_model1 = ModelSequence('UCAGGG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model2 = ModelSequence('YCU-RG',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_model3 = ModelSequence('CAA-NR',\ Alphabet=RNA.Alphabets.DegenGapped) self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],\ MolType=RNA) self.nn_da = DenseAlignment([self.nn_model1, self.nn_model2,\ self.nn_model3], MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) def test_printing_named_seqs(self): """Printing named seqs should work the same on Aln and DenseAln""" #Note: the newline trailing each sequence is intentional, because #we want each FASTA-format record to be separated. exp_lines_general = ['>rna1','UCAGGG','>rna2','YCU-RG','>rna3','CAA-NR'] self.assertEqual(str(self.aln), '\n'.join(exp_lines_general) + '\n') self.assertEqual(str(self.da), '\n'.join(exp_lines_general) + '\n') def test_printing_unnamed_seqs(self): """Printing unnamed sequences should work the same on Aln and DenseAln """ exp_lines_gen = ['>seq_0','UCAGGG','>seq_1','YCU-RG','>seq_2','CAA-NR\n'] self.assertEqual(str(self.nn_aln),'\n'.join(exp_lines_gen)) self.assertEqual(str(self.nn_da),'\n'.join(exp_lines_gen)) def test_DenseAlignment_without_moltype(self): """Expect MolType to be picked up from the sequences.""" m1 = ModelSequence('UCAG',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna1') m2 = ModelSequence('CCCR',Alphabet=RNA.Alphabets.DegenGapped,\ Name='rna2') da = DenseAlignment([m1, m2]) exp_lines = ['>rna1','UCAG','>rna2','CCCR'] self.assertEqual(str(da), '\n'.join(exp_lines) + '\n') def test_names(self): # Should both alignments handle names the same way? self.assertEqual(self.aln.Names, ['rna1','rna2','rna3']) self.assertEqual(self.da.Names, ['rna1','rna2','rna3']) # On unnamed sequences the behavior is now the same. self.assertEqual(self.nn_aln.Names, ['seq_0','seq_1','seq_2']) self.assertEqual(self.nn_da.Names, ['seq_0','seq_1','seq_2']) def test_seqFreqs(self): """seqFreqs should work the same on Alignment and DenseAlignment""" # Used alphabet: ('U', 'C', 'A', 'G', '-', 'B', 'D', 'H',\ # 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y') exp = [[1,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0],\ [1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0],\ [0,1,2,0,1,0,0,0,0,0,1,0,1,0,0,0,0]] # This works self.assertEqual(self.da.getSeqFreqs().Data, exp) # This used to raise an error, but now works self.assertEqual(self.aln.getSeqFreqs().Data, exp) def test_subset_positions_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) full_data = array([[0,1,2,3,3,3],[15,1,0,4,12,3],[1,2,2,4,10,12]]) sub_data = array([[0,1,3],[15,1,3],[1,2,12]]) # First check some data self.assertEqual(self.da.ArraySeqs, full_data) self.assertEqual(self.da.ArrayPositions, transpose(full_data)) self.assertEqual(sub_da.ArraySeqs, sub_data) self.assertEqual(sub_da.ArrayPositions, transpose(sub_data)) obs_sub_da_TP = self.da.takePositions([0,1,5]) obs_sub_da_SA = self.da.getSubAlignment(pos=[0,1,5]) # When using the getSubAlignment method the data is right self.assertEqual(obs_sub_da_SA, sub_da) self.failIfEqual(obs_sub_da_SA, self.da) self.assertEqual(obs_sub_da_SA.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_SA.ArrayPositions, transpose(sub_data)) # For the takePositions method: Why does this work self.assertEqual(obs_sub_da_TP, sub_da) self.failIfEqual(obs_sub_da_TP, self.da) # If the data doesn't match? self.assertEqual(obs_sub_da_TP.ArraySeqs, sub_data) self.assertEqual(obs_sub_da_TP.ArrayPositions, transpose(sub_data)) # Shouldn't the __eq__ method check the data at least? def test_subset_positions_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = self.aln.takePositions([0,1,5]) self.assertEqual(obs_sub_aln, sub_aln) self.failIfEqual(obs_sub_aln, self.aln) # string representations should be the same. This fails right # now, because sequence order is not maintained. See separate test. self.assertEqual(str(obs_sub_aln), str(sub_aln)) def test_takePositions_sequence_order(self): """Alignment takePositions should maintain seq order""" #This works self.assertEqual(self.da.Names,['rna1','rna2','rna3']) sub_da = self.da.getSubAlignment(pos=[0,1,5]) self.assertEqual(sub_da.Names,['rna1','rna2','rna3']) # seq order not maintained in Alignment self.assertEqual(self.aln.Names,['rna1','rna2','rna3']) sub_aln = self.aln.takePositions([0,1,5]) self.assertEqual(sub_aln.Names,['rna1','rna2','rna3']) def test_subset_seqs_Alignment(self): rna1 = RnaSequence('UCG', Name='rna1') rna2 = RnaSequence('YCG', Name='rna2') rna3 = RnaSequence('CAR', Name='rna3') sub_aln = Alignment([rna2, rna3], MolType=RNA) aln = Alignment([rna1, rna2, rna3], MolType=RNA) obs_sub_aln = aln.takeSeqs(['rna2','rna3']) self.assertEqual(obs_sub_aln, sub_aln) self.assertEqual(str(obs_sub_aln), str(sub_aln)) # Selected sequences should be in specified order? obs_sub_aln_1 = self.aln.takeSeqs(['rna3','rna2']) obs_sub_aln_2 = self.aln.takeSeqs(['rna2','rna3']) self.failIfEqual(str(obs_sub_aln_1), str(obs_sub_aln_2)) def test_subset_seqs_DenseAlignment(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('YCG', Name='rna2',\ Alphabet=RNA.Alphabets.DegenGapped) model3 = ModelSequence('CAR', Name='rna3',\ Alphabet=RNA.Alphabets.DegenGapped) sub_da = DenseAlignment([model1, model2, model3],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) # takeSeqs by name should have the same effect as # getSubAlignment by seq idx? obs_sub_da_TS = self.da.takeSeqs(['rna1']) obs_sub_da_SA = self.da.getSubAlignment(seqs=[0]) # These two are now the same. Fixed mapping of key to char array. self.assertEqual(obs_sub_da_TS, obs_sub_da_SA) self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA)) def test_aln_equality(self): # When does something compare equal? self.assertEqual(self.da == self.da, True) # one sequence less other_da1 = DenseAlignment([self.model1, self.model2],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(self.da == other_da1, False) # seqs in different order -- doesn't matter other_da2 = DenseAlignment([self.model1, self.model3, self.model2],\ MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(self.da == other_da2, True) # seqs in different encoding -- doesn't matter, only looks at data other_da3 = DenseAlignment([self.model1, self.model2, self.model3]) # Should this compare False even though the data is exactly the same? # The MolType is different... self.assertEqual(self.da == other_da3, True) assert alltrue(map(alltrue,self.da.ArraySeqs == other_da3.ArraySeqs)) def test_seq_equality(self): model1 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) model2 = ModelSequence('UCG', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) # Shouldn't the above two sequences be equal? self.assertEqual(model1, model2) # string comparison is True self.assertEqual(str(model1), str(model2)) def test_seq_ungapping(self): rna1 = RnaSequence('U-C-A-G-', Name='rna1') model1 = ModelSequence('U-C-A-G-', Name='rna1',\ Alphabet=RNA.Alphabets.DegenGapped) self.assertEqual(rna1, 'U-C-A-G-') self.assertEqual(rna1.degap(), 'UCAG') # check is produces the right string from the beginning self.assertEqual(str(model1), 'U-C-A-G-') self.assertEqual(model1._data, [0,4,1,4,2,4,3,4]) # ModelSequence should maybe have the same degap method as normal Seq self.assertEqual(str(model1.degap()), 'UCAG') def test_the_rest_of_ModelSequence(self): """The class ModelSequence has 14 methods, but only 2 unittests. You might want to add some tests there...""" #note: mostly these are tested in derived classes, for convenience. pass