class UtilTests(TestCase): def setUp(self): """Set up for Voronoi tests""" self.aln1 = Alignment(['ABC','BCC','BAC']) self.aln2 = Alignment({'seq1':'GYVGS','seq2':'GFDGF','seq3':'GYDGF',\ 'seq4':'GYQGG'},RowOrder=['seq1','seq2','seq3','seq4']) self.aln3 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB'},\ RowOrder=['seq1','seq2','seq3']) self.aln4 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB',\ 'seq4':'BB','seq5':'CC'},RowOrder=['seq1','seq2','seq3','seq4','seq5']) self.aln5 = Alignment(['ABBA','ABCA','CBCB']) def test_number_of_pseudo_seqs(self): """number_of_pseudo_seqs: should return # of pseudo seqs""" self.assertEqual(number_of_pseudo_seqs(self.aln1),6) self.assertEqual(number_of_pseudo_seqs(self.aln2),18) self.assertEqual(number_of_pseudo_seqs(self.aln3),4) self.assertEqual(number_of_pseudo_seqs(self.aln4),9) def test_pseudo_seqs_exact(self): """pseudo_seqs_exact: should generate expected pseudo sequences""" self.assertEqualItems(pseudo_seqs_exact(self.aln1),\ ['AAC','ABC','ACC','BAC','BBC','BCC']) self.assertEqualItems(pseudo_seqs_exact(self.aln3),\ ['AA','AB','BA','BB']) self.assertEqual(len(pseudo_seqs_exact(self.aln2)), 18) def test_pseudo_seqs_monte_carlo(self): """pseudo_seqs_monte_carlo: random sample from all possible pseudo seqs """ self.assertEqual(len(list(pseudo_seqs_monte_carlo(self.aln1,n=100))),\ 100) for i in pseudo_seqs_monte_carlo(self.aln3,n=100): assert i in ['AA','AB','BA','BB'] def test_row_to_vote(self): """row_to_vote: should return correct votes for int and float distances """ self.assertEqual(row_to_vote(array([2,3,4,5])),array([1,0,0,0])) self.assertEqual(row_to_vote(array([2,3,2,5])),array([.5,0,0.5,0])) self.assertEqual(row_to_vote(array([2.3,3.5,2.1,5.8]))\ ,array([0,0,1,0])) def test_distance_matrix(self): """distance_matrix should obey RowOrder of alignment""" #RowOrder=None aln1_exp = array([[0,2,2],[2,0,1],[2,1,0]]) self.assertEqual(distance_matrix(self.aln1),aln1_exp) a = Alignment(self.aln1.copy()) a.RowOrder=[1,2,0] a_exp = array([[0,1,2],[1,0,2],[2,2,0]]) self.assertEqual(distance_matrix(a),a_exp) def test_eigenvector_for_largest_eigenvalue(self): """eigenvector_for_largest_eigenvalue: No idea how to test this""" pass def test_distance_to_closest(self): """distance_to_closest: should return closest distances""" self.assertEqual(distance_to_closest(self.aln1),[2,1,1]) self.assertEqual(distance_to_closest(self.aln2),[2,1,1,2]) def test_SeqToProfile(self): """SequenceToProfile: should work with different parameter settings """ seq = DnaSequence("ATCGRYN-") #Only non-degenerate bases in the char order, all other #characters are ignored. In a sequence this means that #several positions will contain only zeros in the profile. exp = zeros([len(seq),4],Float64) for x,y in zip(range(len(seq)),[2,0,1,3]): exp[x,y] = 1 self.assertEqual(SeqToProfile(seq,char_order="TCAG",\ split_degenerates=False).Data.tolist(),exp.tolist()) #Same thing should work as well when the char order is not passed in exp = zeros([len(seq),4],Float64) for x,y in zip(range(len(seq)),[2,0,1,3]): exp[x,y] = 1 self.assertEqual(SeqToProfile(seq, split_degenerates=False)\ .Data.tolist(),exp.tolist()) #All symbols in the sequence are in the char order, no row #should contain only zeros. Degenerate symbols are not split. exp = zeros([len(seq),8],Float64) for x,y in zip(range(len(seq)),[2,0,1,3,4,5,6,7]): exp[x,y] = 1 self.assertEqual(SeqToProfile(seq,char_order="TCAGRYN-",\ split_degenerates=False).Data.tolist(), exp.tolist()) #splitting all degenerate symbols, having only non-degenerate symbols #in the character order (and -) exp = array([[0,0,1,0,0],[1,0,0,0,0],[0,1,0,0,0],[0,0,0,1,0], [0,0,.5,.5,0],[.5,.5,0,0,0],[.25,.25,.25,.25,0],[0,0,0,0,1]]) self.assertEqual(SeqToProfile(seq,char_order="TCAG-",\ split_degenerates=True).Data.tolist(),exp.tolist()) #splitting degenerates, but having one of the degenerate #symbols in the character order. In that case the degenerate symbol #is not split. exp = array([[0,0,1,0,0,0],[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,0,1,0,0], [0,0,.5,.5,0,0],[.5,.5,0,0,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1]]) self.assertEqual(SeqToProfile(seq,char_order="TCAGN-",\ split_degenerates=True).Data.tolist(),exp.tolist()) def test_AlignmentToProfile_basic(self): """AlignmentToProfile: should work under basic conditions """ #sequences in the alignment are unweighted #Alphabet is the alphabet of the sequences (RnaAlphabet) #CharOrder is set explicitly #Degenerate bases are split up #Gaps are ignored #In all of the columns at least one character is in the CharOrder a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAAA')}) exp =\ array([[.5,0,.5,0], [0,1,0,0], [.5,0,.5,0], [0,0,0,1], [0,0,.75,.25], [.25,.25,.5,0], [.125,.125,.625,.125], [0,0,1,0]]) self.assertEqual(AlnToProfile(a,alphabet=RnaAlphabet,\ split_degenerates=True).Data.tolist(),exp.tolist()) def test_AlignmentToProfile_ignore(self): """AlignmentToProfile: should raise an error if too many chars ignored """ #Same conditions as previous function, but in the last column #there are only gaps, so normalization will fail at that position a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAA-')}) exp =\ array([[.5,0,.5,0], [0,1,0,0], [.5,0,.5,0], [0,0,0,1], [0,0,.75,.25], [.25,.25,.5,0], [.125,.125,.625,.125], [0,0,1,0]]) self.assertRaises(ValueError,AlnToProfile,a,alphabet=RnaAlphabet,\ split_degenerates=True) def test_AlignmentToProfile_weighted(self): """AlignmentToProfile: should work when sequences are weighted """ #Alignment: sequences are just strings and don't have an alphabet #Weights: a normal dictionary (could be a real Weights object as well) a = Alignment({'seq1':'TCAG','seq2':'TAR-','seq3':'YAG-'},\ RowOrder=['seq1','seq2','seq3']) w = {'seq1':0.5,'seq2':.25,'seq3':.25} #Error will be raised when no Alphabet is given, since the seqs #in the alignment are just strings self.assertRaises(AttributeError,AlnToProfile,a) #Basic situation in which all letters in the sequences occur in the #CharOrder, None have to be ignored. In that case it doesn't matter #whether we set split_degenerates to True or False, because if it's #True it's overwritten by the fact that the char is in the CharOrder. exp = array([[0.75,0,0,0,0,.25,0], [0,0.5,0.5,0,0,0,0], [0,0.5,0,0.25,0.25,0,0], [0,0,0,0.5,0,0,0.5]]) #split_degenerates = False self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACGRY-",\ weights=w, split_degenerates=False).Data.tolist(),exp.tolist()) #split_degenerates = True self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACGRY-",\ weights=w, split_degenerates=True).Data.tolist(),exp.tolist()) #Only non-degenerate symbols in the CharOrder. Degenerates are split. #Gaps are ignored exp = array([[0.875,0,0.125,0], [0,0.5,0.5,0], [0,0.625,0,0.375], [0,0,0,1]]) self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACG",\ weights=w, split_degenerates=True).Data.tolist(),exp.tolist()) #An Error is raised if all chars in an alignment column are ignored #CharOrder=AT, degenerates are not split. self.assertRaises(ValueError,AlnToProfile,a,DnaAlphabet,\ char_order="AT",weights=w, split_degenerates=True)