Esempio n. 1
0
 def test_getIntMap(self):
     """Alignment.getIntMap should return correct mapping."""
     aln = Alignment({'seq1':'ACGU','seq2':'CGUA','seq3':'CCGU'})
     int_keys = {'seq_0':'seq1','seq_1':'seq2','seq_2':'seq3'}
     int_map = {'seq_0':'ACGU','seq_1':'CGUA','seq_2':'CCGU'}
     im,ik = aln.getIntMap()
     self.assertEqual(ik,int_keys)
     self.assertEqual(im,int_map)
Esempio n. 2
0
 def test_distance_matrix(self):
     """distance_matrix should obey RowOrder of alignment"""
     #RowOrder=None
     aln1_exp = array([[0,2,2],[2,0,1],[2,1,0]])
     self.assertEqual(distance_matrix(self.aln1),aln1_exp)
     
     a = Alignment(self.aln1.copy())
     a.RowOrder=[1,2,0]
     a_exp = array([[0,1,2],[1,0,2],[2,2,0]])
     self.assertEqual(distance_matrix(a),a_exp)
Esempio n. 3
0
 def test_columnProbs(self):
     """Alignment.columnProbs should find Pr(symbol) in each column"""
     #make an alignment with 4 rows (easy to calculate probabilities)
     align = Alignment(["AAA", "ACA", "GGG", "GUC"])
     cp = align.columnProbs()
     #check that the column probs match the counts we expect
     self.assertEqual(cp, map(Freqs, [   
         {'A':0.5, 'G':0.5},
         {'A':0.25, 'C':0.25, 'G':0.25, 'U':0.25},
         {'A':0.5, 'G':0.25, 'C':0.25},
         ]))
Esempio n. 4
0
def ClustalParser(record, seq_constructor=Sequence, strict=True):
    """Returns contents of record as Alignment object."""
    seqs, label_order = MinimalClustalParser(record, strict)
    result = Alignment()
    for label, seq in seqs.items():
        new_seq = seq_constructor(''.join(map(strip, seq)))
        try:
            new_seq.Label = label
        except AttributeError:
            pass
        result[label] = new_seq
    result.RowOrder = label_order
    return result
Esempio n. 5
0
    def test_toPhylip(self):
        """Alignment should return PHYLIP string format correctly"""
        align_norm = Alignment( ['ACDEFGHIKLMNPQRSTUVWY-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'ACDEFGHIKLMNPERSKUVWC-',
                                     'ACNEFGHIKLMNPQRS-UVWP-',                                     
                                     ])

        phylip_str, id_map =  align_norm.toPhylip()

        self.assertEqual(phylip_str, """4 22\nseq0000001 ACDEFGHIKLMNPQRSTUVWY-\nseq0000002 ACDEFGHIKLMNPQRSUUVWF-\nseq0000003 ACDEFGHIKLMNPERSKUVWC-\nseq0000004 ACNEFGHIKLMNPQRS-UVWP-""")
        self.assertEqual(id_map, {'seq0000004': 3, 'seq0000001': 0, 'seq0000003': 2, 'seq0000002': 1})

        align_rag = Alignment( ['ACDEFGHIKLMNPQRSTUVWY-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'ACDEFGHIKLMNPERSKUVWC-',
                                     'ACNEFGHIKLMNUVWP-',                                     
                                     ])


        self.assertRaises(ValueError,  align_rag.toPhylip)
Esempio n. 6
0
    def setUp(self):
        """Set up for Voronoi tests"""
        self.aln1 = Alignment(['ABC','BCC','BAC'])
        
        self.aln2 = Alignment({'seq1':'GYVGS','seq2':'GFDGF','seq3':'GYDGF',\
            'seq4':'GYQGG'},RowOrder=['seq1','seq2','seq3','seq4'])

        self.aln3 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB'},\
            RowOrder=['seq1','seq2','seq3'])

        self.aln4 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB',\
        'seq4':'BB','seq5':'CC'},RowOrder=['seq1','seq2','seq3','seq4','seq5'])

        self.aln5 = Alignment(['ABBA','ABCA','CBCB'])
Esempio n. 7
0
 def test_uncertainties(self):
     """Alignment.uncertainties should match hand-calculated values"""
     aln = Alignment(['abc', 'axc'])
     obs = aln.uncertainties()
     self.assertFloatEqual(obs, [0, 1, 0])
     #check what happens with only one input sequence
     aln = Alignment(['abc'])
     obs = aln.uncertainties()
     self.assertFloatEqual(obs, [0, 0, 0])
     #check that we can screen out bad items OK
     aln = Alignment(['abc', 'def', 'ghi', 'jkl', 'GHI'])
     obs = aln.uncertainties('abcdefghijklmnop')
     self.assertFloatEqual(obs, [2.0] * 3)
Esempio n. 8
0
class UtilTests(TestCase):
    
    def setUp(self):
        """Set up for Voronoi tests"""
        self.aln1 = Alignment(['ABC','BCC','BAC'])
        
        self.aln2 = Alignment({'seq1':'GYVGS','seq2':'GFDGF','seq3':'GYDGF',\
            'seq4':'GYQGG'},RowOrder=['seq1','seq2','seq3','seq4'])

        self.aln3 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB'},\
            RowOrder=['seq1','seq2','seq3'])

        self.aln4 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB',\
        'seq4':'BB','seq5':'CC'},RowOrder=['seq1','seq2','seq3','seq4','seq5'])

        self.aln5 = Alignment(['ABBA','ABCA','CBCB'])

    
    def test_number_of_pseudo_seqs(self):
        """number_of_pseudo_seqs: should return # of pseudo seqs"""
        self.assertEqual(number_of_pseudo_seqs(self.aln1),6)
        self.assertEqual(number_of_pseudo_seqs(self.aln2),18)
        self.assertEqual(number_of_pseudo_seqs(self.aln3),4)
        self.assertEqual(number_of_pseudo_seqs(self.aln4),9)
    
    def test_pseudo_seqs_exact(self):
        """pseudo_seqs_exact: should generate expected pseudo sequences"""
        self.assertEqualItems(pseudo_seqs_exact(self.aln1),\
            ['AAC','ABC','ACC','BAC','BBC','BCC']) 
        self.assertEqualItems(pseudo_seqs_exact(self.aln3),\
            ['AA','AB','BA','BB'])
        self.assertEqual(len(pseudo_seqs_exact(self.aln2)), 18)

    def test_pseudo_seqs_monte_carlo(self):
        """pseudo_seqs_monte_carlo: random sample from all possible pseudo seqs
        """
        self.assertEqual(len(list(pseudo_seqs_monte_carlo(self.aln1,n=100))),\
            100)
        for i in pseudo_seqs_monte_carlo(self.aln3,n=100):
            assert i in ['AA','AB','BA','BB']

    def test_row_to_vote(self):
        """row_to_vote: should return correct votes for int and float distances
        """
        self.assertEqual(row_to_vote(array([2,3,4,5])),array([1,0,0,0]))
        self.assertEqual(row_to_vote(array([2,3,2,5])),array([.5,0,0.5,0]))
        self.assertEqual(row_to_vote(array([2.3,3.5,2.1,5.8]))\
            ,array([0,0,1,0]))

    def test_distance_matrix(self):
        """distance_matrix should obey RowOrder of alignment"""
        #RowOrder=None
        aln1_exp = array([[0,2,2],[2,0,1],[2,1,0]])
        self.assertEqual(distance_matrix(self.aln1),aln1_exp)
        
        a = Alignment(self.aln1.copy())
        a.RowOrder=[1,2,0]
        a_exp = array([[0,1,2],[1,0,2],[2,2,0]])
        self.assertEqual(distance_matrix(a),a_exp)

    def test_eigenvector_for_largest_eigenvalue(self):
        """eigenvector_for_largest_eigenvalue: No idea how to test this"""
        pass

    def test_distance_to_closest(self):
        """distance_to_closest: should return closest distances"""
        self.assertEqual(distance_to_closest(self.aln1),[2,1,1])
        self.assertEqual(distance_to_closest(self.aln2),[2,1,1,2])

    def test_SeqToProfile(self):
        """SequenceToProfile: should work with different parameter settings
        """
        seq = DnaSequence("ATCGRYN-")

        #Only non-degenerate bases in the char order, all other
        #characters are ignored. In a sequence this means that 
        #several positions will contain only zeros in the profile.
        exp = zeros([len(seq),4],Float64)
        for x,y in zip(range(len(seq)),[2,0,1,3]):
            exp[x,y] = 1
        self.assertEqual(SeqToProfile(seq,char_order="TCAG",\
            split_degenerates=False).Data.tolist(),exp.tolist()) 
       
        #Same thing should work as well when the char order is not passed in
        exp = zeros([len(seq),4],Float64)
        for x,y in zip(range(len(seq)),[2,0,1,3]):
            exp[x,y] = 1
        self.assertEqual(SeqToProfile(seq, split_degenerates=False)\
            .Data.tolist(),exp.tolist()) 

       
        #All symbols in the sequence are in the char order, no row
        #should contain only zeros. Degenerate symbols are not split.
        exp = zeros([len(seq),8],Float64)
        for x,y in zip(range(len(seq)),[2,0,1,3,4,5,6,7]):
            exp[x,y] = 1
        self.assertEqual(SeqToProfile(seq,char_order="TCAGRYN-",\
            split_degenerates=False).Data.tolist(), exp.tolist())
        
        #splitting all degenerate symbols, having only non-degenerate symbols
        #in the character order (and -)
        exp = array([[0,0,1,0,0],[1,0,0,0,0],[0,1,0,0,0],[0,0,0,1,0],
            [0,0,.5,.5,0],[.5,.5,0,0,0],[.25,.25,.25,.25,0],[0,0,0,0,1]])
        self.assertEqual(SeqToProfile(seq,char_order="TCAG-",\
            split_degenerates=True).Data.tolist(),exp.tolist())
        
        #splitting degenerates, but having one of the degenerate
        #symbols in the character order. In that case the degenerate symbol
        #is not split. 
        exp = array([[0,0,1,0,0,0],[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,0,1,0,0],
            [0,0,.5,.5,0,0],[.5,.5,0,0,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1]])
        self.assertEqual(SeqToProfile(seq,char_order="TCAGN-",\
            split_degenerates=True).Data.tolist(),exp.tolist())

    def test_AlignmentToProfile_basic(self):
        """AlignmentToProfile: should work under basic conditions
        """
        #sequences in the alignment are unweighted
        #Alphabet is the alphabet of the sequences (RnaAlphabet)
        #CharOrder is set explicitly
        #Degenerate bases are split up
        #Gaps are ignored
        #In all of the columns at least one character is in the CharOrder
        a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAAA')})
        exp =\
        array([[.5,0,.5,0],
         [0,1,0,0],
         [.5,0,.5,0],
         [0,0,0,1],
         [0,0,.75,.25],
         [.25,.25,.5,0],
         [.125,.125,.625,.125],
         [0,0,1,0]])
        self.assertEqual(AlnToProfile(a,alphabet=RnaAlphabet,\
            split_degenerates=True).Data.tolist(),exp.tolist())

    def test_AlignmentToProfile_ignore(self):
        """AlignmentToProfile: should raise an error if too many chars ignored
        """
        #Same conditions as previous function, but in the last column 
        #there are only gaps, so normalization will fail at that position
        a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAA-')})
        exp =\
        array([[.5,0,.5,0],
         [0,1,0,0],
         [.5,0,.5,0],
         [0,0,0,1],
         [0,0,.75,.25],
         [.25,.25,.5,0],
         [.125,.125,.625,.125],
         [0,0,1,0]])
        self.assertRaises(ValueError,AlnToProfile,a,alphabet=RnaAlphabet,\
            split_degenerates=True)


    def test_AlignmentToProfile_weighted(self):
        """AlignmentToProfile: should work when sequences are weighted
        """
        #Alignment: sequences are just strings and don't have an alphabet
        #Weights: a normal dictionary (could be a real Weights object as well)
        a = Alignment({'seq1':'TCAG','seq2':'TAR-','seq3':'YAG-'},\
        RowOrder=['seq1','seq2','seq3'])
        w = {'seq1':0.5,'seq2':.25,'seq3':.25}
        
        #Error will be raised when no Alphabet is given, since the seqs
        #in the alignment are just strings
        self.assertRaises(AttributeError,AlnToProfile,a)
        
        #Basic situation in which all letters in the sequences occur in the
        #CharOrder, None have to be ignored. In that case it doesn't matter
        #whether we set split_degenerates to True or False, because if it's 
        #True it's overwritten by the fact that the char is in the CharOrder.
        exp = array([[0.75,0,0,0,0,.25,0],
            [0,0.5,0.5,0,0,0,0],
            [0,0.5,0,0.25,0.25,0,0],
            [0,0,0,0.5,0,0,0.5]])
        #split_degenerates = False
        self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACGRY-",\
            weights=w, split_degenerates=False).Data.tolist(),exp.tolist())
        #split_degenerates = True
        self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACGRY-",\
            weights=w, split_degenerates=True).Data.tolist(),exp.tolist())

        #Only non-degenerate symbols in the CharOrder. Degenerates are split.
        #Gaps are ignored
        exp = array([[0.875,0,0.125,0],
            [0,0.5,0.5,0],
            [0,0.625,0,0.375],
            [0,0,0,1]])
        self.assertEqual(AlnToProfile(a,DnaAlphabet, char_order="TACG",\
            weights=w, split_degenerates=True).Data.tolist(),exp.tolist())
        
        #An Error is raised if all chars in an alignment column are ignored
        #CharOrder=AT, degenerates are not split.
        self.assertRaises(ValueError,AlnToProfile,a,DnaAlphabet,\
            char_order="AT",weights=w, split_degenerates=True)
Esempio n. 9
0
 def test_init_dict(self):
     """Alignment init from dict should work as expected"""
     d = {'a':'aaaaa', 'b':'bbbbb'}
     a = Alignment(d)
     self.assertEqual(a, d)
     self.assertEqual(a.items(), d.items())
Esempio n. 10
0
    def test_getSimilar(self):
        """Alignment getSimilar should get all sequences close to target seq"""
        aln = self.many
        x = Rna('gggggggggg')
        y = Rna('----------')
        #test min and max similarity ranges
        result = aln.getSimilar(aln['a'],min_similarity=0.4,max_similarity=0.7)
        for seq in 'cefg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 4)
        
        result = aln.getSimilar(aln['a'],min_similarity=0.95,max_similarity=1)
        for seq in 'a':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)

        result = aln.getSimilar(aln['a'], min_similarity=0.75, \
            max_similarity=0.85)
        for seq in 'bd':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        result = aln.getSimilar(aln['a'],min_similarity=0,max_similarity=0.2)
        self.assertEqual(len(result), 0)

        #test some sequence transformations
        transform = lambda s: s[1:4]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abdfg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        transform = lambda s: s[-3:]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abcde':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        #test a different distance metric
        metric = lambda x, y: x.count('g') + y.count('g')
        result = aln.getSimilar(aln['a'], min_similarity=5, max_similarity=10, \
            metric=metric)
        for seq in 'ef':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        #test the combination of a transform and a distance metric
        aln = Alignment(dict(enumerate(map(Rna, ['aA-ac','A-aAC','aa-aa']))))
        transform = lambda s: Rna(s.upper())
        metric = RnaSequence.fracSameNonGaps
        #first, do it without the transformation
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric)
        for seq in [0,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric)
        for seq in [0]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)
        #then, verify that the transform changes the results         
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric, \
            transform=transform)
        for seq in [0,1,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 3)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric, \
            transform=transform)
        for seq in [0,1]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)
Esempio n. 11
0
 def setUp(self):
     """Define some standard Alignment objects."""
     self.empty = Alignment({})
     self.one_seq = Alignment({'a':'aaaaa'})
     self.ragged = Alignment({'a':'aaaaaa', 'b':'aaa', 'c':'aaaa'})
     self.identical = Alignment({'a':'aaaa','b':'aaaa'})
     self.gaps = Alignment({'a':'aaaaaaa','b':'a--a-aa', 'c':'aa-----'})
     self.gaps_rna = Alignment({'a':Rna('aaaaaaa'), 'b':Rna('a--a-aa'), \
         'c':Rna('aa-----')})
     self.unordered = Alignment({'a':'aaaaa','b':'bbbbb'})
     self.ordered1 = Alignment({'a':'aaaaa','b':'bbbbb'}, RowOrder=['a','b'])
     self.ordered2 = Alignment({'a':'aaaaa','b':'bbbbb'}, RowOrder=['b','a'])
     self.mixed = Alignment({'a':'abcde', 'b':'lmnop'})
     self.end_gaps = Alignment({'a':'--a-bc-', 'b':'-cb-a--', 'c':'--d-ef-'})
     self.many = Alignment({
         'a': Rna('ucagucaguu'),
         'b': Rna('uccgucaauu'),
         'c': Rna('accaucaguc'),
         'd': Rna('ucaaucgguu'),
         'e': Rna('uugguugggu'),
         'f': Rna('ccgggcggcc'),
         'g': Rna('ucaaccggaa'),
         })
     #Additional Alignments for tests added 6/4/04 by Jeremy Widmann
     self.integers = Alignment([[1,2,3,4,5],[1,1,1,1,1],[5,4,3,2,1]])
     self.sequences = Alignment(map(RnaSequence, ['UCAG', 'UCAG', 'UCAG']))
     self.structures = Alignment(map(ViennaStructure, 
                             ['(())..', '......', '(....)']), None, str)
     self.labeled = Alignment(['abc', 'def'], ['1st', '2nd'])
     #Additional Alignment for tests added 1/30/06 by Cathy Lozupone
     self.omitRowsTemplate_aln = Alignment({
         's1':Rna('UC-----CU---C'),
         's2':Rna('UC------U---C'),
         's3':Rna('UUCCUUCUU-UUC'),
         's4':Rna('UU-UUUU-UUUUC'),
         's5':Rna('-------------')
         })
Esempio n. 12
0
class AlignmentTests(TestCase):
    """Tests of the Alignment object."""
    def setUp(self):
        """Define some standard Alignment objects."""
        self.empty = Alignment({})
        self.one_seq = Alignment({'a':'aaaaa'})
        self.ragged = Alignment({'a':'aaaaaa', 'b':'aaa', 'c':'aaaa'})
        self.identical = Alignment({'a':'aaaa','b':'aaaa'})
        self.gaps = Alignment({'a':'aaaaaaa','b':'a--a-aa', 'c':'aa-----'})
        self.gaps_rna = Alignment({'a':Rna('aaaaaaa'), 'b':Rna('a--a-aa'), \
            'c':Rna('aa-----')})
        self.unordered = Alignment({'a':'aaaaa','b':'bbbbb'})
        self.ordered1 = Alignment({'a':'aaaaa','b':'bbbbb'}, RowOrder=['a','b'])
        self.ordered2 = Alignment({'a':'aaaaa','b':'bbbbb'}, RowOrder=['b','a'])
        self.mixed = Alignment({'a':'abcde', 'b':'lmnop'})
        self.end_gaps = Alignment({'a':'--a-bc-', 'b':'-cb-a--', 'c':'--d-ef-'})
        self.many = Alignment({
            'a': Rna('ucagucaguu'),
            'b': Rna('uccgucaauu'),
            'c': Rna('accaucaguc'),
            'd': Rna('ucaaucgguu'),
            'e': Rna('uugguugggu'),
            'f': Rna('ccgggcggcc'),
            'g': Rna('ucaaccggaa'),
            })
        #Additional Alignments for tests added 6/4/04 by Jeremy Widmann
        self.integers = Alignment([[1,2,3,4,5],[1,1,1,1,1],[5,4,3,2,1]])
        self.sequences = Alignment(map(RnaSequence, ['UCAG', 'UCAG', 'UCAG']))
        self.structures = Alignment(map(ViennaStructure, 
                                ['(())..', '......', '(....)']), None, str)
        self.labeled = Alignment(['abc', 'def'], ['1st', '2nd'])
        #Additional Alignment for tests added 1/30/06 by Cathy Lozupone
        self.omitRowsTemplate_aln = Alignment({
            's1':Rna('UC-----CU---C'),
            's2':Rna('UC------U---C'),
            's3':Rna('UUCCUUCUU-UUC'),
            's4':Rna('UU-UUUU-UUUUC'),
            's5':Rna('-------------')
            })
    
    def test_init_dict(self):
        """Alignment init from dict should work as expected"""
        d = {'a':'aaaaa', 'b':'bbbbb'}
        a = Alignment(d)
        self.assertEqual(a, d)
        self.assertEqual(a.items(), d.items())

    def test_init_seq(self):
        """Alignment init from list of sequences should use indices as keys"""
        seqs = ['aaaaa', 'bbbbb', 'ccccc']
        a = Alignment(seqs)
        self.assertEqual(len(a), 3)
        self.assertEqual(a[0], 'aaaaa')
        self.assertEqual(a[1], 'bbbbb')
        self.assertEqual(a[2], 'ccccc')
        self.assertEqual(a.RowOrder, [0,1,2])
        self.assertEqual(list(a.Rows), ['aaaaa','bbbbb','ccccc'])

    def test_init_pairs(self):
        """Alignment init from list of (key,val) pairs should work correctly"""
        seqs = [['x', 'xxx'], ['b','bbb'], ['c','ccc']]
        a = Alignment(seqs)
        self.assertEqual(len(a), 3)
        self.assertEqual(a['x'], 'xxx')
        self.assertEqual(a['b'], 'bbb')
        self.assertEqual(a['c'], 'ccc')
        self.assertEqual(a.RowOrder, ['x','b','c'])
        self.assertEqual(list(a.Rows), ['xxx','bbb','ccc'])

    def test_init_ordered(self):
        """Alignment should iterate over rows correctly even if ordered"""
        first = self.ordered1
        sec = self.ordered2
        un = self.unordered

        self.assertEqual(first.RowOrder, ['a','b'])
        self.assertEqual(sec.RowOrder, ['b', 'a'])
        self.assertEqual(un.RowOrder, un.keys())

        first_list = list(first.Rows)
        sec_list = list(sec.Rows)
        un_list = list(un.Rows)

        self.assertEqual(first_list, ['aaaaa','bbbbb'])
        self.assertEqual(sec_list, ['bbbbb','aaaaa'])
    
        #check that the unordered seq matches one of the lists
        assert (un_list == first_list) or (un_list == sec_list)
        self.assertNotEqual(first_list, sec_list)

    def test_SeqLen_get(self):
        """Alignment SeqLen should return length of longest seq"""
        self.assertEqual(self.empty.SeqLen, 0)
        self.assertEqual(self.one_seq.SeqLen, 5)
        self.assertEqual(self.ragged.SeqLen, 6)
        self.assertEqual(self.identical.SeqLen, 4)
        self.assertEqual(self.gaps.SeqLen, 7)

    def test_SeqLen_set(self):
        """Alignment SeqLen assignment should pad or truncate sequences"""
        #if there are no sequences, SeqLen will always be 0
        a = self.empty
        a.SeqLen = 5
        self.assertEqual(a.SeqLen, 0)

        a = self.one_seq
        a.SeqLen = 10
        self.assertEqual(a['a'], 'aaaaa-----')
        a.SeqLen = 2
        self.assertEqual(a['a'], 'aa')

        a = self.ragged
        a.SeqLen = None
        self.assertEqual(a, {'a':'aaaaaa', 'b':'aaa---', 'c':'aaaa--'})
        a.SeqLen = 8
        self.assertEqual(a, {'a':'aaaaaa--','b':'aaa-----','c':'aaaa----'})
        a.SeqLen = 2
        self.assertEqual(a, {'a':'aa','b':'aa','c':'aa'})

        a.DefaultGap = '~'
        a.SeqLen = 4
        self.assertEqual(a, {'a':'aa~~','b':'aa~~','c':'aa~~'})

    def test_Rows(self):
        """Alignment Rows property should return rows in correct order."""
        first = self.ordered1
        sec = self.ordered2
        un = self.unordered

        first_list = list(first.Rows)
        sec_list = list(sec.Rows)
        un_list = list(un.Rows)

        self.assertEqual(first_list, ['aaaaa','bbbbb'])
        self.assertEqual(sec_list, ['bbbbb','aaaaa'])
    
        #check that the unordered seq matches one of the lists
        assert (un_list == first_list) or (un_list == sec_list)
        self.assertNotEqual(first_list, sec_list)

        #should work on empty alignment
        self.assertEqual(list(self.empty.Rows), [])

        #should work on ragged alignment
        self.ragged.RowOrder = 'bac'
        self.assertEqual(list(self.ragged.Rows), ['aaa', 'aaaaaa', 'aaaa'])
        
    def test_iterRows(self):
        """Alignment iterRows() method should support reordering of rows"""
        self.assertEqual(list(self.empty.iterRows()), [])

        self.ragged.RowOrder = ['a','b','c']
        rows = list(self.ragged.iterRows())
        self.assertEqual(rows, ['aaaaaa', 'aaa', 'aaaa'])
        rows = list(self.ragged.iterRows(row_order=['b','a','a']))
        self.assertEqual(rows, ['aaa', 'aaaaaa', 'aaaaaa'])
        assert rows[1] is rows[2]
        assert rows[0] is self.ragged['b']
        
    def test_Cols(self):
        """Alignment Cols property should iterate over columns, using self.RowOrder"""
        r = self.ragged
        self.assertRaises(IndexError, list, r.Cols)
        r.SeqLen = None
        r.RowOrder = ['a','b','c']
        self.assertEqual(list(r.Cols), map(list, \
            ['aaa','aaa','aaa', 'a-a', 'a--', 'a--']))
        #should work on empty alignment
        self.assertEqual(list(self.empty.Cols), [])
        
    def test_iterCols(self):
        """Alignment iterCols() method should support reordering of cols"""
        r = self.ragged
        self.assertRaises(IndexError, list, r.iterCols(col_order=[5,1,3]))
        r.RowOrder = 'cb'
        r.SeqLen = None
        self.assertEqual(list(r.iterCols(col_order=[5,1,3])),\
            map(list,['--','aa','a-']))

        r.RowOrder = ['a','b','c']
        cols = list(self.ragged.iterCols())
        self.assertEqual(cols, map(list, ['aaa','aaa','aaa','a-a','a--','a--']))
        
    def test_Items(self):
        """Alignment Items should iterate over items in specified order."""
        #should work on empty alignment
        self.assertEqual(list(self.empty.Items), [])
        #should work if one row
        self.assertEqual(list(self.one_seq.Items), ['a']*5)
        #should work on ragged alignment
        self.assertEqual(list(self.ragged.Items), ['a']*13)
        #should take order into account
        self.assertEqual(list(self.ordered1.Items), ['a']*5 + ['b']*5)
        self.assertEqual(list(self.ordered2.Items), ['b']*5 + ['a']*5)
        
    def test_iterItems(self):
        """Alignment iterItems() should iterate over items in correct order"""
        #should work on empty alignment
        self.assertEqual(list(self.empty.iterItems()), [])
        #should work if one row
        self.assertEqual(list(self.one_seq.iterItems()), ['a']*5)
        #should work on ragged alignment
        self.assertEqual(list(self.ragged.iterItems()), ['a']*13)
        #should take order into account
        self.assertEqual(list(self.ordered1.iterItems()), ['a']*5 + ['b']*5)
        self.assertEqual(list(self.ordered2.iterItems()), ['b']*5 + ['a']*5)
        #should allow row and/or col specification
        self.assertRaises(IndexError, list, self.ragged.iterItems(\
            row_order=['c','b'], col_order=[5,1,3]))
        self.ragged.SeqLen = None
        self.assertEqual(list(self.ragged.iterItems(row_order=['c','b'], \
            col_order=[5,1,3])), list('-aa-a-'))
        #should not interfere with superclass iteritems()
        i = list(self.ragged.iteritems()) #remember, we padded self.ragged above
        i.sort()
        self.assertEqual(i, [('a','aaaaaa'),('b','aaa---'),('c','aaaa--')])
    
    def test_getRows(self):
        """Alignment getRows should return new Alignment with selected rows."""
        self.assertRaises(KeyError, self.empty.getRows, ['a'])
        a = self.ragged.getRows('bc')
        assert isinstance(a, Alignment)
        self.assertEqual(a, {'b':'aaa','c':'aaaa'})
        #should be able to negate
        a = self.ragged.getRows('bc', negate=True)
        self.assertEqual(a, {'a':'aaaaaa'})

    def test_getRowIndices(self):
        """Alignment getRowIndices should return names of rows where f(row) is True"""
        is_long = lambda x: len(x) > 10
        is_med = lambda x: len(x) > 3
        is_any = lambda x: len(x) > 0
        self.assertEqual(self.ragged.getRowIndices(is_long), [])
        self.ragged.RowOrder = 'cba'
        self.assertEqual(self.ragged.getRowIndices(is_med), ['c','a'])
        self.ragged.RowOrder = 'abc'
        self.assertEqual(self.ragged.getRowIndices(is_med), ['a','c'])
        self.assertEqual(self.ragged.getRowIndices(is_any), ['a','b','c'])
        #should be able to negate
        self.assertEqual(self.ragged.getRowIndices(is_med, negate=True), ['b'])
        self.assertEqual(self.ragged.getRowIndices(is_any, negate=True), [])

    def test_getRowsIf(self):
        """Alignment getRowsIf should return rows where f(row) is True"""
        is_long = lambda x: len(x) > 10
        is_med = lambda x: len(x) > 3
        is_any = lambda x: len(x) > 0
        self.assertEqual(self.ragged.getRowsIf(is_long), {})
        self.ragged.RowOrder = 'cba'
        self.assertEqual(self.ragged.getRowsIf(is_med), \
            {'c':'aaaa','a':'aaaaaa'})
        self.ragged.RowOrder = None
        self.assertEqual(self.ragged.getRowsIf(is_med), \
            {'c':'aaaa','a':'aaaaaa'})
        self.assertEqual(self.ragged.getRowsIf(is_any), self.ragged)
        assert isinstance(self.ragged.getRowsIf(is_med), Alignment)
        #should be able to negate
        self.assertEqual(self.ragged.getRowsIf(is_med, negate=True), \
            {'b':'aaa'})

    def test_getCols(self):
        """Alignment getCols should return new alignment w/ specified columns"""
        self.assertEqual(self.empty.getCols([0]), {})
        
        self.assertEqual(self.gaps.getCols([5,4,0], row_constructor=''.join), \
            {'a':'aaa','b':'a-a','c':'--a'})
        assert isinstance(self.gaps.getCols([0]), Alignment)
        #should be able to negate
        self.assertEqual(self.gaps.getCols([5,4,0], negate=True, \
            row_constructor=''.join),
            {'a':'aaaa','b':'--aa','c':'a---'})

    def test_getColIndices(self):
        """Alignment getColIndices should return names of cols where f(col) is True"""
        gap_1st = lambda x: x[0] == '-'
        gap_2nd = lambda x: x[1] == '-'
        gap_3rd = lambda x: x[2] == '-'
        is_list =  lambda x: isinstance(x, list)
        self.gaps.RowOrder = 'abc'

        self.assertEqual(self.gaps.getColIndices(gap_1st), [])
        self.assertEqual(self.gaps.getColIndices(gap_2nd), [1,2,4])
        self.assertEqual(self.gaps.getColIndices(gap_3rd), [2,3,4,5,6])
        self.assertEqual(self.gaps.getColIndices(is_list), [0,1,2,3,4,5,6])
        #should be able to negate
        self.assertEqual(self.gaps.getColIndices(gap_2nd, negate=True), \
            [0,3,5,6])
        self.assertEqual(self.gaps.getColIndices(gap_1st, negate=True), \
            [0,1,2,3,4,5,6])
        self.assertEqual(self.gaps.getColIndices(is_list, negate=True), [])

    def test_getColsIf(self):
        """Alignment getColsIf should return cols where f(col) is True"""
        gap_1st = lambda x: x[0] == '-'
        gap_2nd = lambda x: x[1] == '-'
        gap_3rd = lambda x: x[2] == '-'
        is_list =  lambda x: isinstance(x, list)
        self.gaps.RowOrder = 'abc'

        self.assertEqual(self.gaps.getColsIf(gap_1st,row_constructor=''.join),\
            {'a':'', 'b':'', 'c':''})
        self.assertEqual(self.gaps.getColsIf(gap_2nd,row_constructor=''.join),\
            {'a':'aaa', 'b':'---', 'c':'a--'})
        self.assertEqual(self.gaps.getColsIf(gap_3rd,row_constructor=''.join),\
            {'a':'aaaaa', 'b':'-a-aa', 'c':'-----'})
        self.assertEqual(self.gaps.getColsIf(is_list,row_constructor=''.join),\
            self.gaps)

        assert isinstance(self.gaps.getColsIf(gap_1st), Alignment)
        #should be able to negate
        self.assertEqual(self.gaps.getColsIf(gap_1st, row_constructor=''.join,\
            negate=True), self.gaps)
        self.assertEqual(self.gaps.getColsIf(gap_2nd, row_constructor=''.join,\
            negate=True), {'a':'aaaa','b':'aaaa','c':'a---'})
        self.assertEqual(self.gaps.getColsIf(gap_3rd, row_constructor=''.join,\
            negate=True), {'a':'aa','b':'a-','c':'aa'})

    def test_getItems(self):
        """Alignment getItems should return list of items from k,v pairs"""
        self.assertEqual(self.mixed.getItems([('a',3),('b',4),('a',0)]), \
            ['d','p','a'])
        self.assertRaises(KeyError, self.mixed.getItems, [('x','y')])
        self.assertRaises(IndexError, self.mixed.getItems, [('a',1000)])
        #should be able to negate -- note that results will have rows in
        #arbitrary order
        self.assertEqualItems(self.mixed.getItems([('a',3),('b',4),('a',0)], \
            negate=True), ['b','c','e','l','m','n','o'])

    def test_getItemIndices(self):
        """Alignment getItemIndices should return coordinates of matching items"""
        is_vowel = lambda x: x in 'aeiou'
        self.mixed.RowOrder = 'ba'     #specify reverse order
        self.assertEqual(self.mixed.getItemIndices(is_vowel), \
            [('b',3),('a',0),('a',4)])
        not_vowel = lambda x: not is_vowel(x)
        self.assertEqual(self.ragged.getItemIndices(not_vowel), [])
        #should be able to negate
        self.assertEqualItems(self.mixed.getItemIndices(is_vowel, negate=True),\
            [('a',1),('a',2),('a',3),('b',0),('b',1),('b',2),('b',4)])

    def test_getItemsIf(self):
        """Alignment getItemsIf should return matching items"""
        is_vowel = lambda x: x in 'aeiou'
        self.mixed.RowOrder = 'ba'
        self.assertEqual(self.mixed.getItemsIf(is_vowel), ['o','a','e'])
        self.assertEqual(self.empty.getItemsIf(is_vowel), [])
        self.assertEqual(self.one_seq.getItemsIf(is_vowel), list('aaaaa'))
        #should be able to negate
        self.assertEqualItems(self.mixed.getItemsIf(is_vowel, negate=True), \
            list('bcdlmnp'))

    def test_getSimilar(self):
        """Alignment getSimilar should get all sequences close to target seq"""
        aln = self.many
        x = Rna('gggggggggg')
        y = Rna('----------')
        #test min and max similarity ranges
        result = aln.getSimilar(aln['a'],min_similarity=0.4,max_similarity=0.7)
        for seq in 'cefg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 4)
        
        result = aln.getSimilar(aln['a'],min_similarity=0.95,max_similarity=1)
        for seq in 'a':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)

        result = aln.getSimilar(aln['a'], min_similarity=0.75, \
            max_similarity=0.85)
        for seq in 'bd':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        result = aln.getSimilar(aln['a'],min_similarity=0,max_similarity=0.2)
        self.assertEqual(len(result), 0)

        #test some sequence transformations
        transform = lambda s: s[1:4]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abdfg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        transform = lambda s: s[-3:]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abcde':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        #test a different distance metric
        metric = lambda x, y: x.count('g') + y.count('g')
        result = aln.getSimilar(aln['a'], min_similarity=5, max_similarity=10, \
            metric=metric)
        for seq in 'ef':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        #test the combination of a transform and a distance metric
        aln = Alignment(dict(enumerate(map(Rna, ['aA-ac','A-aAC','aa-aa']))))
        transform = lambda s: Rna(s.upper())
        metric = RnaSequence.fracSameNonGaps
        #first, do it without the transformation
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric)
        for seq in [0,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric)
        for seq in [0]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)
        #then, verify that the transform changes the results         
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric, \
            transform=transform)
        for seq in [0,1,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 3)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric, \
            transform=transform)
        for seq in [0,1]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)
         
    def test_omitGapCols(self):
        """Alignment omitGapCols should return alignment w/o columns of gaps"""
        aln = self.end_gaps
        
        #first, check behavior when we're just acting on the cols (and not
        #trying to delete the naughty rows).
        
        #default should strip out cols that are 100% gaps
        self.assertEqual(aln.omitGapCols(row_constructor=''.join), \
            {'a':'-abc', 'b':'cba-', 'c':'-def'})
        #if allowed_gap_frac is 1, shouldn't delete anything
        self.assertEqual(aln.omitGapCols(1, row_constructor=''.join), \
            {'a':'--a-bc-', 'b':'-cb-a--', 'c':'--d-ef-'})
        #if allowed_gap_frac is 0, should strip out any cols containing gaps
        self.assertEqual(aln.omitGapCols(0, row_constructor=''.join), \
            {'a':'ab', 'b':'ba', 'c':'de'})
        #intermediate numbers should work as expected
        self.assertEqual(aln.omitGapCols(0.4, row_constructor=''.join), \
            {'a':'abc', 'b':'ba-', 'c':'def'})
        self.assertEqual(aln.omitGapCols(0.7, row_constructor=''.join), \
            {'a':'-abc', 'b':'cba-', 'c':'-def'})
        #check that it doesn't fail on an empty alignment
        self.assertEqual(self.empty.omitGapCols(), {})

        #second, need to check behavior when the naughty rows should be
        #deleted as well.

        #default should strip out cols that are 100% gaps
        self.assertEqual(aln.omitGapCols(row_constructor=''.join, \
            del_rows=True), {'a':'-abc', 'b':'cba-', 'c':'-def'})
        #if allowed_gap_frac is 1, shouldn't delete anything
        self.assertEqual(aln.omitGapCols(1, row_constructor=''.join, \
            del_rows=True), {'a':'--a-bc-', 'b':'-cb-a--', 'c':'--d-ef-'})
        #if allowed_gap_frac is 0, should strip out any cols containing gaps
        self.assertEqual(aln.omitGapCols(0, row_constructor=''.join, \
            del_rows=True), {}) #everything has at least one naughty non-gap
        #intermediate numbers should work as expected
        self.assertEqual(aln.omitGapCols(0.4, row_constructor=''.join,
            del_rows=True), {'a':'abc', 'c':'def'}) #b has a naughty non-gap
        #check that does not delete b if allowed_frac_bad_calls higher than 0.14
        self.assertEqual(aln.omitGapCols(0.4, row_constructor=''.join,
            del_rows=True, allowed_frac_bad_cols=0.2), \
                    {'a':'abc', 'b':'ba-','c':'def'})
        self.assertEqual(aln.omitGapCols(0.4, row_constructor=''.join,
            del_rows=True), {'a':'abc', 'c':'def'}) #b has a naughty non-gap
        
        self.assertEqual(aln.omitGapCols(0.7, row_constructor=''.join,
            del_rows=True), {'a':'-abc', 'b':'cba-', 'c':'-def'}) #all ok
        #check that it doesn't fail on an empty alignment
        self.assertEqual(self.empty.omitGapCols(del_rows=True), {})

        #when we increase the number of sequences to 6, more differences
        #start to appear.
        aln['d'] = '-------'
        aln['e'] = 'xyzxyzx'
        aln['f'] = 'ab-cdef'
        #if no gaps are allowed, everything is deleted...
        self.assertEqual(aln.omitGapCols(0, del_rows=False), \
            {'a':[], 'b':[], 'c':[], 'd':[], 'e':[], 'f':[]})
        #...though not a sequence that's all gaps, since it has no positions
        #that are not gaps. This 'feature' should possibly be considered a bug.
        self.assertEqual(aln.omitGapCols(0, del_rows=True), {'d':[]})
        #if we're deleting only full columns of gaps, del_rows does nothing.
        self.assertEqual(aln.omitGapCols(del_rows=True, \
            row_constructor=''.join), aln)
        #at 50%, should delete a bunch of minority sequences
        self.assertEqual(aln.omitGapCols(0.5, del_rows=True, \
            row_constructor=''.join), \
            {'a':'-abc','b':'cba-','c':'-def','d':'----'})
        #shouldn't depend on order of rows
        aln.RowOrder = 'fadbec'
        self.assertEqual(aln.omitGapCols(0.5, del_rows=True, \
            row_constructor=''.join), \
            {'a':'-abc','b':'cba-','c':'-def','d':'----'})
        

    def test_omitGapRows(self):
        """Alignment omitGapRows should return alignment w/o rows with gaps"""
        #check default params
        self.assertEqual(self.gaps.omitGapRows(), self.gaps.omitGapRows(0))
        #check for boundary effects
        self.assertEqual(self.gaps.omitGapRows(-1), {})
        self.assertEqual(self.gaps.omitGapRows(0), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps.omitGapRows(0.1), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps.omitGapRows(3.0/7 - 0.01), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps.omitGapRows(3.0/7), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRows(3.0/7 + 0.01), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRows(5.0/7 - 0.01), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRows(5.0/7 + 0.01), self.gaps)
        self.assertEqual(self.gaps.omitGapRows(0.99), self.gaps)
        #check new object creation
        assert self.gaps.omitGapRows(0.99) is not self.gaps
        assert isinstance(self.gaps.omitGapRows(3.0/7), Alignment)
        #repeat tests for object that supplies its own gaps
        self.assertEqual(self.gaps_rna.omitGapRows(-1), {})
        self.assertEqual(self.gaps_rna.omitGapRows(0), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps_rna.omitGapRows(0.1), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps_rna.omitGapRows(3.0/7 - 0.01), \
            {'a':'aaaaaaa'})
        self.assertEqual(self.gaps_rna.omitGapRows(3.0/7), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps_rna.omitGapRows(3.0/7 + 0.01), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps_rna.omitGapRows(5.0/7 - 0.01), \
            {'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps_rna.omitGapRows(5.0/7 + 0.01), self.gaps_rna)
        self.assertEqual(self.gaps_rna.omitGapRows(0.99), self.gaps_rna)
        assert self.gaps_rna.omitGapRows(0.99) is not self.gaps_rna
        assert isinstance(self.gaps_rna.omitGapRows(3.0/7), Alignment)

    def test_omitGapRuns(self):
        """Alignment omitGapRuns should return alignment w/o runs of gaps"""
        #negative value will still let through ungapped sequences
        self.assertEqual(self.gaps.omitGapRuns(-5), {'a':'aaaaaaa'})
        #test edge effects
        self.assertEqual(self.gaps.omitGapRuns(0), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps.omitGapRuns(1), {'a':'aaaaaaa'})
        self.assertEqual(self.gaps.omitGapRuns(2),{'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRuns(3),{'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRuns(4),{'a':'aaaaaaa','b':'a--a-aa'})
        self.assertEqual(self.gaps.omitGapRuns(5), self.gaps)
        self.assertEqual(self.gaps.omitGapRuns(6), self.gaps)
        self.assertEqual(self.gaps.omitGapRuns(1000), self.gaps)
        #test new object creation
        assert self.gaps.omitGapRuns(6) is not self.gaps
        assert isinstance(self.gaps.omitGapRuns(6), Alignment)

    def test_distanceMatrix(self):
        """Alignment distanceMatrix should produce correct scores"""
        self.assertEqual(self.empty.distanceMatrix(frac_same), {})
        self.assertEqual(self.one_seq.distanceMatrix(frac_same), {'a':{'a':1}})
        self.assertEqual(self.gaps.distanceMatrix(frac_same), 
            {   'a':{'a':7/7.0,'b':4/7.0,'c':2/7.0},
                'b':{'a':4/7.0,'b':7/7.0,'c':3/7.0},
                'c':{'a':2/7.0,'b':3/7.0,'c':7/7.0},
            })

    def test_IUPACConsensus_RNA(self):
        """Alignment IUPACConsensus should use RNA IUPAC symbols correctly"""
        alignmentUpper = Alignment( ['UCAGN-UCAGN-UCAGN-UCAGAGCAUN-',
                                     'UUCCAAGGNN--UUCCAAGGNNAGCAG--',
                                     'UUCCAAGGNN--UUCCAAGGNNAGCUA--',
                                     'UUUUCCCCAAAAGGGGNNNN--AGCUA--',
                                     'UUUUCCCCAAAAGGGGNNNN--AGCUA--',
                                     ])
        alignmentLower = Alignment( ['ucagn-ucagn-ucagn-ucagagcaun-',
                                     'uuccaaggnn--uuccaaggnnagcag--',
                                     'uuccaaggnn--uuccaaggnnagcua--',
                                     'uuuuccccaaaaggggnnnn--agcua--',
                                     'uuuuccccaaaaggggnnnn--agcua--',
                                     ])
        alignmentMixed = Alignment( ['ucagn-ucagn-ucagn-ucagagcaun-',
                                     'UUCCAAGGNN--UUCCAAGGNNAGCAG--',
                                     'uuccaaggnn--uuccaaggnnagcua--',
                                     'UUUUCCCCAAAAGGGGNNNN--AGCUA--',
                                     'uuuuccccaaaaggggnnnn--agcua--',
                                     ])
                                    
        #following IUPAC consensus calculated by hand
        #Test all uppper
        self.assertEqual(alignmentUpper.IUPACConsensus(),
                         'UYHBN?BSNN??KBVSN?NN??AGCWD?-')
        #Test all lower
        self.assertEqual(alignmentLower.IUPACConsensus(),
                         'uyhbn?bsnn??kbvsn?nn??agcwd?-')
        #Test mixed case
        self.assertEqual(alignmentMixed.IUPACConsensus(),
                         'UYHBN?BSNN??KBVSN?NN??AGCWD?-')
        
    def test_IUPACConsensus_DNA(self):
        """Alignment IUPACConsensus should use DNA IUPAC symbols correctly"""
        alignmentUpper = Alignment( ['TCAGN-TCAGN-TCAGN-TCAGAGCATN-',
                                     'TTCCAAGGNN--TTCCAAGGNNAGCAG--',
                                     'TTCCAAGGNN--TTCCAAGGNNAGCTA--',
                                     'TTTTCCCCAAAAGGGGNNNN--AGCTA--',
                                     'TTTTCCCCAAAAGGGGNNNN--AGCTA--',
                                     ])
        alignmentLower = Alignment( ['tcagn-tcagn-tcagn-tcagagcatn-',
                                     'ttccaaggnn--ttccaaggnnagcag--',
                                     'ttccaaggnn--ttccaaggnnagcta--',
                                     'ttttccccaaaaggggnnnn--agcta--',
                                     'ttttccccaaaaggggnnnn--agcta--',
                                     ])
        alignmentMixed = Alignment( ['tcagn-tcagn-tcagn-tcagagcatn-',
                                     'TTCCAAGGNN--TTCCAAGGNNAGCAG--',
                                     'ttccaaggnn--ttccaaggnnagcta--',
                                     'TTTTCCCCAAAAGGGGNNNN--AGCTA--',
                                     'ttttccccaaaaggggnnnn--agcta--',
                                     ])
                                    
        #following IUPAC consensus calculated by hand
        #Test all uppper
        self.assertEqual(alignmentUpper.IUPACConsensus(DnaAlphabet),
                         'TYHBN?BSNN??KBVSN?NN??AGCWD?-')
        #Test all lower
        self.assertEqual(alignmentLower.IUPACConsensus(DnaAlphabet),
                         'tyhbn?bsnn??kbvsn?nn??agcwd?-')
        #Test mixed case
        self.assertEqual(alignmentMixed.IUPACConsensus(DnaAlphabet),
                         'TYHBN?BSNN??KBVSN?NN??AGCWD?-')

    def test_IUPACConsensus_Protein(self):
        """Alignment IUPACConsensus should use protein IUPAC symbols correctly"""
        alignmentUpper = Alignment( ['ACDEFGHIKLMNPQRSTUVWY-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'ACDEFGHIKLMNPERSKUVWC-',
                                     'ACNEFGHIKLMNPQRS-UVWP-',                                     
                                     ])
        alignmentLower = Alignment( ['acdefghiklmnpqrstuvwy-',
                                     'acdefghiklmnpqrsuuvwf-',
                                     'acdefghiklmnperskuvwc-',
                                     'acnefghiklmnpqrs-uvwp-',
                                     ])
        alignmentMixed = Alignment( ['acdefghiklmnpqrstuvwy-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'acdefghiklmnperskuvwc-',
                                     'ACNEFGHIKLMNPQRS-UVWP-',
                                     ])
                                    
        #following IUPAC consensus calculated by hand
        #Test all uppper
        self.assertEqual(alignmentUpper.IUPACConsensus(ProteinAlphabet),
                         'ACBEFGHIKLMNPZRS?UVWx-')
        #Test all lower
        self.assertEqual(alignmentLower.IUPACConsensus(ProteinAlphabet),
                         'acbefghiklmnpzrs?uvwx-')
        #Test mixed case
        self.assertEqual(alignmentMixed.IUPACConsensus(ProteinAlphabet),
                         'ACBEFGHIKLMNPZRS?UVWx-')

    def test_toPhylip(self):
        """Alignment should return PHYLIP string format correctly"""
        align_norm = Alignment( ['ACDEFGHIKLMNPQRSTUVWY-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'ACDEFGHIKLMNPERSKUVWC-',
                                     'ACNEFGHIKLMNPQRS-UVWP-',                                     
                                     ])

        phylip_str, id_map =  align_norm.toPhylip()

        self.assertEqual(phylip_str, """4 22\nseq0000001 ACDEFGHIKLMNPQRSTUVWY-\nseq0000002 ACDEFGHIKLMNPQRSUUVWF-\nseq0000003 ACDEFGHIKLMNPERSKUVWC-\nseq0000004 ACNEFGHIKLMNPQRS-UVWP-""")
        self.assertEqual(id_map, {'seq0000004': 3, 'seq0000001': 0, 'seq0000003': 2, 'seq0000002': 1})

        align_rag = Alignment( ['ACDEFGHIKLMNPQRSTUVWY-',
                                     'ACDEFGHIKLMNPQRSUUVWF-',
                                     'ACDEFGHIKLMNPERSKUVWC-',
                                     'ACNEFGHIKLMNUVWP-',                                     
                                     ])


        self.assertRaises(ValueError,  align_rag.toPhylip)

    def test_isRagged(self):
        """Alignment isRagged should return true if ragged alignment"""
        assert(self.ragged.isRagged())
        assert(not self.identical.isRagged())
        assert(not self.gaps.isRagged())

    def test_scoreMatrix(self):
        """Alignment scoreMatrix should produce position specific score matrix."""
        scoreMatrix = {
            0:{'a':1.0,'c':1.0,'u':5.0},
            1:{'c':6.0,'u':1.0},
            2:{'a':3.0,'c':2.0,'g':2.0},
            3:{'a':3.0,'g':4.0},
            4:{'c':1.0,'g':1.0,'u':5.0},
            5:{'c':6.0,'u':1.0},
            6:{'a':3.0,'g':4.0},
            7:{'a':1.0,'g':6.0},
            8:{'a':1.0,'c':1.0,'g':1.0,'u':4.0},
            9:{'a':1.0,'c':2.0,'u':4.0},
            }
        self.assertEqual(self.many.scoreMatrix(), scoreMatrix)

    def test_columnFrequencies(self):
        """Alignment.columnFrequencies should count symbols in each column"""
        #calculate by hand what the first and last columns should look like in
        #each case
        firstvalues = [ [self.integers, Freqs([1,1,5])],
                        [self.sequences, Freqs('UUU')],
                        [self.structures, Freqs('(.(')],
                    ]
        
        lastvalues = [ [self.integers, Freqs([5,1,1])],
                        [self.sequences, Freqs('GGG')],
                        [self.structures, Freqs('..)')],
                    ]
        #check that the first columns are what we expected
        for obj, result in firstvalues:
            freqs = obj.columnFrequencies()
            self.assertEqual(str(freqs[0]), str(result))
        #check that the last columns are what we expected
        for obj, result in lastvalues:
            freqs = obj.columnFrequencies()
            self.assertEqual(str(freqs[-1]), str(result))
        #check that it works for the empty alignment
        freqs = self.empty.columnFrequencies()
        self.assertEqual(str(freqs), str([]))

    def test_columnProbs(self):
        """Alignment.columnProbs should find Pr(symbol) in each column"""
        #make an alignment with 4 rows (easy to calculate probabilities)
        align = Alignment(["AAA", "ACA", "GGG", "GUC"])
        cp = align.columnProbs()
        #check that the column probs match the counts we expect
        self.assertEqual(cp, map(Freqs, [   
            {'A':0.5, 'G':0.5},
            {'A':0.25, 'C':0.25, 'G':0.25, 'U':0.25},
            {'A':0.5, 'G':0.25, 'C':0.25},
            ]))

    def test_majorityConsensus(self):
        """Alignment.majorityConsensus should return commonest symbol per column"""
        #Check the majority consensus in detail for the integer alignment
        obs = self.integers.majorityConsensus()
        self.assertEqual(obs[0], 1)
        assert(obs[1] in [2, 1, 4])
        self.assertEqual(obs[2], 3)
        assert(obs[3] in [2, 1, 4])
        self.assertEqual(obs[4], 1)
        #Check the exact strings expected from string transform
        self.assertEqual(self.sequences.majorityConsensus(str), 'UCAG')
        self.assertEqual(self.structures.majorityConsensus(str), '(.....')
        self.assertEqual(str(self.empty.majorityConsensus()), str([]))

    
    def test_uncertainties(self):
        """Alignment.uncertainties should match hand-calculated values"""
        aln = Alignment(['abc', 'axc'])
        obs = aln.uncertainties()
        self.assertFloatEqual(obs, [0, 1, 0])
        #check what happens with only one input sequence
        aln = Alignment(['abc'])
        obs = aln.uncertainties()
        self.assertFloatEqual(obs, [0, 0, 0])
        #check that we can screen out bad items OK
        aln = Alignment(['abc', 'def', 'ghi', 'jkl', 'GHI'])
        obs = aln.uncertainties('abcdefghijklmnop')
        self.assertFloatEqual(obs, [2.0] * 3)

    def test_omitRowsTemplate(self):
        """Alignment.omitRowsTemplate returns new aln with well-aln to temp"""
        aln = self.omitRowsTemplate_aln
        result = aln.omitRowsTemplate('s3', 0.9, 5)
        self.assertEqual(result, {'s3': 'UUCCUUCUU-UUC', \
                's4': 'UU-UUUU-UUUUC'})
        result2 = aln.omitRowsTemplate('s4', 0.9, 4)
        self.assertEqual(result2, {'s3': 'UUCCUUCUU-UUC', \
                's4': 'UU-UUUU-UUUUC'})
        result3 = aln.omitRowsTemplate('s1', 0.9, 4)
        self.assertEqual(result3, {'s2': 'UC------U---C', \
                's1': 'UC-----CU---C', 's5': '-------------'})
        result4 = aln.omitRowsTemplate('s3', 0.5, 13)
        self.assertEqual(result4, {'s3': 'UUCCUUCUU-UUC', \
                's4': 'UU-UUUU-UUUUC'})
        
    def test_toFasta(self):
        """toFasta returns a fasta string"""

        aln = self.end_gaps 
        result = aln.toFasta()
        self.assertEqual(result, """>a
--a-bc-
>c
--d-ef-
>b
-cb-a--""")

    def test_make_gap_filter(self):
        """make_gap_filter returns f(seq) -> True if aligned ok w/ query"""
        s1 = Rna('UC-----CU---C')
        s3 = Rna('UUCCUUCUU-UUC')
        s4 = Rna('UU-UUUU-UUUUC')
        #check that the behavior is ok for gap runs
        f1 = make_gap_filter(s1, 0.9, 5)
        f3 = make_gap_filter(s3, 0.9, 5)
        #Should return False since s1 has gap run >= 5 with respect to s3
        self.assertEqual(f3(s1), False)
        #Should return False since s3 has an insertion run >= 5 to s1
        self.assertEqual(f1(s3), False)
        #Should retun True since s4 does not have a long enough gap or ins run
        self.assertEqual(f3(s4), True)
        f3 = make_gap_filter(s3, 0.9, 6)
        self.assertEqual(f3(s1), True)
        
        #Check that behavior is ok for gap_fractions
        f1 = make_gap_filter(s1, 0.5, 6)
        f3 = make_gap_filter(s3, 0.5, 6)
        #Should return False since 0.53% of positions are diff for gaps
        self.assertEqual(f3(s1), False)
        self.assertEqual(f1(s3), False)
        self.assertEqual(f3(s4), True)

    def test_getIntMap(self):
        """Alignment.getIntMap should return correct mapping."""
        aln = Alignment({'seq1':'ACGU','seq2':'CGUA','seq3':'CCGU'})
        int_keys = {'seq_0':'seq1','seq_1':'seq2','seq_2':'seq3'}
        int_map = {'seq_0':'ACGU','seq_1':'CGUA','seq_2':'CCGU'}
        im,ik = aln.getIntMap()
        self.assertEqual(ik,int_keys)
        self.assertEqual(im,int_map)