예제 #1
0
    def test_getSimilar(self):
        """Alignment getSimilar should get all sequences close to target seq"""
        aln = self.many
        x = Rna('gggggggggg')
        y = Rna('----------')
        #test min and max similarity ranges
        result = aln.getSimilar(aln['a'],min_similarity=0.4,max_similarity=0.7)
        for seq in 'cefg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 4)
        
        result = aln.getSimilar(aln['a'],min_similarity=0.95,max_similarity=1)
        for seq in 'a':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)

        result = aln.getSimilar(aln['a'], min_similarity=0.75, \
            max_similarity=0.85)
        for seq in 'bd':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        result = aln.getSimilar(aln['a'],min_similarity=0,max_similarity=0.2)
        self.assertEqual(len(result), 0)

        #test some sequence transformations
        transform = lambda s: s[1:4]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abdfg':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        transform = lambda s: s[-3:]
        result = aln.getSimilar(aln['a'], min_similarity=0.5, \
            transform=transform)
        for seq in 'abcde':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 5)

        #test a different distance metric
        metric = lambda x, y: x.count('g') + y.count('g')
        result = aln.getSimilar(aln['a'], min_similarity=5, max_similarity=10, \
            metric=metric)
        for seq in 'ef':
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)

        #test the combination of a transform and a distance metric
        aln = Alignment(dict(enumerate(map(Rna, ['aA-ac','A-aAC','aa-aa']))))
        transform = lambda s: Rna(s.upper())
        metric = RnaSequence.fracSameNonGaps
        #first, do it without the transformation
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric)
        for seq in [0,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric)
        for seq in [0]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 1)
        #then, verify that the transform changes the results         
        result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric, \
            transform=transform)
        for seq in [0,1,2]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 3)
        
        result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric, \
            transform=transform)
        for seq in [0,1]:
            assert seq in result
            assert result[seq] == aln[seq]
        self.assertEqual(len(result), 2)