def test_getSimilar(self): """Alignment getSimilar should get all sequences close to target seq""" aln = self.many x = Rna('gggggggggg') y = Rna('----------') #test min and max similarity ranges result = aln.getSimilar(aln['a'],min_similarity=0.4,max_similarity=0.7) for seq in 'cefg': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 4) result = aln.getSimilar(aln['a'],min_similarity=0.95,max_similarity=1) for seq in 'a': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 1) result = aln.getSimilar(aln['a'], min_similarity=0.75, \ max_similarity=0.85) for seq in 'bd': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 2) result = aln.getSimilar(aln['a'],min_similarity=0,max_similarity=0.2) self.assertEqual(len(result), 0) #test some sequence transformations transform = lambda s: s[1:4] result = aln.getSimilar(aln['a'], min_similarity=0.5, \ transform=transform) for seq in 'abdfg': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 5) transform = lambda s: s[-3:] result = aln.getSimilar(aln['a'], min_similarity=0.5, \ transform=transform) for seq in 'abcde': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 5) #test a different distance metric metric = lambda x, y: x.count('g') + y.count('g') result = aln.getSimilar(aln['a'], min_similarity=5, max_similarity=10, \ metric=metric) for seq in 'ef': assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 2) #test the combination of a transform and a distance metric aln = Alignment(dict(enumerate(map(Rna, ['aA-ac','A-aAC','aa-aa'])))) transform = lambda s: Rna(s.upper()) metric = RnaSequence.fracSameNonGaps #first, do it without the transformation result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric) for seq in [0,2]: assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 2) result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric) for seq in [0]: assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 1) #then, verify that the transform changes the results result = aln.getSimilar(aln[0], min_similarity=0.5, metric=metric, \ transform=transform) for seq in [0,1,2]: assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 3) result = aln.getSimilar(aln[0], min_similarity=0.8, metric=metric, \ transform=transform) for seq in [0,1]: assert seq in result assert result[seq] == aln[seq] self.assertEqual(len(result), 2)