def test_small_seqs(self): """Test sequences smaller than the probe length. """ with self.assertRaises(ValueError): candidate_probes.make_candidate_probes_from_sequences( ['ATCGATCGATCG', 'CCGG'], probe_length=6, probe_stride=3, min_n_string_length=2) with self.assertRaises(ValueError): candidate_probes.make_candidate_probes_from_sequences( ['ATCGATCGATCG', 'CCGG'], probe_length=6, probe_stride=3, allow_small_seqs=5, min_n_string_length=2) with self.assertRaises(ValueError): candidate_probes.make_candidate_probes_from_sequences( ['ATCGATCGATCG', 'CNNN'], probe_length=6, probe_stride=3, allow_small_seqs=4, min_n_string_length=2) p = candidate_probes.make_candidate_probes_from_sequences( ['ATCGATCGATCG', 'CCGG'], probe_length=6, probe_stride=3, allow_small_seqs=4, min_n_string_length=2) p = [''.join(x.seq) for x in p] self.assertCountEqual(p, ['ATCGAT', 'GATCGA', 'CGATCG'] + ['CCGG'])
def setUp(self): """Read the dataset's genomes and create candidate probes. Only process the first 100 genomes to avoid using too much memory with the candidate probes. """ # Disable logging logging.disable(logging.WARNING) seqs = [ gnm.seqs[0] for gnm in seq_io.read_dataset_genomes(zaire_ebolavirus) ] seqs = seqs[:100] self.probes_100 = candidate_probes.make_candidate_probes_from_sequences( seqs, probe_length=100, probe_stride=50, min_n_string_length=2) self.probes_75 = candidate_probes.make_candidate_probes_from_sequences( seqs, probe_length=75, probe_stride=25, min_n_string_length=2)
def test_multiple_seqs(self): p = candidate_probes.make_candidate_probes_from_sequences( ['ATCGNCGNNTCG', 'ATCGNCGNNTCGATAT'], probe_length=6, probe_stride=3, min_n_string_length=2) p = [''.join(x.seq) for x in p] self.assertCountEqual( p, ['ATCGNC', 'TCGNCG'] + ['ATCGNC', 'TCGNCG', 'TCGATA', 'TCGATA', 'CGATAT'])
def test_one_genome(self): target_genomes = [['ABCDEFGHIJKLMNOPQRSTUVWXYZ']] target_genomes = self.convert_target_genomes(target_genomes) # Create probes of length 6 bp with a stride of 3 bp input = [] for genomes_from_group in target_genomes: for g in genomes_from_group: input += cp.make_candidate_probes_from_sequences( g.seqs, probe_length=6, probe_stride=3) f, output = self.get_filter_and_output(6, 0, target_genomes, input, 3, 10) desired_output = self.make_probes_with_adapters( ['ABCDEF', 'GHIJKL', 'MNOPQR', 'STUVWX'], ['DEFGHI', 'JKLMNO', 'PQRSTU', 'UVWXYZ']) self.assertCountEqual(output, desired_output)