Exemple #1
0
    def test_small_seqs(self):
        """Test sequences smaller than the probe length.
        """
        with self.assertRaises(ValueError):
            candidate_probes.make_candidate_probes_from_sequences(
                ['ATCGATCGATCG', 'CCGG'],
                probe_length=6,
                probe_stride=3,
                min_n_string_length=2)

        with self.assertRaises(ValueError):
            candidate_probes.make_candidate_probes_from_sequences(
                ['ATCGATCGATCG', 'CCGG'],
                probe_length=6,
                probe_stride=3,
                allow_small_seqs=5,
                min_n_string_length=2)

        with self.assertRaises(ValueError):
            candidate_probes.make_candidate_probes_from_sequences(
                ['ATCGATCGATCG', 'CNNN'],
                probe_length=6,
                probe_stride=3,
                allow_small_seqs=4,
                min_n_string_length=2)

        p = candidate_probes.make_candidate_probes_from_sequences(
            ['ATCGATCGATCG', 'CCGG'],
            probe_length=6,
            probe_stride=3,
            allow_small_seqs=4,
            min_n_string_length=2)
        p = [''.join(x.seq) for x in p]
        self.assertCountEqual(p, ['ATCGAT', 'GATCGA', 'CGATCG'] + ['CCGG'])
    def setUp(self):
        """Read the dataset's genomes and create candidate probes.

        Only process the first 100 genomes to avoid using too much memory
        with the candidate probes.
        """
        # Disable logging
        logging.disable(logging.WARNING)

        seqs = [
            gnm.seqs[0]
            for gnm in seq_io.read_dataset_genomes(zaire_ebolavirus)
        ]
        seqs = seqs[:100]
        self.probes_100 = candidate_probes.make_candidate_probes_from_sequences(
            seqs, probe_length=100, probe_stride=50, min_n_string_length=2)
        self.probes_75 = candidate_probes.make_candidate_probes_from_sequences(
            seqs, probe_length=75, probe_stride=25, min_n_string_length=2)
Exemple #3
0
 def test_multiple_seqs(self):
     p = candidate_probes.make_candidate_probes_from_sequences(
         ['ATCGNCGNNTCG', 'ATCGNCGNNTCGATAT'],
         probe_length=6,
         probe_stride=3,
         min_n_string_length=2)
     p = [''.join(x.seq) for x in p]
     self.assertCountEqual(
         p, ['ATCGNC', 'TCGNCG'] + ['ATCGNC', 'TCGNCG', 'TCGATA', 'TCGATA',
                                    'CGATAT'])
Exemple #4
0
    def test_one_genome(self):
        target_genomes = [['ABCDEFGHIJKLMNOPQRSTUVWXYZ']]
        target_genomes = self.convert_target_genomes(target_genomes)
        # Create probes of length 6 bp with a stride of 3 bp
        input = []
        for genomes_from_group in target_genomes:
            for g in genomes_from_group:
                input += cp.make_candidate_probes_from_sequences(
                    g.seqs, probe_length=6, probe_stride=3)

        f, output = self.get_filter_and_output(6, 0, target_genomes, input, 3,
                                               10)
        desired_output = self.make_probes_with_adapters(
            ['ABCDEF', 'GHIJKL', 'MNOPQR', 'STUVWX'],
            ['DEFGHI', 'JKLMNO', 'PQRSTU', 'UVWXYZ'])
        self.assertCountEqual(output, desired_output)