def default_seqs():
    """Default sequence input used in all tests requiring sequence data.
    Exceptions include tests in the TestSeqIO class."""

    seqs = read_seqs(data_dir, filename_end='trimmed.fq', cutoff=5.8)

    return seqs
    def test_data_random_sampling(self):
        seqs = read_seqs(data_dir, filename_end='trimmed.fq', p_discard=0.5)

        for condition in seqs:
            # With 1000 seqs, chance of having less than 400 or more than 600
            # seqs by chance is low (at least six standard devs from mean).
            assert len(seqs[condition]) > 400
            assert len(seqs[condition]) < 600
    def test_exact_length(self):
        seqs_len10 = read_seqs(data_dir,
                               filename_end='trimmed.fq',
                               seq_len_req=10)

        for condition in seqs_len10:
            for seq in seqs_len10[condition]:
                assert len(seq) == 10
    def test_get_seqs_for_one_specific_condition(self):
        seqs = read_seqs(data_dir,
                         filename_end='trimmed.fq',
                         cond_text='C0_rep1')

        cond_name = 'test_C0_rep1_reads'

        assert len(seqs) == 1
        assert cond_name in seqs
    def test_get_seqs_for_multiple_specific_conditions(self):
        seqs = read_seqs(data_dir,
                         filename_end='trimmed.fq',
                         cond_text=['C0_rep1', 'C1_rep1'])

        cond1_name = 'test_C0_rep1_reads'
        cond2_name = 'test_C1_rep1_reads'

        assert len(seqs) == 2
        assert cond1_name in seqs
        assert cond2_name in seqs
    def test_degen(self):
        seqs_degen = read_seqs(data_dir, filename_end='trimmed.fq', degen=5)

        first_seqs = [
            'CAC', 'AGAATGAG', 'AAAATAGCTGGAGGATC', 'ACGGGGGATGCAGAGGGGTTGTCC',
            'AGTGAAAGGATAGGAAGGTCA', 'GAGTCGAGACGAGAAGGATA'
        ]

        for i, condition in enumerate(seqs_degen):
            first_seq = list(first_seqs[i])
            assert seqs_degen[condition][0] == first_seq
    def test_cutoff_int(self):
        seqs_cutoff_int = read_seqs(data_dir,
                                    filename_end='trimmed.fq',
                                    cutoff=5.0)

        first_seqs = [
            'CTA', 'GTAGGAGA', 'GAGGTAAAATAGCTGGA', 'AAGTGACGGGGGATGCAGAGGGGT',
            'GATCGAGTGAAAGGATAGGAA', 'CGGGTGAGTCGAGACGAGAA'
        ]

        for i, condition in enumerate(seqs_cutoff_int):
            first_seq = list(first_seqs[i])
            assert seqs_cutoff_int[condition][0] == first_seq
    def test_degen_cutoff(self):
        seqs_degen_cutoff = read_seqs(data_dir,
                                      filename_end='trimmed.fq',
                                      degen=5,
                                      cutoff=5.8)

        first_seqs = [
            ('G', 'GGAAACGGGAAAGCTAAATCAAGAGA',
             'GGAAACGGGAAAGCTAAATCAAGAG'),  # first seq too short
            ('AGA', 'AG'),
            ('AAAATAGCTGGA', 'AAAATAGCTGG'),
            ('ACGGGGGATGCAGAGGGGT', 'ACGGGGGATGCAGAGGGG'),
            ('AGTGAAAGGATAGGAA', 'AGTGAAAGGATAGGA'),
            ('GAGTCGAGACGAGAA', 'GAGTCGAGACGAGA')
        ]

        for i, condition in enumerate(seqs_degen_cutoff):
            first_seq_possibilities = (list(seq) for seq in first_seqs[i])
            assert seqs_degen_cutoff[condition][0] in first_seq_possibilities
 def test_p_discard_greater_than_1_raises_ValueError(self):
     with pytest.raises(ValueError):
         read_seqs(data_dir, filename_end='trimmed.fq', p_discard=1.5)
Example #10
0
 def test_neg_p_discard_raises_ValueError(self):
     with pytest.raises(ValueError):
         read_seqs(data_dir, filename_end='trimmed.fq', p_discard=-0.5)
Example #11
0
 def test_neg_cutoff_raises_ValueError(self):
     with pytest.raises(ValueError):
         read_seqs(data_dir, filename_end='trimmed.fq', cutoff=-1)
Example #12
0
 def seqs(self):
     return read_seqs(data_dir, filename_end='trimmed.fq')