def test_calculate_stats():
        'It tests the calculate stat function'
        in_fhands = []
        for val in range(1, 6):
            fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val)))
            in_fhands.append(fhand)
        seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQRECORD])
        results = calculate_sequence_stats(seqs, nxs=[50])
        assert 'maximum: 4' in results['length']
        assert 'N50' in results['length']
        assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot']
        assert '[30 , 31[ (96): **********' in results['quality']
        assert 'Q30: 100.0' in results['quality']
        assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in  results['nucl_freq']
        assert results['kmer'] == ''

        infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))]
        seqs = list(read_seqs(infhands, prefered_seq_classes=[SEQRECORD]))
        kmers = calculate_sequence_stats(seqs)['kmer']
        assert not 'Kmer distribution' in kmers

        kmers = calculate_sequence_stats(seqs, kmer_size=3)['kmer']
        assert 'Kmer distribution' in kmers
        assert 'TCT: 167' in kmers

        # dust
        dust = calculate_sequence_stats(seqs)['dustscore']
        assert not dust
        dust = calculate_sequence_stats(seqs, do_dust_stats=True)['dustscore']
        assert 'average: 1.83\nvariance: 0.14\nnum. seqs.: 6\n' in dust
        assert '% above 7 (low complexity): 0.00' in dust
    def test_calculate_stats():
        'It tests the calculate stat function'
        in_fhands = []
        for val in range(1, 6):
            fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val)))
            in_fhands.append(fhand)
        seqs = read_seqrecords(in_fhands, file_format='fastq')
        (lengths_srt, qual_str, freq_str,
                          qual_boxplot, kmers) = calculate_sequence_stats(seqs)
        assert 'maximum: 4' in lengths_srt
        assert 'Q30: 100.0' in qual_str
        assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in qual_boxplot
        assert '[30 , 31[ (96): **********' in qual_str
        assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00, N: 0.00) |' in  freq_str
        assert kmers == ''

        infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))]
        seqs = read_seqrecords(infhands, file_format='fasta')
        kmers = calculate_sequence_stats(seqs)[-1]
        assert not 'Kmer distribution' in kmers

        infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))]
        seqs = read_seqrecords(infhands, file_format='fasta')
        kmers = calculate_sequence_stats(seqs, kmer_size=3)[-1]
        assert 'Kmer distribution' in kmers
        assert 'TCT: 167' in kmers
Exemple #3
0
    def test_calculate_stats():
        'It tests the calculate stat function'
        in_fhands = []
        for val in range(1, 6):
            fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val)))
            in_fhands.append(fhand)
        seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQRECORD])
        results = calculate_sequence_stats(seqs, nxs=[50])
        assert 'maximum: 4' in results['length']
        assert 'N50' in results['length']
        assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot']
        assert '[30 , 31[ (96): **********' in results['quality']
        assert 'Q30: 100.0' in results['quality']
        assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in results['nucl_freq']
        assert results['kmer'] == ''

        infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))]
        seqs = list(read_seqs(infhands, prefered_seq_classes=[SEQRECORD]))
        kmers = calculate_sequence_stats(seqs)['kmer']
        assert not 'Kmer distribution' in kmers

        kmers = calculate_sequence_stats(seqs, kmer_size=3)['kmer']
        assert 'Kmer distribution' in kmers
        assert 'TCT: 167' in kmers

        # dust
        dust = calculate_sequence_stats(seqs)['dustscore']
        assert not dust
        dust = calculate_sequence_stats(seqs, do_dust_stats=True)['dustscore']
        assert 'average: 1.83\nvariance: 0.14\nnum. seqs.: 6\n' in dust
        assert '% above 7 (low complexity): 0.00' in dust
 def test_calculate_stats_seqitems():
     'It tests the calculate stat function with seqitems'
     in_fhands = []
     for val in range(1, 6):
         fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val)))
         in_fhands.append(fhand)
     seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQITEM])
     results = calculate_sequence_stats(seqs, nxs=[50])
     assert 'maximum: 4' in results['length']
     assert 'N50' in results['length']
     assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot']
     assert '[30 , 31[ (96): **********' in results['quality']
     assert 'Q30: 100.0' in results['quality']
     assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in  results['nucl_freq']
     assert results['kmer'] == ''
Exemple #5
0
 def test_calculate_stats_seqitems():
     'It tests the calculate stat function with seqitems'
     in_fhands = []
     for val in range(1, 6):
         fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val)))
         in_fhands.append(fhand)
     seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQITEM])
     results = calculate_sequence_stats(seqs, nxs=[50])
     assert 'maximum: 4' in results['length']
     assert 'N50' in results['length']
     assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot']
     assert '[30 , 31[ (96): **********' in results['quality']
     assert 'Q30: 100.0' in results['quality']
     assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in results['nucl_freq']
     assert results['kmer'] == ''