def test_calculate_stats(): 'It tests the calculate stat function' in_fhands = [] for val in range(1, 6): fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val))) in_fhands.append(fhand) seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQRECORD]) results = calculate_sequence_stats(seqs, nxs=[50]) assert 'maximum: 4' in results['length'] assert 'N50' in results['length'] assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot'] assert '[30 , 31[ (96): **********' in results['quality'] assert 'Q30: 100.0' in results['quality'] assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in results['nucl_freq'] assert results['kmer'] == '' infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))] seqs = list(read_seqs(infhands, prefered_seq_classes=[SEQRECORD])) kmers = calculate_sequence_stats(seqs)['kmer'] assert not 'Kmer distribution' in kmers kmers = calculate_sequence_stats(seqs, kmer_size=3)['kmer'] assert 'Kmer distribution' in kmers assert 'TCT: 167' in kmers # dust dust = calculate_sequence_stats(seqs)['dustscore'] assert not dust dust = calculate_sequence_stats(seqs, do_dust_stats=True)['dustscore'] assert 'average: 1.83\nvariance: 0.14\nnum. seqs.: 6\n' in dust assert '% above 7 (low complexity): 0.00' in dust
def test_calculate_stats(): 'It tests the calculate stat function' in_fhands = [] for val in range(1, 6): fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val))) in_fhands.append(fhand) seqs = read_seqrecords(in_fhands, file_format='fastq') (lengths_srt, qual_str, freq_str, qual_boxplot, kmers) = calculate_sequence_stats(seqs) assert 'maximum: 4' in lengths_srt assert 'Q30: 100.0' in qual_str assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in qual_boxplot assert '[30 , 31[ (96): **********' in qual_str assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00, N: 0.00) |' in freq_str assert kmers == '' infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))] seqs = read_seqrecords(infhands, file_format='fasta') kmers = calculate_sequence_stats(seqs)[-1] assert not 'Kmer distribution' in kmers infhands = [open(join(TEST_DATA_DIR, 'arabidopsis_genes'))] seqs = read_seqrecords(infhands, file_format='fasta') kmers = calculate_sequence_stats(seqs, kmer_size=3)[-1] assert 'Kmer distribution' in kmers assert 'TCT: 167' in kmers
def test_calculate_stats_seqitems(): 'It tests the calculate stat function with seqitems' in_fhands = [] for val in range(1, 6): fhand = open(join(TEST_DATA_DIR, 'pairend{0}.sfastq'.format(val))) in_fhands.append(fhand) seqs = read_seqs(in_fhands, prefered_seq_classes=[SEQITEM]) results = calculate_sequence_stats(seqs, nxs=[50]) assert 'maximum: 4' in results['length'] assert 'N50' in results['length'] assert '1:30.0,30.0,30.0,30.0,30.0 <[|]>' in results['qual_boxplot'] assert '[30 , 31[ (96): **********' in results['quality'] assert 'Q30: 100.0' in results['quality'] assert '0 (A: 1.00, C: 0.00, G: 0.00, T: 0.00' in results['nucl_freq'] assert results['kmer'] == ''