def test_split_paired_reads_1_fa(): # test input file infile = utils.get_test_data('paired.fa') ex_outfile1 = utils.get_test_data('paired.fa.1') ex_outfile2 = utils.get_test_data('paired.fa.2') # actual output files... outfile1 = utils.get_temp_filename('paired.fa.1') in_dir = os.path.dirname(outfile1) outfile2 = utils.get_temp_filename('paired.fa.2', in_dir) script = 'split-paired-reads.py' args = [infile] utils.runscript(script, args, in_dir) assert os.path.exists(outfile1), outfile1 assert os.path.exists(outfile2), outfile2 n = 0 for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0 n = 0 for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_split_paired_reads_3_output_files_right(): # test input file infile = utils.get_test_data('paired.fq') ex_outfile1 = utils.get_test_data('paired.fq.1') ex_outfile2 = utils.get_test_data('paired.fq.2') # actual output files... outfile1 = utils.get_temp_filename('paired.fq.1') output_dir = os.path.dirname(outfile1) outfile2 = utils.get_temp_filename('yyy', output_dir) script = 'split-paired-reads.py' args = ['-2', outfile2, '-d', output_dir, infile] utils.runscript(script, args) assert os.path.exists(outfile1), outfile1 assert os.path.exists(outfile2), outfile2 n = 0 for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert r.quality == q.quality assert n > 0 n = 0 for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert r.quality == q.quality assert n > 0
def test_extract_paired_reads_3_output_dir(): # test input file infile = utils.get_test_data('paired-mixed.fa') ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe') ex_outfile2 = utils.get_test_data('paired-mixed.fa.se') # output directory out_dir = utils.get_temp_filename('output') script = 'extract-paired-reads.py' args = [infile, '-d', out_dir] utils.runscript(script, args) outfile1 = os.path.join(out_dir, 'paired-mixed.fa.pe') outfile2 = os.path.join(out_dir, 'paired-mixed.fa.se') assert os.path.exists(outfile1), outfile1 assert os.path.exists(outfile2), outfile2 n = 0 for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0 n = 0 for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_extract_paired_reads_2_fq(): # test input file infile = utils.get_test_data('paired-mixed.fq') ex_outfile1 = utils.get_test_data('paired-mixed.fq.pe') ex_outfile2 = utils.get_test_data('paired-mixed.fq.se') # actual output files... outfile1 = utils.get_temp_filename('paired-mixed.fq.pe') in_dir = os.path.dirname(outfile1) outfile2 = utils.get_temp_filename('paired-mixed.fq.se', in_dir) script = 'extract-paired-reads.py' args = [infile] utils.runscript(script, args, in_dir) assert os.path.exists(outfile1), outfile1 assert os.path.exists(outfile2), outfile2 n = 0 for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)): n += 1 assert r.name == q.name, (r.name, q.name, n) assert r.sequence == q.sequence assert r.quality == q.quality assert n > 0 n = 0 for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert r.quality == q.quality assert n > 0
def test_extract_paired_reads_4_output_files(): # test input file infile = utils.get_test_data('paired-mixed.fa') ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe') ex_outfile2 = utils.get_test_data('paired-mixed.fa.se') # actual output files... outfile1 = utils.get_temp_filename('out_pe') outfile2 = utils.get_temp_filename('out_se') script = 'extract-paired-reads.py' args = [infile, '-p', outfile1, '-s', outfile2] utils.runscript(script, args) assert os.path.exists(outfile1), outfile1 assert os.path.exists(outfile2), outfile2 n = 0 for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0 n = 0 for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_split_paired_reads_2_mixed_fq_orphans_to_file(): # test input file infile = utils.copy_test_data('paired-mixed-2.fq') in_dir = os.path.dirname(infile) outfile = utils.get_temp_filename('out.fq') script = 'split-paired-reads.py' args = ['-0', outfile, infile] status, out, err = utils.runscript(script, args, in_dir) assert status == 0 assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err n_orphans = len([1 for record in screed.open(outfile)]) assert n_orphans == 5 n_left = len([1 for record in screed.open(infile + '.1')]) assert n_left == 3 n_right = len([1 for record in screed.open(infile + '.2')]) assert n_right == 3 for filename in [outfile, infile + '.1', infile + '.2']: fp = gzip.open(filename) try: fp.read() except IOError as e: assert "Not a gzipped file" in str(e), str(e) fp.close()
def test_split_paired_reads_2_stdin_no_out(): script = 'split-paired-reads.py' args = ['-'] status, out, err = utils.runscript(script, args, fail_ok=True) assert status == 1 assert "Accepting input from stdin; output filenames must " in err
def test_extract_paired_reads_5_stdin_error(): script = 'extract-paired-reads.py' args = ['-f', '/dev/stdin'] status, out, err = utils.runscript(script, args, fail_ok=True) assert status == 1 assert "output filenames must be provided." in err
def test_interleave_read_stdout(): # create input files infile1 = utils.get_test_data('paired-slash1.fq.1') infile2 = utils.get_test_data('paired-slash1.fq.2') # correct output ex_outfile = utils.get_test_data('paired-slash1.fq') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2] (stats, out, err) = utils.runscript(script, args) with open(outfile, 'w') as ofile: ofile.write(out) n = 0 for r, q in zip(screed.open(ex_outfile), screed.open(outfile)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_interleave_reads_no_reformat(): infile1 = utils.get_test_data('paired.fq.1') infile2 = utils.get_test_data('paired.malformat.fq.2') ex_outfile = utils.get_test_data('paired.malformat.fq') outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '--no-reformat', '-o', outfile] utils.runscript(script, args) r = open(ex_outfile).read() q = open(outfile).read() assert r == q, (r, q)
def test_interleave_reads_1_fq(): # test input files infile1 = utils.get_test_data('paired.fq.1') infile2 = utils.get_test_data('paired.fq.2') # correct output ex_outfile = utils.get_test_data('paired.fq') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] utils.runscript(script, args) r = open(ex_outfile).read() q = open(outfile).read() assert r == q, (r, q)
def test_split_paired_reads_2_mixed_fq_require_pair(): # test input file infile = utils.copy_test_data('paired-mixed.fq') in_dir = os.path.dirname(infile) script = 'split-paired-reads.py' args = [infile] status, out, err = utils.runscript(script, args, in_dir, fail_ok=True) assert status == 1, status assert "Unpaired reads found" in err
def test_split_paired_reads_2_mixed_fq(): # test input file infile = utils.copy_test_data('paired-mixed-2.fq') in_dir = os.path.dirname(infile) script = 'split-paired-reads.py' args = ['-0', '/dev/null', infile] status, out, err = utils.runscript(script, args, in_dir) assert status == 0 assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err
def test_split_paired_reads_2_mixed_fq_broken_pairing_format(): # test input file infile = utils.copy_test_data('paired-mixed-broken.fq') in_dir = os.path.dirname(infile) script = 'split-paired-reads.py' args = [infile] status, out, err = utils.runscript(script, args, in_dir, fail_ok=True) assert status == 1 assert "Unpaired reads found starting at 895:1:37:17593:9954" in err, err
def test_interleave_read_badleft_badright(): # create input files infile1 = utils.get_test_data('paired-broken.fq.badleft') infile2 = utils.get_test_data('paired-broken.fq.badright') # correct output ex_outfile = utils.get_test_data('paired-broken.fq.paired_bad') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] utils.runscript(script, args) n = 0 for r, q in zip(screed.open(ex_outfile), screed.open(outfile)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_interleave_read_seq1_fq(): # create input files infile1 = utils.get_test_data('paired-slash1.fq.1') infile2 = utils.get_test_data('paired-slash1.fq.2') # correct output ex_outfile = utils.get_test_data('paired-slash1.fq') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] utils.runscript(script, args) n = 0 for r, q in zip(screed.open(ex_outfile), screed.open(outfile)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_interleave_reads_2_fa(): # test input files infile1 = utils.get_test_data('paired.fa.1') infile2 = utils.get_test_data('paired.fa.2') # correct output ex_outfile = utils.get_test_data('paired.fa') # actual output file outfile = utils.get_temp_filename('out.fa') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] utils.runscript(script, args) n = 0 for r, q in zip(screed.open(ex_outfile), screed.open(outfile)): n += 1 assert r.name == q.name assert r.sequence == q.sequence assert n > 0
def test_interleave_reads_broken_fq(): # test input files infile1 = utils.get_test_data('paired-broken.fq.1') infile2 = utils.get_test_data('paired-broken.fq.2') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] status, out, err = utils.runscript(script, args, fail_ok=True) assert status == 1 assert 'ERROR: Input files contain different number of records.' in err
def test_interleave_reads_broken_fq_5(): # test input files infile1 = utils.get_test_data('paired-broken4.fq.1') infile2 = utils.get_test_data('paired-broken4.fq.2') # actual output file outfile = utils.get_temp_filename('out.fq') script = 'interleave-reads.py' args = [infile1, infile2, '-o', outfile] status, out, err = utils.runscript(script, args, fail_ok=True) assert status == 1 assert "ERROR: This doesn't look like paired data!" in err
def test_extract_paired_reads_unpaired(): # test input file infile = utils.get_test_data('random-20-a.fa') # actual output files... outfile1 = utils.get_temp_filename('unpaired.pe.fa') in_dir = os.path.dirname(outfile1) outfile2 = utils.get_temp_filename('unpaired.se.fa', in_dir) script = 'extract-paired-reads.py' args = [infile] (_, _, err) = utils.runscript(script, args, in_dir, fail_ok=True) assert 'no paired reads!? check file formats...' in err, err
def test_split_paired_reads_2_mixed_fq_gzfile(): # test input file infile = utils.copy_test_data('paired-mixed-2.fq') in_dir = os.path.dirname(infile) outfile = utils.get_temp_filename('out.fq') script = 'split-paired-reads.py' args = ['-0', outfile, '--gzip', infile] status, out, err = utils.runscript(script, args, in_dir) assert status == 0 assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err n_orphans = len([1 for record in screed.open(outfile)]) assert n_orphans == 5 n_left = len([1 for record in screed.open(infile + '.1')]) assert n_left == 3 n_right = len([1 for record in screed.open(infile + '.2')]) assert n_right == 3 for filename in [outfile, infile + '.1', infile + '.2']: fp = gzip.open(filename) fp.read() # this will fail if not gzip file. fp.close()