def run_interleaved(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): """ Interleaved input or output (or both) """ assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ['--interleaved', '--cores', str(cores), '--buffer-size=512'] with temporary_path('tmp1-' + expected1) as tmp1: params += ['-o', tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: with temporary_path('tmp2-' + expected2) as tmp2: params += ['-p', tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def test_no_trimming_legacy(): # make sure that this doesn"t divide by zero main([ "-a", "XXXXX", "-o", os.devnull, "-p", os.devnull, datapath("paired.1.fastq"), datapath("paired.2.fastq") ])
def test_write_compressed_fastq(cores, tmpdir): main([ '--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq') ])
def test_no_trimming(): # make sure that this doesn't divide by zero main([ '-a', 'XXXXX', '-A', 'XXXXX', '-o', '/dev/null', '-p', '/dev/null', datapath('paired.1.fastq'), datapath('paired.2.fastq') ])
def test_missing_file(tmpdir): with pytest.raises(SystemExit): main([ "--paired-output", str(tmpdir.join("out.fastq")), datapath("paired.1.fastq") ])
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores): params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split() params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))] params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))] params += ["--cores", str(cores)] params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")] # third item in tuple says whether the file must exist combinations = [(a, b, True) for a, b in product("AC", "GT")] optional = [("unknown", "unknown")] optional += [(a, "unknown") for a in "AC"] optional += [("unknown", b) for b in "GT"] if discarduntrimmed: combinations.extend((a, b, False) for a, b in optional) params += ["--discard-untrimmed"] else: combinations.extend((a, b, True) for a, b in optional) main(params) for (name1, name2, should_exist) in combinations: for i in (1, 2): name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i) path = cutpath(os.path.join("combinatorial", name)) if should_exist: assert tmpdir.join(name).check(), ("Output file missing", name) if os.path.exists(path): assert_files_equal(path, str(tmpdir.join(name))) else: assert not tmpdir.join(name).check(), ("Output file should not exist", name)
def test_rename_cannot_be_combined_with_other_renaming_options(opt): with pytest.raises(SystemExit): main([ opt, "something", "--rename='{id} {comment} extrainfo'", "-o", "/dev/null", datapath("empty.fastq") ])
def test_missing_file(): with pytest.raises(SystemExit): with redirect_stderr(): main([ '-a', 'XX', '--paired-output', 'out.fastq', datapath('paired.1.fastq') ])
def test_no_trimming(): # make sure that this doesn"t divide by zero main([ "-a", "XXXXX", "-A", "XXXXX", "-o", "/dev/null", "-p", "/dev/null", datapath("paired.1.fastq"), datapath("paired.2.fastq") ])
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m): """Separate minimum lengths for R1 and R2""" m1, m2 = m name, func = name_op inpath = str(tmpdir.join('separate_minlength.fasta')) expected = str(tmpdir.join('separate_minlength_expected.fasta')) outpath = str(tmpdir.join('out.fasta')) record = '>r{}:{}\n{}\n'.format(l1, l2, 'A' * l1) record += '>r{}:{}\n{}'.format(l1, l2, 'A' * l2) with open(inpath, 'w') as f: print(record, file=f) with open(expected, 'w') as f: if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)): print(record, file=f) assert os.path.exists(inpath) assert os.path.exists(expected) if m1 is None: m1 = '' if m2 is None: m2 = '' main([ '--interleaved', '-o', outpath, '-' + name, '{}:{}'.format(m1, m2), inpath ]) assert_files_equal(expected, outpath)
def test_p_without_o(cores): """Option -p given but -o missing""" with pytest.raises(SystemExit): main(["-a", "XX", "-p", os.devnull] + ["--cores", str(cores)] + [datapath("paired.1.fastq"), datapath("paired.2.fastq")])
def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"] tmp1 = str(tmpdir.join("out1-" + expected1)) params += ["-o", tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: tmp2 = str(tmpdir.join("out2-" + expected2)) params += ["-p", tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m): """Separate minimum lengths for R1 and R2""" m1, m2 = m name, func = name_op inpath = str(tmpdir.join("separate_minlength.fasta")) expected = str(tmpdir.join("separate_minlength_expected.fasta")) outpath = str(tmpdir.join("out.fasta")) record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1) record += ">r{}:{}\n{}".format(l1, l2, "A" * l2) with open(inpath, "w") as f: print(record, file=f) with open(expected, "w") as f: if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)): print(record, file=f) assert os.path.exists(inpath) assert os.path.exists(expected) if m1 is None: m1 = "" if m2 is None: m2 = "" main([ "--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2), inpath ]) assert_files_equal(expected, outpath)
def test_missing_file(tmpdir): with redirect_stderr(): with pytest.raises(SystemExit): main([ '--paired-output', str(tmpdir.join('out.fastq')), datapath('paired.1.fastq') ])
def test_interleaved_neither_nor(tmpdir): """Option --interleaved used, but pairs of files given for input and output""" p1 = str(tmpdir.join("temp-paired.1.fastq")) p2 = str(tmpdir.join("temp-paired.2.fastq")) params = "-a XX --interleaved".split() params += ["-o", p1, "-p1", p2, "paired.1.fastq", "paired.2.fastq"] with pytest.raises(SystemExit): main(params)
def test_compressed_output(tmp_path, cores, extension): out_path = str(tmp_path / ("small.fastq." + extension)) params = [ "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, datapath("small.fastq") ] main(params)
def test_interleaved_neither_nor(): """Option --interleaved used, but pairs of files given for input and output""" with temporary_path("temp-paired.1.fastq") as p1: with temporary_path("temp-paired.2.fastq") as p2: params = '-a XX --interleaved'.split() with redirect_stderr(): params += ['-o', p1, '-p1', p2, 'paired.1.fastq', 'paired.2.fastq'] with pytest.raises(SystemExit): main(params)
def test_interleaved_neither_nor(tmpdir): """Option --interleaved used, but pairs of files given for input and output""" p1 = str(tmpdir.join("temp-paired.1.fastq")) p2 = str(tmpdir.join("temp-paired.2.fastq")) params = "-a XX --interleaved".split() with redirect_stderr(): params += ["-o", p1, "-p1", p2, "paired.1.fastq", "paired.2.fastq"] with pytest.raises(SystemExit): main(params)
def test_linked_info_file(tmpdir): info_path = str(tmpdir.join('info.txt')) main([ '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path, '-o', str(tmpdir.join('out.fasta')), datapath('linked.fasta') ]) assert_files_equal(cutpath('linked-info.txt'), info_path)
def test_issue_296(tmpdir): # Hang when using both --no-trim and --info-file together info_path = str(tmpdir.join('info.txt')) reads_path = str(tmpdir.join('reads.fasta')) out_path = str(tmpdir.join('out.fasta')) with open(reads_path, 'w') as f: f.write('>read\nCACAAA\n') main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path]) # Output should be unchanged because of --no-trim assert_files_equal(reads_path, out_path)
def test_info_file_revcomp(run, tmp_path): info_path = tmp_path / "info-rc.txt" main([ "--info-file", str(info_path), "-a", "adapt=GAGTCG", "--revcomp", "--rename={header}", "-o", str(tmp_path / "out.fasta"), datapath("info-rc.fasta") ]) assert_files_equal(cutpath("info-rc.txt"), info_path)
def test_info_file(tmpdir): info_path = str(tmpdir.join("info.txt")) params = [ "--info-file", info_path, "-o", str(tmpdir.join("out.1.fastq")), "-p", str(tmpdir.join("out.2.fastq")), datapath("paired.1.fastq"), datapath("paired.2.fastq"), ] main(params)
def test_interleaved_untrimmed_output(tmpdir): o1 = str(tmpdir.join("out.1.fastq")) o2 = str(tmpdir.join("out.2.fastq")) untrimmed = str(tmpdir.join("untrimmed.interleaved.fastq")) main([ "--interleaved", "-a", "XXXX", "-o", o1, "-p", o2, "--untrimmed-output", untrimmed, datapath("interleaved.fastq") ]) assert_files_equal(datapath("interleaved.fastq"), untrimmed)
def test_pair_adapters_unequal_length(tmpdir): with pytest.raises(SystemExit): main([ "--paired-adapters", "-a", "GTCTCCAGCT", "-a", "ACGTACGT", # Two R1 adapters "-A", "TGCA", # But only one R2 adapter "-o", str(tmpdir.join("out.1.fastq")), "-p", str(tmpdir.join("out.2.fastq")), datapath("paired.1.fastq"), datapath("paired.2.fastq"), ])
def test_linked_info_file(tmp_path): info_path = tmp_path / 'info.txt' main([ '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', str(info_path), '-o', str(tmp_path / 'out.fasta'), datapath('linked.fasta') ]) assert_files_equal(cutpath('linked-info.txt'), info_path, ignore_trailing_space=True)
def test_second_too_short(cores): with pytest.raises(SystemExit): with temporary_path("truncated.2.fastq") as trunc2: # Create a truncated file in which the last read is missing with open(datapath('paired.2.fastq')) as f: lines = f.readlines() lines = lines[:-4] with open(trunc2, 'w') as f: f.writelines(lines) with redirect_stderr(): main('-a XX -o /dev/null --paired-output out.fastq'.split() + ['--cores', str(cores)] + [datapath('paired.1.fastq'), trunc2])
def test_reverse_complement_no_rc_suffix(run, tmp_path): out_path = tmp_path / "out.fastq" main([ "-o", str(out_path), "--revcomp", "--no-index", "--rename", "{header}", "-g", "^TTATTTGTCT", "-g", "^TCCGCACTGG", datapath("revcomp.1.fastq") ]) with dnaio.open(out_path) as f: reads = list(f) assert len(reads) == 6 assert reads[1].name == "read2/1" assert reads[1].sequence == "ACCATCCGATATGTCTAATGTGGCCTGTTG"
def test_unmatched_read_names(cores): with pytest.raises(SystemExit): with temporary_path("swapped.1.fastq") as swapped: # Create a file in which reads 2 and 1 are swapped with open(datapath('paired.1.fastq')) as f: lines = f.readlines() lines = lines[0:4] + lines[8:12] + lines[4:8] + lines[12:] with open(swapped, 'w') as f: f.writelines(lines) with redirect_stderr(): main('-a XX -o out1.fastq --paired-output out2.fastq'.split() + ['--cores', str(cores)] + [swapped, datapath('paired.2.fastq')])
def test_overlap_a(tmpdir, length): """-O/--overlap with -a""" adapter = "catatctccg" record = ">read\nGAGACCATTCCAATG" + adapter[:length] + '\n' input = tmpdir.join("overlap.fasta") input.write(record) if length < 7: expected = record else: expected = '>read\nGAGACCATTCCAATG\n' output = tmpdir.join("overlap-trimmed.fasta") main(["-O", "7", "-e", "0", "-a", adapter, "-o", str(output), str(input)]) assert expected == output.read()
def test_quiet_is_quiet(): captured_standard_output = StringIO() captured_standard_error = StringIO() old_stdout = sys.stdout old_stderr = sys.stderr try: sys.stdout = captured_standard_output sys.stderr = captured_standard_error main(['-o', '/dev/null', '--quiet', '-a', 'XXXX', datapath('illumina.fastq.gz')]) finally: sys.stdout = old_stdout sys.stderr = old_stderr assert captured_standard_output.getvalue() == '' assert captured_standard_error.getvalue() == ''
def test_second_too_short(tmpdir, cores): # Create a truncated file in which the last read is missing trunc2 = tmpdir.join("truncated.2.fastq") with open(datapath("paired.2.fastq")) as f: lines = f.readlines() lines = lines[:-4] trunc2.write("".join(lines)) with pytest.raises(SystemExit): main([ "-o", "/dev/null", "--paired-output", str(tmpdir.join("out.fastq")), "--cores", str(cores), datapath("paired.1.fastq"), str(trunc2) ])
def test_unmatched_read_names(tmpdir, cores): # Create a file in which reads 2 and 1 are swapped with open(datapath("paired.1.fastq")) as f: lines = f.readlines() lines = lines[0:4] + lines[8:12] + lines[4:8] + lines[12:] swapped = tmpdir.join("swapped.1.fastq") swapped.write("".join(lines)) with pytest.raises(SystemExit): main([ "-o", str(tmpdir.join("out1.fastq")), "--paired-output", str(tmpdir.join("out2.fastq")), "--cores", str(cores), str(swapped), datapath("paired.2.fastq") ])
def test_second_too_short(tmpdir, cores): # Create a truncated file in which the last read is missing trunc2 = tmpdir.join("truncated.2.fastq") with open(datapath("paired.2.fastq")) as f: lines = f.readlines() lines = lines[:-4] trunc2.write("".join(lines)) with redirect_stderr(): with pytest.raises(SystemExit): main([ "-o", "/dev/null", "--paired-output", str(tmpdir.join("out.fastq")), "--cores", str(cores), datapath("paired.1.fastq"), str(trunc2) ])
def test_demultiplex(): tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.') multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta') params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')] assert main(params) is None assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first')) assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second')) assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown')) shutil.rmtree(tempdir)
def _run(params, in1, in2, expected1, expected2, cores): if type(params) is str: params = params.split() params += ["--cores", str(cores), "--buffer-size=512"] path1 = str(tmpdir.join(expected1)) path2 = str(tmpdir.join(expected2)) params += ["-o", path1, "-p", path2] params += [datapath(in1), datapath(in2)] assert main(params) is None assert_files_equal(cutpath(expected1), path1) assert_files_equal(cutpath(expected2), path2)
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m): """Separate minimum lengths for R1 and R2""" m1, m2 = m name, func = name_op inpath = str(tmpdir.join("separate_minlength.fasta")) expected = str(tmpdir.join("separate_minlength_expected.fasta")) outpath = str(tmpdir.join("out.fasta")) record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1) record += ">r{}:{}\n{}".format(l1, l2, "A" * l2) with open(inpath, "w") as f: print(record, file=f) with open(expected, "w") as f: if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)): print(record, file=f) assert os.path.exists(inpath) assert os.path.exists(expected) if m1 is None: m1 = "" if m2 is None: m2 = "" main(["--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2), inpath]) assert_files_equal(expected, outpath)
def test_paired_demultiplex(tmpdir): multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq")) multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq")) params = [ "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG", "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA", "-o", multiout1, "-p", multiout2, datapath("paired.1.fastq"), datapath("paired.2.fastq")] assert main(params) is None assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown")) assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
def test_pair_adapters_demultiplexing(tmpdir): params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split() params += ["--pair-adapters"] params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))] params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))] params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")] assert main(params) is None for name in [ "dual-i1.1.fastq", "dual-i1.2.fastq", "dual-i2.1.fastq", "dual-i2.2.fastq", "dual-unknown.1.fastq", "dual-unknown.2.fastq", ]: assert tmpdir.join(name).check() assert_files_equal(cutpath(name), str(tmpdir.join(name)))
def test_no_trimming(): # make sure that this doesn"t divide by zero main([ "-a", "XXXXX", "-A", "XXXXX", "-o", "/dev/null", "-p", "/dev/null", datapath("paired.1.fastq"), datapath("paired.2.fastq")])
def test_linked_anywhere(): with pytest.raises(SystemExit): with redirect_stderr(): main(['-b', 'AAA...TTT', datapath('linked.fasta')])
def test_x_brace_notation(): main(['-o', '/dev/null', '--quiet', '-a', 'X{5}', datapath('small.fastq')])
def test_missing_file(tmpdir): with redirect_stderr(): with pytest.raises(SystemExit): main(["--paired-output", str(tmpdir.join("out.fastq")), datapath("paired.1.fastq")])
def test_non_iupac_characters(run): with pytest.raises(SystemExit): with redirect_stderr(): main(['-a', 'ZACGT', datapath('small.fastq')])
def test_write_compressed_fastq(cores, tmpdir): main(['--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq')])
def test_p_without_o(cores): """Option -p given but -o missing""" with pytest.raises(SystemExit): main("-a XX -p /dev/null".split() + ["--cores", str(cores)] + [datapath("paired.1.fastq"), datapath("paired.2.fastq")])
def test_paired_but_only_one_input_file(cores): """Option -p given but only one input file""" with pytest.raises(SystemExit): main("-a XX -o /dev/null -p /dev/null".split() + ["--cores", str(cores)] + [datapath("paired.1.fastq")])
def test_separate_minlength_single(): """Using separate minlengths for single-end data""" with pytest.raises(SystemExit): main(["-m", "5:7", datapath("small.fastq")])
def test_no_args(): with pytest.raises(SystemExit): with redirect_stderr(): main([])
def test_anywhere_anchored_3p(): with pytest.raises(SystemExit): with redirect_stderr(): main(['-b', 'TTT$', datapath('small.fastq')])
def test_two_fastqs(): with pytest.raises(SystemExit): with redirect_stderr(): main([datapath('paired.1.fastq'), datapath('paired.2.fastq')])