def test_paired_demultiplex(): tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.') multiout1 = os.path.join(tempdir, 'demultiplexed.{name}.1.fastq') multiout2 = os.path.join(tempdir, 'demultiplexed.{name}.2.fastq') params = [ '-a', 'first=AACATTAGACA', '-a', 'second=CATTAGACATATCGG', '-A', 'ignored=CAGTGGAGTA', '-A', 'alsoignored=AATAACAGTGGAGTA', '-o', multiout1, '-p', multiout2, datapath('paired.1.fastq'), datapath('paired.2.fastq') ] assert main(params) is None assert_files_equal(cutpath('demultiplexed.first.1.fastq'), multiout1.format(name='first')) assert_files_equal(cutpath('demultiplexed.second.1.fastq'), multiout1.format(name='second')) assert_files_equal(cutpath('demultiplexed.unknown.1.fastq'), multiout1.format(name='unknown')) assert_files_equal(cutpath('demultiplexed.first.2.fastq'), multiout2.format(name='first')) assert_files_equal(cutpath('demultiplexed.second.2.fastq'), multiout2.format(name='second')) assert_files_equal(cutpath('demultiplexed.unknown.2.fastq'), multiout2.format(name='unknown')) shutil.rmtree(tempdir)
def test_no_trimming_legacy(): # make sure that this doesn"t divide by zero main([ "-a", "XXXXX", "-o", os.devnull, "-p", os.devnull, datapath("paired.1.fastq"), datapath("paired.2.fastq") ])
def test_unmatched_read_names(): # paired-swapped.1.fastq: paired.1.fastq with reads 2 and 3 swapped with redirect_stderr(): cutadapt.main( '-a XX --paired-output out.fastq'.split() + [datapath('paired-swapped.1.fastq'), datapath('paired.2.fastq')])
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores): params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split() params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))] params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))] params += ["--cores", str(cores)] params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")] # third item in tuple says whether the file must exist combinations = [(a, b, True) for a, b in product("AC", "GT")] optional = [("unknown", "unknown")] optional += [(a, "unknown") for a in "AC"] optional += [("unknown", b) for b in "GT"] if discarduntrimmed: combinations.extend((a, b, False) for a, b in optional) params += ["--discard-untrimmed"] else: combinations.extend((a, b, True) for a, b in optional) main(params) for (name1, name2, should_exist) in combinations: for i in (1, 2): name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i) path = cutpath(os.path.join("combinatorial", name)) if should_exist: assert tmpdir.join(name).check(), ("Output file missing", name) if os.path.exists(path): assert_files_equal(path, str(tmpdir.join(name))) else: assert not tmpdir.join(name).check(), ("Output file should not exist", name)
def run_interleaved(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): """ Interleaved input or output (or both) """ assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ['--interleaved', '--cores', str(cores), '--buffer-size=512'] with temporary_path('tmp1-' + expected1) as tmp1: params += ['-o', tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: with temporary_path('tmp2-' + expected2) as tmp2: params += ['-p', tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def test_no_trimming(): # make sure that this doesn"t divide by zero main([ "-a", "XXXXX", "-A", "XXXXX", "-o", "/dev/null", "-p", "/dev/null", datapath("paired.1.fastq"), datapath("paired.2.fastq") ])
def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"] tmp1 = str(tmpdir.join("out1-" + expected1)) params += ["-o", tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: tmp2 = str(tmpdir.join("out2-" + expected2)) params += ["-p", tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def test_p_without_o(cores): """Option -p given but -o missing""" with pytest.raises(SystemExit): main(["-a", "XX", "-p", os.devnull] + ["--cores", str(cores)] + [datapath("paired.1.fastq"), datapath("paired.2.fastq")])
def test_no_trimming(): # make sure that this doesn't divide by zero main([ '-a', 'XXXXX', '-A', 'XXXXX', '-o', '/dev/null', '-p', '/dev/null', datapath('paired.1.fastq'), datapath('paired.2.fastq') ])
def test_second_too_short(): # paired-truncated.2.fastq is paired.2.fastq without the last read with redirect_stderr(): cutadapt.main( '-a XX --paired-output out.fastq'.split() + [datapath('paired.1.fastq'), datapath('paired-truncated.2.fastq')])
def test_interleaved_untrimmed_output(tmpdir): o1 = str(tmpdir.join("out.1.fastq")) o2 = str(tmpdir.join("out.2.fastq")) untrimmed = str(tmpdir.join("untrimmed.interleaved.fastq")) main([ "--interleaved", "-a", "XXXX", "-o", o1, "-p", o2, "--untrimmed-output", untrimmed, datapath("interleaved.fastq") ]) assert_files_equal(datapath("interleaved.fastq"), untrimmed)
def test_info_file(tmpdir): info_path = str(tmpdir.join("info.txt")) params = [ "--info-file", info_path, "-o", str(tmpdir.join("out.1.fastq")), "-p", str(tmpdir.join("out.2.fastq")), datapath("paired.1.fastq"), datapath("paired.2.fastq"), ] assert main(params) is None
def run_paired(params, in1, in2, expected1, expected2, cores): if type(params) is str: params = params.split() params += ['--cores', str(cores), '--buffer-size=512'] with temporary_path('tmp1-' + expected1) as p1: with temporary_path('tmp2-' + expected2) as p2: params += ['-o', p1, '-p', p2] params += [datapath(in1), datapath(in2)] assert main(params) is None assert_files_equal(cutpath(expected1), p1) assert_files_equal(cutpath(expected2), p2)
def _run(params, expected, inpath, inpath2=None): if type(params) is str: params = params.split() tmp_fastaq = str(tmpdir.join(expected)) params += ['-o', tmp_fastaq] params += [datapath(inpath)] if inpath2: params += [datapath(inpath2)] assert main(params) is None # TODO redirect standard output assert_files_equal(cutpath(expected), tmp_fastaq)
def test_pair_adapters_unequal_length(tmpdir): with pytest.raises(SystemExit): main([ "--paired-adapters", "-a", "GTCTCCAGCT", "-a", "ACGTACGT", # Two R1 adapters "-A", "TGCA", # But only one R2 adapter "-o", str(tmpdir.join("out.1.fastq")), "-p", str(tmpdir.join("out.2.fastq")), datapath("paired.1.fastq"), datapath("paired.2.fastq"), ])
def _run(params, in1, in2, expected1, expected2, cores): if type(params) is str: params = params.split() params += ["--cores", str(cores), "--buffer-size=512"] path1 = str(tmpdir.join(expected1)) path2 = str(tmpdir.join(expected2)) params += ["-o", path1, "-p", path2] params += [datapath(in1), datapath(in2)] assert main(params) is None assert_files_equal(cutpath(expected1), path1) assert_files_equal(cutpath(expected2), path2)
def test_explicit_format_with_paired(): # Use --format=fastq with input files whose extension is .txt with temporary_path("paired.1.txt") as txt1: with temporary_path("paired.2.txt") as txt2: shutil.copyfile(datapath("paired.1.fastq"), txt1) shutil.copyfile(datapath("paired.2.fastq"), txt2) run_paired('--format=fastq -a TTAGACATAT -m 14', in1=txt1, in2=txt2, expected1='paired.m14.1.fastq', expected2='paired.m14.2.fastq', cores=1)
def test_explicit_format_with_paired(tmpdir, run_paired): # Use --format=fastq with input files whose extension is .txt txt1 = str(tmpdir.join("paired.1.txt")) txt2 = str(tmpdir.join("paired.2.txt")) shutil.copyfile(datapath("paired.1.fastq"), txt1) shutil.copyfile(datapath("paired.2.fastq"), txt2) run_paired("--format=fastq -a TTAGACATAT -m 14 -q 10", in1=txt1, in2=txt2, expected1="paired.m14.1.fastq", expected2="paired.m14.2.fastq", cores=1)
def test_explicit_format_with_paired(tmpdir, run_paired): # Use --format=fastq with input files whose extension is .txt txt1 = str(tmpdir.join("paired.1.txt")) txt2 = str(tmpdir.join("paired.2.txt")) shutil.copyfile(datapath("paired.1.fastq"), txt1) shutil.copyfile(datapath("paired.2.fastq"), txt2) run_paired( "--format=fastq -a TTAGACATAT -m 14 -q 10", in1=txt1, in2=txt2, expected1="paired.m14.1.fastq", expected2="paired.m14.2.fastq", cores=1 )
def test_unmatched_read_names(cores): with pytest.raises(SystemExit): with temporary_path("swapped.1.fastq") as swapped: # Create a file in which reads 2 and 1 are swapped with open(datapath('paired.1.fastq')) as f: lines = f.readlines() lines = lines[0:4] + lines[8:12] + lines[4:8] + lines[12:] with open(swapped, 'w') as f: f.writelines(lines) with redirect_stderr(): main('-a XX -o out1.fastq --paired-output out2.fastq'.split() + ['--cores', str(cores)] + [swapped, datapath('paired.2.fastq')])
def test_second_too_short(cores): with pytest.raises(SystemExit): with temporary_path("truncated.2.fastq") as trunc2: # Create a truncated file in which the last read is missing with open(datapath('paired.2.fastq')) as f: lines = f.readlines() lines = lines[:-4] with open(trunc2, 'w') as f: f.writelines(lines) with redirect_stderr(): main('-a XX -o /dev/null --paired-output out.fastq'.split() + ['--cores', str(cores)] + [datapath('paired.1.fastq'), trunc2])
def test_paired_demultiplex(tmpdir): multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq")) multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq")) params = [ "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG", "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA", "-o", multiout1, "-p", multiout2, datapath("paired.1.fastq"), datapath("paired.2.fastq")] assert main(params) is None assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown")) assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
def test_second_too_short(tmpdir, cores): # Create a truncated file in which the last read is missing trunc2 = tmpdir.join("truncated.2.fastq") with open(datapath("paired.2.fastq")) as f: lines = f.readlines() lines = lines[:-4] trunc2.write("".join(lines)) with pytest.raises(SystemExit): main([ "-o", "/dev/null", "--paired-output", str(tmpdir.join("out.fastq")), "--cores", str(cores), datapath("paired.1.fastq"), str(trunc2) ])
def test_second_too_short(tmpdir, cores): # Create a truncated file in which the last read is missing trunc2 = tmpdir.join("truncated.2.fastq") with open(datapath("paired.2.fastq")) as f: lines = f.readlines() lines = lines[:-4] trunc2.write("".join(lines)) with redirect_stderr(): with pytest.raises(SystemExit): main([ "-o", "/dev/null", "--paired-output", str(tmpdir.join("out.fastq")), "--cores", str(cores), datapath("paired.1.fastq"), str(trunc2) ])
def test_unmatched_read_names(tmpdir, cores): # Create a file in which reads 2 and 1 are swapped with open(datapath("paired.1.fastq")) as f: lines = f.readlines() lines = lines[0:4] + lines[8:12] + lines[4:8] + lines[12:] swapped = tmpdir.join("swapped.1.fastq") swapped.write("".join(lines)) with pytest.raises(SystemExit): main([ "-o", str(tmpdir.join("out1.fastq")), "--paired-output", str(tmpdir.join("out2.fastq")), "--cores", str(cores), str(swapped), datapath("paired.2.fastq") ])
def test_rename_cannot_be_combined_with_other_renaming_options(opt): with pytest.raises(SystemExit): main([ opt, "something", "--rename='{id} {comment} extrainfo'", "-o", "/dev/null", datapath("empty.fastq") ])
def test_missing_file(): with pytest.raises(SystemExit): with redirect_stderr(): main([ '-a', 'XX', '--paired-output', 'out.fastq', datapath('paired.1.fastq') ])
def test_too_short_no_primer(): '''--too-short-output and --trim-primer''' run( "-c -m 5 -a 330201030313112312 --trim-primer --too-short-output tooshort.tmp.fa", "minlen.noprimer.fa", "lengths.fa") assert files_equal(datapath('tooshort.noprimer.fa'), "tooshort.tmp.fa") os.remove('tooshort.tmp.fa')
def test_missing_file(tmpdir): with pytest.raises(SystemExit): main([ "--paired-output", str(tmpdir.join("out.fastq")), datapath("paired.1.fastq") ])
def test_write_compressed_fastq(cores, tmpdir): main([ '--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq') ])
def test_compressed_output(tmp_path, cores, extension): out_path = str(tmp_path / ("small.fastq." + extension)) params = [ "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, datapath("small.fastq") ] assert main(params) is None
def test_pair_adapters_demultiplexing(tmpdir): params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split() params += ["--pair-adapters"] params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))] params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))] params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")] assert main(params) is None for name in [ "dual-i1.1.fastq", "dual-i1.2.fastq", "dual-i2.1.fastq", "dual-i2.2.fastq", "dual-unknown.1.fastq", "dual-unknown.2.fastq", ]: assert tmpdir.join(name).check() assert_files_equal(cutpath(name), str(tmpdir.join(name)))
def test_untrimmed_paired_output(): with temporary_path("tmp-paired.1.fastq") as tmp1: with temporary_path("tmp-paired.2.fastq") as tmp2: with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1: with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2: params = [ '-a', 'TTAGACATAT', '-o', tmp1, '-p', tmp2, '--untrimmed-output', untrimmed1, '--untrimmed-paired-output', untrimmed2, datapath('paired.1.fastq'), datapath('paired.2.fastq') ] assert cutadapt.main(params) is None assert files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1) assert files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2) assert files_equal(cutpath('paired-trimmed.1.fastq'), tmp1) assert files_equal(cutpath('paired-trimmed.2.fastq'), tmp2)
def test_explicit_standard_output(tmpdir, cores): """Write FASTQ to standard output (using "-o -")""" import subprocess out_path = str(tmpdir.join("out.fastq")) with open(out_path, "w") as out_file: py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "-o", "-", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")], stdout=out_file) _ = py.communicate() assert_files_equal(cutpath("small.fastq"), out_path)
def test_quiet_is_quiet(): captured_standard_output = StringIO() captured_standard_error = StringIO() old_stdout = sys.stdout old_stderr = sys.stderr try: sys.stdout = captured_standard_output sys.stderr = captured_standard_error main(['-o', '/dev/null', '--quiet', '-a', 'XXXX', datapath('illumina.fastq.gz')]) finally: sys.stdout = old_stdout sys.stderr = old_stderr assert captured_standard_output.getvalue() == '' assert captured_standard_error.getvalue() == ''
def test_standard_input_pipe(tmpdir, cores): """Read FASTQ from standard input""" import subprocess out_path = str(tmpdir.join("out.fastq")) in_path = datapath("small.fastq") # Use 'cat' to simulate that no file name is available for stdin cat = subprocess.Popen(["cat", in_path], stdout=subprocess.PIPE) py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, "-"], stdin=cat.stdout) _ = py.communicate() cat.stdout.close() _ = py.communicate()[0] assert_files_equal(cutpath("small.fastq"), out_path)
def test_non_iupac_characters(run): with pytest.raises(SystemExit): with redirect_stderr(): main(['-a', 'ZACGT', datapath('small.fastq')])
def test_adapter_file(run): run('-a file:' + datapath('adapter.fasta'), 'illumina.fastq', 'illumina.fastq.gz')
def test_adapter_file_5p_anchored(run): run('-N -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
def test_adapter_file_3p_anchored(run): run('-N -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
def test_adapter_file_5p_anchored_no_indels(run): run('-N --no-indels -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
def test_write_compressed_fastq(cores, tmpdir): main(['--cores', str(cores), '-o', str(tmpdir.join('out.fastq.gz')), datapath('small.fastq')])
def test_separate_minlength_single(): """Using separate minlengths for single-end data""" with pytest.raises(SystemExit): main(["-m", "5:7", datapath("small.fastq")])
def test_demultiplex(): tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.') multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta') params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')] assert main(params) is None assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first')) assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second')) assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown')) shutil.rmtree(tempdir)
def test_rest(run, tmpdir): """-r/--rest-file""" rest = str(tmpdir.join("rest.tmp")) run(['-b', 'ADAPTER', '-N', '-r', rest], "rest.fa", "rest.fa") assert_files_equal(datapath('rest.txt'), rest)
def test_x_brace_notation(): main(['-o', '/dev/null', '--quiet', '-a', 'X{5}', datapath('small.fastq')])
def test_linked_anywhere(): with pytest.raises(SystemExit): with redirect_stderr(): main(['-b', 'AAA...TTT', datapath('linked.fasta')])
def test_restfront(run, tmpdir): path = str(tmpdir.join("rest.txt")) run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa") assert_files_equal(datapath('restfront.txt'), path)
def test_anywhere_anchored_3p(): with pytest.raises(SystemExit): with redirect_stderr(): main(['-b', 'TTT$', datapath('small.fastq')])
def test_two_fastqs(): with pytest.raises(SystemExit): with redirect_stderr(): main([datapath('paired.1.fastq'), datapath('paired.2.fastq')])
def test_too_long(run, tmpdir): """--too-long-output""" too_long_path = str(tmpdir.join('toolong.fa')) run("-M 5 -a TTAGACATATCTCCGTCG --too-long-output " + too_long_path, "maxlen.fa", "lengths.fa") assert_files_equal(datapath('toolong.fa'), too_long_path)
def test_adapter_file_empty_name(run): run('-N -a file:' + datapath('adapter-empty-name.fasta'), 'illumina.fastq', 'illumina.fastq.gz')
def test_adapter_file_3p_anchored_no_indels(run): run('-N --no-indels -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
def test_too_short(run, tmpdir): """--too-short-output""" too_short_path = str(tmpdir.join('tooshort.fa')) run("-m 5 -a TTAGACATATCTCCGTCG --too-short-output " + too_short_path, "minlen.fa", "lengths.fa") assert_files_equal(datapath('tooshort.fa'), too_short_path)