def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"] tmp1 = str(tmpdir.join("out1-" + expected1)) params += ["-o", tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: tmp2 = str(tmpdir.join("out2-" + expected2)) params += ["-p", tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def run_interleaved(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1): """ Interleaved input or output (or both) """ assert not (inpath1 and inpath2 and expected1 and expected2) assert not (expected2 and not expected1) assert not (inpath2 and not inpath1) if type(params) is str: params = params.split() params += ['--interleaved', '--cores', str(cores), '--buffer-size=512'] with temporary_path('tmp1-' + expected1) as tmp1: params += ['-o', tmp1] paths = [datapath(inpath1)] if inpath2: paths += [datapath(inpath2)] if expected2: with temporary_path('tmp2-' + expected2) as tmp2: params += ['-p', tmp2] assert main(params + paths) is None assert_files_equal(cutpath(expected2), tmp2) else: assert main(params + paths) is None assert_files_equal(cutpath(expected1), tmp1)
def test_demultiplex(): tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.') multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta') params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')] assert main(params) is None assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first')) assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second')) assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown')) shutil.rmtree(tempdir)
def test_demultiplex(): multiout = os.path.join(os.path.dirname(__file__), 'data', 'tmp-demulti.{name}.fasta') params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')] assert cutadapt.main(params) is None assert files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first')) assert files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second')) assert files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown')) os.remove(multiout.format(name='first')) os.remove(multiout.format(name='second')) os.remove(multiout.format(name='unknown'))
def _run(params, in1, in2, expected1, expected2, cores): if type(params) is str: params = params.split() params += ["--cores", str(cores), "--buffer-size=512"] path1 = str(tmpdir.join(expected1)) path2 = str(tmpdir.join(expected2)) params += ["-o", path1, "-p", path2] params += [datapath(in1), datapath(in2)] assert main(params) is None assert_files_equal(cutpath(expected1), path1) assert_files_equal(cutpath(expected2), path2)
def run_paired(params, in1, in2, expected1, expected2, cores): if type(params) is str: params = params.split() params += ['--cores', str(cores), '--buffer-size=512'] with temporary_path('tmp1-' + expected1) as p1: with temporary_path('tmp2-' + expected2) as p2: params += ['-o', p1, '-p', p2] params += [datapath(in1), datapath(in2)] assert main(params) is None assert_files_equal(cutpath(expected1), p1) assert_files_equal(cutpath(expected2), p2)
def test_too_long_output(run_paired, tmpdir): p1 = str(tmpdir.join("too-long.1.fastq")) p2 = str(tmpdir.join("too-long.2.fastq")) run_paired( "-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output " "{0} --too-long-paired-output {1}".format(p1, p2), in1="paired.1.fastq", in2="paired.2.fastq", expected1="paired-too-short.1.fastq", expected2="paired-too-short.2.fastq", cores=1 ) assert_files_equal(cutpath("paired.1.fastq"), p1) assert_files_equal(cutpath("paired.2.fastq"), p2)
def test_too_long_output(): with temporary_path('temp-too-long.1.fastq') as p1: with temporary_path('temp-too-long.2.fastq') as p2: run_paired('-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output ' '{0} --too-long-paired-output {1}'.format(p1, p2), in1='paired.1.fastq', in2='paired.2.fastq', expected1='paired-too-short.1.fastq', expected2='paired-too-short.2.fastq', cores=1) assert_files_equal(cutpath('paired.1.fastq'), p1) assert_files_equal(cutpath('paired.2.fastq'), p2)
def test_untrimmed_paired_output(tmpdir, run_paired): untrimmed1 = str(tmpdir.join("untrimmed.1.fastq")) untrimmed2 = str(tmpdir.join("untrimmed.2.fastq")) run_paired( ["-a", "TTAGACATAT", "--pair-filter=first", "--untrimmed-output", untrimmed1, "--untrimmed-paired-output", untrimmed2], in1="paired.1.fastq", in2="paired.2.fastq", expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq", cores=1 ) assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1) assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
def test_untrimmed_paired_output(): with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1: with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2: run_paired( ['-a', 'TTAGACATAT', '--untrimmed-output', untrimmed1, '--untrimmed-paired-output', untrimmed2], in1='paired.1.fastq', in2='paired.2.fastq', expected1='paired-trimmed.1.fastq', expected2='paired-trimmed.2.fastq', cores=1 ) assert_files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1) assert_files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2)
def test_too_long_output(run_paired, tmpdir, cores): p1 = str(tmpdir.join("too-long.1.fastq")) p2 = str(tmpdir.join("too-long.2.fastq")) run_paired(" -a TTAGACATAT -A CAGTGGAGTA -M 14" " --too-long-output {}" " --too-long-paired-output {}".format(p1, p2), in1="paired.1.fastq", in2="paired.2.fastq", expected1="paired-too-short.1.fastq", expected2="paired-too-short.2.fastq", cores=cores) assert_files_equal(cutpath("paired.1.fastq"), p1) assert_files_equal(cutpath("paired.2.fastq"), p2)
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores): params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split() params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))] params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))] params += ["--cores", str(cores)] params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")] # third item in tuple says whether the file must exist combinations = [(a, b, True) for a, b in product("AC", "GT")] optional = [("unknown", "unknown")] optional += [(a, "unknown") for a in "AC"] optional += [("unknown", b) for b in "GT"] if discarduntrimmed: combinations.extend((a, b, False) for a, b in optional) params += ["--discard-untrimmed"] else: combinations.extend((a, b, True) for a, b in optional) main(params) for (name1, name2, should_exist) in combinations: for i in (1, 2): name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i) path = cutpath(os.path.join("combinatorial", name)) if should_exist: assert tmpdir.join(name).check(), ("Output file missing", name) if os.path.exists(path): assert_files_equal(path, str(tmpdir.join(name))) else: assert not tmpdir.join(name).check(), ("Output file should not exist", name)
def test_info_file_times(run, tmpdir): info_path = str(tmpdir.join("info.txt")) run([ "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT" ], "illumina5.fastq", "illumina5.fastq") assert_files_equal(cutpath('illumina5.info.txt'), info_path)
def test_info_file(run, tmpdir): # The true adapter sequence in the illumina.fastq.gz data set is # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here) info_path = str(tmpdir.join("info.txt")) run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"], "illumina.fastq", "illumina.fastq.gz") assert_files_equal(cutpath("illumina.info.txt"), info_path)
def test_info_file_times(): with temporary_path("infotmp.txt") as infotmp: run([ "--info-file", infotmp, '--times', '2', '-a', 'adapt=GCCGAACTTCTTA', '-a', 'adapt2=GACTGCCTTAAGGACGT' ], "illumina5.fastq", "illumina5.fastq") assert_files_equal(cutpath('illumina5.info.txt'), infotmp)
def test_paired_end_legacy(): '''--paired-output, no -A/-B/-G''' with temporary_path("paired-tmp.fastq") as pairedtmp: # the -m 14 filters out one read, which should then also be filtered out in the second output file run(['-a', 'TTAGACATAT', '-m', '14', '--paired-output', pairedtmp], 'paired.m14.1.fastq', 'paired.1.fastq', 'paired.2.fastq') assert files_equal(cutpath('paired.m14.2.fastq'), pairedtmp)
def test_untrimmed_output(run, cores, tmpdir): path = str(tmpdir.join("untrimmed.fastq")) run([ "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path ], "small.trimmed.fastq", "small.fastq") assert_files_equal(cutpath("small.untrimmed.fastq"), path)
def test_info_file(): with temporary_path("infotmp.txt") as infotmp: run([ "--info-file", infotmp, '-a', 'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT' ], "illumina.fastq", "illumina.fastq.gz") assert files_equal(cutpath('illumina.info.txt'), infotmp)
def test_untrimmed_paired_output_automatic_pair_filter(tmpdir, run_paired): # When no R2 adapters are given, --pair-filter should be ignored for # --discard-untrimmed, --untrimmed-output, --untrimmed-paired-output # and always be "both" (with --pair-filter=any, all pairs would be # considered untrimmed because the R1 read is always untrimmed) untrimmed1 = str(tmpdir.join("untrimmed.1.fastq")) untrimmed2 = str(tmpdir.join("untrimmed.2.fastq")) run_paired( ["-a", "TTAGACATAT", "--untrimmed-output", untrimmed1, "--untrimmed-paired-output", untrimmed2], in1="paired.1.fastq", in2="paired.2.fastq", expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq", cores=1 ) assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1) assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
def test_untrimmed_paired_output(): with temporary_path("tmp-paired.1.fastq") as tmp1: with temporary_path("tmp-paired.2.fastq") as tmp2: with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1: with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2: params = [ '-a', 'TTAGACATAT', '-o', tmp1, '-p', tmp2, '--untrimmed-output', untrimmed1, '--untrimmed-paired-output', untrimmed2, datapath('paired.1.fastq'), datapath('paired.2.fastq') ] assert cutadapt.main(params) is None assert files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1) assert files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2) assert files_equal(cutpath('paired-trimmed.1.fastq'), tmp1) assert files_equal(cutpath('paired-trimmed.2.fastq'), tmp2)
def test_linked_info_file(tmpdir): info_path = str(tmpdir.join('info.txt')) main([ '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path, '-o', str(tmpdir.join('out.fasta')), datapath('linked.fasta') ]) assert_files_equal(cutpath('linked-info.txt'), info_path)
def test_demultiplex(): multiout = os.path.join(os.path.dirname(__file__), 'data', 'tmp-demulti.{name}.fasta') params = [ '-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta') ] assert cutadapt.main(params) is None assert files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first')) assert files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second')) assert files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown')) os.remove(multiout.format(name='first')) os.remove(multiout.format(name='second')) os.remove(multiout.format(name='unknown'))
def _run(params, expected, inpath): if type(params) is str: params = params.split() tmp_fastaq = str(tmpdir.join(expected)) params += ['-o', tmp_fastaq] params += [datapath(inpath)] assert main(params) is None # TODO redirect standard output assert_files_equal(cutpath(expected), tmp_fastaq)
def test_info_file_times(run, tmp_path, cores): info_path = tmp_path / "info.txt" run([ "--cores", str(cores), "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT" ], "illumina5.fastq", "illumina5.fastq") assert_files_equal(cutpath('illumina5.info.txt'), info_path, ignore_trailing_space=True)
def test_info_file_revcomp(run, tmp_path): info_path = tmp_path / "info-rc.txt" main([ "--info-file", str(info_path), "-a", "adapt=GAGTCG", "--revcomp", "--rename={header}", "-o", str(tmp_path / "out.fasta"), datapath("info-rc.fasta") ]) assert_files_equal(cutpath("info-rc.txt"), info_path)
def test_standard_output(tmpdir, cores): """Write FASTQ to standard output (not using --output/-o option)""" out_path = str(tmpdir.join("out.fastq")) with open(out_path, "w") as out_file: py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")], stdout=out_file) _ = py.communicate() assert_files_equal(cutpath("small.fastq"), out_path)
def _run(params, expected, inpath) -> Statistics: if type(params) is str: params = params.split() tmp_fastaq = str(tmpdir.join(expected)) params += ['-o', tmp_fastaq] params += [datapath(inpath)] stats = main(params) # TODO redirect standard output assert_files_equal(cutpath(expected), tmp_fastaq) return stats
def test_untrimmed_output(run, cores, tmp_path): path = tmp_path / "untrimmed.fastq" stats = run([ "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path ], "small.trimmed.fastq", "small.fastq") assert_files_equal(cutpath("small.untrimmed.fastq"), path) assert stats.with_adapters[0] == 2 assert stats.written == 2 assert stats.written_bp[0] == 46
def test_info_file(): # The true adapter sequence in the illumina.fastq.gz data set is # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different) # with temporary_path("infotmp.txt") as infotmp: run([ "--info-file", infotmp, '-a', 'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT' ], "illumina.fastq", "illumina.fastq.gz") assert_files_equal(cutpath('illumina.info.txt'), infotmp)
def test_force_fasta_output(tmpdir, cores): """Write FASTA to standard output even on FASTQ input""" out_path = str(tmpdir.join("out.fasta")) with open(out_path, "w") as out_file: py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "--fasta", "-o", "-", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")], stdout=out_file) _ = py.communicate() assert_files_equal(cutpath("small.fasta"), out_path)
def test_linked_info_file(tmp_path): info_path = tmp_path / 'info.txt' main([ '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', str(info_path), '-o', str(tmp_path / 'out.fasta'), datapath('linked.fasta') ]) assert_files_equal(cutpath('linked-info.txt'), info_path, ignore_trailing_space=True)
def test_info_file(run, tmp_path, cores): # The true adapter sequence in the illumina.fastq.gz data set is # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here) info_path = tmp_path / "info.txt" run([ "--cores", str(cores), "--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT" ], "illumina.fastq", "illumina.fastq.gz") assert_files_equal(cutpath("illumina.info.txt"), info_path, ignore_trailing_space=True)
def test_explicit_standard_output(tmpdir, cores): """Write FASTQ to standard output (using "-o -")""" import subprocess out_path = str(tmpdir.join("out.fastq")) with open(out_path, "w") as out_file: py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "-o", "-", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")], stdout=out_file) _ = py.communicate() assert_files_equal(cutpath("small.fastq"), out_path)
def test_standard_input_pipe(tmpdir, cores): """Read FASTQ from standard input""" import subprocess out_path = str(tmpdir.join("out.fastq")) in_path = datapath("small.fastq") # Use 'cat' to simulate that no file name is available for stdin cat = subprocess.Popen(["cat", in_path], stdout=subprocess.PIPE) py = subprocess.Popen([ sys.executable, "-m", "cutadapt", "--cores", str(cores), "-a", "TTAGACATATCTCCGTCG", "-o", out_path, "-"], stdin=cat.stdout) _ = py.communicate() cat.stdout.close() _ = py.communicate()[0] assert_files_equal(cutpath("small.fastq"), out_path)
def test_pair_adapters_demultiplexing(tmpdir): params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split() params += ["--pair-adapters"] params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))] params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))] params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")] assert main(params) is None for name in [ "dual-i1.1.fastq", "dual-i1.2.fastq", "dual-i2.1.fastq", "dual-i2.2.fastq", "dual-unknown.1.fastq", "dual-unknown.2.fastq", ]: assert tmpdir.join(name).check() assert_files_equal(cutpath(name), str(tmpdir.join(name)))
def test_paired_demultiplex(tmpdir): multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq")) multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq")) params = [ "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG", "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA", "-o", multiout1, "-p", multiout2, datapath("paired.1.fastq"), datapath("paired.2.fastq")] assert main(params) is None assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown")) assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first")) assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second")) assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
def test_explicit_format_with_paired(): with temporary_path("paired-tmp.fastq") as pairedtmp: run(['--format=fastq', '-a', 'TTAGACATAT', '-m', '14', '-p', pairedtmp], 'paired.m14.1.fastq', 'paired.1.txt', 'paired.2.txt') assert files_equal(cutpath('paired.m14.2.fastq'), pairedtmp)
def test_info_file_times(run, tmpdir): info_path = str(tmpdir.join("info.txt")) run(["--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq") assert_files_equal(cutpath('illumina5.info.txt'), info_path)
def test_untrimmed_output(run, tmpdir): path = str(tmpdir.join("untrimmed.fastq")) run(["-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path], "small.trimmed.fastq", "small.fastq") assert_files_equal(cutpath("small.untrimmed.fastq"), path)
def test_untrimmed_output(): with temporary_path('untrimmed.tmp.fastq') as tmp: run(['-a', 'TTAGACATATCTCCGTCG', '--untrimmed-output', tmp], 'small.trimmed.fastq', 'small.fastq') assert files_equal(cutpath('small.untrimmed.fastq'), tmp)
def test_info_file(): with temporary_path("infotmp.txt") as infotmp: run(["--info-file", infotmp, '-a', 'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'], "illumina.fastq", "illumina.fastq.gz") assert files_equal(cutpath('illumina.info.txt'), infotmp)