Beispiel #1
0
 def _run(params,
          inpath1,
          inpath2=None,
          expected1=None,
          expected2=None,
          cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
Beispiel #2
0
def run_interleaved(params,
                    inpath1,
                    inpath2=None,
                    expected1=None,
                    expected2=None,
                    cores=1):
    """
	Interleaved input or output (or both)
	"""
    assert not (inpath1 and inpath2 and expected1 and expected2)
    assert not (expected2 and not expected1)
    assert not (inpath2 and not inpath1)
    if type(params) is str:
        params = params.split()
    params += ['--interleaved', '--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as tmp1:
        params += ['-o', tmp1]
        paths = [datapath(inpath1)]
        if inpath2:
            paths += [datapath(inpath2)]
        if expected2:
            with temporary_path('tmp2-' + expected2) as tmp2:
                params += ['-p', tmp2]
                assert main(params + paths) is None
                assert_files_equal(cutpath(expected2), tmp2)
        else:
            assert main(params + paths) is None
        assert_files_equal(cutpath(expected1), tmp1)
Beispiel #3
0
def test_demultiplex():
    tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.')
    multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta')
    params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
    assert main(params) is None
    assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
    assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
    assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
    shutil.rmtree(tempdir)
Beispiel #4
0
def test_demultiplex():
    tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.')
    multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta')
    params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
    assert main(params) is None
    assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
    assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
    assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
    shutil.rmtree(tempdir)
def test_demultiplex():
	multiout = os.path.join(os.path.dirname(__file__), 'data', 'tmp-demulti.{name}.fasta')
	params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
	assert cutadapt.main(params) is None
	assert files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
	assert files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
	assert files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
	os.remove(multiout.format(name='first'))
	os.remove(multiout.format(name='second'))
	os.remove(multiout.format(name='unknown'))
Beispiel #6
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
Beispiel #7
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
Beispiel #8
0
def run_paired(params, in1, in2, expected1, expected2, cores):
    if type(params) is str:
        params = params.split()
    params += ['--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as p1:
        with temporary_path('tmp2-' + expected2) as p2:
            params += ['-o', p1, '-p', p2]
            params += [datapath(in1), datapath(in2)]
            assert main(params) is None
            assert_files_equal(cutpath(expected1), p1)
            assert_files_equal(cutpath(expected2), p2)
Beispiel #9
0
def test_too_long_output(run_paired, tmpdir):
    p1 = str(tmpdir.join("too-long.1.fastq"))
    p2 = str(tmpdir.join("too-long.2.fastq"))
    run_paired(
        "-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output "
        "{0} --too-long-paired-output {1}".format(p1, p2),
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-too-short.1.fastq", expected2="paired-too-short.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired.1.fastq"), p1)
    assert_files_equal(cutpath("paired.2.fastq"), p2)
Beispiel #10
0
def test_too_long_output():
    with temporary_path('temp-too-long.1.fastq') as p1:
        with temporary_path('temp-too-long.2.fastq') as p2:
            run_paired('-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output '
                       '{0} --too-long-paired-output {1}'.format(p1, p2),
                       in1='paired.1.fastq',
                       in2='paired.2.fastq',
                       expected1='paired-too-short.1.fastq',
                       expected2='paired-too-short.2.fastq',
                       cores=1)
            assert_files_equal(cutpath('paired.1.fastq'), p1)
            assert_files_equal(cutpath('paired.2.fastq'), p2)
Beispiel #11
0
def test_untrimmed_paired_output(tmpdir, run_paired):
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT", "--pair-filter=first",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
Beispiel #12
0
def test_untrimmed_paired_output():
	with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1:
		with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2:
			run_paired(
				['-a', 'TTAGACATAT',
					'--untrimmed-output', untrimmed1,
					'--untrimmed-paired-output', untrimmed2],
				in1='paired.1.fastq', in2='paired.2.fastq',
				expected1='paired-trimmed.1.fastq', expected2='paired-trimmed.2.fastq',
				cores=1
			)
			assert_files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1)
			assert_files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2)
Beispiel #13
0
def test_too_long_output(run_paired, tmpdir, cores):
    p1 = str(tmpdir.join("too-long.1.fastq"))
    p2 = str(tmpdir.join("too-long.2.fastq"))
    run_paired(" -a TTAGACATAT -A CAGTGGAGTA -M 14"
               " --too-long-output {}"
               " --too-long-paired-output {}".format(p1, p2),
               in1="paired.1.fastq",
               in2="paired.2.fastq",
               expected1="paired-too-short.1.fastq",
               expected2="paired-too-short.2.fastq",
               cores=cores)
    assert_files_equal(cutpath("paired.1.fastq"), p1)
    assert_files_equal(cutpath("paired.2.fastq"), p2)
Beispiel #14
0
def test_untrimmed_paired_output(tmpdir, run_paired):
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT", "--pair-filter=first",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
Beispiel #15
0
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores):
    params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split()
    params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))]
    params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))]
    params += ["--cores", str(cores)]
    params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")]
    # third item in tuple says whether the file must exist
    combinations = [(a, b, True) for a, b in product("AC", "GT")]
    optional = [("unknown", "unknown")]
    optional += [(a, "unknown") for a in "AC"]
    optional += [("unknown", b) for b in "GT"]
    if discarduntrimmed:
        combinations.extend((a, b, False) for a, b in optional)
        params += ["--discard-untrimmed"]
    else:
        combinations.extend((a, b, True) for a, b in optional)
    main(params)
    for (name1, name2, should_exist) in combinations:
        for i in (1, 2):
            name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i)
            path = cutpath(os.path.join("combinatorial", name))
            if should_exist:
                assert tmpdir.join(name).check(), ("Output file missing", name)
                if os.path.exists(path):
                    assert_files_equal(path, str(tmpdir.join(name)))
            else:
                assert not tmpdir.join(name).check(), ("Output file should not exist", name)
Beispiel #16
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run([
        "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
Beispiel #17
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
Beispiel #18
0
def test_info_file_times():
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '--times', '2', '-a',
            'adapt=GCCGAACTTCTTA', '-a', 'adapt2=GACTGCCTTAAGGACGT'
        ], "illumina5.fastq", "illumina5.fastq")
        assert_files_equal(cutpath('illumina5.info.txt'), infotmp)
Beispiel #19
0
def test_paired_end_legacy():
    '''--paired-output, no -A/-B/-G'''
    with temporary_path("paired-tmp.fastq") as pairedtmp:
        # the -m 14 filters out one read, which should then also be filtered out in the second output file
        run(['-a', 'TTAGACATAT', '-m', '14', '--paired-output', pairedtmp],
            'paired.m14.1.fastq', 'paired.1.fastq', 'paired.2.fastq')
        assert files_equal(cutpath('paired.m14.2.fastq'), pairedtmp)
Beispiel #20
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
Beispiel #21
0
def test_untrimmed_output(run, cores, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
Beispiel #22
0
def test_info_file():
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '-a',
            'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'
        ], "illumina.fastq", "illumina.fastq.gz")
        assert files_equal(cutpath('illumina.info.txt'), infotmp)
Beispiel #23
0
def test_untrimmed_paired_output_automatic_pair_filter(tmpdir, run_paired):
    # When no R2 adapters are given, --pair-filter should be ignored for
    # --discard-untrimmed, --untrimmed-output, --untrimmed-paired-output
    # and always be "both" (with --pair-filter=any, all pairs would be
    # considered untrimmed because the R1 read is always untrimmed)
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
Beispiel #24
0
def test_untrimmed_paired_output_automatic_pair_filter(tmpdir, run_paired):
    # When no R2 adapters are given, --pair-filter should be ignored for
    # --discard-untrimmed, --untrimmed-output, --untrimmed-paired-output
    # and always be "both" (with --pair-filter=any, all pairs would be
    # considered untrimmed because the R1 read is always untrimmed)
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
def test_untrimmed_paired_output():
	with temporary_path("tmp-paired.1.fastq") as tmp1:
		with temporary_path("tmp-paired.2.fastq") as tmp2:
			with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1:
				with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2:
					params = [
						'-a', 'TTAGACATAT',
						'-o', tmp1, '-p', tmp2,
						'--untrimmed-output', untrimmed1,
						'--untrimmed-paired-output', untrimmed2, 
						datapath('paired.1.fastq'), datapath('paired.2.fastq')
					]
					assert cutadapt.main(params) is None
					assert files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1)
					assert files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2)
					assert files_equal(cutpath('paired-trimmed.1.fastq'), tmp1)
					assert files_equal(cutpath('paired-trimmed.2.fastq'), tmp2)
Beispiel #26
0
def test_linked_info_file(tmpdir):
    info_path = str(tmpdir.join('info.txt'))
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path,
        '-o',
        str(tmpdir.join('out.fasta')),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'), info_path)
Beispiel #27
0
def test_demultiplex():
    multiout = os.path.join(os.path.dirname(__file__), 'data',
                            'tmp-demulti.{name}.fasta')
    params = [
        '-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o',
        multiout,
        datapath('twoadapters.fasta')
    ]
    assert cutadapt.main(params) is None
    assert files_equal(cutpath('twoadapters.first.fasta'),
                       multiout.format(name='first'))
    assert files_equal(cutpath('twoadapters.second.fasta'),
                       multiout.format(name='second'))
    assert files_equal(cutpath('twoadapters.unknown.fasta'),
                       multiout.format(name='unknown'))
    os.remove(multiout.format(name='first'))
    os.remove(multiout.format(name='second'))
    os.remove(multiout.format(name='unknown'))
Beispiel #28
0
 def _run(params, expected, inpath):
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     assert main(params) is None
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
Beispiel #29
0
def test_info_file_times(run, tmp_path, cores):
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "--times", "2", "-a",
        "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'),
                       info_path,
                       ignore_trailing_space=True)
Beispiel #30
0
def test_untrimmed_paired_output():
    with temporary_path("tmp-paired.1.fastq") as tmp1:
        with temporary_path("tmp-paired.2.fastq") as tmp2:
            with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1:
                with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2:
                    params = [
                        '-a', 'TTAGACATAT', '-o', tmp1, '-p', tmp2,
                        '--untrimmed-output', untrimmed1,
                        '--untrimmed-paired-output', untrimmed2,
                        datapath('paired.1.fastq'),
                        datapath('paired.2.fastq')
                    ]
                    assert cutadapt.main(params) is None
                    assert files_equal(cutpath('paired-untrimmed.1.fastq'),
                                       untrimmed1)
                    assert files_equal(cutpath('paired-untrimmed.2.fastq'),
                                       untrimmed2)
                    assert files_equal(cutpath('paired-trimmed.1.fastq'), tmp1)
                    assert files_equal(cutpath('paired-trimmed.2.fastq'), tmp2)
Beispiel #31
0
def test_info_file_revcomp(run, tmp_path):
    info_path = tmp_path / "info-rc.txt"
    main([
        "--info-file",
        str(info_path), "-a", "adapt=GAGTCG", "--revcomp", "--rename={header}",
        "-o",
        str(tmp_path / "out.fasta"),
        datapath("info-rc.fasta")
    ])
    assert_files_equal(cutpath("info-rc.txt"), info_path)
Beispiel #32
0
def test_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (not using --output/-o option)"""
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
Beispiel #33
0
 def _run(params, expected, inpath) -> Statistics:
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     stats = main(params)
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
     return stats
Beispiel #34
0
def test_untrimmed_output(run, cores, tmp_path):
    path = tmp_path / "untrimmed.fastq"
    stats = run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
    assert stats.with_adapters[0] == 2
    assert stats.written == 2
    assert stats.written_bp[0] == 46
Beispiel #35
0
def test_info_file():
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different)
    #
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '-a',
            'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'
        ], "illumina.fastq", "illumina.fastq.gz")
        assert_files_equal(cutpath('illumina.info.txt'), infotmp)
Beispiel #36
0
 def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
Beispiel #37
0
def test_force_fasta_output(tmpdir, cores):
    """Write FASTA to standard output even on FASTQ input"""

    out_path = str(tmpdir.join("out.fasta"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--fasta", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fasta"), out_path)
Beispiel #38
0
def test_linked_info_file(tmp_path):
    info_path = tmp_path / 'info.txt'
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file',
        str(info_path), '-o',
        str(tmp_path / 'out.fasta'),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'),
                       info_path,
                       ignore_trailing_space=True)
Beispiel #39
0
def test_info_file(run, tmp_path, cores):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "-a",
        "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"
    ], "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"),
                       info_path,
                       ignore_trailing_space=True)
Beispiel #40
0
def test_explicit_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (using "-o -")"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
Beispiel #41
0
def test_standard_input_pipe(tmpdir, cores):
    """Read FASTQ from standard input"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    in_path = datapath("small.fastq")
    # Use 'cat' to simulate that no file name is available for stdin
    cat = subprocess.Popen(["cat", in_path], stdout=subprocess.PIPE)
    py = subprocess.Popen([
        sys.executable, "-m", "cutadapt", "--cores", str(cores),
        "-a", "TTAGACATATCTCCGTCG", "-o", out_path, "-"],
        stdin=cat.stdout)
    _ = py.communicate()
    cat.stdout.close()
    _ = py.communicate()[0]
    assert_files_equal(cutpath("small.fastq"), out_path)
Beispiel #42
0
def test_pair_adapters_demultiplexing(tmpdir):
    params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split()
    params += ["--pair-adapters"]
    params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))]
    params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))]
    params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")]
    assert main(params) is None
    for name in [
        "dual-i1.1.fastq",
        "dual-i1.2.fastq",
        "dual-i2.1.fastq",
        "dual-i2.2.fastq",
        "dual-unknown.1.fastq",
        "dual-unknown.2.fastq",
    ]:
        assert tmpdir.join(name).check()
        assert_files_equal(cutpath(name), str(tmpdir.join(name)))
Beispiel #43
0
def test_paired_demultiplex(tmpdir):
    multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq"))
    multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq"))
    params = [
        "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG",
        "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA",
        "-o", multiout1, "-p", multiout2,
        datapath("paired.1.fastq"), datapath("paired.2.fastq")]
    assert main(params) is None
    assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown"))
    assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
def test_explicit_format_with_paired():
	with temporary_path("paired-tmp.fastq") as pairedtmp:
		run(['--format=fastq', '-a', 'TTAGACATAT', '-m', '14', '-p', pairedtmp], 'paired.m14.1.fastq', 'paired.1.txt', 'paired.2.txt')
		assert files_equal(cutpath('paired.m14.2.fastq'), pairedtmp)
Beispiel #45
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
Beispiel #46
0
def test_untrimmed_output(run, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run(["-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
def test_untrimmed_output():
	with temporary_path('untrimmed.tmp.fastq') as tmp:
		run(['-a', 'TTAGACATATCTCCGTCG', '--untrimmed-output', tmp], 'small.trimmed.fastq', 'small.fastq')
		assert files_equal(cutpath('small.untrimmed.fastq'), tmp)
def test_paired_end_legacy():
	'''--paired-output, no -A/-B/-G'''
	with temporary_path("paired-tmp.fastq") as pairedtmp:
		# the -m 14 filters out one read, which should then also be filtered out in the second output file
		run(['-a', 'TTAGACATAT', '-m', '14', '--paired-output', pairedtmp], 'paired.m14.1.fastq', 'paired.1.fastq', 'paired.2.fastq')
		assert files_equal(cutpath('paired.m14.2.fastq'), pairedtmp)
def test_info_file():
	with temporary_path("infotmp.txt") as infotmp:
		run(["--info-file", infotmp, '-a', 'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'], "illumina.fastq", "illumina.fastq.gz")
		assert files_equal(cutpath('illumina.info.txt'), infotmp)