コード例 #1
0
def test_run_extractor_zero_reads_rsv(request):
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    conf_dir = pjoin(test_data,"gene_extractor/rsv")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"rsv/SA2018_sub")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("extractor_zero_reads_rsv"):
        runner_conf = make_extractor_runner_conf(conda_env,runner_no_retry=True,leave_workdir=True)
        cmd = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/ref_AB_single.trimmed.arbref.tgz "
            "--manifest {conf_dir}/sample_manifest_zero_reads.txt --ref-common {conf_dir}/ref_AB_single.fasta "
            "--cwl-runner-name toil --cwl-runner-config cwl_runner.yaml "
            "--cwl-inputs {conf_dir}/cwl_inputs.yaml "
            "--outdir out "
            "--web-dir-out out/web").format(**locals())
        check_call(shlex.split(cmd))
        manifest_out = "out/manifest_out.tsv"
        assert os.path.exists(manifest_out)
        with open(manifest_out,"r") as man:
            lines = man.readlines()
            assert len(lines) == 1, "Expected only header line in output manifest"
        manifest_out_all = "out/web/manifest_out_all.tsv"
        assert os.path.exists(manifest_out_all)
        man_all = pd.read_table(manifest_out_all)
        assert man_all.shape[0] == 1
        assert 'WARNING:' in str(man_all.Asm_Msg[0])
コード例 #2
0
def test_ariba_stop_codon_sa(request):
    test_data = os.path.abspath(globals()["test_data"])
    conf_dir = pjoin(test_data,"gene_extractor/sa")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"sa/sast300")
    pkg_data = resources.get_pkg_data_dir("MICGENT")
    cwl_dir = pjoin(pkg_data,"cwl")
    cwl_wf = pjoin(cwl_dir,"ariba_run.cwl")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("ariba_stop_codon_sa"):
        runner_conf = make_extractor_runner_conf(conda_env,runner_no_retry=True,leave_workdir=True) #batchSystem="Torque",
        inp_reads = [ path_to_cwl_file(os.path.join(data_dir,"SMA807030_S1_L001_R{}_001.fastq.gz".format(i_read))) \
            for i_read in (1,2) ] # SMA1927338_S1_L001_R1_001.fastq.gz
        wf_inp = os.path.abspath("wf_inp.yaml")
        inp = dict(
            reads_1 = inp_reads[0],
            reads_2 = inp_reads[1],
            threads = 8,
            serial = True,
            debug = True,
            SampleID = "SMA807030",
            prepareref_tgz = path_to_cwl_file(pjoin(conf_dir,"ref_HA.arbref.tar"))
            )
        yaml_util.dump_yaml(inp,wf_inp)
        cmd = ("python -m MICGENT.cwl_runner --config cwl_runner.yaml "
            "run-toil --logLevel DEBUG "
            "{cwl_wf} {wf_inp}").format(**locals())
        check_call(shlex.split(cmd))
コード例 #3
0
def test_ariba_skewed_cov_filter_rsv(request):
    test_data = os.path.abspath(globals()["test_data"])
    conf_dir = pjoin(test_data,"gene_extractor/rsv")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"rsv/SA2018")
    pkg_data = resources.get_pkg_data_dir("MICGENT")
    cwl_dir = pjoin(pkg_data,"cwl")
    cwl_wf = pjoin(cwl_dir,"ariba_run.cwl")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("ariba_run_skewed_cov_filter_rsv"):
        runner_conf = make_extractor_runner_conf(conda_env,runner_no_retry=True,leave_workdir=True)
        inp_reads = [ path_to_cwl_file(os.path.join(data_dir,"SMA589764_S1_L001_R{}_001.fastq.gz".format(i_read))) \
            for i_read in (1,2) ]
        wf_inp = os.path.abspath("wf_inp.yaml")
        inp = dict(
            reads_1 = inp_reads[0],
            reads_2 = inp_reads[1],
            assembly_cov = 1000000,
            assembly_cov_min = 10,
            assembler = "plugin",
            plugin_asm_options = 'python -m MICGENT.ariba_asm_plugin asm-for-skewed-coverage --extra-args "{deterministic: true}"',
            threads = 8,
            serial = True,
            debug = True,
            SampleID = "SMA589764",
            prepareref_tgz = path_to_cwl_file(pjoin(conf_dir,"ref_AB_single.trimmed.arbref.tgz"))
            )
        yaml_util.dump_yaml(inp,wf_inp)
        cmd = ("python -m MICGENT.cwl_runner --config cwl_runner.yaml "
            "run-toil --logLevel DEBUG "
            "{cwl_wf} {wf_inp}").format(**locals())
        check_call(shlex.split(cmd))
コード例 #4
0
ファイル: test_post_ariba.py プロジェクト: ngs-mstb/micgent
def test_extract_contigs_stop_codon_revcomp(request):
    """Gene found with a stop codon and overlaps the assembled match partially,
    and found either on positive or on negative strand.
    Test that we correctly identify the correspodence between the gene and the match,
    and will not output the match, and also output the gene on the correct strand"""
    test_data = os.path.abspath(globals()["test_data"])
    for extr_cont_dir in ("extract_contigs_stop_codon","extract_contigs_stop_codon_revcomp"):
        inp_dir = pjoin(test_data,"gene_extractor",extr_cont_dir)
        with helpers.mkchdir(extr_cont_dir):
            for (pad_assembled, pad_gene) in [(0,0),(1,1),(200,200),(200,0),(0,200)]:
                cmd = ("python -m MICGENT.gene_extractor extract-contigs "
                    "--cut-to-ref --pad-assembled {pad_assembled} --pad-gene {pad_gene} "
                    "SMA807030 "
                    "{inp_dir}/report.tsv "
                    "{inp_dir}/assembled_genes.fa.gz "
                    "{inp_dir}/assembled_seqs.fa.gz "
                    "{inp_dir}/assemblies.fa.gz "
                    "{inp_dir}/seq_map.tsv "
                    "{inp_dir}/stdout.log "
                    "{inp_dir}/stderr.log "
                    "report_out.tsv seq_out.fasta status.tsv").format(**locals())
                print(cmd)
                check_call(shlex.split(cmd))
                man = pd.read_table("report_out.tsv")
                assert man.shape[0] == 1
                assert man.loc[0,"SeqStatus"] == "gene"
                seq_out = seq_util.fasta_to_df("seq_out.fasta",seq_format_out="str")
                seq_gene = seq_util.fasta_to_df("{inp_dir}/assembled_genes.fa.gz".format(inp_dir=inp_dir),seq_format_out="str")
                assert seq_out.shape[0] == 1
                assert str(seq_gene.seq[0]) in str(seq_out.seq[0])
コード例 #5
0
ファイル: test_post_ariba.py プロジェクト: ngs-mstb/micgent
def test_filter_assemblies(descr,filter_yaml,manifests):
    test_data = os.path.abspath(globals()["test_data"])
    filter_inp_dir = pjoin(test_data,"gene_extractor/post_ariba/filter")
    with helpers.mkchdir("filter_assemblies"):
        cmd = ("python -m MICGENT.post_ariba filter-assemblies "
            "--args '{filter_yaml}' "
            "--manifest {filter_inp_dir}/manifest_out.tsv "
            "--contigs {filter_inp_dir}/seq_out.fasta").format(**locals())
        print(cmd)
        check_call(shlex.split(cmd))
        for returned_manifest,expected_json in manifests:
            _compare_tables(expected_json,returned_manifest,filter_yaml,descr)
コード例 #6
0
def test_run_extractor_config_gen_rsv(request):
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    conf_dir = pjoin(test_data,"gene_extractor/rsv")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"rsv/SA2017")
    with helpers.mkchdir("extractor_config_gen_rsv"):
        ## Run w/o existing workflow inputs
        cmd_base = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/ref_AB_single.trimmed.arbref.tgz "
            "--manifest {conf_dir}/sample_manifest_two.txt --ref-common {conf_dir}/ref_AB_single.fasta "
            "--only-configure-inputs").format(**locals())
        cmd = cmd_base
        check_call(shlex.split(cmd))
        assert os.path.exists("micgentjs.tgz")
        wf_inp1 = "out/gene_extractor.yaml"
        assert os.path.exists(wf_inp1)
        wf_inp1 = yaml_util.load_yaml(wf_inp1)
        print(wf_inp1)
        assert os.path.basename(wf_inp1["spikes_file"]["path"]) == "phiX.fa"
        assert "primer_literals" not in wf_inp1
        assert "assembler" not in wf_inp1
        assert yaml_util.get_arg_as_yaml(wf_inp1["filter_asm_args"])["ctg_len_min"] == 0
        assert os.path.basename(wf_inp1["ref_common"]["path"]) == "ref_AB_single.fasta"
        assert os.path.basename(wf_inp1["micgentjs_tgz"]["path"]) == "micgentjs.tgz"

        ## Run again with existing workflow inputs
        cmd = cmd_base + " --cwl-inputs {conf_dir}/cwl_inputs.yaml".format(**locals())
        check_call(shlex.split(cmd))
        wf_inp2 = "out/gene_extractor.yaml"
        assert os.path.exists(wf_inp2)
        wf_inp2 = yaml_util.load_yaml(wf_inp2)
        print(wf_inp2)
        assert wf_inp2["primer_literals"][1] == 'YTACCATTCAAGCAATGACCTC'
        assert yaml_util.get_arg_as_yaml(wf_inp2["filter_asm_args"])["ctg_len_min"] == 2300

        ## Run w/o existing workflow inputs and with asm_policy spades
        cmd = cmd_base + \
              (" --assembly-policy wgs_spades "
               "--filter-asm-default-ctg-len-min 800")
        check_call(shlex.split(cmd))
        wf_inp3 = "out/gene_extractor.yaml"
        assert os.path.exists(wf_inp3)
        wf_inp3 = yaml_util.load_yaml(wf_inp3)
        print(wf_inp3)
        assert wf_inp3["assembler"] == 'spades'
        assert yaml_util.get_arg_as_yaml(wf_inp3["filter_asm_args"])["ctg_len_min"] == 800
コード例 #7
0
def test_run_extractor_skewed_cov_filter_rsv(request):
    """Test that median coverage filter drops sample with very low median base coverage"""
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    conf_dir = pjoin(test_data,"gene_extractor/rsv")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"rsv/SA2018")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("extractor_skewed_cov_filter_rsv"):
        runner_conf = make_extractor_runner_conf(conda_env,batchSystem="singleMachine",runner_no_retry=True)
        cmd = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/ref_AB_single.trimmed.arbref.tgz "
            "--manifest {conf_dir}/sample_manifest_skewed_cov_filter.txt --ref-common {conf_dir}/ref_AB_single.fasta "
            "--cwl-runner-name toil --cwl-runner-config cwl_runner.yaml "
            "--cwl-inputs {conf_dir}/cwl_inputs.yaml "
            "--outdir out "
            "--web-dir-out out/web").format(**locals())
        check_call(shlex.split(cmd))
        manifest_out = "out/manifest_out.tsv"
        assert os.path.exists(manifest_out)
        man = pd.read_table(manifest_out)
        assert man.shape[0] == 0
コード例 #8
0
ファイル: test_post_ariba.py プロジェクト: ngs-mstb/micgent
def test_extract_contigs(request):
    test_data = os.path.abspath(globals()["test_data"])
    inp_dir = pjoin(test_data,"gene_extractor/extract_contigs")
    with helpers.mkchdir("extract_contigs"):
        cmd = ("python -m MICGENT.gene_extractor extract-contigs "
            "--cut-to-ref --pad-assembled 200 --sig-inp 123,123 "
            "SMA1828869 "
            "{inp_dir}/report.tsv "
            "{inp_dir}/assembled_genes.fa.gz "
            "{inp_dir}/assembled_seqs.fa.gz "
            "{inp_dir}/assemblies.fa.gz "
            "{inp_dir}/seq_map.tsv "
            "{inp_dir}/stdout.log "
            "{inp_dir}/stderr.log "
            "report_out.tsv seq_out.fasta status.tsv").format(**locals())
        print(cmd)
        check_call(shlex.split(cmd))
        man = pd.read_table("report_out.tsv")
        assert man.shape[0] == 2
        assert man[man.SeqStatus=="gene"].shape[0] == 1
        assert man[man.SeqStatus=="match"].shape[0] == 1
コード例 #9
0
def test_run_extractor_small_rsv(request):
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    conf_dir = pjoin(test_data,"gene_extractor/rsv")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"rsv/SA2017_sub")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("extractor_small_rsv"):
        runner_conf = make_extractor_runner_conf(conda_env,runner_no_retry=True,batchSystem="singleMachine",clean_jobstore=True,leave_workdir=True)
        cmd = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/ref_AB_single.trimmed.arbref.tgz "
            "--manifest {conf_dir}/sample_manifest_two.txt --ref-common {conf_dir}/ref_AB_single.fasta "
            "--cwl-runner-name toil --cwl-runner-config cwl_runner.yaml "
            "--cwl-inputs {conf_dir}/cwl_inputs.yaml "
            "--outdir out "
            "--web-dir-out out/web").format(**locals())
        check_call(shlex.split(cmd))
        manifest_out = "out/manifest_out.tsv"
        assert os.path.exists(manifest_out)
        with open(manifest_out,"r") as man:
            lines = man.readlines()
            assert len(lines) == 3, "Expected three lines in output manifest (header and one contig per sample)"
        check_multiqc_cwl_tool_output("out/web/multiqc.html")
コード例 #10
0
ファイル: test_post_ariba.py プロジェクト: ngs-mstb/micgent
def test_extract_contigs_skewed_cov_filter_rsv(request):
    test_data = os.path.abspath(globals()["test_data"])
    inp_dir = pjoin(test_data,"gene_extractor/extract_contigs_skewed_cov_filter_rsv")
    with helpers.mkchdir("extract_contigs"):
        cmd = ("python -m MICGENT.gene_extractor extract-contigs "
            "--cut-to-ref --pad-assembled 200 --sig-inp 123,123 "
            "SMA589764 "
            "{inp_dir}/report.tsv "
            "{inp_dir}/assembled_genes.fa.gz "
            "{inp_dir}/assembled_seqs.fa.gz "
            "{inp_dir}/assemblies.fa.gz "
            "{inp_dir}/seq_map.tsv "
            "{inp_dir}/stdout.log "
            "{inp_dir}/stderr.log "
            "{inp_dir}/basecov_asm.txt "
            "{inp_dir}/basecov_ref.txt "
            "report_out.tsv seq_out.fasta status.tsv").format(**locals())
        print(cmd)
        check_call(shlex.split(cmd))
        man = pd.read_table("report_out.tsv")
        assert man.shape[0] == 1
        assert (man.Asm_ctg_cov == 10).all()
コード例 #11
0
def test_run_extractor_stop_codon_sa(request):
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    conf_dir = pjoin(test_data,"gene_extractor/sa")
    data_dir = pjoin(request.config.getoption('--large-test-data'),"sa/sast300")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("extractor_stop_codon_sa"):
        runner_conf = make_extractor_runner_conf(conda_env,batchSystem="singleMachine",leave_workdir=True,runner_no_retry=True)
        #runner_conf = make_extractor_runner_conf(conda_env,leave_workdir=True,runner_no_retry=True)
        cmd = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/ref_HA.arbref.tar "
            "--manifest {conf_dir}/sample_manifest_sast300_stop_codon.txt --ref-common {conf_dir}/ref_HA.fasta "
            "--cwl-runner-name toil --cwl-runner-config cwl_runner.yaml "
            "--outdir out "
            "--debug "
            "--web-dir-out out/web "
            "--assembly-policy wgs_spades").format(**locals())
        check_call(shlex.split(cmd))
        manifest_out = "out/manifest_out.tsv"
        assert os.path.exists(manifest_out)
        with open(manifest_out,"r") as man:
            lines = man.readlines()
            assert len(lines) == 2, "Expected two lines in output manifest (header and one contig for one sample)"
コード例 #12
0
def test_clean_reads_small_sa(request):
    test_data = os.path.abspath(globals()["test_data"])
    data_dir = pjoin(request.config.getoption('--large-test-data'),"sa/sast2")
    pkg_data = resources.get_pkg_data_dir("MICGENT")
    cwl_dir = pjoin(pkg_data,"cwl")
    cwl_wf = pjoin(cwl_dir,"clean_reads_qc.cwl")
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir("clean_reads_small_sa"):
        runner_conf = make_extractor_runner_conf(conda_env,runner_no_retry=True,batchSystem="singleMachine")
        inp_reads = [ path_to_cwl_file(os.path.join(data_dir,"SMA1828869_S1_L001_R{}_001.fastq.gz".format(i_read))) \
            for i_read in (1,2) ] # SMA1927338_S1_L001_R1_001.fastq.gz
        wf_inp = os.path.abspath("wf_inp.yaml")
        inp = dict(
            SampleID = "SASMA1828869",
            inp_seq1 = inp_reads[0],
            inp_seq2 = inp_reads[1],
            threads = 8
            )
        yaml_util.dump_yaml(inp,wf_inp)
        cmd = ("python -m MICGENT.cwl_runner --config cwl_runner.yaml "
            "run-toil --logLevel DEBUG "
            "{cwl_wf} {wf_inp}").format(**locals())
        check_call(shlex.split(cmd))
コード例 #13
0
def run_extractor_from_config(request,
    conf_dir,
    data_dir,
    run_dir,
    man_inp,
    prepareref_tgz,
    ref_common,
    data_root=None,
    man_n_inp=0,
    man_n_repl=10,
    expect_min_rows_manifest_out=0,
    runner_no_retry=False,
    cwl_inputs=None,
    logLevel="INFO",
    leave_workdir=None):
    """Generic test function for config-driven deterministic test.
    :param expect_min_rows_manifest_out: Check that at least that many rows are returned per non-replicated output manifest.
    :param run_dir: relative to the callers cwd. Output will be in run_dir/out.

    All other input paths should be relative to respective top data directories of gene_extractor tests.
    """
    test_data = os.path.abspath(globals()["test_data"])
    micgent_data = request.config.getoption('--micgent-data')
    extra_config = request.config.getoption('--extra-config')
    if extra_config:
        extra_config = yaml_util.load_yaml(extra_config)
    else:
        extra_config = {}
    conf_dir = pjoin(test_data,"gene_extractor",conf_dir)
    if not cwl_inputs:
        cwl_inputs = "cwl_inputs.yaml"
    cwl_inputs = pjoin(conf_dir,cwl_inputs)
    if data_root is None:
        data_root = request.config.getoption('--large-test-data')
    data_dir = pjoin(data_root,data_dir)
    conda_env = request.config.getoption('--conda-env-ngs-mstb')
    with helpers.mkchdir(run_dir):
        man_inp = "{conf_dir}/{man_inp}".format(**locals())
        man_rep_df = ge_bench.replicate_sample_man(man_inp,n_inp=man_n_inp,n_repl=man_n_repl)
        man_rep = os.path.join(os.getcwd(),"sample_manifest_rep.txt")
        man_rep_df.to_csv(man_rep, index=False, sep="\t")
        runner_kw = extra_config.get("cwl_runner",{}).get("run_toil",{})
        if logLevel:
            runner_kw["logLevel"] = logLevel
        if leave_workdir is not None:
            runner_kw["leave_workdir"] = leave_workdir
        runner_conf = make_extractor_runner_conf(conda_env,
            runner_no_retry=runner_no_retry,
            **runner_kw)
        cmd = ("python -m MICGENT.gene_extractor run-extraction-wf --micgent-data {micgent_data} "
            "--datadir {data_dir} --prepareref-tgz {conf_dir}/{prepareref_tgz} "
            "--manifest {man_rep} --ref-common {conf_dir}/{ref_common} "
            "--cwl-runner-name toil --cwl-runner-config cwl_runner.yaml "
            "--cwl-inputs {cwl_inputs} "
            "--deterministic "
            "--outdir out "
            "--web-dir-out out/web").format(**locals())
        check_call(shlex.split(cmd))
        if man_rep_df.shape[0] > 0:
            check_multiqc_cwl_tool_output("out/web/multiqc.html")
        manifest_out = "out/manifest_out.tsv"
        assert os.path.exists(manifest_out)
        man_out = pd.read_table(manifest_out)
        assert man_out.shape[0] >= expect_min_rows_manifest_out * man_n_repl
コード例 #14
0
ファイル: test_pysteps.py プロジェクト: ngs-mstb/micgent
def clean_reads(request, target="rsv", full_size=False):
    if target == "rsv":
        data_dir = pjoin(request.config.getoption('--large-test-data'), "rsv",
                         "SA2017" if full_size else "SA2017_sub")
        inp_reads = [
            os.path.join(data_dir,
                         "SMA1646701_S1_L001_R{}_001.fastq.gz".format(i_read))
            for i_read in (1, 2)
        ]
    elif target == "sa":
        data_dir = pjoin(request.config.getoption('--large-test-data'), "sa",
                         "sast2")
        inp_reads = [
            os.path.join(data_dir,
                         "SMA1828869_S1_L001_R{}_001.fastq.gz".format(i_read))
            for i_read in (1, 2)
        ]
    inp_reads1 = inp_reads[0]
    inp_reads2 = inp_reads[1]
    with helpers.mkchdir("clean_reads_rsv"):
        out_reads1 = "cleaned.1.fq"
        out_reads2 = "cleaned.2.fq"
        out_qc_reads_cmd = (
            " --out-qc-before-reads qc.before.1.fq --out-qc-before-reads2 qc.before.2.fq "
            "--out-qc-after-reads qc.after.1.fq --out-qc-after-reads2 qc.after.2.fq "
        )
        for extra_steps in ("",
                out_qc_reads_cmd,
                "--primer-literals AGTGTTCAAYTTYGTWCCYTG,YTACCATTCAAGCAATGACCTC",
                "--clumpify --filter-spikes --primer-literals AGTGTTCAAYTTYGTWCCYTG,YTACCATTCAAGCAATGACCTC",
                out_qc_reads_cmd + \
                "--clumpify --filter-spikes --primer-literals AGTGTTCAAYTTYGTWCCYTG,YTACCATTCAAGCAATGACCTC"
                            ):
            cmd = (
                "python -m MICGENT.pysteps clean-reads --inp-reads {inp_reads1} --inp-reads2 {inp_reads2} "
                "--out-reads {out_reads1} --out-reads2 {out_reads2} "
                "--threads 4 --out-stats stats.log --deterministic " +
                extra_steps).format(**locals())
            check_call(shlex.split(cmd))
            s1 = sig.file_sig(out_reads1)
            s2 = sig.file_sig(out_reads2)
            ## save first replicate in case test fails and we need to debug
            os.rename(out_reads1, out_reads1 + ".r1")
            os.rename(out_reads2, out_reads2 + ".r1")
            check_call(shlex.split(cmd))
            assert os.path.getsize(
                out_reads1) > 0, "Output file {} has zero size".format(
                    out_reads1)
            assert os.path.getsize(
                out_reads2) > 0, "Output file {} has zero size".format(
                    out_reads2)
            assert sig.file_sig_cmp(
                s1, out_reads1
            ), "Output files differ between repeated runs: {}".format(cmd)
            assert sig.file_sig_cmp(
                s2, out_reads2
            ), "Output files differ between repeated runs: {}".format(cmd)
            if "--out-qc" in cmd:
                out_qc_reads_file_sizes = [
                    os.path.getsize(f) for f in glob.glob("qc.*.[12].fq")
                ]
                assert all([_ > 0 for _ in out_qc_reads_file_sizes
                            ]), "Some subsampled read file have zero size"
                assert len(out_qc_reads_file_sizes
                           ) == 4, "Some subsampled read files are missing"
            for fq in glob.glob("*.fq"):
                os.remove(fq)