def test_complete_assembly_with_snps_and_indels(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.complete_assembly_with_snps_and_indels"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq1 = f"{pre_out}.1.fq"
    fq2 = f"{pre_out}.2.fq"
    all_amplicon_names = set([x[0] for x in test_data["amplicons"]])
    make_catted_paired_reads_for_amplicon_set(test_data, all_amplicon_names, fq1, fq2)
    outdir = f"{pre_out}.out"
    ref_fasta = f"{pre_out}.ref.fa"
    ref_seq = copy.copy(test_data["ref_seq"])
    ref_seq[500] = "A" if ref_seq[500] != "A" else "G"
    ref_seq.insert(1100, "A")
    ref_seq.pop(1200)
    nucleotides_list_to_fasta_file(ref_seq, "ref", ref_fasta)
    one_sample_pipeline.run_one_sample(
        "illumina",
        outdir,
        ref_fasta,
        fq1,
        fq2=fq2,
        tsv_of_amp_schemes=test_data["schemes_tsv"],
        keep_intermediate=True,
    )
    # TODO: check that we got the expected output
    # Should be something like this in the VCF (which doesn't yet exist):
    # CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample
    # ref	501	0	A	C	.	PASS	.	GT	1/1
    # ref	1099	1	GA	G	.	PASS	.	GT	1/1
    # ref	1199	2	C	CG	.	PASS	.	GT	1/1
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def _test_not_expected_amplicons(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.not_expected_amplicons"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq1 = f"{pre_out}.1.fq"
    fq2 = f"{pre_out}.2.fq"
    all_amplicon_names = set([x[0] for x in test_data["amplicons"]])
    make_catted_paired_reads_for_amplicon_set(test_data, all_amplicon_names, fq1, fq2)
    amplicons = [
        ("amplicon1", 50, 900),
        ("amplicon2", 850, 1200),
        ("amplicon3", 1165, 1700),
    ]
    amplicons_json = f"{pre_out}.amplicons.json"
    make_amplicons(amplicons_json, amplicons=amplicons)
    outdir = f"{pre_out}.out"
    try:
        one_sample_pipeline.run_one_sample(
            "illumina",
            outdir,
            test_data["ref_fasta"],
            fq1,
            fq2=fq2,
            amplicon_json=amplicons_json,
            keep_intermediate=True,
        )
    except:
        pass
    # TODO: check that we got the expected output
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def _test_complete_assembly_no_reads_map(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.no_reads_map"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq1 = f"{pre_out}.1.fq"
    fq2 = f"{pre_out}.2.fq"
    # make some garbage reads so they don't map
    with open(fq1, "w") as f1, open(fq2, "w") as f2:
        print("@read1/1", "A" * 100, "+", "I" * 100, sep="\n", file=f1)
        print("@read1/2", "A" * 100, "+", "I" * 100, sep="\n", file=f2)
    outdir = f"{pre_out}.out"
    try:
        one_sample_pipeline.run_one_sample(
            "illumina",
            outdir,
            test_data["ref_fasta"],
            fq1,
            fq2=fq2,
            amplicon_json=test_data["amplicons_tsv"],
        )
        # This test should fail on viridian, producing no consensus
        # TODO specify that it was the consensus file that's missing
    except utils.OutputFileError as error:
        if str(error) != str(
            os.path.abspath(
                os.path.join(outdir, "Processing/viridian/consensus.final_assembly.fa")
            )
        ):
            raise error

    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
Example #4
0
def run(options):
    fq1, fq2 = utils.check_tech_and_reads_opts_and_get_reads(options)

    if options.force:
        logging.info(f"--force option used, so deleting {options.outdir} if it exists")
        subprocess.check_output(f"rm -rf {options.outdir}", shell=True)

    one_sample_pipeline.run_one_sample(
        options.tech,
        options.outdir,
        options.ref_fasta,
        fq1,
        fq2=fq2,
        built_in_amp_schemes=options.built_in_amp_schemes,
        tsv_of_amp_schemes=options.amp_schemes_tsv,
        force_amp_scheme=options.force_amp_scheme,
        keep_intermediate=options.debug,
        keep_bam=options.keep_bam,
        target_sample_depth=options.target_sample_depth,
        sample_name=options.sample_name,
        min_sample_depth=options.min_sample_depth,
        max_percent_amps_fail=options.max_percent_amps_fail,
        viridian_cons_max_n_percent=options.max_cons_n_percent,
        frs_threshold=options.frs_threshold,
        self_qc_depth=options.self_qc_depth,
        log_liftover=options.log_liftover,
        trim_5prime=options.trim_5prime,
        command_line_args=options,
    )
def test_complete_assembly_from_all_good_amplicons_unpaired(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.complete_assembly_from_all_good_amplicons_unpaired"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq = f"{pre_out}.fq"
    all_amplicon_names = set([x[0] for x in test_data["amplicons"]])
    make_catted_unpaired_reads_for_amplicon_set(test_data, all_amplicon_names, fq)
    outdir = f"{pre_out}.out"
    one_sample_pipeline.run_one_sample(
        "ont",
        outdir,
        test_data["ref_fasta"],
        fq,
        tsv_of_amp_schemes=test_data["schemes_tsv"],
        keep_intermediate=True,
    )
    # TODO: check that we got the expected output
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def test_reads_are_wgs_not_amplicon(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.reads_are_wgs_not_amplicon"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq1 = f"{pre_out}.1.fq"
    fq2 = f"{pre_out}.2.fq"
    tiling_reads(test_data["ref_seq"], 150, 350, fq1, fq2, step=2)
    outdir = f"{pre_out}.out"
    one_sample_pipeline.run_one_sample(
        "illumina",
        outdir,
        test_data["ref_fasta"],
        fq1,
        fq2=fq2,
        tsv_of_amp_schemes=test_data["schemes_tsv"],
        keep_intermediate=True,
    )
    # TODO: check that we got the expected output
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def test_assembly_amplicon_3_no_reads(test_data):
    assert os.path.exists(test_data["dirname"])
    pre_out = "tmp.assembly_amplicon_3_no_reads"
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
    fq1 = f"{pre_out}.1.fq"
    fq2 = f"{pre_out}.2.fq"
    amplicon_names = {"amplicon1", "amplicon2", "amplicon4", "amplicon5"}
    make_catted_paired_reads_for_amplicon_set(test_data, amplicon_names, fq1, fq2)
    outdir = f"{pre_out}.out"
    one_sample_pipeline.run_one_sample(
        "illumina",
        outdir,
        test_data["ref_fasta"],
        fq1,
        fq2=fq2,
        tsv_of_amp_schemes=test_data["schemes_tsv"],
        keep_intermediate=True,
    )
    # TODO: check that we got the expected output
    subprocess.check_output(f"rm -rf {pre_out}*", shell=True)