def test_complete_assembly_with_snps_and_indels(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.complete_assembly_with_snps_and_indels" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq1 = f"{pre_out}.1.fq" fq2 = f"{pre_out}.2.fq" all_amplicon_names = set([x[0] for x in test_data["amplicons"]]) make_catted_paired_reads_for_amplicon_set(test_data, all_amplicon_names, fq1, fq2) outdir = f"{pre_out}.out" ref_fasta = f"{pre_out}.ref.fa" ref_seq = copy.copy(test_data["ref_seq"]) ref_seq[500] = "A" if ref_seq[500] != "A" else "G" ref_seq.insert(1100, "A") ref_seq.pop(1200) nucleotides_list_to_fasta_file(ref_seq, "ref", ref_fasta) one_sample_pipeline.run_one_sample( "illumina", outdir, ref_fasta, fq1, fq2=fq2, tsv_of_amp_schemes=test_data["schemes_tsv"], keep_intermediate=True, ) # TODO: check that we got the expected output # Should be something like this in the VCF (which doesn't yet exist): # CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample # ref 501 0 A C . PASS . GT 1/1 # ref 1099 1 GA G . PASS . GT 1/1 # ref 1199 2 C CG . PASS . GT 1/1 subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def _test_not_expected_amplicons(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.not_expected_amplicons" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq1 = f"{pre_out}.1.fq" fq2 = f"{pre_out}.2.fq" all_amplicon_names = set([x[0] for x in test_data["amplicons"]]) make_catted_paired_reads_for_amplicon_set(test_data, all_amplicon_names, fq1, fq2) amplicons = [ ("amplicon1", 50, 900), ("amplicon2", 850, 1200), ("amplicon3", 1165, 1700), ] amplicons_json = f"{pre_out}.amplicons.json" make_amplicons(amplicons_json, amplicons=amplicons) outdir = f"{pre_out}.out" try: one_sample_pipeline.run_one_sample( "illumina", outdir, test_data["ref_fasta"], fq1, fq2=fq2, amplicon_json=amplicons_json, keep_intermediate=True, ) except: pass # TODO: check that we got the expected output subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def _test_complete_assembly_no_reads_map(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.no_reads_map" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq1 = f"{pre_out}.1.fq" fq2 = f"{pre_out}.2.fq" # make some garbage reads so they don't map with open(fq1, "w") as f1, open(fq2, "w") as f2: print("@read1/1", "A" * 100, "+", "I" * 100, sep="\n", file=f1) print("@read1/2", "A" * 100, "+", "I" * 100, sep="\n", file=f2) outdir = f"{pre_out}.out" try: one_sample_pipeline.run_one_sample( "illumina", outdir, test_data["ref_fasta"], fq1, fq2=fq2, amplicon_json=test_data["amplicons_tsv"], ) # This test should fail on viridian, producing no consensus # TODO specify that it was the consensus file that's missing except utils.OutputFileError as error: if str(error) != str( os.path.abspath( os.path.join(outdir, "Processing/viridian/consensus.final_assembly.fa") ) ): raise error subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def run(options): fq1, fq2 = utils.check_tech_and_reads_opts_and_get_reads(options) if options.force: logging.info(f"--force option used, so deleting {options.outdir} if it exists") subprocess.check_output(f"rm -rf {options.outdir}", shell=True) one_sample_pipeline.run_one_sample( options.tech, options.outdir, options.ref_fasta, fq1, fq2=fq2, built_in_amp_schemes=options.built_in_amp_schemes, tsv_of_amp_schemes=options.amp_schemes_tsv, force_amp_scheme=options.force_amp_scheme, keep_intermediate=options.debug, keep_bam=options.keep_bam, target_sample_depth=options.target_sample_depth, sample_name=options.sample_name, min_sample_depth=options.min_sample_depth, max_percent_amps_fail=options.max_percent_amps_fail, viridian_cons_max_n_percent=options.max_cons_n_percent, frs_threshold=options.frs_threshold, self_qc_depth=options.self_qc_depth, log_liftover=options.log_liftover, trim_5prime=options.trim_5prime, command_line_args=options, )
def test_complete_assembly_from_all_good_amplicons_unpaired(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.complete_assembly_from_all_good_amplicons_unpaired" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq = f"{pre_out}.fq" all_amplicon_names = set([x[0] for x in test_data["amplicons"]]) make_catted_unpaired_reads_for_amplicon_set(test_data, all_amplicon_names, fq) outdir = f"{pre_out}.out" one_sample_pipeline.run_one_sample( "ont", outdir, test_data["ref_fasta"], fq, tsv_of_amp_schemes=test_data["schemes_tsv"], keep_intermediate=True, ) # TODO: check that we got the expected output subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def test_reads_are_wgs_not_amplicon(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.reads_are_wgs_not_amplicon" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq1 = f"{pre_out}.1.fq" fq2 = f"{pre_out}.2.fq" tiling_reads(test_data["ref_seq"], 150, 350, fq1, fq2, step=2) outdir = f"{pre_out}.out" one_sample_pipeline.run_one_sample( "illumina", outdir, test_data["ref_fasta"], fq1, fq2=fq2, tsv_of_amp_schemes=test_data["schemes_tsv"], keep_intermediate=True, ) # TODO: check that we got the expected output subprocess.check_output(f"rm -rf {pre_out}*", shell=True)
def test_assembly_amplicon_3_no_reads(test_data): assert os.path.exists(test_data["dirname"]) pre_out = "tmp.assembly_amplicon_3_no_reads" subprocess.check_output(f"rm -rf {pre_out}*", shell=True) fq1 = f"{pre_out}.1.fq" fq2 = f"{pre_out}.2.fq" amplicon_names = {"amplicon1", "amplicon2", "amplicon4", "amplicon5"} make_catted_paired_reads_for_amplicon_set(test_data, amplicon_names, fq1, fq2) outdir = f"{pre_out}.out" one_sample_pipeline.run_one_sample( "illumina", outdir, test_data["ref_fasta"], fq1, fq2=fq2, tsv_of_amp_schemes=test_data["schemes_tsv"], keep_intermediate=True, ) # TODO: check that we got the expected output subprocess.check_output(f"rm -rf {pre_out}*", shell=True)