def test_prepare_nextflow_input_files(self): """test _prepare_nextflow_input_files""" # Contents of the files is checked elsewhere. # We'll just check that the files exist ref_fasta = "tmp.prepare_nextflow_input_files.in.ref.fa" outdir = "tmp.prepare_nextflow_input_files.outdir" data_tsv = "tmp.prepare_nextflow_input_files.in.tsv" vcf_file = "tmp.prepare_nextflow_input_files.in.vcf" reads_file = "tmp.prepare_nextflow_input_files.in.reads" with open(ref_fasta, "w") as f: pass with open(data_tsv, "w") as f: print(vcf_file, reads_file, sep="\t", file=f) with open(vcf_file, "w"), open(reads_file, "w"): pass if os.path.exists(outdir): shutil.rmtree(outdir) pipeline = multi_sample_pipeline.MultiSamplePipeline( ref_fasta, data_tsv, outdir) pipeline._make_output_dir() pipeline._prepare_nextflow_input_files() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(pipeline.nextflow_input_tsv)) shutil.rmtree(outdir) os.unlink(ref_fasta) os.unlink(data_tsv) os.unlink(vcf_file) os.unlink(reads_file)
def run(options): pipeline = multi_sample_pipeline.MultiSamplePipeline( options.ref_fasta, options.data_tsv, options.outdir, max_alleles_per_cluster=options.max_alleles_per_cluster, min_large_ref_length=options.min_large_ref_length, gramtools_max_read_length=options.max_read_length, gramtools_kmer_size=options.gramtools_kmer_size, gramtools_build_threads=options.gramtools_build_threads, nextflow_config_file=options.nextflow_config_file, nextflow_work_dir=options.nextflow_work_dir, force=options.force, no_run=options.no_run, clean=not options.no_clean, variants_per_split=options.variants_per_split, alleles_per_split=options.alleles_per_split, total_splits=options.total_splits, nf_ram_cluster_small_vars=options.nf_ram_cluster_small_vars, nf_ram_gramtools_build_small=options.nf_ram_gramtools_build_small, nf_ram_minos_small_vars=options.nf_ram_minos_small_vars, nf_ram_merge_small_vars=options.nf_ram_merge_small_vars, testing=options.testing, use_unmapped_reads=options.use_unmapped_reads, ) pipeline.run()
def test_prepare_nextflow_input_files(self): '''test _prepare_nextflow_input_files''' # Contents of the files is checked elsewhere. # We'll just check that the files exist ref_fasta = 'tmp.prepare_nextflow_input_files.in.ref.fa' outdir = 'tmp.prepare_nextflow_input_files.outdir' data_tsv = 'tmp.prepare_nextflow_input_files.in.tsv' vcf_file = 'tmp.prepare_nextflow_input_files.in.vcf' reads_file = 'tmp.prepare_nextflow_input_files.in.reads' with open(ref_fasta, 'w') as f: pass with open(data_tsv, 'w') as f: print(vcf_file, reads_file, sep='\t', file=f) with open(vcf_file, 'w'), open(reads_file, 'w'): pass if os.path.exists(outdir): shutil.rmtree(outdir) pipeline = multi_sample_pipeline.MultiSamplePipeline( ref_fasta, data_tsv, outdir) pipeline._make_output_dir() pipeline._prepare_nextflow_input_files() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(pipeline.nextflow_input_tsv)) shutil.rmtree(outdir) os.unlink(ref_fasta) os.unlink(data_tsv) os.unlink(vcf_file) os.unlink(reads_file)
def test_run_with_small_var_vcf_chunking_total_splits(self): '''test run with chunking small variatn VCF file using total_splits option''' input_tsv = 'tmp.multi_sample_pipeline.run.in.tsv' ref_fasta = os.path.join(data_dir, 'run.ref.0.fa') with open(input_tsv, 'w') as f: for i in '1', '2': reads = os.path.join(data_dir, 'run.reads.' + i + '.sorted.bam') vcf = os.path.join(data_dir, 'run.calls.' + i + '.vcf') print(vcf, reads, sep='\t', file=f) outdir = 'tmp.multi_sample_pipeline.run.out' if os.path.exists(outdir): shutil.rmtree(outdir) pipeline = multi_sample_pipeline.MultiSamplePipeline(ref_fasta, input_tsv, outdir, total_splits=3, min_large_ref_length=10, testing=True, clean=False) pipeline.run() expected_vcf = os.path.join(data_dir, 'run.out.vcf') expected_header, expected_lines = vcf_file_read.vcf_file_to_list(expected_vcf) got_vcf = os.path.join(outdir, 'combined_calls.vcf') self.assertTrue(os.path.exists(got_vcf)) got_header, got_lines = vcf_file_read.vcf_file_to_list(got_vcf) # the datei, minos version, and bcftools verisons might not match expected_header = [x for x in expected_header if not (x.startswith('##fileDate') or x.startswith('##source=minos') or x.startswith('##bcftools_mergeVersion'))] got_header = [x for x in got_header if not (x.startswith('##fileDate') or x.startswith('##source=minos') or x.startswith('##bcftools_mergeVersion'))] self.assertEqual(expected_header, got_header) self.assertEqual(expected_lines, got_lines) shutil.rmtree(outdir) os.unlink(input_tsv)
def _test_run_with_small_var_vcf_chunking_vars_per_split(self): """test run with chunking small variant VCF file using variants_per_split option""" input_tsv = "tmp.multi_sample_pipeline.run.in.tsv" ref_fasta = os.path.join(data_dir, "run.ref.0.fa") with open(input_tsv, "w") as f: for i in "1", "2": reads1 = os.path.join(data_dir, "run.reads." + i + ".sorted.bam") reads2 = os.path.join(data_dir, "run.reads." + i + ".sorted.bam") vcf = os.path.join(data_dir, "run.calls." + i + ".vcf") print(vcf, reads1, reads2, sep="\t", file=f) outdir = "tmp.multi_sample_pipeline.run.out" if os.path.exists(outdir): shutil.rmtree(outdir) pipeline = multi_sample_pipeline.MultiSamplePipeline( ref_fasta, input_tsv, outdir, variants_per_split=3, min_large_ref_length=10, testing=True, clean=False, ) pipeline.run() expected_vcf = os.path.join(data_dir, "run.out.vcf") expected_header, expected_lines = vcf_file_read.vcf_file_to_list( expected_vcf) got_vcf = os.path.join(outdir, "combined_calls.vcf") self.assertTrue(os.path.exists(got_vcf)) got_header, got_lines = vcf_file_read.vcf_file_to_list(got_vcf) # the datei, minos version, and bcftools verisons might not match expected_header = [ x for x in expected_header if not (x.startswith("##fileDate") or x.startswith( "##source=minos") or x.startswith("##bcftools_mergeVersion")) ] got_header = [ x for x in got_header if not (x.startswith("##fileDate") or x.startswith( "##source=minos") or x.startswith("##bcftools_mergeVersion")) ] self.assertEqual(expected_header, got_header) self.assertEqual(expected_lines, got_lines) shutil.rmtree(outdir) os.unlink(input_tsv)