def test_run_empty_vcf_input_files(self): '''test run when input files have no variants''' outdir = 'tmp.adjudicator.out' if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, 'run.ref.fa') reads_file = os.path.join(data_dir, 'run.bwa.bam') vcf_files = [ os.path.join(data_dir, x) for x in ['run.calls.empty.1.vcf', 'run.calls.empty.2.vcf'] ] adj = adjudicator.Adjudicator(outdir, ref_fasta, [reads_file], vcf_files, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000) with self.assertRaises(adjudicator.Error): adj.run() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(adj.log_file)) self.assertFalse(os.path.exists(adj.final_vcf)) self.assertFalse(os.path.exists(adj.gramtools_build_dir)) self.assertFalse(os.path.exists(adj.gramtools_quasimap_dir)) self.assertFalse(os.path.exists(adj.plots_prefix + '.data.tsv')) self.assertFalse(os.path.exists(adj.plots_prefix + '.dp_hist.pdf')) self.assertFalse( os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf')) self.assertFalse(os.path.exists(adj.plots_prefix + '.gt_conf_hist.pdf')) self.assertTrue(os.path.exists(adj.clustered_vcf)) shutil.rmtree(outdir)
def test_0MeanDepth_stillRuns(self): """ When mean depth is 0, we can get math errors: math.log(0) in genotype likelihood computation, and division by 0 in genotype confidence simulation. Note the former only actually occurs if there is a variant site with non-zero coverage; in this case, mean depth can get set to 0 due to rounding imprecision. This is tested in genotyper unit tests. """ outdir = "tmp.adjudicator.out" if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, "run.ref.fa") reads_file = os.path.join(data_dir, "no_map_reads.fastq") vcf_files = [os.path.join(data_dir, "run.calls.1.vcf")] adj = adjudicator.Adjudicator( outdir, ref_fasta, [reads_file], vcf_files, clean=False, gramtools_kmer_size=5, ) adj.run() # Make sure the coverage is 0 self.assertEqual(adj.mean_depths[0], 0)
def test_run_clean_is_true(self): """test run when we do clean files afterwards""" # We're just testing that it doesn't crash. # Check the output files exist, but not their contents. # First run using splitting of VCF file. # Then run without splitting. outdir = "tmp.adjudicator.clean.out" if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, "run.ref.fa") reads_file = os.path.join(data_dir, "run.bwa.bam") vcf_files = [ os.path.join(data_dir, x) for x in ["run.calls.1.vcf", "run.calls.2.vcf"] ] adj = adjudicator.Adjudicator( outdir, ref_fasta, [reads_file], vcf_files, clean=True, gramtools_kmer_size=5, genotype_simulation_iterations=1000, ) adj.run() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(os.path.join(outdir, "final.vcf"))) shutil.rmtree(outdir)
def test_run_empty_vcf_input_files(): """test run when input files have no variants""" outdir = "tmp.adjudicator.out" if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, "run.ref.fa") reads_file = os.path.join(data_dir, "run.bwa.bam") vcf_files = [ os.path.join(data_dir, x) for x in ["run.calls.empty.1.vcf", "run.calls.empty.2.vcf"] ] adj = adjudicator.Adjudicator( outdir, ref_fasta, [reads_file], vcf_files, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000, ) with pytest.raises(Exception): adj.run() assert os.path.exists(outdir) assert os.path.exists(adj.log_file) assert not os.path.exists(adj.final_vcf) assert not os.path.exists(adj.gramtools_build_dir) assert not os.path.exists(adj.gramtools_quasimap_dir) assert os.path.exists(adj.clustered_vcf) shutil.rmtree(outdir)
def run(options): adj = adjudicator.Adjudicator( options.outdir, options.ref_fasta, options.reads, options.vcf_files, max_read_length=options.max_read_length, read_error_rate=options.read_error_rate, overwrite_outdir=options.force, max_alleles_per_cluster=options.max_alleles_per_cluster, gramtools_build_dir=options.gramtools_build_dir, sample_name=options.sample_name, variants_per_split=options.variants_per_split, alleles_per_split=options.alleles_per_split, total_splits=options.total_splits, clean=not options.debug, gramtools_kmer_size=options.gramtools_kmer_size, use_unmapped_reads=options.use_unmapped_reads, ) adj.run()
def run(options): adj = adjudicator.Adjudicator( options.outdir, options.ref_fasta, options.reads, options.vcf_files, read_error_rate=options.read_error_rate, overwrite_outdir=options.force, max_alleles_per_cluster=options.max_alleles_per_cluster, gramtools_build_dir=options.gramtools_build_dir, sample_name=options.sample_name, variants_per_split=options.variants_per_split, alleles_per_split=options.alleles_per_split, total_splits=options.total_splits, clean=not options.debug, gramtools_kmer_size=options.gramtools_kmer_size, use_unmapped_reads=options.use_unmapped_reads, filter_min_dp=options.filter_min_dp, filter_min_gcp=options.filter_min_gcp, filter_min_frs=options.filter_min_frs, call_hets=options.include_het_calls, debug=options.debug, ) adj.run()
def test_run(self): '''test run''' # We're just testing that it doesn't crash. # Check the output files exist, but not their contents. # First run using splitting of VCF file. # Then run without splitting. outdir = 'tmp.adjudicator.out' if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, 'run.ref.fa') reads_file = os.path.join(data_dir, 'run.bwa.bam') vcf_files = [ os.path.join(data_dir, x) for x in ['run.calls.1.vcf', 'run.calls.2.vcf'] ] adj = adjudicator.Adjudicator(outdir, ref_fasta, [reads_file], vcf_files, variants_per_split=3, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000) adj.run() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(adj.log_file)) self.assertTrue(os.path.exists(adj.final_vcf)) self.assertTrue(os.path.exists(adj.clustered_vcf)) # Clean up and then run without splitting shutil.rmtree(outdir) adj = adjudicator.Adjudicator(outdir, ref_fasta, [reads_file], vcf_files, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000) adj.run() self.assertTrue(os.path.exists(outdir)) self.assertTrue(os.path.exists(adj.log_file)) self.assertTrue(os.path.exists(adj.final_vcf)) self.assertTrue(os.path.exists(adj.gramtools_build_dir)) self.assertTrue(os.path.exists(adj.gramtools_quasimap_dir)) self.assertTrue(os.path.exists(adj.clustered_vcf)) self.assertTrue(os.path.exists(adj.plots_prefix + '.data.tsv')) self.assertTrue(os.path.exists(adj.plots_prefix + '.dp_hist.pdf')) self.assertTrue( os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf')) self.assertTrue(os.path.exists(adj.plots_prefix + '.gt_conf_hist.pdf')) # Now we've run the adjudicator, we have a gramtools # build directory. Rerun, but this time use the build # directory, so we test the gramtools_build_dir option outdir2 = 'tmp.adjudicator.out.2' gramtools_build_dir = adj.gramtools_build_dir if os.path.exists(outdir2): shutil.rmtree(outdir2) ref_fasta = os.path.join(data_dir, 'run.ref.fa') reads_file = os.path.join(data_dir, 'run.bwa.bam') # When gramtools build dir supplied, the Adjudicator assumes # one clsutered VCF file that matches the gramtools build run. # This is the clustered VCF made by the Adjudicator, so we # use that instead of the list of original VCF files vcf_files = [adj.clustered_vcf] adj = adjudicator.Adjudicator(outdir2, ref_fasta, [reads_file], vcf_files, gramtools_build_dir=gramtools_build_dir, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000) adj.run() self.assertTrue(os.path.exists(outdir2)) self.assertTrue(os.path.exists(adj.log_file)) self.assertTrue(os.path.exists(adj.final_vcf)) self.assertTrue(os.path.exists(adj.gramtools_build_dir)) self.assertTrue(os.path.exists(adj.gramtools_quasimap_dir)) self.assertTrue(os.path.exists(adj.clustered_vcf)) self.assertTrue(os.path.exists(adj.plots_prefix + '.data.tsv')) self.assertTrue(os.path.exists(adj.plots_prefix + '.dp_hist.pdf')) self.assertTrue( os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf')) self.assertTrue(os.path.exists(adj.plots_prefix + '.gt_conf_hist.pdf')) self.assertFalse( os.path.exists(os.path.join(outdir2, 'gramtools.build'))) shutil.rmtree(outdir) shutil.rmtree(outdir2)
def test_run_clean_is_false(): """test run when not cleaning up files afterwards""" # We're just testing that it doesn't crash. # Check the output files exist, but not their contents. # First run using splitting of VCF file. # Then run without splitting. outdir = "tmp.adjudicator.noclean.out" if os.path.exists(outdir): shutil.rmtree(outdir) ref_fasta = os.path.join(data_dir, "run.ref.fa") reads_file = os.path.join(data_dir, "run.bwa.bam") vcf_files = [ os.path.join(data_dir, x) for x in ["run.calls.1.vcf", "run.calls.2.vcf"] ] adj = adjudicator.Adjudicator( outdir, ref_fasta, [reads_file], vcf_files, variants_per_split=3, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000, ) adj.run() assert os.path.exists(outdir) assert os.path.exists(adj.log_file) assert os.path.exists(adj.final_vcf) assert os.path.exists(adj.clustered_vcf) # Clean up and then run without splitting shutil.rmtree(outdir) adj = adjudicator.Adjudicator( outdir, ref_fasta, [reads_file], vcf_files, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000, ) adj.run() assert os.path.exists(outdir) assert os.path.exists(adj.log_file) assert os.path.exists(adj.final_vcf) assert os.path.exists(adj.gramtools_build_dir) assert os.path.exists(adj.gramtools_quasimap_dir) assert os.path.exists(adj.clustered_vcf) # Now we've run the adjudicator, we have a gramtools # build directory. Rerun, but this time use the build # directory, so we test the gramtools_build_dir option outdir2 = "tmp.adjudicator.out.2" gramtools_build_dir = adj.gramtools_build_dir if os.path.exists(outdir2): shutil.rmtree(outdir2) ref_fasta = os.path.join(data_dir, "run.ref.fa") reads_file = os.path.join(data_dir, "run.bwa.bam") # When gramtools build dir supplied, the Adjudicator assumes # one clsutered VCF file that matches the gramtools build run. # This is the clustered VCF made by the Adjudicator, so we # use that instead of the list of original VCF files vcf_files = [adj.clustered_vcf] adj = adjudicator.Adjudicator( outdir2, ref_fasta, [reads_file], vcf_files, gramtools_build_dir=gramtools_build_dir, clean=False, gramtools_kmer_size=5, genotype_simulation_iterations=1000, ) adj.run() assert os.path.exists(outdir2) assert os.path.exists(adj.log_file) assert os.path.exists(adj.final_vcf) assert os.path.exists(adj.gramtools_build_dir) assert os.path.exists(adj.gramtools_quasimap_dir) assert os.path.exists(adj.clustered_vcf) assert not os.path.exists(os.path.join(outdir2, "gramtools.build")) shutil.rmtree(outdir) shutil.rmtree(outdir2)