Beispiel #1
0
    def test_run_empty_vcf_input_files(self):
        '''test run when input files have no variants'''
        outdir = 'tmp.adjudicator.out'
        if os.path.exists(outdir):
            shutil.rmtree(outdir)

        ref_fasta = os.path.join(data_dir, 'run.ref.fa')
        reads_file = os.path.join(data_dir, 'run.bwa.bam')
        vcf_files = [
            os.path.join(data_dir, x)
            for x in ['run.calls.empty.1.vcf', 'run.calls.empty.2.vcf']
        ]
        adj = adjudicator.Adjudicator(outdir,
                                      ref_fasta, [reads_file],
                                      vcf_files,
                                      clean=False,
                                      gramtools_kmer_size=5,
                                      genotype_simulation_iterations=1000)
        with self.assertRaises(adjudicator.Error):
            adj.run()
        self.assertTrue(os.path.exists(outdir))
        self.assertTrue(os.path.exists(adj.log_file))
        self.assertFalse(os.path.exists(adj.final_vcf))
        self.assertFalse(os.path.exists(adj.gramtools_build_dir))
        self.assertFalse(os.path.exists(adj.gramtools_quasimap_dir))
        self.assertFalse(os.path.exists(adj.plots_prefix + '.data.tsv'))
        self.assertFalse(os.path.exists(adj.plots_prefix + '.dp_hist.pdf'))
        self.assertFalse(
            os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf'))
        self.assertFalse(os.path.exists(adj.plots_prefix +
                                        '.gt_conf_hist.pdf'))
        self.assertTrue(os.path.exists(adj.clustered_vcf))
        shutil.rmtree(outdir)
Beispiel #2
0
    def test_0MeanDepth_stillRuns(self):
        """
        When mean depth is 0, we can get math errors: math.log(0) in genotype likelihood computation,
        and division by 0 in genotype confidence simulation.

        Note the former only actually occurs if there is a variant site with non-zero coverage;
        in this case, mean depth can get set to 0 due to rounding imprecision. This is tested in genotyper unit tests.
        """

        outdir = "tmp.adjudicator.out"
        if os.path.exists(outdir):
            shutil.rmtree(outdir)
        ref_fasta = os.path.join(data_dir, "run.ref.fa")
        reads_file = os.path.join(data_dir, "no_map_reads.fastq")
        vcf_files = [os.path.join(data_dir, "run.calls.1.vcf")]

        adj = adjudicator.Adjudicator(
            outdir,
            ref_fasta,
            [reads_file],
            vcf_files,
            clean=False,
            gramtools_kmer_size=5,
        )
        adj.run()
        # Make sure the coverage is 0
        self.assertEqual(adj.mean_depths[0], 0)
Beispiel #3
0
 def test_run_clean_is_true(self):
     """test run when we do clean files afterwards"""
     # We're just testing that it doesn't crash.
     # Check the output files exist, but not their contents.
     # First run using splitting of VCF file.
     # Then run without splitting.
     outdir = "tmp.adjudicator.clean.out"
     if os.path.exists(outdir):
         shutil.rmtree(outdir)
     ref_fasta = os.path.join(data_dir, "run.ref.fa")
     reads_file = os.path.join(data_dir, "run.bwa.bam")
     vcf_files = [
         os.path.join(data_dir, x) for x in ["run.calls.1.vcf", "run.calls.2.vcf"]
     ]
     adj = adjudicator.Adjudicator(
         outdir,
         ref_fasta,
         [reads_file],
         vcf_files,
         clean=True,
         gramtools_kmer_size=5,
         genotype_simulation_iterations=1000,
     )
     adj.run()
     self.assertTrue(os.path.exists(outdir))
     self.assertTrue(os.path.exists(os.path.join(outdir, "final.vcf")))
     shutil.rmtree(outdir)
def test_run_empty_vcf_input_files():
    """test run when input files have no variants"""
    outdir = "tmp.adjudicator.out"
    if os.path.exists(outdir):
        shutil.rmtree(outdir)

    ref_fasta = os.path.join(data_dir, "run.ref.fa")
    reads_file = os.path.join(data_dir, "run.bwa.bam")
    vcf_files = [
        os.path.join(data_dir, x)
        for x in ["run.calls.empty.1.vcf", "run.calls.empty.2.vcf"]
    ]
    adj = adjudicator.Adjudicator(
        outdir,
        ref_fasta,
        [reads_file],
        vcf_files,
        clean=False,
        gramtools_kmer_size=5,
        genotype_simulation_iterations=1000,
    )
    with pytest.raises(Exception):
        adj.run()
    assert os.path.exists(outdir)
    assert os.path.exists(adj.log_file)
    assert not os.path.exists(adj.final_vcf)
    assert not os.path.exists(adj.gramtools_build_dir)
    assert not os.path.exists(adj.gramtools_quasimap_dir)
    assert os.path.exists(adj.clustered_vcf)
    shutil.rmtree(outdir)
Beispiel #5
0
def run(options):
    adj = adjudicator.Adjudicator(
        options.outdir,
        options.ref_fasta,
        options.reads,
        options.vcf_files,
        max_read_length=options.max_read_length,
        read_error_rate=options.read_error_rate,
        overwrite_outdir=options.force,
        max_alleles_per_cluster=options.max_alleles_per_cluster,
        gramtools_build_dir=options.gramtools_build_dir,
        sample_name=options.sample_name,
        variants_per_split=options.variants_per_split,
        alleles_per_split=options.alleles_per_split,
        total_splits=options.total_splits,
        clean=not options.debug,
        gramtools_kmer_size=options.gramtools_kmer_size,
        use_unmapped_reads=options.use_unmapped_reads,
    )
    adj.run()
Beispiel #6
0
def run(options):
    adj = adjudicator.Adjudicator(
        options.outdir,
        options.ref_fasta,
        options.reads,
        options.vcf_files,
        read_error_rate=options.read_error_rate,
        overwrite_outdir=options.force,
        max_alleles_per_cluster=options.max_alleles_per_cluster,
        gramtools_build_dir=options.gramtools_build_dir,
        sample_name=options.sample_name,
        variants_per_split=options.variants_per_split,
        alleles_per_split=options.alleles_per_split,
        total_splits=options.total_splits,
        clean=not options.debug,
        gramtools_kmer_size=options.gramtools_kmer_size,
        use_unmapped_reads=options.use_unmapped_reads,
        filter_min_dp=options.filter_min_dp,
        filter_min_gcp=options.filter_min_gcp,
        filter_min_frs=options.filter_min_frs,
        call_hets=options.include_het_calls,
        debug=options.debug,
    )
    adj.run()
Beispiel #7
0
    def test_run(self):
        '''test run'''
        # We're just testing that it doesn't crash.
        # Check the output files exist, but not their contents.
        # First run using splitting of VCF file.
        # Then run without splitting.
        outdir = 'tmp.adjudicator.out'
        if os.path.exists(outdir):
            shutil.rmtree(outdir)
        ref_fasta = os.path.join(data_dir, 'run.ref.fa')
        reads_file = os.path.join(data_dir, 'run.bwa.bam')
        vcf_files = [
            os.path.join(data_dir, x)
            for x in ['run.calls.1.vcf', 'run.calls.2.vcf']
        ]
        adj = adjudicator.Adjudicator(outdir,
                                      ref_fasta, [reads_file],
                                      vcf_files,
                                      variants_per_split=3,
                                      clean=False,
                                      gramtools_kmer_size=5,
                                      genotype_simulation_iterations=1000)
        adj.run()
        self.assertTrue(os.path.exists(outdir))
        self.assertTrue(os.path.exists(adj.log_file))
        self.assertTrue(os.path.exists(adj.final_vcf))
        self.assertTrue(os.path.exists(adj.clustered_vcf))

        # Clean up and then run without splitting
        shutil.rmtree(outdir)
        adj = adjudicator.Adjudicator(outdir,
                                      ref_fasta, [reads_file],
                                      vcf_files,
                                      clean=False,
                                      gramtools_kmer_size=5,
                                      genotype_simulation_iterations=1000)
        adj.run()
        self.assertTrue(os.path.exists(outdir))
        self.assertTrue(os.path.exists(adj.log_file))
        self.assertTrue(os.path.exists(adj.final_vcf))
        self.assertTrue(os.path.exists(adj.gramtools_build_dir))
        self.assertTrue(os.path.exists(adj.gramtools_quasimap_dir))
        self.assertTrue(os.path.exists(adj.clustered_vcf))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.data.tsv'))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.dp_hist.pdf'))
        self.assertTrue(
            os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf'))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.gt_conf_hist.pdf'))

        # Now we've run the adjudicator, we have a gramtools
        # build directory. Rerun, but this time use the build
        # directory, so we test the gramtools_build_dir option
        outdir2 = 'tmp.adjudicator.out.2'
        gramtools_build_dir = adj.gramtools_build_dir
        if os.path.exists(outdir2):
            shutil.rmtree(outdir2)
        ref_fasta = os.path.join(data_dir, 'run.ref.fa')
        reads_file = os.path.join(data_dir, 'run.bwa.bam')
        # When gramtools build dir supplied, the Adjudicator assumes
        # one clsutered VCF file that matches the gramtools build run.
        # This is the clustered VCF made by the Adjudicator, so we
        # use that instead of the list of original VCF files
        vcf_files = [adj.clustered_vcf]
        adj = adjudicator.Adjudicator(outdir2,
                                      ref_fasta, [reads_file],
                                      vcf_files,
                                      gramtools_build_dir=gramtools_build_dir,
                                      clean=False,
                                      gramtools_kmer_size=5,
                                      genotype_simulation_iterations=1000)
        adj.run()
        self.assertTrue(os.path.exists(outdir2))
        self.assertTrue(os.path.exists(adj.log_file))
        self.assertTrue(os.path.exists(adj.final_vcf))
        self.assertTrue(os.path.exists(adj.gramtools_build_dir))
        self.assertTrue(os.path.exists(adj.gramtools_quasimap_dir))
        self.assertTrue(os.path.exists(adj.clustered_vcf))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.data.tsv'))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.dp_hist.pdf'))
        self.assertTrue(
            os.path.exists(adj.plots_prefix + '.gt_conf_dp_scatter.pdf'))
        self.assertTrue(os.path.exists(adj.plots_prefix + '.gt_conf_hist.pdf'))
        self.assertFalse(
            os.path.exists(os.path.join(outdir2, 'gramtools.build')))
        shutil.rmtree(outdir)
        shutil.rmtree(outdir2)
def test_run_clean_is_false():
    """test run when not cleaning up files afterwards"""
    # We're just testing that it doesn't crash.
    # Check the output files exist, but not their contents.
    # First run using splitting of VCF file.
    # Then run without splitting.
    outdir = "tmp.adjudicator.noclean.out"
    if os.path.exists(outdir):
        shutil.rmtree(outdir)
    ref_fasta = os.path.join(data_dir, "run.ref.fa")
    reads_file = os.path.join(data_dir, "run.bwa.bam")
    vcf_files = [
        os.path.join(data_dir, x)
        for x in ["run.calls.1.vcf", "run.calls.2.vcf"]
    ]
    adj = adjudicator.Adjudicator(
        outdir,
        ref_fasta,
        [reads_file],
        vcf_files,
        variants_per_split=3,
        clean=False,
        gramtools_kmer_size=5,
        genotype_simulation_iterations=1000,
    )
    adj.run()
    assert os.path.exists(outdir)
    assert os.path.exists(adj.log_file)
    assert os.path.exists(adj.final_vcf)
    assert os.path.exists(adj.clustered_vcf)

    # Clean up and then run without splitting
    shutil.rmtree(outdir)
    adj = adjudicator.Adjudicator(
        outdir,
        ref_fasta,
        [reads_file],
        vcf_files,
        clean=False,
        gramtools_kmer_size=5,
        genotype_simulation_iterations=1000,
    )
    adj.run()
    assert os.path.exists(outdir)
    assert os.path.exists(adj.log_file)
    assert os.path.exists(adj.final_vcf)
    assert os.path.exists(adj.gramtools_build_dir)
    assert os.path.exists(adj.gramtools_quasimap_dir)
    assert os.path.exists(adj.clustered_vcf)

    # Now we've run the adjudicator, we have a gramtools
    # build directory. Rerun, but this time use the build
    # directory, so we test the gramtools_build_dir option
    outdir2 = "tmp.adjudicator.out.2"
    gramtools_build_dir = adj.gramtools_build_dir
    if os.path.exists(outdir2):
        shutil.rmtree(outdir2)
    ref_fasta = os.path.join(data_dir, "run.ref.fa")
    reads_file = os.path.join(data_dir, "run.bwa.bam")
    # When gramtools build dir supplied, the Adjudicator assumes
    # one clsutered VCF file that matches the gramtools build run.
    # This is the clustered VCF made by the Adjudicator, so we
    # use that instead of the list of original VCF files
    vcf_files = [adj.clustered_vcf]
    adj = adjudicator.Adjudicator(
        outdir2,
        ref_fasta,
        [reads_file],
        vcf_files,
        gramtools_build_dir=gramtools_build_dir,
        clean=False,
        gramtools_kmer_size=5,
        genotype_simulation_iterations=1000,
    )
    adj.run()
    assert os.path.exists(outdir2)
    assert os.path.exists(adj.log_file)
    assert os.path.exists(adj.final_vcf)
    assert os.path.exists(adj.gramtools_build_dir)
    assert os.path.exists(adj.gramtools_quasimap_dir)
    assert os.path.exists(adj.clustered_vcf)
    assert not os.path.exists(os.path.join(outdir2, "gramtools.build"))
    shutil.rmtree(outdir)
    shutil.rmtree(outdir2)