Beispiel #1
0
    def test_downsample_to_too_large_target_count(self):
        """ Should fail """
        temp_dir = tempfile.mkdtemp()

        target_count = 20000

        with self.assertRaises(ValueError):
            read_utils.main_downsample_bams([self.larger_bam, self.smaller_bam], temp_dir, specified_read_count=target_count, JVMmemory="1g")
Beispiel #2
0
    def test_downsample_to_target_count_without_subdir(self):
        target_count = 4000
        read_utils.main_downsample_bams([self.larger_bam], out_path=None, specified_read_count=target_count, JVMmemory="1g")

        output_bams = list(glob.glob(os.path.join(os.path.dirname(self.larger_bam), '*downsampled-*.bam')))
        
        print(output_bams)
        self.assertGreater(len(output_bams), 0, msg="No output files matching *downsampled-*.bam found")
        for out_bam in output_bams:
            self.assertAlmostEqual(self.samtools.count(out_bam), target_count, delta=10, msg="{} not downsampled to the target size: {}".format(os.path.basename(out_bam),target_count))
Beispiel #3
0
    def test_downsample_with_dedup_before(self):
        """ Also tests subdir output """
        temp_dir = tempfile.mkdtemp()

        target_count = 1500
        read_utils.main_downsample_bams([self.with_dups], temp_dir, deduplicate_before=True, specified_read_count=target_count, JVMmemory="1g")

        output_bams = list(glob.glob(os.path.join(temp_dir, '*.bam')))
        
        self.assertGreater(len(output_bams), 0, msg="No output found")
        for out_bam in output_bams:
            self.assertAlmostEqual(self.samtools.count(out_bam), target_count, delta=10, msg="{} not downsampled to the target size: {}".format(os.path.basename(out_bam),target_count))
Beispiel #4
0
    def test_normalization_to_lowest_cardinality(self):
        """ Also tests subdir output """
        temp_dir = tempfile.mkdtemp()

        target_count = self.samtools.count(self.smaller_bam)
        # target count not passed in since we are checking that the count of the smaller file is used
        read_utils.main_downsample_bams([self.larger_bam, self.smaller_bam], temp_dir, JVMmemory="1g")

        output_bams = list(glob.glob(os.path.join(temp_dir, '*.bam')))
        
        self.assertGreater(len(output_bams), 0, msg="No output found")
        for out_bam in output_bams:
            self.assertAlmostEqual(self.samtools.count(out_bam), target_count, delta=10, msg="{} not downsampled to the target size: {}".format(os.path.basename(out_bam),target_count))