def testBin(self): # first do all liquidation together at once, then do it in two runs appending, and verify everything matches bin_size = len(self.sequence1) together_dir_path = os.path.join(self.dir_path, 'together') liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=together_dir_path, bam_file_path=self.dir_path) appending_dir = os.path.join(self.dir_path, 'appending') #print 'liquidating bams at path:', self.bam1_file_path liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=appending_dir, bam_file_path=self.bam1_file_path) appending_h5_path = os.path.join(appending_dir, 'counts.h5') liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=os.path.join( self.dir_path, 'appending_extra_without_h5_file'), bam_file_path=self.bam2_file_path, counts_file_path=appending_h5_path) with tables.open_file(os.path.join(together_dir_path, 'counts.h5')) as together_h5: with tables.open_file(appending_h5_path) as appending_h5: self.assertEqual(str(together_h5.root.bin_counts[:]), str(appending_h5.root.bin_counts[:])) self.assertEqual(str(together_h5.root.normalized_counts[:]), str(appending_h5.root.normalized_counts[:])) self.assertEqual(str(together_h5.root.summary[:]), str(appending_h5.root.summary[:])) self.assertEqual(str(together_h5.root.sorted_summary[:]), str(appending_h5.root.sorted_summary[:]))
def test_bin_liquidation(self): bin_size = len(self.sequence) liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=os.path.join( self.dir_path, 'output'), bam_file_path=self.bam_file_path) liquidator.flatten() with tables.open_file(liquidator.counts_file_path) as counts: self.assertEqual(1, len( counts.root.files)) # 1 since only a single bam file file_record = counts.root.files[0] self.assertEqual( 1, file_record['length']) # 1 since only a single read self.assertEqual(1, file_record['key']) self.assertEqual(1, len(counts.root.bin_counts) ) # 1 since 1 bin accommodates full sequence record = counts.root.bin_counts[0] self.assertEqual(0, record['bin_number']) self.assertEqual(self.chromosome, record['chromosome'].decode()) self.assertEqual( len(self.sequence), record['count']) # count represents how many base pair reads
def test_liquidation_in_long_directory(self): self.dir_path = tempfile.mkdtemp(prefix='blt_' + 'a' * 16) truncated_cell_type = os.path.basename(self.dir_path)[:15] self.bam_file_path = create_bam(self.dir_path, [self.chromosome], self.sequence) bin_size = len(self.sequence) liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=os.path.join( self.dir_path, 'output'), bam_file_path=self.bam_file_path) with tables.open_file(liquidator.counts_file_path) as counts: self.assertEqual(1, len( counts.root.files)) # 1 since only a single bam file file_record = counts.root.files[0] self.assertEqual( 1, file_record['length']) # 1 since only a single read self.assertEqual(1, file_record['key']) self.assertEqual(1, len(counts.root.bin_counts) ) # 1 since 1 bin accommodates full sequence record = counts.root.bin_counts[0] self.assertEqual(0, record['bin_number']) self.assertEqual(truncated_cell_type, record['cell_type'].decode()) self.assertEqual(self.chromosome, record['chromosome'].decode()) self.assertEqual( len(self.sequence), record['count']) # count represents how many base pair reads
def testOverridingBlackList(self): chromosomes = ['chr1', 'chr2', 'chr2_random'] sequence = 'ATTTAAAAATTAATTTAATGCTTGGCTAAATCTTAATTACATATATAATT' bam_file_path = create_bam(self.dir_path, chromosomes, sequence, file_name='multiple.bam') bin_size = len(sequence) liquidator = blb.BinLiquidator(bin_size=bin_size, output_directory=os.path.join( self.dir_path, 'output'), bam_file_path=bam_file_path, blacklist=[]) with tables.open_file(liquidator.counts_file_path) as counts: self.assertEqual(1, len( counts.root.files)) # 1 since only a single bam file file_record = counts.root.files[0] self.assertEqual( 3, file_record['length']) # 1 read for each chromosome self.assertEqual(1, file_record['key']) self.assertEqual( 3, len(counts.root.bin_counts )) # 1 for each chromosome that should be liquidated for record_index, chromosome in enumerate(chromosomes): record = counts.root.bin_counts[record_index] self.assertEqual(0, record['bin_number']) self.assertEqual(chromosome, record['chromosome'].decode()) self.assertEqual(len(sequence), record['count'] ) # count represents how many base pair reads
def test_bin_liquidation_zero_bin_size(self): with self.assertRaises(Exception): liquidator = blb.BinLiquidator(bin_size=0, output_directory=os.path.join( self.dir_path, 'output'), bam_file_path=self.bam_file_path) liquidator.batch(extension=0, sense='.')
def test_bin_long_bam_file_name(self): long_file_name = 'x' * 65 # more than Float64Col long_file_path = os.path.join(self.dir_path, long_file_name) shutil.copyfile(self.bam_file_path, long_file_path) shutil.copyfile(self.bam_file_path + '.bai', long_file_path + '.bai') bin_liquidator = blb.BinLiquidator(bin_size=len(self.sequence), output_directory=os.path.join( self.dir_path, 'bin_output'), bam_file_path=long_file_path)