def test_fetch_single_read(self): b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam')) s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1) self.assertEqual(1, len(s)) r = list(s)[0] self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname) b.close()
def test_add_read(self): fh = MockBamFileHandle() b = BamCache(fh) r = MockRead('name') b.add_read(r) self.assertEqual(1, len(b.cache.values())) self.assertEqual(set([r]), b.cache['name'])
def setUpModule(): global REFERENCE_GENOME REFERENCE_GENOME = load_reference_genome(get_data('mock_reference_genome.fa')) if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME['fake'].seq[0:50].upper(): raise AssertionError('fake genome file does not have the expected contents') global BAM_CACHE BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
def setUp(self): self.cache = BamCache( MockBamFileHandle({ 'Y': 23, 'fake': 0, 'reference3': 3 }))
def test_add_invalid_read(self, log_patcher): bad_read = mock.Mock( is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ' ) cache = BamCache(MockBamFileHandle()) cache.add_read(bad_read) self.assertEqual(0, len(cache.cache)) log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
def test_bin_fetch_invalid_read(self, log_patcher): bad_read = mock.Mock(is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ') fh = mock.Mock(references=['chr'], spec=['references', 'fetch']) fh.configure_mock(**{'fetch.return_value': [bad_read]}) cache = BamCache(fh) cache.fetch_from_bins('chr', 1, 10) self.assertEqual(0, len(cache.cache)) log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
def test_genome_bam_stats(self): bamfh = BamCache(get_data('mock_reads_for_events.sorted.bam')) stats = compute_genome_bam_stats( bamfh, 1000, 100, min_mapping_quality=1, sample_cap=10000, distribution_fraction=0.99 ) self.assertGreaterEqual(50, abs(stats.median_fragment_size - 420)) self.assertEqual(150, stats.read_length) bamfh.close()
def test_add_invalid_read(self, log_patcher): bad_read = mock.Mock( is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ' ) cache = BamCache(MockBamFileHandle()) cache.add_read(bad_read) assert len(cache.cache) == 0 log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
def cache(): return BamCache( MockBamFileHandle({ 'Y': 23, 'fake': 0, 'reference3': 3, '14': 13 }))
def setUpModule(): global REFERENCE_GENOME REFERENCE_GENOME = load_reference_genome(REFERENCE_GENOME_FILE) if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME[ 'fake'].seq[0:50].upper(): raise AssertionError( 'fake genome file does not have the expected contents') global BAM_CACHE BAM_CACHE = BamCache(BAM_INPUT)
def test_get_mate(self): # dependant on fetch working b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam')) s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1) self.assertEqual(1, len(s)) r = list(s)[0] self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname) o = b.get_mate(r, allow_file_access=True) self.assertEqual(1, len(o)) self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', o[0].qname)
def test_fetch_invalid_read(self, log_patcher): bad_read = mock.Mock( is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ' ) fh = mock.Mock(references=['chr'], spec=['references', 'fetch']) fh.configure_mock(**{'fetch.return_value': [bad_read]}) cache = BamCache(fh) cache.fetch('chr', 1, 10) assert len(cache.cache) == 0 log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
def test_add_read(self): fh = MockBamFileHandle() b = BamCache(fh) r = mock.MagicMock(query_name='name', query_sequence='') b.add_read(r) self.assertEqual(1, len(b.cache.values())) b.add_read(r) self.assertEqual(1, len(b.cache.values())) r.reference_start = 0 b.add_read(r) self.assertEqual(1, len(b.cache.values()))
def setUpModule(): global REFERENCE_GENOME REFERENCE_GENOME = load_reference_genome(get_data('mock_reference_genome.fa')) if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME['fake'].seq[0:50].upper(): raise AssertionError('fake genome file does not have the expected contents') global BAM_CACHE BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam')) global FULL_BAM_CACHE FULL_BAM_CACHE = BamCache(get_data('mock_reads_for_events.sorted.bam')) global READS READS = {} for read in BAM_CACHE.fetch('reference3', 1, 8000): if read.qname not in READS: READS[read.qname] = [None, None] if read.is_supplementary: continue if read.is_read1: READS[read.qname][0] = read else: READS[read.qname][1] = read
def test_genome_bam_stats(self): bamfh = BamCache(FULL_BAM_INPUT) stats = compute_genome_bam_stats(bamfh, 1000, 100, min_mapping_quality=1, sample_cap=10000, distribution_fraction=0.99) self.assertGreaterEqual(50, abs(stats.median_fragment_size - 420)) self.assertEqual(150, stats.read_length) bamfh.close()
def test_fetch_single_read(self): b = BamCache(BAM_INPUT) s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1) self.assertEqual(1, len(s)) r = list(s)[0] self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname) b.close()
def setUp(self): self.ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, call_error=0, stdev_count_abnormal=1)
def flanking_ge(read_length): return GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0}, )
def test_trans_bam_stats(self): bamfh = BamCache(TRANSCRIPTOME_BAM_INPUT) annotations = load_reference_genes( FULL_REFERENCE_ANNOTATIONS_FILE_JSON) stats = compute_transcriptome_bam_stats(bamfh, annotations, 100, min_mapping_quality=1, stranded=True, sample_cap=10000, distribution_fraction=0.99) self.assertTrue(abs(stats.median_fragment_size - 185) < 5) self.assertEqual(75, stats.read_length) self.assertTrue(stats.stdev_fragment_size < 50) bamfh.close()
def test_trans_bam_stats(self): bamfh = BamCache(get_data('mock_trans_reads_for_events.sorted.bam')) annotations = load_reference_genes(get_data('mock_annotations.json')) stats = compute_transcriptome_bam_stats( bamfh, annotations, 100, min_mapping_quality=1, stranded=True, sample_cap=10000, distribution_fraction=0.99, ) self.assertTrue(abs(stats.median_fragment_size - 185) < 5) self.assertEqual(75, stats.read_length) self.assertTrue(stats.stdev_fragment_size < 50) bamfh.close()
def test_reference_id(self): fh = MockBamFileHandle({'1': 0}) b = BamCache(fh) self.assertEqual(0, b.reference_id('1')) with self.assertRaises(KeyError): b.reference_id('2')
def test___init__(self): fh = MockBamFileHandle() b = BamCache(fh) assert b.fh == fh
def test___init__(self): fh = MockBamFileHandle() b = BamCache(fh) self.assertEqual(fh, b.fh)
def test_reference_id(self): fh = MockBamFileHandle({'1': 0}) b = BamCache(fh) assert b.reference_id('1') == 0 with pytest.raises(KeyError): b.reference_id('2')
def test_get_read_reference_name(self): fh = MockBamFileHandle({'1': 0}) b = BamCache(fh) r = MockRead('name', 0) assert b.get_read_reference_name(r) == '1'
def test_get_read_reference_name(self): fh = MockBamFileHandle({'1': 0}) b = BamCache(fh) r = MockRead('name', 0) self.assertEqual('1', b.get_read_reference_name(r))