Exemple #1
0
 def test_fetch_single_read(self):
     b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
     s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1)
     self.assertEqual(1, len(s))
     r = list(s)[0]
     self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname)
     b.close()
Exemple #2
0
 def test_add_read(self):
     fh = MockBamFileHandle()
     b = BamCache(fh)
     r = MockRead('name')
     b.add_read(r)
     self.assertEqual(1, len(b.cache.values()))
     self.assertEqual(set([r]), b.cache['name'])
Exemple #3
0
def setUpModule():
    global REFERENCE_GENOME
    REFERENCE_GENOME = load_reference_genome(get_data('mock_reference_genome.fa'))
    if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME['fake'].seq[0:50].upper():
        raise AssertionError('fake genome file does not have the expected contents')
    global BAM_CACHE
    BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
Exemple #4
0
 def setUp(self):
     self.cache = BamCache(
         MockBamFileHandle({
             'Y': 23,
             'fake': 0,
             'reference3': 3
         }))
Exemple #5
0
 def test_add_invalid_read(self, log_patcher):
     bad_read = mock.Mock(
         is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
     )
     cache = BamCache(MockBamFileHandle())
     cache.add_read(bad_read)
     self.assertEqual(0, len(cache.cache))
     log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
Exemple #6
0
 def test_bin_fetch_invalid_read(self, log_patcher):
     bad_read = mock.Mock(is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ')
     fh = mock.Mock(references=['chr'], spec=['references', 'fetch'])
     fh.configure_mock(**{'fetch.return_value': [bad_read]})
     cache = BamCache(fh)
     cache.fetch_from_bins('chr', 1, 10)
     self.assertEqual(0, len(cache.cache))
     log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
Exemple #7
0
 def test_genome_bam_stats(self):
     bamfh = BamCache(get_data('mock_reads_for_events.sorted.bam'))
     stats = compute_genome_bam_stats(
         bamfh, 1000, 100, min_mapping_quality=1, sample_cap=10000, distribution_fraction=0.99
     )
     self.assertGreaterEqual(50, abs(stats.median_fragment_size - 420))
     self.assertEqual(150, stats.read_length)
     bamfh.close()
Exemple #8
0
 def test_add_invalid_read(self, log_patcher):
     bad_read = mock.Mock(
         is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
     )
     cache = BamCache(MockBamFileHandle())
     cache.add_read(bad_read)
     assert len(cache.cache) == 0
     log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
Exemple #9
0
def cache():
    return BamCache(
        MockBamFileHandle({
            'Y': 23,
            'fake': 0,
            'reference3': 3,
            '14': 13
        }))
Exemple #10
0
def setUpModule():
    global REFERENCE_GENOME
    REFERENCE_GENOME = load_reference_genome(REFERENCE_GENOME_FILE)
    if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME[
            'fake'].seq[0:50].upper():
        raise AssertionError(
            'fake genome file does not have the expected contents')
    global BAM_CACHE
    BAM_CACHE = BamCache(BAM_INPUT)
Exemple #11
0
 def test_get_mate(self):
     # dependant on fetch working
     b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
     s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1)
     self.assertEqual(1, len(s))
     r = list(s)[0]
     self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname)
     o = b.get_mate(r, allow_file_access=True)
     self.assertEqual(1, len(o))
     self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', o[0].qname)
Exemple #12
0
 def test_fetch_invalid_read(self, log_patcher):
     bad_read = mock.Mock(
         is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
     )
     fh = mock.Mock(references=['chr'], spec=['references', 'fetch'])
     fh.configure_mock(**{'fetch.return_value': [bad_read]})
     cache = BamCache(fh)
     cache.fetch('chr', 1, 10)
     assert len(cache.cache) == 0
     log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
Exemple #13
0
 def test_add_read(self):
     fh = MockBamFileHandle()
     b = BamCache(fh)
     r = mock.MagicMock(query_name='name', query_sequence='')
     b.add_read(r)
     self.assertEqual(1, len(b.cache.values()))
     b.add_read(r)
     self.assertEqual(1, len(b.cache.values()))
     r.reference_start = 0
     b.add_read(r)
     self.assertEqual(1, len(b.cache.values()))
Exemple #14
0
def setUpModule():
    global REFERENCE_GENOME
    REFERENCE_GENOME = load_reference_genome(get_data('mock_reference_genome.fa'))
    if 'CTCCAAAGAAATTGTAGTTTTCTTCTGGCTTAGAGGTAGATCATCTTGGT' != REFERENCE_GENOME['fake'].seq[0:50].upper():
        raise AssertionError('fake genome file does not have the expected contents')
    global BAM_CACHE
    BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
    global FULL_BAM_CACHE
    FULL_BAM_CACHE = BamCache(get_data('mock_reads_for_events.sorted.bam'))
    global READS
    READS = {}
    for read in BAM_CACHE.fetch('reference3', 1, 8000):
        if read.qname not in READS:
            READS[read.qname] = [None, None]
        if read.is_supplementary:
            continue
        if read.is_read1:
            READS[read.qname][0] = read
        else:
            READS[read.qname][1] = read
Exemple #15
0
 def test_genome_bam_stats(self):
     bamfh = BamCache(FULL_BAM_INPUT)
     stats = compute_genome_bam_stats(bamfh,
                                      1000,
                                      100,
                                      min_mapping_quality=1,
                                      sample_cap=10000,
                                      distribution_fraction=0.99)
     self.assertGreaterEqual(50, abs(stats.median_fragment_size - 420))
     self.assertEqual(150, stats.read_length)
     bamfh.close()
Exemple #16
0
 def test_fetch_single_read(self):
     b = BamCache(BAM_INPUT)
     s = b.fetch_from_bins('reference3',
                           1382,
                           1383,
                           read_limit=1,
                           sample_bins=1)
     self.assertEqual(1, len(s))
     r = list(s)[0]
     self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname)
     b.close()
 def setUp(self):
     self.ge = GenomeEvidence(
         Breakpoint('1', 1500, orient=ORIENT.LEFT),
         Breakpoint('1', 6001, orient=ORIENT.RIGHT),
         BamCache(MockBamFileHandle({'1': 0})),
         None,  # reference_genome
         opposing_strands=False,
         read_length=150,
         stdev_fragment_size=500,
         median_fragment_size=100,
         call_error=0,
         stdev_count_abnormal=1)
Exemple #18
0
def flanking_ge(read_length):
    return GenomeEvidence(
        Breakpoint('1', 1500, orient=ORIENT.LEFT),
        Breakpoint('1', 6001, orient=ORIENT.RIGHT),
        BamCache(MockBamFileHandle({'1': 0})),
        None,  # reference_genome
        opposing_strands=False,
        read_length=150,
        stdev_fragment_size=500,
        median_fragment_size=100,
        config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
    )
Exemple #19
0
 def test_trans_bam_stats(self):
     bamfh = BamCache(TRANSCRIPTOME_BAM_INPUT)
     annotations = load_reference_genes(
         FULL_REFERENCE_ANNOTATIONS_FILE_JSON)
     stats = compute_transcriptome_bam_stats(bamfh,
                                             annotations,
                                             100,
                                             min_mapping_quality=1,
                                             stranded=True,
                                             sample_cap=10000,
                                             distribution_fraction=0.99)
     self.assertTrue(abs(stats.median_fragment_size - 185) < 5)
     self.assertEqual(75, stats.read_length)
     self.assertTrue(stats.stdev_fragment_size < 50)
     bamfh.close()
Exemple #20
0
 def test_trans_bam_stats(self):
     bamfh = BamCache(get_data('mock_trans_reads_for_events.sorted.bam'))
     annotations = load_reference_genes(get_data('mock_annotations.json'))
     stats = compute_transcriptome_bam_stats(
         bamfh,
         annotations,
         100,
         min_mapping_quality=1,
         stranded=True,
         sample_cap=10000,
         distribution_fraction=0.99,
     )
     self.assertTrue(abs(stats.median_fragment_size - 185) < 5)
     self.assertEqual(75, stats.read_length)
     self.assertTrue(stats.stdev_fragment_size < 50)
     bamfh.close()
Exemple #21
0
 def test_reference_id(self):
     fh = MockBamFileHandle({'1': 0})
     b = BamCache(fh)
     self.assertEqual(0, b.reference_id('1'))
     with self.assertRaises(KeyError):
         b.reference_id('2')
Exemple #22
0
 def test___init__(self):
     fh = MockBamFileHandle()
     b = BamCache(fh)
     assert b.fh == fh
Exemple #23
0
 def test___init__(self):
     fh = MockBamFileHandle()
     b = BamCache(fh)
     self.assertEqual(fh, b.fh)
Exemple #24
0
 def test_reference_id(self):
     fh = MockBamFileHandle({'1': 0})
     b = BamCache(fh)
     assert b.reference_id('1') == 0
     with pytest.raises(KeyError):
         b.reference_id('2')
Exemple #25
0
 def test_get_read_reference_name(self):
     fh = MockBamFileHandle({'1': 0})
     b = BamCache(fh)
     r = MockRead('name', 0)
     assert b.get_read_reference_name(r) == '1'
Exemple #26
0
 def test_get_read_reference_name(self):
     fh = MockBamFileHandle({'1': 0})
     b = BamCache(fh)
     r = MockRead('name', 0)
     self.assertEqual('1', b.get_read_reference_name(r))