def test_process_interval_group(): """Test processing an entire group of intervals.""" bam = BamFile('tests/fixtures/alignment.bam') interval_group = [BaseInterval('chr1', 1, 5), BaseInterval('chr1', 10, 13)] intervals_coverage = list(process_interval_group(bam, interval_group)) interval1 = intervals_coverage[0][0] read_depths1 = list(intervals_coverage[0][1]) assert interval1 == interval_group[0] assert read_depths1 == [2, 4, 5, 5, 5] interval2 = intervals_coverage[1][0] read_depths2 = list(intervals_coverage[1][1]) assert interval2 == interval_group[1] assert read_depths2 == [7, 7, 7, 7]
def test_rename_sex_interval(): sex = BaseInterval('X', 11, 111, block_ids=['block1', 'block2']) non_sex = BaseInterval('chr22', 4, 55, superblock_ids=['sblock1']) alt_sex = BaseInterval('chrY', 99, 921, superblock_ids=['sblock2']) # test with sex interval renamed_interval = rename_sex_interval(sex) assert renamed_interval.block_ids == ['X-block1', 'X-block2'] # test non-sex interval same_interval = rename_sex_interval(non_sex) assert same_interval == non_sex # test sex interval with alternate contig names renamed_interval = rename_sex_interval(alt_sex, sex_contigs=['chrX', 'chrY']) assert renamed_interval.superblock_ids == ['chrY-sblock2']
def test_extend_interval(): """Test extending an interval in both directions.""" interval = BaseInterval('chr1', 10, 100) # test default which is no extension assert extend_interval(interval) == interval # test simple extension extended_interval = extend_interval(interval, extension=5) assert extended_interval.start == 5 assert extended_interval.end == 105
def test_extract_intervals(): record = [ '1', 'NC_000001.10', 'SAMD11', '148398', 'CCDS2.2', 'Public', '+', '11', '35', '[11-18, 25-30, 32-35]', 'Identical' ] intervals = list(extract_intervals(record)) interval1 = BaseInterval('1', 11, 18, '1-11-18', 0, '+', ['CCDS2.2'], ['SAMD11']) assert intervals[0] == interval1
def test_BaseInterval(): """Test generating an interval without all fields filled in.""" interval = ('chr1', 10, 20, 'int1') bed_interval = BaseInterval(*interval) assert bed_interval != interval assert bed_interval.start == 10 assert bed_interval.contig == 'chr1' assert bed_interval.score == '' assert bed_interval.name == 'int1' assert bed_interval.block_ids == []
def test_calculate_metrics(): """Test calculating multiple metrics.""" # at this point, completeness can only be calculated with a # numpy array of read depths # read depts, coverage=4, bp_count=8 read_depths = np.array([0, 5, 3, 3, 4, 5, 5, 7]) fake_interval = BaseInterval('chr1', 10, 17) interval = calculate_metrics((fake_interval, read_depths), threshold=5) assert interval[:3] == fake_interval[:3] assert interval.coverage == 4 assert interval.completeness == .5
def test_serialize_interval(): """Test serializing an BaseInterval instance.""" # simple case, should remove empty fields to the right interval = BaseInterval('chr1', 10, 20) assert serialize_interval(interval) == 'chr1\t10\t20' # test convertion to BED format interval = BaseInterval('chr1', 123, 123, 'int1') assert serialize_interval(interval, bed=True) == 'chr1\t122\t123\tint1' # with block ids, should maintain empty intermediate fields! interval = BaseInterval('chr22', 101, 200, block_ids=['block11', 'block12']) serialized_interval = 'chr22\t101\t200\t\t\t\tblock11,block12' assert serialize_interval(interval) == serialized_interval # test curried function composition serialize_interval_alt = serialize_interval(delimiter='|', subdelimiter=';') serialized_interval_alt = 'chr22|101|200||||block11;block12' assert serialize_interval_alt(interval) == serialized_interval_alt
def test_merge_related_elements(): interval_group = [ BaseInterval('X', 10, 100, block_ids=['block1'], superblock_ids=['sblock1']), BaseInterval('X', 10, 100, block_ids=['block2'], superblock_ids=['sblock1']), BaseInterval('X', 10, 100, block_ids=['block3'], superblock_ids=['sblock2']) ] merged_interval = merge_related_elements(interval_group) assert merged_interval[:3] == ('X', 10, 100) assert merged_interval.block_ids == ['block1', 'block2', 'block3'] assert merged_interval.superblock_ids == ['sblock1', 'sblock1', 'sblock2']
def test_annotator_pipeline(): """Test the entire annotator pipeline.""" bam_path = 'tests/fixtures/alignment.bam' # test with minimal BED "file" bed_stream = ['#chrom\tstart\tend', '1\t0\t5', '1\t9\t20'] read_depths1 = [2, 4, 5, 5, 5] read_depths2 = [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] result = annotate_bed_stream(bed_stream, bam_path, cutoff=5, contig_prefix='chr', bp_threshold=100) interval1 = next(result) interval2 = next(result) assert interval1[:3] == BaseInterval('chr1', 1, 5)[:3] assert interval1.coverage == sum(read_depths1) / len(read_depths1) assert interval1.completeness == 3 / 5 assert interval2[:3] == BaseInterval('chr1', 10, 20)[:3] assert interval2.coverage == sum(read_depths2) / len(read_depths2) assert interval2.completeness == 1.
def test_aggregate(): payloads = [('block1', BaseInterval('X', 10, 100)), ('block1', BaseInterval('X', 50, 130)), ('block2', BaseInterval('Y', 111, 222))] groups = [[('block1', BaseInterval('X', 10, 100)), ('block1', BaseInterval('X', 50, 130))], [('block2', BaseInterval('Y', 111, 222))]] assert groups == list(aggregate(payloads))
def setup(self): self.store = Store(':memory:') self.store.set_up() self.interval = BaseInterval('1', 10, 100, name='int1', block_ids=['block1', 'block2'], superblock_ids=['sblock1', 'sblock1']) self.db_interval = StoreInterval(interval_id=self.interval.name, contig=self.interval.contig, start=self.interval.start, end=self.interval.end, strand=self.interval.strand) self.interval_group = [('block2', self.db_interval, 'sblock1')]
def test_merge_intervals(): """Test merging multiple intervals.""" # only makes sense for intervals on the same contig intervals = [ BaseInterval('1', 10, 100), BaseInterval('1', 90, 150), BaseInterval('1', 200, 250) ] assert merge_intervals(intervals) == (10, 250) # test with tricky overlapping intervals intervals = [ BaseInterval('1', 505, 585), BaseInterval('1', 520, 550), BaseInterval('1', 545, 580) ] assert merge_intervals(intervals) == (505, 585) # test with 1 interval assert merge_intervals([BaseInterval('1', 10, 10)]) == (10, 10) # test with 0 intervals with pytest.raises(ValueError): merge_intervals([])
def test_group_intervals(): """Test aggregating intervals in groups.""" # intervals on the same contig intervals = [ BaseInterval('1', 10, 100), BaseInterval('1', 90, 150), BaseInterval('1', 200, 250) ] grouped_intervals = [intervals[:2], intervals[2:]] groups = group_intervals(intervals, bp_threshold=150) assert list(groups) == grouped_intervals # intervals on multiple contigs intervals = [ BaseInterval('X', 10, 100), BaseInterval('Y', 90, 150), BaseInterval('Y', 200, 250) ] grouped_intervals = [intervals[:1], intervals[1:]] groups = group_intervals(intervals, bp_threshold=1000) assert list(groups) == grouped_intervals