Esempio n. 1
0
def test_process_interval_group():
    """Test processing an entire group of intervals."""
    bam = BamFile('tests/fixtures/alignment.bam')
    interval_group = [BaseInterval('chr1', 1, 5), BaseInterval('chr1', 10, 13)]

    intervals_coverage = list(process_interval_group(bam, interval_group))

    interval1 = intervals_coverage[0][0]
    read_depths1 = list(intervals_coverage[0][1])
    assert interval1 == interval_group[0]
    assert read_depths1 == [2, 4, 5, 5, 5]

    interval2 = intervals_coverage[1][0]
    read_depths2 = list(intervals_coverage[1][1])
    assert interval2 == interval_group[1]
    assert read_depths2 == [7, 7, 7, 7]
def test_rename_sex_interval():
    sex = BaseInterval('X', 11, 111, block_ids=['block1', 'block2'])
    non_sex = BaseInterval('chr22', 4, 55, superblock_ids=['sblock1'])
    alt_sex = BaseInterval('chrY', 99, 921, superblock_ids=['sblock2'])

    # test with sex interval
    renamed_interval = rename_sex_interval(sex)
    assert renamed_interval.block_ids == ['X-block1', 'X-block2']

    # test non-sex interval
    same_interval = rename_sex_interval(non_sex)
    assert same_interval == non_sex

    # test sex interval with alternate contig names
    renamed_interval = rename_sex_interval(alt_sex,
                                           sex_contigs=['chrX', 'chrY'])
    assert renamed_interval.superblock_ids == ['chrY-sblock2']
Esempio n. 3
0
def test_extend_interval():
    """Test extending an interval in both directions."""
    interval = BaseInterval('chr1', 10, 100)

    # test default which is no extension
    assert extend_interval(interval) == interval

    # test simple extension
    extended_interval = extend_interval(interval, extension=5)
    assert extended_interval.start == 5
    assert extended_interval.end == 105
def test_extract_intervals():
    record = [
        '1', 'NC_000001.10', 'SAMD11', '148398', 'CCDS2.2', 'Public', '+',
        '11', '35', '[11-18, 25-30, 32-35]', 'Identical'
    ]

    intervals = list(extract_intervals(record))
    interval1 = BaseInterval('1', 11, 18, '1-11-18', 0, '+', ['CCDS2.2'],
                             ['SAMD11'])

    assert intervals[0] == interval1
Esempio n. 5
0
def test_BaseInterval():
    """Test generating an interval without all fields filled in."""
    interval = ('chr1', 10, 20, 'int1')
    bed_interval = BaseInterval(*interval)

    assert bed_interval != interval
    assert bed_interval.start == 10
    assert bed_interval.contig == 'chr1'
    assert bed_interval.score == ''
    assert bed_interval.name == 'int1'
    assert bed_interval.block_ids == []
Esempio n. 6
0
def test_calculate_metrics():
    """Test calculating multiple metrics."""
    # at this point, completeness can only be calculated with a
    # numpy array of read depths
    # read depts, coverage=4, bp_count=8
    read_depths = np.array([0, 5, 3, 3, 4, 5, 5, 7])
    fake_interval = BaseInterval('chr1', 10, 17)
    interval = calculate_metrics((fake_interval, read_depths), threshold=5)

    assert interval[:3] == fake_interval[:3]
    assert interval.coverage == 4
    assert interval.completeness == .5
Esempio n. 7
0
def test_serialize_interval():
    """Test serializing an BaseInterval instance."""
    # simple case, should remove empty fields to the right
    interval = BaseInterval('chr1', 10, 20)
    assert serialize_interval(interval) == 'chr1\t10\t20'

    # test convertion to BED format
    interval = BaseInterval('chr1', 123, 123, 'int1')
    assert serialize_interval(interval, bed=True) == 'chr1\t122\t123\tint1'

    # with block ids, should maintain empty intermediate fields!
    interval = BaseInterval('chr22',
                            101,
                            200,
                            block_ids=['block11', 'block12'])
    serialized_interval = 'chr22\t101\t200\t\t\t\tblock11,block12'
    assert serialize_interval(interval) == serialized_interval

    # test curried function composition
    serialize_interval_alt = serialize_interval(delimiter='|',
                                                subdelimiter=';')
    serialized_interval_alt = 'chr22|101|200||||block11;block12'
    assert serialize_interval_alt(interval) == serialized_interval_alt
def test_merge_related_elements():
    interval_group = [
        BaseInterval('X',
                     10,
                     100,
                     block_ids=['block1'],
                     superblock_ids=['sblock1']),
        BaseInterval('X',
                     10,
                     100,
                     block_ids=['block2'],
                     superblock_ids=['sblock1']),
        BaseInterval('X',
                     10,
                     100,
                     block_ids=['block3'],
                     superblock_ids=['sblock2'])
    ]

    merged_interval = merge_related_elements(interval_group)

    assert merged_interval[:3] == ('X', 10, 100)
    assert merged_interval.block_ids == ['block1', 'block2', 'block3']
    assert merged_interval.superblock_ids == ['sblock1', 'sblock1', 'sblock2']
def test_annotator_pipeline():
    """Test the entire annotator pipeline."""
    bam_path = 'tests/fixtures/alignment.bam'

    # test with minimal BED "file"
    bed_stream = ['#chrom\tstart\tend', '1\t0\t5', '1\t9\t20']
    read_depths1 = [2, 4, 5, 5, 5]
    read_depths2 = [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
    result = annotate_bed_stream(bed_stream,
                                 bam_path,
                                 cutoff=5,
                                 contig_prefix='chr',
                                 bp_threshold=100)

    interval1 = next(result)
    interval2 = next(result)

    assert interval1[:3] == BaseInterval('chr1', 1, 5)[:3]
    assert interval1.coverage == sum(read_depths1) / len(read_depths1)
    assert interval1.completeness == 3 / 5

    assert interval2[:3] == BaseInterval('chr1', 10, 20)[:3]
    assert interval2.coverage == sum(read_depths2) / len(read_depths2)
    assert interval2.completeness == 1.
Esempio n. 10
0
def test_aggregate():
    payloads = [('block1', BaseInterval('X', 10, 100)),
                ('block1', BaseInterval('X', 50, 130)),
                ('block2', BaseInterval('Y', 111, 222))]

    groups = [[('block1', BaseInterval('X', 10, 100)),
               ('block1', BaseInterval('X', 50, 130))],
              [('block2', BaseInterval('Y', 111, 222))]]

    assert groups == list(aggregate(payloads))
Esempio n. 11
0
    def setup(self):
        self.store = Store(':memory:')
        self.store.set_up()

        self.interval = BaseInterval('1',
                                     10,
                                     100,
                                     name='int1',
                                     block_ids=['block1', 'block2'],
                                     superblock_ids=['sblock1', 'sblock1'])

        self.db_interval = StoreInterval(interval_id=self.interval.name,
                                         contig=self.interval.contig,
                                         start=self.interval.start,
                                         end=self.interval.end,
                                         strand=self.interval.strand)
        self.interval_group = [('block2', self.db_interval, 'sblock1')]
Esempio n. 12
0
def test_merge_intervals():
    """Test merging multiple intervals."""
    # only makes sense for intervals on the same contig
    intervals = [
        BaseInterval('1', 10, 100),
        BaseInterval('1', 90, 150),
        BaseInterval('1', 200, 250)
    ]
    assert merge_intervals(intervals) == (10, 250)

    # test with tricky overlapping intervals
    intervals = [
        BaseInterval('1', 505, 585),
        BaseInterval('1', 520, 550),
        BaseInterval('1', 545, 580)
    ]
    assert merge_intervals(intervals) == (505, 585)

    # test with 1 interval
    assert merge_intervals([BaseInterval('1', 10, 10)]) == (10, 10)

    # test with 0 intervals
    with pytest.raises(ValueError):
        merge_intervals([])
Esempio n. 13
0
def test_group_intervals():
    """Test aggregating intervals in groups."""
    # intervals on the same contig
    intervals = [
        BaseInterval('1', 10, 100),
        BaseInterval('1', 90, 150),
        BaseInterval('1', 200, 250)
    ]
    grouped_intervals = [intervals[:2], intervals[2:]]
    groups = group_intervals(intervals, bp_threshold=150)
    assert list(groups) == grouped_intervals

    # intervals on multiple contigs
    intervals = [
        BaseInterval('X', 10, 100),
        BaseInterval('Y', 90, 150),
        BaseInterval('Y', 200, 250)
    ]
    grouped_intervals = [intervals[:1], intervals[1:]]
    groups = group_intervals(intervals, bp_threshold=1000)
    assert list(groups) == grouped_intervals