Esempio n. 1
0
def test_mappable_bin_from_region(chr1_bin, chr1_params):

    mb = MappableBin.from_start(chr1_params, 0)

    assert mb.chrom == 'chr1'
    assert mb.start_pos == 0
    assert mb.end_pos == 0
    assert mb.current_size == 0
    assert mb.bin_size == chr1_params.bin_size
Esempio n. 2
0
def test_check_extend(chr1_bin, chr1_params):
    mb = MappableBin.from_start(chr1_params, 0)
    region = {
        'start_pos': 10,
        'end_pos': 20,
    }

    assert mb.check_extend(region)
    assert mb.start_pos == 0
    assert mb.end_pos == 20
    assert mb.current_size == 10
Esempio n. 3
0
def test_check_extend_overflow(chr1_params):
    chr1_params.bin_size = 10000
    mb = MappableBin.from_start(chr1_params, 0)
    region = {
        'start_pos': 10,
        'end_pos': 10011,
    }
    assert not mb.check_extend(region)

    assert mb.start_pos == 0
    assert mb.end_pos == 0
    assert mb.current_size == 0
    assert mb.bin_size == 10000
Esempio n. 4
0
def test_split_extend_overfill(chr1_params):
    chr1_params.bin_size = 10000
    mb = MappableBin.from_start(chr1_params, 0)
    region = {
        'start_pos': 10,
        'end_pos': 20011,
    }
    next_bin = mb.split_extend(region)

    assert mb.is_full()
    assert not mb.is_overfill()

    assert not next_bin.is_full()
    assert next_bin.is_overfill()
Esempio n. 5
0
    def bins_boundaries_generator(self, chroms, mappable_regions_df):
        chrom_sizes = self.hg.chrom_sizes()
        chrom_bins = self.hg.calc_chrom_bins()

        # if mappable_regions_df is None:
        #     mappable_regions_df = self.load_mappable_regions()

        for chrom in chroms:
            chrom_df = mappable_regions_df[mappable_regions_df.chrom == chrom]
            chrom_df = chrom_df.sort_values(
                by=['chrom', 'start_pos', 'end_pos'])

            params = BinParams.build(
                chrom_size=chrom_sizes[chrom],
                chrom_bin=chrom_bins[chrom])
            mappable_bin = None
            current_excess = 0
            bins_count = params.bins_count

            for _index, row in chrom_df.iterrows():
                if mappable_bin is None:
                    mappable_bin = MappableBin.from_start(params, start_pos=0)
                    current_excess = mappable_bin.adapt_excess(current_excess)
                if not mappable_bin.check_extend(row):
                    next_bin = mappable_bin.split_extend(row)

                    bins_count -= 1
                    if bins_count == 0:
                        # last bin a chromosome
                        mappable_bin.end_pos = chrom_sizes[chrom].size
                    yield mappable_bin
                    if next_bin.is_overfill():
                        current_excess, mappable_bins = \
                            next_bin.overfill_split(current_excess)

                        assert len(mappable_bins) > 1
                        for mb in mappable_bins[:-1]:
                            bins_count -= 1
                            yield mb
                        mappable_bin = mappable_bins[-1]
                    else:
                        mappable_bin = next_bin
                        current_excess = \
                            mappable_bin.adapt_excess(current_excess)

            mappable_bin = None
Esempio n. 6
0
def test_split_extend(chr1_params):
    chr1_params.bin_size = 10000
    mb = MappableBin.from_start(chr1_params, 0)
    region = {
        'start_pos': 10,
        'end_pos': 10011,
    }
    next_bin = mb.split_extend(region)

    assert mb.bin_size == 10000
    assert mb.start_pos == 0
    assert mb.end_pos == 10010
    assert mb.current_size == 10000

    assert next_bin.bin_size == 10000
    assert next_bin.start_pos == 10010
    assert next_bin.end_pos == 10011
    assert next_bin.current_size == 1
Esempio n. 7
0
def test_overfill_split_adapt_excess_overfill(chr1_params):
    chr1_params.bin_size = 10000
    chr1_params.bin_size_excess = 0.2

    mb = MappableBin.from_start(chr1_params, 0)
    mb.end_pos = 10001
    mb.current_size = 10001

    current_excess = 0.9

    current_excess, mappable_bins = \
        mb.overfill_split(current_excess)

    assert len(mappable_bins) == 2
    assert pytest.approx(current_excess) == 0.3

    assert all([mb.is_full() for mb in mappable_bins[0:1]])
    assert all([mb.current_size == 10001 for mb in mappable_bins[0:1]])

    last_mb = mappable_bins[-1]
    assert last_mb.current_size == 0
    assert last_mb.bin_size == 10000
    assert last_mb.start_pos == 10001
    assert last_mb.end_pos == 10001
Esempio n. 8
0
def test_overfill_split(chr1_params):
    chr1_params.bin_size = 10000
    chr1_params.bin_size_excess = 0.1

    mb = MappableBin.from_start(chr1_params, 0)
    mb.end_pos = 20001
    mb.current_size = 20001

    current_excess = 0.1

    current_excess, mappable_bins = \
        mb.overfill_split(current_excess)

    assert len(mappable_bins) == 3
    assert current_excess == 0.4

    assert all([mb.is_full() for mb in mappable_bins[0:2]])
    assert all([mb.current_size == 10000 for mb in mappable_bins[0:2]])

    last_mb = mappable_bins[-1]
    assert last_mb.current_size == 1
    assert last_mb.bin_size == 10000
    assert last_mb.start_pos == 20000
    assert last_mb.end_pos == 20001