def _aggregate(self, span):
        from cooler.api import Cooler
        lo, hi = span

        clr = Cooler(self.source_uri)
        # convert_enum=False returns chroms as raw ints
        table = clr.pixels(join=True, convert_enum=False)
        chunk = table[lo:hi]
        # logger.info('{} {}'.format(lo, hi))
        print('{} {}'.format(lo, hi))

        # use the "start" point as anchor for re-binning
        # XXX - alternatives: midpoint anchor, proportional re-binning
        binsize = self.gs.binsize
        chrom_binoffset = self.gs.chrom_binoffset
        chrom_abspos = self.gs.chrom_abspos
        start_abspos = self.gs.start_abspos

        chrom_id1 = chunk['chrom1'].values
        chrom_id2 = chunk['chrom2'].values
        start1 = chunk['start1'].values
        start2 = chunk['start2'].values
        if binsize is None:
            abs_start1 = chrom_abspos[chrom_id1] + start1
            abs_start2 = chrom_abspos[chrom_id2] + start2
            chunk['bin1_id'] = np.searchsorted(
                start_abspos, abs_start1, side='right') - 1
            chunk['bin2_id'] = np.searchsorted(
                start_abspos, abs_start2, side='right') - 1
        else:
            rel_bin1 = np.floor(start1 / binsize).astype(int)
            rel_bin2 = np.floor(start2 / binsize).astype(int)
            chunk['bin1_id'] = chrom_binoffset[chrom_id1] + rel_bin1
            chunk['bin2_id'] = chrom_binoffset[chrom_id2] + rel_bin2

        grouped = chunk.groupby(['bin1_id', 'bin2_id'], sort=False)
        return grouped['count'].sum().reset_index()
new_bins = binnify(c.chromsizes, 2 * c.binsize)

iterator = CoolerAggregator(input_uri, new_bins, 1000000, batchsize=1, map=map)

# # last message before it fails ...
# # INFO:cooler:17868809 17872380
# for ii in iterator:
#     print(ii)

# from cooler.api import Cooler
lo, hi = 17869999, 17872300
# lo, hi = 17868809, 17872380

clr = Cooler(input_uri)
# convert_enum=False returns chroms as raw ints
table = clr.pixels(join=True, convert_enum=False)
chunk = table[lo:hi]
# logger.info('{} {}'.format(lo, hi))
print('{} {}'.format(lo, hi))

# use the "start" point as anchor for re-binning
# XXX - alternatives: midpoint anchor, proportional re-binning
binsize = iterator.gs.binsize
chrom_binoffset = iterator.gs.chrom_binoffset
chrom_abspos = iterator.gs.chrom_abspos
start_abspos = iterator.gs.start_abspos

chrom_id1 = chunk['chrom1'].values
chrom_id2 = chunk['chrom2'].values
start1 = chunk['start1'].values
start2 = chunk['start2'].values
Exemple #3
0
from cooler import Cooler

# input_uri = "unzoomifiable_5kb.cool"
input_uri = "missing_bin_cooler_40kb.cool"

clr = Cooler(input_uri)

print(clr.pixels(join=True)[17872226])


table = clr.pixels(join=True)[17800000:]

print(table[table['chrom2'].isnull()])
#          chrom1   start1     end1 chrom2  start2  end2  count
# 17872226  chr88  2640000  2680000    NaN     NaN   NaN      1


########################
# TODO
#########################
# Now i'm on the same page with Nezar
# bad pixel is found ...
# FIND OUT  - HOW that happened ?!?!?!?!?! ...
# 
# processes that might be involved:
# 
#     pairix ${pairs_lib}
# 
#     cooler cload pairix \
#        --nproc ${task.cpus} \
#        --assembly ${params.input.genome.assembly} \