def _aggregate(self, span): from cooler.api import Cooler lo, hi = span clr = Cooler(self.source_uri) # convert_enum=False returns chroms as raw ints table = clr.pixels(join=True, convert_enum=False) chunk = table[lo:hi] # logger.info('{} {}'.format(lo, hi)) print('{} {}'.format(lo, hi)) # use the "start" point as anchor for re-binning # XXX - alternatives: midpoint anchor, proportional re-binning binsize = self.gs.binsize chrom_binoffset = self.gs.chrom_binoffset chrom_abspos = self.gs.chrom_abspos start_abspos = self.gs.start_abspos chrom_id1 = chunk['chrom1'].values chrom_id2 = chunk['chrom2'].values start1 = chunk['start1'].values start2 = chunk['start2'].values if binsize is None: abs_start1 = chrom_abspos[chrom_id1] + start1 abs_start2 = chrom_abspos[chrom_id2] + start2 chunk['bin1_id'] = np.searchsorted( start_abspos, abs_start1, side='right') - 1 chunk['bin2_id'] = np.searchsorted( start_abspos, abs_start2, side='right') - 1 else: rel_bin1 = np.floor(start1 / binsize).astype(int) rel_bin2 = np.floor(start2 / binsize).astype(int) chunk['bin1_id'] = chrom_binoffset[chrom_id1] + rel_bin1 chunk['bin2_id'] = chrom_binoffset[chrom_id2] + rel_bin2 grouped = chunk.groupby(['bin1_id', 'bin2_id'], sort=False) return grouped['count'].sum().reset_index()
new_bins = binnify(c.chromsizes, 2 * c.binsize) iterator = CoolerAggregator(input_uri, new_bins, 1000000, batchsize=1, map=map) # # last message before it fails ... # # INFO:cooler:17868809 17872380 # for ii in iterator: # print(ii) # from cooler.api import Cooler lo, hi = 17869999, 17872300 # lo, hi = 17868809, 17872380 clr = Cooler(input_uri) # convert_enum=False returns chroms as raw ints table = clr.pixels(join=True, convert_enum=False) chunk = table[lo:hi] # logger.info('{} {}'.format(lo, hi)) print('{} {}'.format(lo, hi)) # use the "start" point as anchor for re-binning # XXX - alternatives: midpoint anchor, proportional re-binning binsize = iterator.gs.binsize chrom_binoffset = iterator.gs.chrom_binoffset chrom_abspos = iterator.gs.chrom_abspos start_abspos = iterator.gs.start_abspos chrom_id1 = chunk['chrom1'].values chrom_id2 = chunk['chrom2'].values start1 = chunk['start1'].values start2 = chunk['start2'].values
from cooler import Cooler # input_uri = "unzoomifiable_5kb.cool" input_uri = "missing_bin_cooler_40kb.cool" clr = Cooler(input_uri) print(clr.pixels(join=True)[17872226]) table = clr.pixels(join=True)[17800000:] print(table[table['chrom2'].isnull()]) # chrom1 start1 end1 chrom2 start2 end2 count # 17872226 chr88 2640000 2680000 NaN NaN NaN 1 ######################## # TODO ######################### # Now i'm on the same page with Nezar # bad pixel is found ... # FIND OUT - HOW that happened ?!?!?!?!?! ... # # processes that might be involved: # # pairix ${pairs_lib} # # cooler cload pairix \ # --nproc ${task.cpus} \ # --assembly ${params.input.genome.assembly} \