def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add): curr_zoom = 0 data_buffers[0] += buffers_to_add nan_data_buffers[0] += nan_buffers_to_add curr_time = time.time() - t1 percent_progress = (positions[curr_zoom] + 1) / float(assembly_size) print( "position: {} progress: {:.2f} elapsed: {:.2f} " "remaining: {:.2f}".format( positions[curr_zoom] + 1, percent_progress, curr_time, curr_time / (percent_progress) - curr_time ) ) while len(data_buffers[curr_zoom]) >= chunk_size: # get the current chunk and store it, converting nans to 0 print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom])) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size]) ''' print("1cc:", sum(curr_chunk)) print("1db:", data_buffers[curr_zoom][:chunk_size]) print("1curr_chunk:", nan_curr_chunk) ''' print("positions[curr_zoom]:", positions[curr_zoom]) curr_pos = positions[curr_zoom] dsets[curr_zoom][curr_pos:curr_pos+chunk_size] = curr_chunk nan_dsets[curr_zoom][curr_pos:curr_pos+chunk_size] = nan_curr_chunk # aggregate and store aggregated values in the next zoom_level's # data data_buffers[curr_zoom+1] += list( ct.aggregate(curr_chunk, 2 ** zoom_step) ) nan_data_buffers[curr_zoom+1] += list( ct.aggregate(nan_curr_chunk, 2 ** zoom_step) ) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] nan_data_buffers[curr_zoom] =\ nan_data_buffers[curr_zoom][chunk_size:] # data = data_buffers[curr_zoom+1] # nan_data = nan_data_buffers[curr_zoom+1] # do the same for the nan values buffers positions[curr_zoom] += chunk_size curr_zoom += 1 if curr_zoom * zoom_step >= max_zoom: break
def _bedgraph(filepath, output_file, assembly, chrom_col, from_pos_col, to_pos_col, value_col, has_header, chromosome, tile_size, chunk_size, method, nan_value, transform, count_nan, closed_interval, chromsizes_filename, zoom_step): last_end = 0 data = [] if output_file is None: output_file = op.splitext(filepath)[0] + '.hitile' print("output file:", output_file) # Override the output file if it existts if op.exists(output_file): os.remove(output_file) f = h5py.File(output_file, 'w') # get the information about the chromosomes in this assembly if chromsizes_filename is not None: chrom_info = nc.get_chrominfo_from_file(chromsizes_filename) chrom_order = [ a.encode('utf-8') for a in nc.get_chromorder_from_file(chromsizes_filename) ] chrom_sizes = nc.get_chromsizes_from_file(chromsizes_filename) else: chrom_info = nc.get_chrominfo(assembly) chrom_order = [a.encode('utf-8') for a in nc.get_chromorder(assembly)] chrom_sizes = nc.get_chromsizes(assembly) assembly_size = chrom_info.total_length print('assembly_size:', assembly_size) tile_size = tile_size chunk_size = tile_size * 2**chunk_size # how many values to read in at once while tiling dsets = [] # data sets at each zoom level nan_dsets = [] # store nan values # initialize the arrays which will store the values at each stored zoom level z = 0 positions = [] # store where we are at the current dataset data_buffers = [[]] nan_data_buffers = [[]] while assembly_size / 2**z > tile_size: dset_length = math.ceil(assembly_size / 2**z) dsets += [ f.create_dataset('values_' + str(z), (dset_length, ), dtype='f', compression='gzip') ] nan_dsets += [ f.create_dataset('nan_values_' + str(z), (dset_length, ), dtype='f', compression='gzip') ] data_buffers += [[]] nan_data_buffers += [[]] positions += [0] z += zoom_step #print("dsets[0][-10:]", dsets[0][-10:]) # load the bigWig file #print("filepath:", filepath) # store some meta data d = f.create_dataset('meta', (1, ), dtype='f') print("assembly:", assembly) #print("chrom_info:", nc.get_chromorder(assembly)) d.attrs['zoom-step'] = zoom_step d.attrs['max-length'] = assembly_size d.attrs['assembly'] = assembly d.attrs['chrom-names'] = chrom_order d.attrs['chrom-sizes'] = chrom_sizes d.attrs['chrom-order'] = chrom_order d.attrs['tile-size'] = tile_size d.attrs['max-zoom'] = max_zoom = math.ceil( math.log(d.attrs['max-length'] / tile_size) / math.log(2)) d.attrs['max-width'] = tile_size * 2**max_zoom d.attrs['max-position'] = 0 print("assembly size (max-length)", d.attrs['max-length']) print("max-width", d.attrs['max-width']) print("max_zoom:", d.attrs['max-zoom']) print("chunk-size:", chunk_size) print("chrom-order", d.attrs['chrom-order']) t1 = time.time() # are we reading the input from stdin or from a file? if filepath == '-': f = sys.stdin else: if filepath.endswith('.gz'): import gzip f = gzip.open(filepath, 'rt') else: f = open(filepath, 'r') curr_zoom = 0 def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add): curr_zoom = 0 data_buffers[0] += buffers_to_add nan_data_buffers[0] += nan_buffers_to_add curr_time = time.time() - t1 percent_progress = (positions[curr_zoom] + 1) / float(assembly_size) print( "position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}". format(positions[curr_zoom] + 1, percent_progress, curr_time, curr_time / (percent_progress) - curr_time)) while len(data_buffers[curr_zoom]) >= chunk_size: # get the current chunk and store it, converting nans to 0 print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom])) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size]) #curr_chunk[np.isnan(curr_chunk)] = 0 ''' print("1cc:", sum(curr_chunk)) print("1db:", data_buffers[curr_zoom][:chunk_size]) print("1curr_chunk:", nan_curr_chunk) ''' print("positions[curr_zoom]:", positions[curr_zoom]) dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = nan_curr_chunk # aggregate nan values #nan_curr_chunk[np.isnan(curr_chunk)] = 0 #print("1na_cc:", sum(nan_curr_chunk)) # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**zoom_step)) nan_data_buffers[curr_zoom + 1] += list( ct.aggregate(nan_curr_chunk, 2**zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][ chunk_size:] data = data_buffers[curr_zoom + 1] nan_data = nan_data_buffers[curr_zoom + 1] # do the same for the nan values buffers positions[curr_zoom] += chunk_size curr_zoom += 1 if curr_zoom * zoom_step >= max_zoom: break values = [] nan_values = [] if has_header: f.readline() # the genome position up to which we've filled in values curr_genome_pos = 0 # keep track of the previous value so that we can use it to fill in NAN values prev_value = 0 for line in f: # each line should indicate a chromsome, start position and end position parts = line.strip().split() start_genome_pos = chrom_info.cum_chrom_lengths[parts[ chrom_col - 1]] + int(parts[from_pos_col - 1]) #print("len(values):", len(values), curr_genome_pos, start_genome_pos) #print("line:", line) if start_genome_pos - curr_genome_pos > 1: values += [np.nan] * (start_genome_pos - curr_genome_pos - 1) nan_values += [1] * (start_genome_pos - curr_genome_pos - 1) curr_genome_pos += (start_genome_pos - curr_genome_pos - 1) # count how many nan values there are in the dataset nan_count = 1 if parts[value_col - 1] == nan_value else 0 # if the provided values are log2 transformed, we have to un-transform them if transform == 'exp2': value = 2**float( parts[value_col - 1]) if not parts[value_col - 1] == nan_value else np.nan else: value = float( parts[value_col - 1]) if not parts[value_col - 1] == nan_value else np.nan # print("pos:", int(parts[to_pos_col-1]) - int(parts[from_pos_col-1])) # we're going to add as many values are as specified in the bedfile line values_to_add = [value] * (int(parts[to_pos_col - 1]) - int(parts[from_pos_col - 1])) nan_counts_to_add = [nan_count] * (int(parts[to_pos_col - 1]) - int(parts[from_pos_col - 1])) if closed_interval: values_to_add += [value] nan_counts_to_add += [nan_count] # print("values_to_add", values_to_add) values += values_to_add nan_values += nan_counts_to_add d.attrs['max-position'] = start_genome_pos + len(values_to_add) #print("values:", values[:30]) curr_genome_pos += len(values_to_add) while len(values) > chunk_size: print("len(values):", len(values), chunk_size) print("line:", line) add_values_to_data_buffers(values[:chunk_size], nan_values[:chunk_size]) values = values[chunk_size:] nan_values = nan_values[chunk_size:] add_values_to_data_buffers(values, nan_values) # store the remaining data while True: # get the current chunk and store it chunk_size = len(data_buffers[curr_zoom]) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size]) ''' print("2curr_chunk", curr_chunk) print("2curr_zoom:", curr_zoom) print("2db", data_buffers[curr_zoom][:100]) ''' dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = nan_curr_chunk #print("chunk_size:", chunk_size, "len(curr_chunk):", len(curr_chunk), "len(nan_curr_chunk)", len(nan_curr_chunk)) # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**zoom_step)) nan_data_buffers[curr_zoom + 1] += list( ct.aggregate(nan_curr_chunk, 2**zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][chunk_size:] data = data_buffers[curr_zoom + 1] nan_data = nan_data_buffers[curr_zoom + 1] positions[curr_zoom] += chunk_size curr_zoom += 1 # we've created enough tile levels to cover the entire maximum width if curr_zoom * zoom_step >= max_zoom: break
def _bigwig(filepath, chunk_size=14, zoom_step=8, tile_size=1024, output_file=None, assembly='hg19', chromsizes_filename=None, chromosome=None): last_end = 0 data = [] if output_file is None: if chromosome is None: output_file = op.splitext(filepath)[0] + '.hitile' else: output_file = op.splitext( filepath)[0] + '.' + chromosome + '.hitile' # Override the output file if it existts if op.exists(output_file): os.remove(output_file) f = h5py.File(output_file, 'w') if chromsizes_filename is not None: chrom_info = nc.get_chrominfo_from_file(chromsizes_filename) chrom_order = [ a for a in nc.get_chromorder_from_file(chromsizes_filename) ] chrom_sizes = nc.get_chromsizes_from_file(chromsizes_filename) else: print("there") chrom_info = nc.get_chrominfo(assembly) chrom_order = [a for a in nc.get_chromorder(assembly)] chrom_sizes = nc.get_chromsizes(assembly) print("chrom_order:", chrom_order) assembly_size = chrom_info.total_length tile_size = tile_size chunk_size = tile_size * 2**chunk_size # how many values to read in at once while tiling dsets = [] # data sets at each zoom level nan_dsets = [] # initialize the arrays which will store the values at each stored zoom level z = 0 positions = [] # store where we are at the current dataset data_buffers = [[]] nan_data_buffers = [[]] while assembly_size / 2**z > tile_size: dset_length = math.ceil(assembly_size / 2**z) dsets += [ f.create_dataset('values_' + str(z), (dset_length, ), dtype='f', compression='gzip') ] nan_dsets += [ f.create_dataset('nan_values_' + str(z), (dset_length, ), dtype='f', compression='gzip') ] data_buffers += [[]] nan_data_buffers += [[]] positions += [0] z += zoom_step # load the bigWig file bwf = pbw.open(filepath) # store some meta data d = f.create_dataset('meta', (1, ), dtype='f') if chromosome is not None: d.attrs['min-pos'] = chrom_info.cum_chrom_lengths[chromosome] d.attrs['max-pos'] = chrom_info.cum_chrom_lengths[ chromosome] + bwf.chroms()[chromosome] else: d.attrs['min-pos'] = 0 d.attrs['max-pos'] = assembly_size ''' print("chroms.keys:", bwf.chroms().keys()) print("chroms.values:", bwf.chroms().values()) ''' d.attrs['zoom-step'] = zoom_step d.attrs['max-length'] = assembly_size d.attrs['assembly'] = assembly d.attrs['chrom-names'] = [a.encode('utf-8') for a in chrom_order] d.attrs['chrom-sizes'] = chrom_sizes d.attrs['chrom-order'] = [a.encode('utf-8') for a in chrom_order] d.attrs['tile-size'] = tile_size d.attrs['max-zoom'] = max_zoom = math.ceil( math.log(d.attrs['max-length'] / tile_size) / math.log(2)) d.attrs['max-width'] = tile_size * 2**max_zoom d.attrs['max-position'] = 0 print("assembly size (max-length)", d.attrs['max-length']) print("max-width", d.attrs['max-width']) print("max_zoom:", d.attrs['max-zoom']) print("chunk-size:", chunk_size) print("chrom-order", d.attrs['chrom-order']) t1 = time.time() curr_zoom = 0 def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add): curr_zoom = 0 data_buffers[0] += buffers_to_add nan_data_buffers[0] += nan_buffers_to_add curr_time = time.time() - t1 percent_progress = (positions[curr_zoom] + 1) / float(assembly_size) print( "position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}". format(positions[curr_zoom] + 1, percent_progress, curr_time, curr_time / (percent_progress) - curr_time)) while len(data_buffers[curr_zoom]) >= chunk_size: # get the current chunk and store it, converting nans to 0 print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom])) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size]) #curr_chunk[np.isnan(curr_chunk)] = 0 ''' print("1cc:", sum(curr_chunk)) print("1db:", data_buffers[curr_zoom][:chunk_size]) print("1curr_chunk:", nan_curr_chunk) ''' print("positions[curr_zoom]:", positions[curr_zoom]) dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = nan_curr_chunk # aggregate nan values #nan_curr_chunk[np.isnan(curr_chunk)] = 0 #print("1na_cc:", sum(nan_curr_chunk)) # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**zoom_step)) nan_data_buffers[curr_zoom + 1] += list( ct.aggregate(nan_curr_chunk, 2**zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][ chunk_size:] data = data_buffers[curr_zoom + 1] nan_data = nan_data_buffers[curr_zoom + 1] # do the same for the nan values buffers positions[curr_zoom] += chunk_size curr_zoom += 1 if curr_zoom * zoom_step >= max_zoom: break # Do we only want values from a single chromosome? if chromosome is not None: chroms_to_use = [chromosome] else: chroms_to_use = chrom_order for chrom in chroms_to_use: print("chrom:", chrom) ''' if chrom not in bwf.chroms(): print("skipping chrom (not in bigWig file):", chrom, chrom_info.chrom_lengths[chrom]) continue ''' counter = 0 # chrom_size = bwf.chroms()[chrom] chrom_size = chrom_info.chrom_lengths[chrom] # print("chrom_size:", chrom_size, bwf.chroms()[chrom]) d.attrs['max-position'] += chrom_size while counter < chrom_size: remaining = min(chunk_size, chrom_size - counter) if chrom not in bwf.chroms(): values = [np.nan] * remaining nan_values = [1] * remaining else: values = bwf.values(chrom, counter, counter + remaining) nan_values = np.isnan(values).astype('i4') # print("counter:", counter, "remaining:", remaining, # "counter + remaining:", counter + remaining) counter += remaining curr_zoom = 0 add_values_to_data_buffers(list(values), list(nan_values)) while True: # get the current chunk and store it chunk_size = len(data_buffers[curr_zoom]) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size]) dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = nan_curr_chunk # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**zoom_step)) nan_data_buffers[curr_zoom + 1] += list( ct.aggregate(nan_curr_chunk, 2**zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][chunk_size:] data = data_buffers[curr_zoom + 1] nan_data = nan_data_buffers[curr_zoom + 1] positions[curr_zoom] += chunk_size curr_zoom += 1 # we've created enough tile levels to cover the entire maximum width if curr_zoom * zoom_step >= max_zoom: break # still need to take care of the last chunk data = np.array(data) t1 = time.time() pass
def get_data(hdf_file, z, x): ''' Return a tile from an hdf_file. :param hdf_file: A file handle for an HDF5 file (h5py.File('...')) :param z: The zoom level :param x: The x position of the tile ''' # is the title within the range of possible tiles if x > 2**z: print("OUT OF RIGHT RANGE") return [] if x < 0: print("OUT OF LEFT RANGE") return [] d = hdf_file['meta'] tile_size = int(d.attrs['tile-size']) zoom_step = int(d.attrs['zoom-step']) max_length = int(d.attrs['max-length']) max_zoom = int(d.attrs['max-zoom']) if 'min-pos' in d.attrs: min_pos = d.attrs['min-pos'] else: min_pos = 0 max_width = tile_size * 2**max_zoom if 'max-position' in d.attrs: max_position = int(d.attrs['max-position']) else: max_position = max_width rz = max_zoom - z tile_width = max_width / 2**z # because we only store some a subsection of the zoom levels next_stored_zoom = zoom_step * math.floor(rz / zoom_step) zoom_offset = rz - next_stored_zoom # the number of entries to aggregate for each new value num_to_agg = 2**zoom_offset total_in_length = tile_size * num_to_agg # which positions we need to retrieve in order to dynamically aggregate start_pos = int((x * 2**zoom_offset * tile_size)) end_pos = int(start_pos + total_in_length) #print("max_position:", max_position) max_position = int(max_position / 2**next_stored_zoom) #print("new max_position:", max_position) ''' print("start_pos:", start_pos) print("end_pos:", end_pos) print("next_stored_zoom", next_stored_zoom) print("max_position:", int(max_position)) ''' f = hdf_file['values_' + str(int(next_stored_zoom))] if start_pos > max_position: # we want a tile that's after the last bit of data a = np.zeros(end_pos - start_pos) a.fill(np.nan) ret_array = ct.aggregate(a, int(num_to_agg)) elif start_pos < max_position and max_position < end_pos: a = f[start_pos:end_pos][:] a[max_position + 1:end_pos] = np.nan ret_array = ct.aggregate(a, int(num_to_agg)) else: ret_array = ct.aggregate(f[start_pos:end_pos], int(num_to_agg)) ''' print("ret_array:", f[start_pos:end_pos]) print('ret_array:', ret_array) ''' #print('nansum', np.nansum(ret_array)) # check to see if we counted the number of NaN values in the given # interval f_nan = None if "nan_values_" + str(int(next_stored_zoom)) in hdf_file: f_nan = hdf_file['nan_values_' + str(int(next_stored_zoom))] nan_array = ct.aggregate(f_nan[start_pos:end_pos], int(num_to_agg)) num_aggregated = 2**(max_zoom - z) num_vals_array = np.zeros(len(nan_array)) num_vals_array.fill(num_aggregated) num_summed_array = num_vals_array - nan_array averages_array = ret_array / num_summed_array return averages_array return ret_array
def main(): parser = argparse.ArgumentParser(description=""" python main.py """) parser.add_argument('-f', '--filepath', default=None) parser.add_argument('-c', '--chunk-size', default=14, type=int) parser.add_argument('-z', '--zoom-step', default=8, type=int) parser.add_argument('-t', '--tile-size', default=1024, type=int) parser.add_argument('-o', '--output-file', default='/tmp/tmp.hdf5') #parser.add_argument('-o', '--options', default='yo', # help="Some option", type='str') #parser.add_argument('-u', '--useless', action='store_true', # help='Another useless option') args = parser.parse_args() last_end = 0 data = [] max_zoom = 24 if op.exists(args.output_file): os.remove(args.output_file) f = h5py.File(args.output_file, 'w') hum_size = 3137161264 tile_size = args.tile_size chunk_size = tile_size * 2**args.chunk_size dsets = [] # initialize the datasets z = 0 positions = [] # store where we are at the current dataset data_buffers = [[]] while hum_size / 2**z > tile_size: dsets += [ f.create_dataset('values_' + str(z), (hum_size / 2**z, ), dtype='f', compression='gzip') ] data_buffers += [[]] positions += [0] z += args.zoom_step d = f.create_dataset('meta', (1, ), dtype='f') d.attrs['zoom-step'] = args.zoom_step d.attrs['max-length'] = hum_size d.attrs['assembly'] = 'hg19' d.attrs['tile-size'] = tile_size d.attrs['max-zoom'] = math.ceil( math.log(d.attrs['max-length'] / tile_size) / math.log(2)) print("max_zoom:", d.attrs['max-zoom']) if args.filepath is None: print("Waiting for input...") for line in sys.stdin: parts = line.split() start = int(parts[0], 10) end = int(parts[1], 10) val = float(parts[2]) if start > last_end: # in case there's skipped values in the bed file data_buffers[0] += [0] * (last_end - start) data_buffers[0] += [float(parts[2])] * (end - start) curr_zoom = 0 while len(data_buffers[curr_zoom]) > chunk_size: # get the current chunk and store it print("curr_zoom:", curr_zoom) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**args.zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] positions[curr_zoom] += chunk_size data = data_buffers[curr_zoom + 1] curr_zoom += 1 # store the remaining data print("tile_size:", tile_size, positions[0]) while True: # get the current chunk and store it chunk_size = len(data_buffers[curr_zoom]) curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size]) dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] + chunk_size] = curr_chunk print("curr_zoom:", curr_zoom, "position:", positions[curr_zoom] + len(curr_chunk)) print("len:", [len(d) for d in data_buffers]) # aggregate and store aggregated values in the next zoom_level's data data_buffers[curr_zoom + 1] += list( ct.aggregate(curr_chunk, 2**args.zoom_step)) data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:] positions[curr_zoom] += chunk_size data = data_buffers[curr_zoom + 1] curr_zoom += 1 # we've created enough tile levels to cover the entire maximum width if curr_zoom * args.zoom_step >= max_zoom: break # still need to take care of the last chunk data = np.array(data) t1 = time.time() '''