Python aggregate Examples

Programming Language: Python

Namespace/Package Name: clodius.tiles

Method/Function: aggregate

Examples at hotexamples.com: 5

Python aggregate - 5 examples found. These are the top rated real world Python examples of clodius.tiles.aggregate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add):
        curr_zoom = 0

        data_buffers[0] += buffers_to_add
        nan_data_buffers[0] += nan_buffers_to_add

        curr_time = time.time() - t1
        percent_progress = (positions[curr_zoom] + 1) / float(assembly_size)
        print(
            "position: {} progress: {:.2f} elapsed: {:.2f} "
            "remaining: {:.2f}".format(
                positions[curr_zoom] + 1,
                percent_progress,
                curr_time, curr_time / (percent_progress) - curr_time
            )
        )

        while len(data_buffers[curr_zoom]) >= chunk_size:
            # get the current chunk and store it, converting nans to 0
            print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom]))
            curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
            nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size])

            '''
            print("1cc:", sum(curr_chunk))
            print("1db:", data_buffers[curr_zoom][:chunk_size])
            print("1curr_chunk:", nan_curr_chunk)
            '''
            print("positions[curr_zoom]:", positions[curr_zoom])

            curr_pos = positions[curr_zoom]
            dsets[curr_zoom][curr_pos:curr_pos+chunk_size] = curr_chunk
            nan_dsets[curr_zoom][curr_pos:curr_pos+chunk_size] = nan_curr_chunk

            # aggregate and store aggregated values in the next zoom_level's
            # data
            data_buffers[curr_zoom+1] += list(
                ct.aggregate(curr_chunk, 2 ** zoom_step)
            )
            nan_data_buffers[curr_zoom+1] += list(
                ct.aggregate(nan_curr_chunk, 2 ** zoom_step)
            )

            data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
            nan_data_buffers[curr_zoom] =\
                nan_data_buffers[curr_zoom][chunk_size:]

            # data = data_buffers[curr_zoom+1]
            # nan_data = nan_data_buffers[curr_zoom+1]

            # do the same for the nan values buffers

            positions[curr_zoom] += chunk_size
            curr_zoom += 1

            if curr_zoom * zoom_step >= max_zoom:
                break

Example #2

Show file

def _bedgraph(filepath, output_file, assembly, chrom_col, from_pos_col,
              to_pos_col, value_col, has_header, chromosome, tile_size,
              chunk_size, method, nan_value, transform, count_nan,
              closed_interval, chromsizes_filename, zoom_step):
    last_end = 0
    data = []

    if output_file is None:
        output_file = op.splitext(filepath)[0] + '.hitile'

    print("output file:", output_file)

    # Override the output file if it existts
    if op.exists(output_file):
        os.remove(output_file)
    f = h5py.File(output_file, 'w')

    # get the information about the chromosomes in this assembly
    if chromsizes_filename is not None:
        chrom_info = nc.get_chrominfo_from_file(chromsizes_filename)
        chrom_order = [
            a.encode('utf-8')
            for a in nc.get_chromorder_from_file(chromsizes_filename)
        ]
        chrom_sizes = nc.get_chromsizes_from_file(chromsizes_filename)
    else:
        chrom_info = nc.get_chrominfo(assembly)
        chrom_order = [a.encode('utf-8') for a in nc.get_chromorder(assembly)]
        chrom_sizes = nc.get_chromsizes(assembly)

    assembly_size = chrom_info.total_length
    print('assembly_size:', assembly_size)

    tile_size = tile_size
    chunk_size = tile_size * 2**chunk_size  # how many values to read in at once while tiling

    dsets = []  # data sets at each zoom level
    nan_dsets = []  # store nan values

    # initialize the arrays which will store the values at each stored zoom level
    z = 0
    positions = []  # store where we are at the current dataset
    data_buffers = [[]]
    nan_data_buffers = [[]]

    while assembly_size / 2**z > tile_size:
        dset_length = math.ceil(assembly_size / 2**z)
        dsets += [
            f.create_dataset('values_' + str(z), (dset_length, ),
                             dtype='f',
                             compression='gzip')
        ]
        nan_dsets += [
            f.create_dataset('nan_values_' + str(z), (dset_length, ),
                             dtype='f',
                             compression='gzip')
        ]

        data_buffers += [[]]
        nan_data_buffers += [[]]

        positions += [0]
        z += zoom_step

    #print("dsets[0][-10:]", dsets[0][-10:])

    # load the bigWig file
    #print("filepath:", filepath)

    # store some meta data
    d = f.create_dataset('meta', (1, ), dtype='f')

    print("assembly:", assembly)
    #print("chrom_info:", nc.get_chromorder(assembly))

    d.attrs['zoom-step'] = zoom_step
    d.attrs['max-length'] = assembly_size
    d.attrs['assembly'] = assembly
    d.attrs['chrom-names'] = chrom_order
    d.attrs['chrom-sizes'] = chrom_sizes
    d.attrs['chrom-order'] = chrom_order
    d.attrs['tile-size'] = tile_size
    d.attrs['max-zoom'] = max_zoom = math.ceil(
        math.log(d.attrs['max-length'] / tile_size) / math.log(2))
    d.attrs['max-width'] = tile_size * 2**max_zoom
    d.attrs['max-position'] = 0

    print("assembly size (max-length)", d.attrs['max-length'])
    print("max-width", d.attrs['max-width'])
    print("max_zoom:", d.attrs['max-zoom'])
    print("chunk-size:", chunk_size)
    print("chrom-order", d.attrs['chrom-order'])

    t1 = time.time()

    # are we reading the input from stdin or from a file?

    if filepath == '-':
        f = sys.stdin
    else:
        if filepath.endswith('.gz'):
            import gzip
            f = gzip.open(filepath, 'rt')
        else:
            f = open(filepath, 'r')

    curr_zoom = 0

    def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add):
        curr_zoom = 0

        data_buffers[0] += buffers_to_add
        nan_data_buffers[0] += nan_buffers_to_add

        curr_time = time.time() - t1
        percent_progress = (positions[curr_zoom] + 1) / float(assembly_size)
        print(
            "position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}".
            format(positions[curr_zoom] + 1, percent_progress, curr_time,
                   curr_time / (percent_progress) - curr_time))

        while len(data_buffers[curr_zoom]) >= chunk_size:
            # get the current chunk and store it, converting nans to 0
            print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom]))
            curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
            nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size])
            #curr_chunk[np.isnan(curr_chunk)] = 0
            '''
            print("1cc:", sum(curr_chunk))
            print("1db:", data_buffers[curr_zoom][:chunk_size])
            print("1curr_chunk:", nan_curr_chunk)
            '''
            print("positions[curr_zoom]:", positions[curr_zoom])

            dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                             chunk_size] = curr_chunk
            nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                                 chunk_size] = nan_curr_chunk

            # aggregate nan values
            #nan_curr_chunk[np.isnan(curr_chunk)] = 0
            #print("1na_cc:", sum(nan_curr_chunk))

            # aggregate and store aggregated values in the next zoom_level's data
            data_buffers[curr_zoom + 1] += list(
                ct.aggregate(curr_chunk, 2**zoom_step))
            nan_data_buffers[curr_zoom + 1] += list(
                ct.aggregate(nan_curr_chunk, 2**zoom_step))

            data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
            nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][
                chunk_size:]

            data = data_buffers[curr_zoom + 1]
            nan_data = nan_data_buffers[curr_zoom + 1]

            # do the same for the nan values buffers

            positions[curr_zoom] += chunk_size
            curr_zoom += 1

            if curr_zoom * zoom_step >= max_zoom:
                break

    values = []
    nan_values = []

    if has_header:
        f.readline()

    # the genome position up to which we've filled in values
    curr_genome_pos = 0

    # keep track of the previous value so that we can use it to fill in NAN values
    prev_value = 0

    for line in f:
        # each line should indicate a chromsome, start position and end position
        parts = line.strip().split()

        start_genome_pos = chrom_info.cum_chrom_lengths[parts[
            chrom_col - 1]] + int(parts[from_pos_col - 1])
        #print("len(values):", len(values), curr_genome_pos, start_genome_pos)
        #print("line:", line)

        if start_genome_pos - curr_genome_pos > 1:
            values += [np.nan] * (start_genome_pos - curr_genome_pos - 1)
            nan_values += [1] * (start_genome_pos - curr_genome_pos - 1)

            curr_genome_pos += (start_genome_pos - curr_genome_pos - 1)

        # count how many nan values there are in the dataset
        nan_count = 1 if parts[value_col - 1] == nan_value else 0

        # if the provided values are log2 transformed, we have to un-transform them
        if transform == 'exp2':
            value = 2**float(
                parts[value_col -
                      1]) if not parts[value_col - 1] == nan_value else np.nan
        else:
            value = float(
                parts[value_col -
                      1]) if not parts[value_col - 1] == nan_value else np.nan

        # print("pos:", int(parts[to_pos_col-1]) - int(parts[from_pos_col-1]))
        # we're going to add as many values are as specified in the bedfile line
        values_to_add = [value] * (int(parts[to_pos_col - 1]) -
                                   int(parts[from_pos_col - 1]))
        nan_counts_to_add = [nan_count] * (int(parts[to_pos_col - 1]) -
                                           int(parts[from_pos_col - 1]))

        if closed_interval:
            values_to_add += [value]
            nan_counts_to_add += [nan_count]

        # print("values_to_add", values_to_add)

        values += values_to_add
        nan_values += nan_counts_to_add

        d.attrs['max-position'] = start_genome_pos + len(values_to_add)

        #print("values:", values[:30])

        curr_genome_pos += len(values_to_add)

        while len(values) > chunk_size:
            print("len(values):", len(values), chunk_size)
            print("line:", line)
            add_values_to_data_buffers(values[:chunk_size],
                                       nan_values[:chunk_size])
            values = values[chunk_size:]
            nan_values = nan_values[chunk_size:]

    add_values_to_data_buffers(values, nan_values)

    # store the remaining data
    while True:
        # get the current chunk and store it
        chunk_size = len(data_buffers[curr_zoom])
        curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
        nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size])
        '''
        print("2curr_chunk", curr_chunk)
        print("2curr_zoom:", curr_zoom)
        print("2db", data_buffers[curr_zoom][:100])
        '''

        dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                         chunk_size] = curr_chunk
        nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                             chunk_size] = nan_curr_chunk

        #print("chunk_size:", chunk_size, "len(curr_chunk):", len(curr_chunk), "len(nan_curr_chunk)", len(nan_curr_chunk))

        # aggregate and store aggregated values in the next zoom_level's data
        data_buffers[curr_zoom + 1] += list(
            ct.aggregate(curr_chunk, 2**zoom_step))
        nan_data_buffers[curr_zoom + 1] += list(
            ct.aggregate(nan_curr_chunk, 2**zoom_step))

        data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
        nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][chunk_size:]

        data = data_buffers[curr_zoom + 1]
        nan_data = nan_data_buffers[curr_zoom + 1]

        positions[curr_zoom] += chunk_size
        curr_zoom += 1

        # we've created enough tile levels to cover the entire maximum width
        if curr_zoom * zoom_step >= max_zoom:
            break

Example #3

Show file

def _bigwig(filepath,
            chunk_size=14,
            zoom_step=8,
            tile_size=1024,
            output_file=None,
            assembly='hg19',
            chromsizes_filename=None,
            chromosome=None):
    last_end = 0
    data = []

    if output_file is None:
        if chromosome is None:
            output_file = op.splitext(filepath)[0] + '.hitile'
        else:
            output_file = op.splitext(
                filepath)[0] + '.' + chromosome + '.hitile'

    # Override the output file if it existts
    if op.exists(output_file):
        os.remove(output_file)
    f = h5py.File(output_file, 'w')

    if chromsizes_filename is not None:
        chrom_info = nc.get_chrominfo_from_file(chromsizes_filename)
        chrom_order = [
            a for a in nc.get_chromorder_from_file(chromsizes_filename)
        ]
        chrom_sizes = nc.get_chromsizes_from_file(chromsizes_filename)
    else:
        print("there")
        chrom_info = nc.get_chrominfo(assembly)
        chrom_order = [a for a in nc.get_chromorder(assembly)]
        chrom_sizes = nc.get_chromsizes(assembly)

    print("chrom_order:", chrom_order)
    assembly_size = chrom_info.total_length

    tile_size = tile_size
    chunk_size = tile_size * 2**chunk_size  # how many values to read in at once while tiling

    dsets = []  # data sets at each zoom level
    nan_dsets = []

    # initialize the arrays which will store the values at each stored zoom level
    z = 0
    positions = []  # store where we are at the current dataset
    data_buffers = [[]]
    nan_data_buffers = [[]]

    while assembly_size / 2**z > tile_size:
        dset_length = math.ceil(assembly_size / 2**z)
        dsets += [
            f.create_dataset('values_' + str(z), (dset_length, ),
                             dtype='f',
                             compression='gzip')
        ]
        nan_dsets += [
            f.create_dataset('nan_values_' + str(z), (dset_length, ),
                             dtype='f',
                             compression='gzip')
        ]

        data_buffers += [[]]
        nan_data_buffers += [[]]

        positions += [0]
        z += zoom_step

    # load the bigWig file
    bwf = pbw.open(filepath)

    # store some meta data
    d = f.create_dataset('meta', (1, ), dtype='f')

    if chromosome is not None:
        d.attrs['min-pos'] = chrom_info.cum_chrom_lengths[chromosome]
        d.attrs['max-pos'] = chrom_info.cum_chrom_lengths[
            chromosome] + bwf.chroms()[chromosome]
    else:
        d.attrs['min-pos'] = 0
        d.attrs['max-pos'] = assembly_size
    '''
    print("chroms.keys:", bwf.chroms().keys())
    print("chroms.values:", bwf.chroms().values())
    '''

    d.attrs['zoom-step'] = zoom_step
    d.attrs['max-length'] = assembly_size
    d.attrs['assembly'] = assembly
    d.attrs['chrom-names'] = [a.encode('utf-8') for a in chrom_order]
    d.attrs['chrom-sizes'] = chrom_sizes
    d.attrs['chrom-order'] = [a.encode('utf-8') for a in chrom_order]
    d.attrs['tile-size'] = tile_size
    d.attrs['max-zoom'] = max_zoom = math.ceil(
        math.log(d.attrs['max-length'] / tile_size) / math.log(2))
    d.attrs['max-width'] = tile_size * 2**max_zoom
    d.attrs['max-position'] = 0

    print("assembly size (max-length)", d.attrs['max-length'])
    print("max-width", d.attrs['max-width'])
    print("max_zoom:", d.attrs['max-zoom'])
    print("chunk-size:", chunk_size)
    print("chrom-order", d.attrs['chrom-order'])

    t1 = time.time()

    curr_zoom = 0

    def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add):
        curr_zoom = 0

        data_buffers[0] += buffers_to_add
        nan_data_buffers[0] += nan_buffers_to_add

        curr_time = time.time() - t1
        percent_progress = (positions[curr_zoom] + 1) / float(assembly_size)
        print(
            "position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}".
            format(positions[curr_zoom] + 1, percent_progress, curr_time,
                   curr_time / (percent_progress) - curr_time))

        while len(data_buffers[curr_zoom]) >= chunk_size:
            # get the current chunk and store it, converting nans to 0
            print("len(data_buffers[curr_zoom])", len(data_buffers[curr_zoom]))
            curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
            nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size])
            #curr_chunk[np.isnan(curr_chunk)] = 0
            '''
            print("1cc:", sum(curr_chunk))
            print("1db:", data_buffers[curr_zoom][:chunk_size])
            print("1curr_chunk:", nan_curr_chunk)
            '''
            print("positions[curr_zoom]:", positions[curr_zoom])

            dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                             chunk_size] = curr_chunk
            nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                                 chunk_size] = nan_curr_chunk

            # aggregate nan values
            #nan_curr_chunk[np.isnan(curr_chunk)] = 0
            #print("1na_cc:", sum(nan_curr_chunk))

            # aggregate and store aggregated values in the next zoom_level's data
            data_buffers[curr_zoom + 1] += list(
                ct.aggregate(curr_chunk, 2**zoom_step))
            nan_data_buffers[curr_zoom + 1] += list(
                ct.aggregate(nan_curr_chunk, 2**zoom_step))

            data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
            nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][
                chunk_size:]

            data = data_buffers[curr_zoom + 1]
            nan_data = nan_data_buffers[curr_zoom + 1]

            # do the same for the nan values buffers

            positions[curr_zoom] += chunk_size
            curr_zoom += 1

            if curr_zoom * zoom_step >= max_zoom:
                break

    # Do we only want values from a single chromosome?
    if chromosome is not None:
        chroms_to_use = [chromosome]
    else:
        chroms_to_use = chrom_order

    for chrom in chroms_to_use:
        print("chrom:", chrom)
        '''
        if chrom not in bwf.chroms():
            print("skipping chrom (not in bigWig file):",
            chrom, chrom_info.chrom_lengths[chrom])
            continue
        '''

        counter = 0
        # chrom_size = bwf.chroms()[chrom]
        chrom_size = chrom_info.chrom_lengths[chrom]

        # print("chrom_size:", chrom_size, bwf.chroms()[chrom])
        d.attrs['max-position'] += chrom_size

        while counter < chrom_size:
            remaining = min(chunk_size, chrom_size - counter)

            if chrom not in bwf.chroms():
                values = [np.nan] * remaining
                nan_values = [1] * remaining
            else:
                values = bwf.values(chrom, counter, counter + remaining)
                nan_values = np.isnan(values).astype('i4')

            # print("counter:", counter, "remaining:", remaining,
            # "counter + remaining:", counter + remaining)
            counter += remaining
            curr_zoom = 0

            add_values_to_data_buffers(list(values), list(nan_values))

    while True:
        # get the current chunk and store it
        chunk_size = len(data_buffers[curr_zoom])
        curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
        nan_curr_chunk = np.array(nan_data_buffers[curr_zoom][:chunk_size])

        dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                         chunk_size] = curr_chunk
        nan_dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                             chunk_size] = nan_curr_chunk

        # aggregate and store aggregated values in the next zoom_level's data
        data_buffers[curr_zoom + 1] += list(
            ct.aggregate(curr_chunk, 2**zoom_step))
        nan_data_buffers[curr_zoom + 1] += list(
            ct.aggregate(nan_curr_chunk, 2**zoom_step))

        data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
        nan_data_buffers[curr_zoom] = nan_data_buffers[curr_zoom][chunk_size:]

        data = data_buffers[curr_zoom + 1]
        nan_data = nan_data_buffers[curr_zoom + 1]

        positions[curr_zoom] += chunk_size
        curr_zoom += 1

        # we've created enough tile levels to cover the entire maximum width
        if curr_zoom * zoom_step >= max_zoom:
            break

    # still need to take care of the last chunk

    data = np.array(data)
    t1 = time.time()
    pass

Example #4

Show file

def get_data(hdf_file, z, x):
    '''
    Return a tile from an hdf_file.

    :param hdf_file: A file handle for an HDF5 file (h5py.File('...'))
    :param z: The zoom level
    :param x: The x position of the tile
    '''

    # is the title within the range of possible tiles
    if x > 2**z:
        print("OUT OF RIGHT RANGE")
        return []
    if x < 0:
        print("OUT OF LEFT RANGE")
        return []

    d = hdf_file['meta']

    tile_size = int(d.attrs['tile-size'])
    zoom_step = int(d.attrs['zoom-step'])
    max_length = int(d.attrs['max-length'])
    max_zoom = int(d.attrs['max-zoom'])

    if 'min-pos' in d.attrs:
        min_pos = d.attrs['min-pos']
    else:
        min_pos = 0

    max_width = tile_size * 2**max_zoom

    if 'max-position' in d.attrs:
        max_position = int(d.attrs['max-position'])
    else:
        max_position = max_width

    rz = max_zoom - z
    tile_width = max_width / 2**z

    # because we only store some a subsection of the zoom levels
    next_stored_zoom = zoom_step * math.floor(rz / zoom_step)
    zoom_offset = rz - next_stored_zoom

    # the number of entries to aggregate for each new value
    num_to_agg = 2**zoom_offset
    total_in_length = tile_size * num_to_agg

    # which positions we need to retrieve in order to dynamically aggregate
    start_pos = int((x * 2**zoom_offset * tile_size))
    end_pos = int(start_pos + total_in_length)

    #print("max_position:", max_position)
    max_position = int(max_position / 2**next_stored_zoom)
    #print("new max_position:", max_position)
    '''
    print("start_pos:", start_pos)
    print("end_pos:", end_pos)
    print("next_stored_zoom", next_stored_zoom)
    print("max_position:", int(max_position))
    '''

    f = hdf_file['values_' + str(int(next_stored_zoom))]

    if start_pos > max_position:
        # we want a tile that's after the last bit of data
        a = np.zeros(end_pos - start_pos)
        a.fill(np.nan)
        ret_array = ct.aggregate(a, int(num_to_agg))
    elif start_pos < max_position and max_position < end_pos:
        a = f[start_pos:end_pos][:]
        a[max_position + 1:end_pos] = np.nan
        ret_array = ct.aggregate(a, int(num_to_agg))
    else:
        ret_array = ct.aggregate(f[start_pos:end_pos], int(num_to_agg))
    '''
    print("ret_array:", f[start_pos:end_pos])
    print('ret_array:', ret_array)
    '''
    #print('nansum', np.nansum(ret_array))

    # check to see if we counted the number of NaN values in the given
    # interval

    f_nan = None
    if "nan_values_" + str(int(next_stored_zoom)) in hdf_file:
        f_nan = hdf_file['nan_values_' + str(int(next_stored_zoom))]
        nan_array = ct.aggregate(f_nan[start_pos:end_pos], int(num_to_agg))
        num_aggregated = 2**(max_zoom - z)

        num_vals_array = np.zeros(len(nan_array))
        num_vals_array.fill(num_aggregated)
        num_summed_array = num_vals_array - nan_array

        averages_array = ret_array / num_summed_array

        return averages_array

    return ret_array

Example #5

Show file

def main():
    parser = argparse.ArgumentParser(description="""

    python main.py
""")

    parser.add_argument('-f', '--filepath', default=None)
    parser.add_argument('-c', '--chunk-size', default=14, type=int)
    parser.add_argument('-z', '--zoom-step', default=8, type=int)
    parser.add_argument('-t', '--tile-size', default=1024, type=int)
    parser.add_argument('-o', '--output-file', default='/tmp/tmp.hdf5')
    #parser.add_argument('-o', '--options', default='yo',
    #					 help="Some option", type='str')
    #parser.add_argument('-u', '--useless', action='store_true',
    #					 help='Another useless option')
    args = parser.parse_args()
    last_end = 0
    data = []

    max_zoom = 24
    if op.exists(args.output_file):
        os.remove(args.output_file)
    f = h5py.File(args.output_file, 'w')

    hum_size = 3137161264
    tile_size = args.tile_size

    chunk_size = tile_size * 2**args.chunk_size

    dsets = []

    # initialize the datasets
    z = 0
    positions = []  # store where we are at the current dataset
    data_buffers = [[]]
    while hum_size / 2**z > tile_size:
        dsets += [
            f.create_dataset('values_' + str(z), (hum_size / 2**z, ),
                             dtype='f',
                             compression='gzip')
        ]
        data_buffers += [[]]
        positions += [0]
        z += args.zoom_step
    d = f.create_dataset('meta', (1, ), dtype='f')

    d.attrs['zoom-step'] = args.zoom_step
    d.attrs['max-length'] = hum_size
    d.attrs['assembly'] = 'hg19'
    d.attrs['tile-size'] = tile_size
    d.attrs['max-zoom'] = math.ceil(
        math.log(d.attrs['max-length'] / tile_size) / math.log(2))

    print("max_zoom:", d.attrs['max-zoom'])

    if args.filepath is None:
        print("Waiting for input...")
        for line in sys.stdin:
            parts = line.split()
            start = int(parts[0], 10)
            end = int(parts[1], 10)
            val = float(parts[2])

            if start > last_end:
                # in case there's skipped values in the bed file
                data_buffers[0] += [0] * (last_end - start)

            data_buffers[0] += [float(parts[2])] * (end - start)
            curr_zoom = 0

            while len(data_buffers[curr_zoom]) > chunk_size:
                # get the current chunk and store it
                print("curr_zoom:", curr_zoom)
                curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
                dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                                 chunk_size] = curr_chunk

                # aggregate and store aggregated values in the next zoom_level's data
                data_buffers[curr_zoom + 1] += list(
                    ct.aggregate(curr_chunk, 2**args.zoom_step))
                data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
                positions[curr_zoom] += chunk_size
                data = data_buffers[curr_zoom + 1]
                curr_zoom += 1

        # store the remaining data
        print("tile_size:", tile_size, positions[0])

        while True:
            # get the current chunk and store it
            chunk_size = len(data_buffers[curr_zoom])
            curr_chunk = np.array(data_buffers[curr_zoom][:chunk_size])
            dsets[curr_zoom][positions[curr_zoom]:positions[curr_zoom] +
                             chunk_size] = curr_chunk

            print("curr_zoom:", curr_zoom, "position:",
                  positions[curr_zoom] + len(curr_chunk))
            print("len:", [len(d) for d in data_buffers])

            # aggregate and store aggregated values in the next zoom_level's data
            data_buffers[curr_zoom + 1] += list(
                ct.aggregate(curr_chunk, 2**args.zoom_step))
            data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
            positions[curr_zoom] += chunk_size
            data = data_buffers[curr_zoom + 1]
            curr_zoom += 1

            # we've created enough tile levels to cover the entire maximum width
            if curr_zoom * args.zoom_step >= max_zoom:
                break

    # still need to take care of the last chunk

    data = np.array(data)
    t1 = time.time()
    '''