Python chunks Examples, ubelt.chunks Python Examples

Example #1

0

Show file

File: test_list.py Project: Kulbear/ubelt

def test_chunk_errors():
    with pytest.raises(ValueError):
        ub.chunks(range(9))

    with pytest.raises(ValueError):
        ub.chunks(range(9), chunksize=2, nchunks=2)

    with pytest.raises(ValueError):
        len(ub.chunks((_ for _ in range(2)), nchunks=2))

Example #2

0

Show file

File: output_shape_for.py Project: Kitware/netharn

 def pad(x, pad, mode='constant', value=0):
     """
     Example:
         >>> t4d = x = (3, 3, 4, 2)
         >>> pad = p1d = (1, 1)
         >>> out = OutputShapeFor(F.pad)(x, pad)
         >>> print(out)
         (3, 3, 4, 4)
         >>> p2d = (1, 1, 2, 2) # pad last dim by (1, 1) and 2nd to last by (2, 2)
         >>> out = OutputShapeFor.pad(t4d, p2d, "constant", 0)
         >>> print(out)
         (3, 3, 8, 4)
         >>> t4d = (3, 3, 4, 2)
         >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3)
         >>> out = OutputShapeFor.pad(t4d, p3d, "constant", 0)
         >>> print(out)
         (3, 9, 7, 3)
     """
     new_x = list(x)
     dim = len(new_x)
     for idx, dpad in enumerate(ub.chunks(pad, 2), start=1):
         dimx = dim - idx
         lpad, rpad = dpad
         new_x[dimx] = x[dimx] + lpad + rpad
     out = SHAPE_CLS(new_x)
     return out

Example #3

0

Show file

File: simple_benchmark.py Project: mmulich/wbia-tpl-pyflann

def run_benchmark():
    import ubelt as ub

    data_dim = 128
    num_dpts = 1000000
    num_qpts = 25000
    num_neighbs = 5
    random_seed = 42
    rng = np.random.RandomState(0)

    dataset = rand_vecs(num_dpts, data_dim, rng)
    testset = rand_vecs(num_qpts, data_dim, rng)
    # Build determenistic flann object
    flann = pyflann.FLANN()
    print('building datset for %d vecs' % (len(dataset)))

    with ub.Timer(label='building kdtrees', verbose=True) as t:
        params = flann.build_index(
            dataset,
            algorithm='kdtree',
            trees=6,
            random_seed=random_seed,
            cores=6,
        )

    print(params)

    qvec_chunks = list(ub.chunks(testset, 1000))
    times = []
    for qvecs in ub.ProgIter(qvec_chunks, label='find nn'):
        with ub.Timer(verbose=0) as t:
            _ = flann.nn_index(testset, num_neighbs)  # NOQA
        times.append(t.ellapsed)
    print(np.mean(times))

Example #4

0

Show file

    def neg_redun_gen(infr):
        """
        Subiterator for phase3 of the main algorithm.

        Searches for decisions that would commplete negative redundancy
        """
        infr.print('===========================', color='white')
        infr.print('--- NEGATIVE REDUN LOOP ---', color='white')

        infr.queue.clear()

        only_auto = infr.params['redun.neg.only_auto']

        # TODO: outer loop that re-iterates until negative redundancy is
        # accomplished.
        needs_neg_redun = infr.find_neg_redun_candidate_edges()
        chunksize = 500
        for new_edges in ub.chunks(needs_neg_redun, chunksize):
            infr.print('another neg redun chunk')
            # Add chunks in a little at a time for faster response time
            infr.add_candidate_edges(new_edges)
            gen = infr._inner_priority_gen(use_refresh=False,
                                           only_auto=only_auto)
            for value in gen:
                yield value

Example #5

0

Show file

    def _dump_chosen_indices(harn):
        """
        Dump a visualization of the validation images to disk
        """
        tag = harn.current_tag
        harn.debug('DUMP CHOSEN INDICES')

        if tag not in harn.chosen_indices:
            harn._choose_indices()

        nh.util.mplutil.aggensure()

        dset = harn.loaders[tag].dataset
        for indices in ub.chunks(harn.chosen_indices[tag], 16):
            harn.debug('PREDICTING CHUNK')
            inbatch = [dset[index] for index in indices]
            raw_batch = nh.data.collate.padded_collate(inbatch)
            batch = harn.prepare_batch(raw_batch)
            outputs, loss = harn.run_batch(batch)
            postout = harn.model.module.postprocess(outputs, nms_mode=4)

            for idx, index in enumerate(indices):
                orig_img = dset._load_image(index)
                fig = harn.visualize_prediction(batch,
                                                outputs,
                                                postout,
                                                idx=idx,
                                                thresh=0.1,
                                                orig_img=orig_img)
                img = nh.util.mplutil.render_figure_to_image(fig)
                dump_dpath = ub.ensuredir((harn.train_dpath, 'dump', tag))
                dump_fname = 'pred_{:04d}_{:08d}.png'.format(index, harn.epoch)
                fpath = os.path.join(dump_dpath, dump_fname)
                harn.debug('dump viz fpath = {}'.format(fpath))
                nh.util.imwrite(fpath, img)

Example #6

0

Show file

 def leave_k_out_xval(k=2):
     for test_scenes in ub.chunks(task.scene_ids, chunksize=k):
         # Simple leave one out
         train_scenes = list(task.scene_ids)
         for test_scene in test_scenes:
             train_scenes.remove(test_scene)
         print('test_scenes = {!r}'.format(test_scenes))
         print('train_scenes = {!r}'.format(train_scenes))
         yield train_scenes, test_scenes

Example #7

0

Show file

File: mixin_loops.py Project: Hrmirzadeh/wildbook-ia

 def serial_gen():
     # use this if threading does bad things
     if True:
         new_edges = list(infr.find_pos_redun_candidate_edges())
         if len(new_edges) > 0:
             infr.add_candidate_edges(new_edges)
             yield new_edges
     else:
         for new_edges in ub.chunks(infr.find_pos_redun_candidate_edges(), 100):
             if len(new_edges) > 0:
                 infr.add_candidate_edges(new_edges)
                 yield new_edges

Example #8

0

Show file

File: misc.py Project: Erotemic/clab

 def protected_print(msg):
     # Check if any progress bars are alive
     paused = getattr(tqdm.tqdm, '_paused', False)
     progs = getattr(tqdm.tqdm, '_instances', [])
     if not paused and len(progs) > 0:
         prog = list(progs)[0]
         # Specify file in case we are capturing stdout
         for line in str(msg).split('\n'):
             if prog.ncols is not None and len(line) > prog.ncols:
                 for subline in ub.chunks(line, prog.ncols):
                     tqdm.tqdm.write(''.join(subline), file=sys.stdout)
             else:
                 tqdm.tqdm.write(line, file=sys.stdout)
     else:
         # otherwise use the print / logger
         # (ensure logger has custom logic to exclude logging at this exact
         # place)
         print(msg)

Example #9

0

Show file

    def mean(self):
        """
        The mean of the best results of each trial.

        Note:
            This is typically less informative than simply looking at the min

        Example:
            >>> import ubelt as ub
            >>> self = Timerit(num=10, verbose=0)
            >>> self.call(ub.find_nth_prime, 50)
            >>> assert self.mean() > 0
        """
        import ubelt as ub
        chunks = ub.chunks(self.times, self.bestof)
        times = list(map(min, chunks))
        mean = sum(times) / len(times)
        return mean

Example #10

0

Show file

    def std(self):
        """
        The standard deviation of the best results of each trial.

        Note:
            As mentioned in the timeit source code, the standard deviation is
            not often useful. Typically the minimum value is most informative.

        Example:
            >>> import ubelt as ub
            >>> self = Timerit(num=10, verbose=1)
            >>> self.call(ub.find_nth_prime, 50)
            >>> assert self.std() > 0
        """
        import ubelt as ub
        chunks = ub.chunks(self.times, self.bestof)
        times = list(map(min, chunks))
        mean = sum(times) / len(times)
        std = math.sqrt(sum((t - mean)**2 for t in times) / len(times))
        return std

Example #11

0

Show file

def stack_images_grid(images,
                      chunksize=None,
                      axis=0,
                      overlap=0,
                      return_info=False,
                      bg_value=None):
    """
    Stacks images in a grid. Optionally return transforms of original image
    positions in the output image.
    """
    if chunksize is None:
        chunksize = int(len(images)**.5)
    stack1_list = []
    tfs1_list = []
    assert axis in [0, 1]
    for batch in ub.chunks(images, chunksize, bordermode='none'):
        stack1, tfs1 = stack_images(batch,
                                    overlap=overlap,
                                    return_info=True,
                                    bg_value=bg_value,
                                    resize=None,
                                    axis=1 - axis)
        tfs1_list.append(tfs1)
        stack1_list.append(stack1)

    img_grid, tfs2 = stack_images(stack1_list,
                                  overlap=overlap,
                                  bg_value=bg_value,
                                  return_info=True,
                                  axis=axis)
    transforms_ = [
        tf1 + tf2 for tfs1, tf2 in zip(tfs1_list, tfs2) for tf1 in tfs1
    ]

    if return_info:
        return img_grid, transforms_
    else:
        return img_grid

Example #12

0

Show file

def stack_images_grid(images,
                      chunksize=None,
                      axis=0,
                      overlap=0,
                      return_info=False,
                      bg_value=None):
    """
    Stacks images in a grid. Optionally return transforms of original image
    positions in the output image.

    Args:
        images (Iterable[ndarray[ndim=2]]):  image data
        chunksize (int, default=None): number of rows per column or columns per
            row depending on the value of `axis`.
            If unspecified, computes this as `int(sqrt(len(images)))`.
        axis (int, default=0):
            If 0, chunksize is columns per row.
            If 1, chunksize is rows per column.
        overlap (int): number of pixels to overlap. Using a negative
            number results in a border.
        return_info (bool): if True, returns transforms (scales and
            translations) to map from original image to its new location.

    Returns:
        ndarray: an image of stacked images in a grid pattern

        OR

        Tuple[ndarray, List]: where the first item is the aformentioned stacked
            image and the second item is a list of transformations for each
            input image mapping it to its location in the returned image.
    """
    import ubelt as ub
    if chunksize is None:
        chunksize = int(len(images)**.5)
    stack1_list = []
    tfs1_list = []
    assert axis in [0, 1]
    for batch in ub.chunks(images, chunksize, bordermode='none'):
        stack1, tfs1 = stack_images(batch,
                                    overlap=overlap,
                                    return_info=True,
                                    bg_value=bg_value,
                                    resize=None,
                                    axis=1 - axis)
        tfs1_list.append(tfs1)
        stack1_list.append(stack1)

    img_grid, tfs2 = stack_images(stack1_list,
                                  overlap=overlap,
                                  bg_value=bg_value,
                                  return_info=True,
                                  axis=axis)
    transforms_ = [
        tf1 + tf2 for tfs1, tf2 in zip(tfs1_list, tfs2) for tf1 in tfs1
    ]

    if return_info:
        return img_grid, transforms_
    else:
        return img_grid

Example #13

0

Show file

File: test_list.py Project: Kulbear/ubelt

def test_chunk_len():
    gen = ub.chunks([1] * 6, chunksize=3)
    assert len(gen) == 2

Example #14

0

Show file

File: test_list.py Project: Kulbear/ubelt

def test_chunk_total_chunksize():
    gen = ub.chunks([], total=10, chunksize=4)
    assert len(gen) == 3

Example #15

0

Show file

File: bench_template.py Project: Erotemic/misc

def benchmark_template():
    import ubelt as ub
    import pandas as pd
    import timerit

    def method1(x, y, z):
        ret = []
        for i in range((x + y) * z):
            ret.append(i)
        return ret

    def method2(x, y, z):
        ret = [i for i in range((x + y) * z)]
        return ret

    method_lut = locals()  # can populate this some other way

    # Change params here to modify number of trials
    ti = timerit.Timerit(100, bestof=10, verbose=1)

    # if True, record every trail run and show variance in seaborn
    # if False, use the standard timerit min/mean measures
    RECORD_ALL = True

    # These are the parameters that we benchmark over
    basis = {
        'method': ['method1', 'method2'],
        'x': list(range(7)),
        'y': [0, 100],
        'z': [2, 3]
        # 'param_name': [param values],
    }
    xlabel = 'x'
    # Set these to param labels that directly transfer to method kwargs
    kw_labels = ['x', 'y', 'z']
    # Set these to empty lists if they are not used
    group_labels = {
        'style': ['y'],
        'size': ['z'],
    }
    group_labels['hue'] = list((ub.oset(basis) - {xlabel}) -
                               set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(ub.dict_isect(
                params, labels),
                                                  compact=1,
                                                  si=1)
        key = ub.repr2(params, compact=1, si=1)
        # Make any modifications you need to compute input kwargs for each
        # method here.
        kwargs = ub.dict_isect(params.copy(), kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit
        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            with timer:
                # Put the logic you want to time here
                method(**kwargs)

        if RECORD_ALL:
            # Seaborn will show the variance if this is enabled, otherwise
            # use the robust timerit mean / min times
            chunk_iter = ub.chunks(ti.times, ti.bestof)
            times = list(map(min, chunk_iter))  # TODO: timerit method for this
            for time in times:
                row = {
                    # 'mean': ti.mean(),
                    'time': time,
                    'key': key,
                    **group_keys,
                    **params,
                }
                rows.append(row)
        else:
            row = {
                'mean': ti.mean(),
                'min': ti.min(),
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    time_key = 'time' if RECORD_ALL else 'min'

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    data = data.sort_values(time_key)

    if RECORD_ALL:
        # Show the min / mean if we record all
        min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
        mean_times = data.groupby('key')[['time'
                                          ]].mean().rename({'time': 'mean'},
                                                           axis=1)
        stats_data = pd.concat([min_times, mean_times], axis=1)
        stats_data = stats_data.sort_values('min')
    else:
        stats_data = data

    USE_OPENSKILL = 1
    if USE_OPENSKILL:
        # Lets try a real ranking method
        # https://github.com/OpenDebates/openskill.py
        import openskill
        method_ratings = {m: openskill.Rating() for m in basis['method']}

    other_keys = sorted(
        set(stats_data.columns) -
        {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'})
    for params, variants in stats_data.groupby(other_keys):
        variants = variants.sort_values('mean')
        ranking = variants['method'].reset_index(drop=True)

        mean_speedup = variants['mean'].max() / variants['mean']
        stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup
        min_speedup = variants['min'].max() / variants['min']
        stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup

        if USE_OPENSKILL:
            # The idea is that each setting of parameters is a game, and each
            # "method" is a player. We rank the players by which is fastest,
            # and update their ranking according to the Weng-Lin Bayes ranking
            # model. This does not take the fact that some "games" (i.e.
            # parameter settings) are more important than others, but it should
            # be fairly robust on average.
            old_ratings = [[r] for r in ub.take(method_ratings, ranking)]
            new_values = openskill.rate(old_ratings)  # Not inplace
            new_ratings = [openskill.Rating(*new[0]) for new in new_values]
            method_ratings.update(ub.dzip(ranking, new_ratings))

    print('Statistics:')
    print(stats_data)

    if USE_OPENSKILL:
        from openskill import predict_win
        win_prob = predict_win([[r] for r in method_ratings.values()])
        skill_agg = pd.Series(ub.dzip(method_ratings.keys(),
                                      win_prob)).sort_values(ascending=False)
        print('Aggregated Rankings =\n{}'.format(skill_agg))

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data,
                     x=xlabel,
                     y=time_key,
                     marker='o',
                     ax=ax,
                     **plotkw)
        ax.set_title('Benchmark Name')
        ax.set_xlabel('Size (todo: A better x-variable description)')
        ax.set_ylabel('Time (todo: A better y-variable description)')
        # ax.set_xscale('log')
        # ax.set_yscale('log')

        try:
            __IPYTHON__
        except NameError:
            plt.show()

Example #16

0

Show file

File: test_list.py Project: Kulbear/ubelt

def test_chunk_total_nchunks():
    gen = ub.chunks([], total=10, nchunks=4)
    assert len(gen) == 4

Example #17

0

Show file

File: bench_mul_pow.py Project: Erotemic/misc

def benchmark_mul_vs_pow():
    import ubelt as ub
    import pandas as pd
    import timerit

    from functools import reduce
    import operator as op
    import itertools as it

    def method_pow_via_mul_raw(n):
        """ Construct a function that does multiplication of a value n times """
        return eval('lambda v: ' + ' * '.join(['v'] * n))

    def method_pow_via_mul_for(v, n):
        ret = v
        for _ in range(1, n):
            ret = ret * v
        return ret

    def method_pow_via_mul_reduce(v, n):
        """ Alternative way to multiply a value n times """
        return reduce(op.mul, it.repeat(v, n))

    def method_pow_via_pow(v, n):
        return v ** n

    method_lut = locals()  # can populate this some other way

    ti = timerit.Timerit(500000, bestof=1000, verbose=2)

    basis = {
        'method': ['method_pow_via_mul_raw', 'method_pow_via_pow'],
        'n': list(range(1, 20)),
        'v': ['random-int', 'random-float'],
        # 'param_name': [param values],
    }
    xlabel = 'n'
    kw_labels = ['v', 'n']
    group_labels = {
        'style': ['v'],
        'size': [],
    }
    group_labels['hue'] = list(
        (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
    grid_iter = list(ub.named_product(basis))

    # For each variation of your experiment, create a row.
    rows = []
    for params in grid_iter:
        group_keys = {}
        for gname, labels in group_labels.items():
            group_keys[gname + '_key'] = ub.repr2(
                ub.dict_isect(params, labels), compact=1, si=1)
        key = ub.repr2(params, compact=1, si=1)
        kwargs = ub.dict_isect(params.copy(),  kw_labels)
        method = method_lut[params['method']]
        # Timerit will run some user-specified number of loops.
        # and compute time stats with similar methodology to timeit

        if params['method'] == 'method_pow_via_mul_raw':
            method = method(kwargs.pop('n'))

        for timer in ti.reset(key):
            # Put any setup logic you dont want to time here.
            # ...
            import random
            if kwargs['v'] == 'random':
                kwargs['v'] = random.randint(1, 31000) if random.random() > 0.5 else random.random()
            elif kwargs['v'] == 'random-int':
                kwargs['v'] = random.randint(1, 31000)
            elif kwargs['v'] == 'random-float':
                kwargs['v'] = random.random()
            with timer:
                # Put the logic you want to time here
                method(**kwargs)
        for time in map(min, ub.chunks(ti.times, ti.bestof)):
            row = {
                # 'mean': ti.mean(),
                'time': time,
                'key': key,
                **group_keys,
                **params,
            }
            rows.append(row)

    # The rows define a long-form pandas data array.
    # Data in long-form makes it very easy to use seaborn.
    data = pd.DataFrame(rows)
    # data = data.sort_values('time')
    print(data)

    plot = True
    if plot:
        # import seaborn as sns
        # kwplot autosns works well for IPython and script execution.
        # not sure about notebooks.
        import kwplot
        sns = kwplot.autosns()
        plt = kwplot.autoplt()

        plotkw = {}
        for gname, labels in group_labels.items():
            if labels:
                plotkw[gname] = gname + '_key'

        # Your variables may change
        ax = kwplot.figure(fnum=1, doclf=True).gca()
        sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw)
        ax.set_title('Benchmark')
        ax.set_xlabel('N')
        ax.set_ylabel('Time')
        ax.set_yscale('log')

        plt.show()