def test_chunk_errors(): with pytest.raises(ValueError): ub.chunks(range(9)) with pytest.raises(ValueError): ub.chunks(range(9), chunksize=2, nchunks=2) with pytest.raises(ValueError): len(ub.chunks((_ for _ in range(2)), nchunks=2))
def pad(x, pad, mode='constant', value=0): """ Example: >>> t4d = x = (3, 3, 4, 2) >>> pad = p1d = (1, 1) >>> out = OutputShapeFor(F.pad)(x, pad) >>> print(out) (3, 3, 4, 4) >>> p2d = (1, 1, 2, 2) # pad last dim by (1, 1) and 2nd to last by (2, 2) >>> out = OutputShapeFor.pad(t4d, p2d, "constant", 0) >>> print(out) (3, 3, 8, 4) >>> t4d = (3, 3, 4, 2) >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3) >>> out = OutputShapeFor.pad(t4d, p3d, "constant", 0) >>> print(out) (3, 9, 7, 3) """ new_x = list(x) dim = len(new_x) for idx, dpad in enumerate(ub.chunks(pad, 2), start=1): dimx = dim - idx lpad, rpad = dpad new_x[dimx] = x[dimx] + lpad + rpad out = SHAPE_CLS(new_x) return out
def run_benchmark(): import ubelt as ub data_dim = 128 num_dpts = 1000000 num_qpts = 25000 num_neighbs = 5 random_seed = 42 rng = np.random.RandomState(0) dataset = rand_vecs(num_dpts, data_dim, rng) testset = rand_vecs(num_qpts, data_dim, rng) # Build determenistic flann object flann = pyflann.FLANN() print('building datset for %d vecs' % (len(dataset))) with ub.Timer(label='building kdtrees', verbose=True) as t: params = flann.build_index( dataset, algorithm='kdtree', trees=6, random_seed=random_seed, cores=6, ) print(params) qvec_chunks = list(ub.chunks(testset, 1000)) times = [] for qvecs in ub.ProgIter(qvec_chunks, label='find nn'): with ub.Timer(verbose=0) as t: _ = flann.nn_index(testset, num_neighbs) # NOQA times.append(t.ellapsed) print(np.mean(times))
def neg_redun_gen(infr): """ Subiterator for phase3 of the main algorithm. Searches for decisions that would commplete negative redundancy """ infr.print('===========================', color='white') infr.print('--- NEGATIVE REDUN LOOP ---', color='white') infr.queue.clear() only_auto = infr.params['redun.neg.only_auto'] # TODO: outer loop that re-iterates until negative redundancy is # accomplished. needs_neg_redun = infr.find_neg_redun_candidate_edges() chunksize = 500 for new_edges in ub.chunks(needs_neg_redun, chunksize): infr.print('another neg redun chunk') # Add chunks in a little at a time for faster response time infr.add_candidate_edges(new_edges) gen = infr._inner_priority_gen(use_refresh=False, only_auto=only_auto) for value in gen: yield value
def _dump_chosen_indices(harn): """ Dump a visualization of the validation images to disk """ tag = harn.current_tag harn.debug('DUMP CHOSEN INDICES') if tag not in harn.chosen_indices: harn._choose_indices() nh.util.mplutil.aggensure() dset = harn.loaders[tag].dataset for indices in ub.chunks(harn.chosen_indices[tag], 16): harn.debug('PREDICTING CHUNK') inbatch = [dset[index] for index in indices] raw_batch = nh.data.collate.padded_collate(inbatch) batch = harn.prepare_batch(raw_batch) outputs, loss = harn.run_batch(batch) postout = harn.model.module.postprocess(outputs, nms_mode=4) for idx, index in enumerate(indices): orig_img = dset._load_image(index) fig = harn.visualize_prediction(batch, outputs, postout, idx=idx, thresh=0.1, orig_img=orig_img) img = nh.util.mplutil.render_figure_to_image(fig) dump_dpath = ub.ensuredir((harn.train_dpath, 'dump', tag)) dump_fname = 'pred_{:04d}_{:08d}.png'.format(index, harn.epoch) fpath = os.path.join(dump_dpath, dump_fname) harn.debug('dump viz fpath = {}'.format(fpath)) nh.util.imwrite(fpath, img)
def leave_k_out_xval(k=2): for test_scenes in ub.chunks(task.scene_ids, chunksize=k): # Simple leave one out train_scenes = list(task.scene_ids) for test_scene in test_scenes: train_scenes.remove(test_scene) print('test_scenes = {!r}'.format(test_scenes)) print('train_scenes = {!r}'.format(train_scenes)) yield train_scenes, test_scenes
def serial_gen(): # use this if threading does bad things if True: new_edges = list(infr.find_pos_redun_candidate_edges()) if len(new_edges) > 0: infr.add_candidate_edges(new_edges) yield new_edges else: for new_edges in ub.chunks(infr.find_pos_redun_candidate_edges(), 100): if len(new_edges) > 0: infr.add_candidate_edges(new_edges) yield new_edges
def protected_print(msg): # Check if any progress bars are alive paused = getattr(tqdm.tqdm, '_paused', False) progs = getattr(tqdm.tqdm, '_instances', []) if not paused and len(progs) > 0: prog = list(progs)[0] # Specify file in case we are capturing stdout for line in str(msg).split('\n'): if prog.ncols is not None and len(line) > prog.ncols: for subline in ub.chunks(line, prog.ncols): tqdm.tqdm.write(''.join(subline), file=sys.stdout) else: tqdm.tqdm.write(line, file=sys.stdout) else: # otherwise use the print / logger # (ensure logger has custom logic to exclude logging at this exact # place) print(msg)
def mean(self): """ The mean of the best results of each trial. Note: This is typically less informative than simply looking at the min Example: >>> import ubelt as ub >>> self = Timerit(num=10, verbose=0) >>> self.call(ub.find_nth_prime, 50) >>> assert self.mean() > 0 """ import ubelt as ub chunks = ub.chunks(self.times, self.bestof) times = list(map(min, chunks)) mean = sum(times) / len(times) return mean
def std(self): """ The standard deviation of the best results of each trial. Note: As mentioned in the timeit source code, the standard deviation is not often useful. Typically the minimum value is most informative. Example: >>> import ubelt as ub >>> self = Timerit(num=10, verbose=1) >>> self.call(ub.find_nth_prime, 50) >>> assert self.std() > 0 """ import ubelt as ub chunks = ub.chunks(self.times, self.bestof) times = list(map(min, chunks)) mean = sum(times) / len(times) std = math.sqrt(sum((t - mean)**2 for t in times) / len(times)) return std
def stack_images_grid(images, chunksize=None, axis=0, overlap=0, return_info=False, bg_value=None): """ Stacks images in a grid. Optionally return transforms of original image positions in the output image. """ if chunksize is None: chunksize = int(len(images)**.5) stack1_list = [] tfs1_list = [] assert axis in [0, 1] for batch in ub.chunks(images, chunksize, bordermode='none'): stack1, tfs1 = stack_images(batch, overlap=overlap, return_info=True, bg_value=bg_value, resize=None, axis=1 - axis) tfs1_list.append(tfs1) stack1_list.append(stack1) img_grid, tfs2 = stack_images(stack1_list, overlap=overlap, bg_value=bg_value, return_info=True, axis=axis) transforms_ = [ tf1 + tf2 for tfs1, tf2 in zip(tfs1_list, tfs2) for tf1 in tfs1 ] if return_info: return img_grid, transforms_ else: return img_grid
def stack_images_grid(images, chunksize=None, axis=0, overlap=0, return_info=False, bg_value=None): """ Stacks images in a grid. Optionally return transforms of original image positions in the output image. Args: images (Iterable[ndarray[ndim=2]]): image data chunksize (int, default=None): number of rows per column or columns per row depending on the value of `axis`. If unspecified, computes this as `int(sqrt(len(images)))`. axis (int, default=0): If 0, chunksize is columns per row. If 1, chunksize is rows per column. overlap (int): number of pixels to overlap. Using a negative number results in a border. return_info (bool): if True, returns transforms (scales and translations) to map from original image to its new location. Returns: ndarray: an image of stacked images in a grid pattern OR Tuple[ndarray, List]: where the first item is the aformentioned stacked image and the second item is a list of transformations for each input image mapping it to its location in the returned image. """ import ubelt as ub if chunksize is None: chunksize = int(len(images)**.5) stack1_list = [] tfs1_list = [] assert axis in [0, 1] for batch in ub.chunks(images, chunksize, bordermode='none'): stack1, tfs1 = stack_images(batch, overlap=overlap, return_info=True, bg_value=bg_value, resize=None, axis=1 - axis) tfs1_list.append(tfs1) stack1_list.append(stack1) img_grid, tfs2 = stack_images(stack1_list, overlap=overlap, bg_value=bg_value, return_info=True, axis=axis) transforms_ = [ tf1 + tf2 for tfs1, tf2 in zip(tfs1_list, tfs2) for tf1 in tfs1 ] if return_info: return img_grid, transforms_ else: return img_grid
def test_chunk_len(): gen = ub.chunks([1] * 6, chunksize=3) assert len(gen) == 2
def test_chunk_total_chunksize(): gen = ub.chunks([], total=10, chunksize=4) assert len(gen) == 3
def benchmark_template(): import ubelt as ub import pandas as pd import timerit def method1(x, y, z): ret = [] for i in range((x + y) * z): ret.append(i) return ret def method2(x, y, z): ret = [i for i in range((x + y) * z)] return ret method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': ['method1', 'method2'], 'x': list(range(7)), 'y': [0, 100], 'z': [2, 3] # 'param_name': [param values], } xlabel = 'x' # Set these to param labels that directly transfer to method kwargs kw_labels = ['x', 'y', 'z'] # Set these to empty lists if they are not used group_labels = { 'style': ['y'], 'size': ['z'], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times chunk_iter = ub.chunks(ti.times, ti.bestof) times = list(map(min, chunk_iter)) # TODO: timerit method for this for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time' ]].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted( set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title('Benchmark Name') ax.set_xlabel('Size (todo: A better x-variable description)') ax.set_ylabel('Time (todo: A better y-variable description)') # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def test_chunk_total_nchunks(): gen = ub.chunks([], total=10, nchunks=4) assert len(gen) == 4
def benchmark_mul_vs_pow(): import ubelt as ub import pandas as pd import timerit from functools import reduce import operator as op import itertools as it def method_pow_via_mul_raw(n): """ Construct a function that does multiplication of a value n times """ return eval('lambda v: ' + ' * '.join(['v'] * n)) def method_pow_via_mul_for(v, n): ret = v for _ in range(1, n): ret = ret * v return ret def method_pow_via_mul_reduce(v, n): """ Alternative way to multiply a value n times """ return reduce(op.mul, it.repeat(v, n)) def method_pow_via_pow(v, n): return v ** n method_lut = locals() # can populate this some other way ti = timerit.Timerit(500000, bestof=1000, verbose=2) basis = { 'method': ['method_pow_via_mul_raw', 'method_pow_via_pow'], 'n': list(range(1, 20)), 'v': ['random-int', 'random-float'], # 'param_name': [param values], } xlabel = 'n' kw_labels = ['v', 'n'] group_labels = { 'style': ['v'], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit if params['method'] == 'method_pow_via_mul_raw': method = method(kwargs.pop('n')) for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... import random if kwargs['v'] == 'random': kwargs['v'] = random.randint(1, 31000) if random.random() > 0.5 else random.random() elif kwargs['v'] == 'random-int': kwargs['v'] = random.randint(1, 31000) elif kwargs['v'] == 'random-float': kwargs['v'] = random.random() with timer: # Put the logic you want to time here method(**kwargs) for time in map(min, ub.chunks(ti.times, ti.bestof)): row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) # data = data.sort_values('time') print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='time', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('N') ax.set_ylabel('Time') ax.set_yscale('log') plt.show()