def benchmark_hash_data(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --convert=True --show python ~/code/ubelt/dev/bench_hash.py --convert=False --show """ import ubelt as ub #ITEM = 'JUST A STRING' * 100 ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4] HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 13)) results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 convert = ub.argval('--convert', default='True').lower() == 'True' print('convert = {!r}'.format(convert)) ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2 ** s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) data = [ITEM] * N for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_data(data, hasher=hasher, convert=convert) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print('convert = {!r}'.format(convert)) print(ratios.mean().sort_values()) if ub.argflag('--show'): import netharn.util as kwel kwel.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results) kwel.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds', title='convert = {}'.format(convert)) kwel.show_if_requested()
def run_benchmark_renormalization(): """ See if we can renormalize probabilities after update with a faster method that maintains memory a bit better Example: >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/misc/tests/python')) >>> from bench_renormalization import * # NOQA >>> run_benchmark_renormalization() """ import ubelt as ub import xdev import pathlib import timerit fpath = pathlib.Path('~/misc/tests/python/renormalize_cython.pyx').expanduser() renormalize_cython = xdev.import_module_from_pyx(fpath, annotate=True, verbose=3, recompile=True) xdev.profile_now(renormalize_demo_v1)(1000, 100) xdev.profile_now(renormalize_demo_v2)(1000, 100) xdev.profile_now(renormalize_demo_v3)(1000, 100) xdev.profile_now(renormalize_demo_v4)(1000, 100) func_list = [ # renormalize_demo_v1, renormalize_demo_v2, # renormalize_demo_v3, # renormalize_demo_v4, renormalize_cython.renormalize_demo_cython_v1, renormalize_cython.renormalize_demo_cython_v2, renormalize_cython.renormalize_demo_cython_v3, ] methods = {f.__name__: f for f in func_list} for key, method in methods.items(): with timerit.Timer(label=key, verbose=0) as t: method(1000, 100) print(f'{key:<30} {t.toc():0.6f}') arg_basis = { 'T': [10, 20, 30, 50], 'D': [10, 50, 100, 300], } args_grid = [] for argkw in list(ub.named_product(arg_basis)): if argkw['T'] <= argkw['D']: arg_basis['size'] = argkw['T'] * argkw['D'] args_grid.append(argkw) ti = timerit.Timerit(100, bestof=10, verbose=2) measures = [] for method_name, method in methods.items(): for argkw in args_grid: row = ub.dict_union({'method': method_name}, argkw) key = ub.repr2(row, compact=1) argkey = ub.repr2(argkw, compact=1) kwargs = ub.dict_subset(argkw, ['T', 'D']) for timer in ti.reset('time'): with timer: method(**kwargs) row['mean'] = ti.mean() row['min'] = ti.min() row['key'] = key row['argkey'] = argkey measures.append(row) import pandas as pd df = pd.DataFrame(measures) import kwplot sns = kwplot.autosns() kwplot.figure(fnum=1, pnum=(1, 2, 1), docla=True) sns.lineplot(data=df, x='D', y='min', hue='method', style='method') kwplot.figure(fnum=1, pnum=(1, 2, 2), docla=True) sns.lineplot(data=df, x='T', y='min', hue='method', style='method') p = (df.pivot(['method'], ['argkey'], ['mean'])) print(p.mean(axis=1).sort_values())
def _precompute_class_weights(dset, mode='median-idf'): """ Example: >>> # xdoctest: +REQUIRES(--download) >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/code/netharn/examples')) >>> from sseg_camvid import * # NOQA >>> harn = setup_harn(0, workers=0, xpu='cpu').initialize() >>> dset = harn.datasets['train'] """ assert mode in ['median-idf', 'log-median-idf'] total_freq = _cached_class_frequency(dset) def logb(arr, base): if base == 'e': return np.log(arr) elif base == 2: return np.log2(arr) elif base == 10: return np.log10(arr) else: out = np.log(arr) out /= np.log(base) return out _min, _max = np.percentile(total_freq, [5, 95]) is_valid = (_min <= total_freq) & (total_freq <= _max) if np.any(is_valid): middle_value = np.median(total_freq[is_valid]) else: middle_value = np.median(total_freq) # variant of median-inverse-frequency nonzero_freq = total_freq[total_freq != 0] if len(nonzero_freq): total_freq[total_freq == 0] = nonzero_freq.min() / 2 if mode == 'median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 elif mode == 'log-median-idf': weights = (middle_value / total_freq) weights[~np.isfinite(weights)] = 1.0 base = 2 base = np.exp(1) weights = logb(weights + (base - 1), base) weights = np.maximum(weights, .1) weights = np.minimum(weights, 10) else: raise KeyError('mode = {!r}'.format(mode)) weights = np.round(weights, 2) cname_to_weight = ub.dzip(dset.classes, weights) print('weights: ' + ub.repr2(cname_to_weight)) if False: # Inspect the weights import kwplot kwplot.autoplt() cname_to_weight = ub.dzip(dset.classes, weights) cname_to_weight = ub.dict_subset(cname_to_weight, ub.argsort(cname_to_weight)) kwplot.multi_plot( ydata=list(cname_to_weight.values()), kind='bar', xticklabels=list(cname_to_weight.keys()), xtick_rotation=90, fnum=2, doclf=True) return weights
def benchmark_hash_file(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --show python ~/code/ubelt/dev/bench_hash.py --show """ import ubelt as ub import random # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensuredir(ub.expandpath('$HOME/tmp')) rng = random.Random(0) # Create a pool of random chunks of data chunksize = int(2 ** 20) pool_size = 8 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] #ITEM = 'JUST A STRING' * 100 HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 10)) import os results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2 ** s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) # Write a big file size_pool = [N] fpath = _write_random_file(dpath, part_pool, size_pool, rng) megabytes = os.stat(fpath).st_size / (2 ** 20) print('megabytes = {!r}'.format(megabytes)) for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_file(fpath, hasher=hasher) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print(ratios.mean().sort_values()) if ub.argflag('--show'): import kwplot kwplot.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_vals(lambda d: [d[x] for x in xdata], results) kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds') kwplot.show_if_requested()
def test_dict_subset_iterable(): # There was a bug in 0.7.0 where iterable keys would be exhausted too soon keys_list = list(range(10)) dict_ = {k: k for k in keys_list} got = ub.dict_subset(dict_, iter(keys_list)) assert dict(got) == dict_
def att_faces_datasets(dim=224): """ https://github.com/harveyslash/Facial-Similarity-with-Siamese-Networks-in-Pytorch >>> from clab.live.siam_train import * >>> train_dataset, vali_dataset, test_dataset = att_faces_datasets() train_dataset[0][0].shape fpath = train_dataset.img1_fpaths[0] """ def ensure_att_dataset(): def unzip(zip_fpath, dpath=None, verbose=1): """ Extracts all members of a zipfile. Args: zip_fpath (str): path of zip file to unzip. dpath (str): directory to unzip to. If not specified, it defaults to a folder parallel to the zip file (excluding the extension). verbose (int): verbosity level """ import zipfile from os.path import splitext from ubelt import progiter if dpath is None: dpath = splitext(zip_fpath)[0] with zipfile.ZipFile(zip_fpath, 'r') as zf: members = zf.namelist() prog = progiter.ProgIter(members, verbose=verbose, label='unzipping') for zipinfo in prog: zf.extract(zipinfo, path=dpath, pwd=None) return dpath faces_zip_fpath = ub.grabdata( 'http://www.cl.cam.ac.uk/Research/DTG/attarchive/pub/data/att_faces.zip' ) from os.path import splitext dpath = splitext(faces_zip_fpath)[0] if not os.path.exists(dpath): dpath = unzip(faces_zip_fpath, dpath=dpath) return dpath # Download the data if you dont have it dpath = ensure_att_dataset() import torchvision.datasets torchvision.datasets.folder.IMG_EXTENSIONS += ['.pgm'] im_dset = torchvision.datasets.ImageFolder(root=dpath) class_to_id = ub.group_items(*zip(*im_dset.imgs)) import utool as ut names = sorted(list(class_to_id.keys())) names = ut.shuffle(names, rng=10) learn, test = names[:40], names[40:] train, vali = learn[:35], learn[35:] print('train = {!r}'.format(len(train))) print('vali = {!r}'.format(len(vali))) print('test = {!r}'.format(len(test))) train_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, train)), dim=dim) vali_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, vali)), dim=dim) test_dataset = LabeledPairDataset(*pair_sampler( ub.dict_subset(class_to_id, test)), dim=dim) print('train_dataset = {!r}'.format(len(train_dataset))) print('vali_dataset = {!r}'.format(len(vali_dataset))) print('test_dataset = {!r}'.format(len(test_dataset))) return train_dataset, vali_dataset, test_dataset
def sort_gids_by_nannots(gids): return ub.argsort( ub.map_vals(len, ub.dict_subset(merged.gid_to_aids, gids, default=[])))[::-1]
def 字典_取子集(数组, key, default=ub.util_const.NoParam, cls=OrderedDict): data = ub.dict_subset(数组, key, default, cls) return data
def _training_sample_weights(self): """ Assigns weighting to each image to includence sample probability. We want to see very frequent categories less often, but we also don't really care about the rarest classes to the point where we should smaple them more than uncommon classes. We also don't want to sample images without any or with too many annotations very often. """ index_to_gid = [img['id'] for img in self.dset.dataset['images']] index_to_aids = list(ub.take(self.dset.gid_to_aids, index_to_gid)) index_to_cids = [[self.dset.anns[aid]['category_id'] for aid in aids] for aids in index_to_aids] catname_to_cid = { cat['name']: cid for cid, cat in self.dset.cats.items()} # median frequency weighting with minimum threshold min_examples = 20 cat_freq = pd.Series(self.dset.category_annotation_frequency()) valid_freq = cat_freq[cat_freq > min_examples] normal_mfw = valid_freq.median() / valid_freq # Draw anything under the threshold with probability equal to the median too_few = cat_freq[(cat_freq <= min_examples) & (cat_freq > 0)] too_few[:] = 1.0 category_mfw = pd.concat([normal_mfw, too_few]) cid_to_mfw = category_mfw.rename(catname_to_cid) cid_to_mfw_dict = cid_to_mfw.to_dict() index_to_weights = [list(ub.take(cid_to_mfw_dict, cids)) for cids in index_to_cids] index_to_nannots = np.array(list(map(len, index_to_weights))) # Each image becomes represented by the category with maximum median # frequency weight. This allows us to assign each image a proxy class # We make another proxy class to represent images without anything in # them. EMPTY_PROXY_CID = -1 index_to_proxyid = [ # cid_to_mfw.loc[cids].idxmax() ub.argmax(ub.dict_subset(cid_to_mfw_dict, cids)) if len(cids) else EMPTY_PROXY_CID for cids in index_to_cids ] proxy_freq = pd.Series(ub.dict_hist(index_to_proxyid)) proxy_root_mfw = proxy_freq.median() / proxy_freq power = 0.878 proxy_root_mfw = proxy_root_mfw ** power # We now have a weight for each item in out dataset index_to_weight = np.array(list(ub.take(proxy_root_mfw.to_dict(), index_to_proxyid))) if False: # Figure out how the likelihoods of each class change xy = {} for power in [0, .5, .878, 1]: proxy_root_mfw = proxy_freq.median() / proxy_freq # dont let weights get too high # proxy_root_mfw = np.sqrt(proxy_root_mfw) # power = .88 proxy_root_mfw = proxy_root_mfw ** power # proxy_root_mfw = np.clip(proxy_root_mfw, a_min=None, a_max=3) index_to_weight = list(ub.take(proxy_root_mfw.to_dict(), index_to_proxyid)) if 1: # what is the probability we draw an empty image? df = pd.DataFrame({ 'nannots': index_to_nannots, 'weight': index_to_weight, }) df['prob'] = df.weight / df.weight.sum() prob_empty = df.prob[df.nannots == 0].sum() probs = {'empty': prob_empty} for cid in cid_to_mfw.index: flags = [cid in cids for cids in index_to_cids] catname = self.dset.cats[cid]['name'] p = df[flags].prob.sum() probs[catname] = p xy['p{}'.format(power)] = pd.Series(probs) xy['freq'] = {} for cid in cid_to_mfw.index: catname = self.dset.cats[cid]['name'] xy['freq'][catname] = proxy_freq[cid] print(pd.DataFrame(xy)) # index_to_prob = index_to_weight / index_to_weight.sum() return index_to_weight
def __init__(self, index_to_labels, batch_size=1, num_batches='auto', label_to_weight=None, shuffle=False, rng=None): import kwarray rng = kwarray.ensure_rng(rng, api='python') label_to_indices = ub.ddict(set) flat_groups = [] for index, item_labels in enumerate(index_to_labels): flat_groups.extend([index] * len(item_labels)) for label in item_labels: label_to_indices[label].add(index) flat_labels = np.hstack(index_to_labels) label_to_freq = ub.dict_hist(flat_labels) # Use tf-idf based scheme to compute sample probabilities label_to_idf = {} label_to_tfidf = {} labels = sorted(set(flat_labels)) for label in labels: # tf for each img, is the number of times the label appears index_to_tf = np.zeros(len(index_to_labels)) for index, item_labels in enumerate(index_to_labels): index_to_tf[index] = (label == item_labels).sum() # idf is the #imgs / #imgs-with-label idf = len(index_to_tf) / (index_to_tf > 0).sum() if label_to_weight: idf = idf * label_to_weight[label] label_to_idf[label] = idf label_to_tfidf[label] = np.maximum(index_to_tf * idf, 1) index_to_weight = sum(label_to_tfidf.values()) index_to_prob = index_to_weight / index_to_weight.sum() if 0: index_to_unique_labels = list(map(set, index_to_labels)) unique_freq = ub.dict_hist(ub.flatten(index_to_unique_labels)) tot = sum(unique_freq.values()) unweighted_odds = ub.map_vals(lambda x: x / tot, unique_freq) label_to_indices = ub.ddict(set) for index, item_labels in enumerate(index_to_labels): for label in item_labels: label_to_indices[label].add(index) ub.map_vals(len, label_to_indices) label_to_odds = ub.ddict(lambda: 0) for label, indices in label_to_indices.items(): for idx in indices: label_to_odds[label] += index_to_prob[idx] coi = {x for x, w in label_to_weight.items() if w > 0} coi_weighted = ub.dict_subset(label_to_odds, coi) coi_unweighted = ub.dict_subset(unweighted_odds, coi) print('coi_weighted = {}'.format(ub.repr2(coi_weighted, nl=1))) print('coi_unweighted = {}'.format(ub.repr2(coi_unweighted, nl=1))) self.index_to_prob = index_to_prob self.indices = np.arange(len(index_to_prob)) if num_batches == 'auto': self.num_batches = self._auto_num_batches() else: self.num_batches = num_batches self.label_to_freq = label_to_freq self.index_to_labels = index_to_labels self.batch_size = batch_size self.shuffle = shuffle self.rng = kwarray.ensure_rng(rng, api='numpy')
def viz_overlay_layers(task): """ >>> from pysseg.tasks import * >>> task = DivaV1(clean=0) """ for scene in ub.ProgIter(task.scene_ids, label='scene', verbose=3): scene_path = join(task.scene_base, scene, 'static') frame_image_fpaths = sorted(glob.glob(join(scene_path, '*.png'))) scene_json_fpath = join(scene_path, 'static.json') frame_to_class_coords = task.parse_scene_elements(scene_json_fpath) from pysseg.util import imutil def new_layer(shape, classname, poly_coords): coords = np.round(np.array([poly_coords])).astype(np.int) alpha = int(.5 * 255) color = list(task.class_colors[classname]) + [alpha] # Initialize groundtruth image layer = np.full((shape[0], shape[1], 4), fill_value=0, dtype=np.uint8) layer = cv2.fillPoly(layer, coords, color) layer = imutil.ensure_float01(layer) yield layer # outline to see more clearly alpha = int(.95 * 255) color = list(task.class_colors[classname]) + [alpha] layer = np.full((shape[0], shape[1], 4), fill_value=0, dtype=np.uint8) layer = cv2.drawContours(layer, [coords], -1, color, 3) layer = imutil.ensure_float01(layer) yield layer priority = [ 'Crosswalk', 'Intersection', 'Trees', 'Grass', 'Parking_Lot' ] for frame_id, class_coords in frame_to_class_coords.items(): frame_fpath = frame_image_fpaths[0] frame = cv2.imread(frame_fpath) shape = frame.shape[:2] # {c[0] for c in class_coords} layers = [] boarder_layers = [] class_coords = sorted( class_coords, key=lambda t: 900 if t[0] not in priority else priority.index(t[0])) classnames = set([p[0] for p in class_coords]) for classname, poly_coords in reversed(class_coords): layer, layer_border = list( new_layer(shape, classname, poly_coords)) layers.append(layer) boarder_layers.append(layer_border) layers = boarder_layers + layers topdown = layers[0] for layer in ub.ProgIter(layers[1:], label='blending'): topdown = imutil.overlay_alpha_images(topdown, layer) blend = imutil.overlay_alpha_images(topdown, imutil.ensure_grayscale(frame)) import plottool as pt import matplotlib.patches as patches import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams['legend.fontsize'] = 20 mpl.rcParams['legend.loc'] = 'center' mpl.rcParams['axes.titlesize'] = 20 mpl.rcParams['figure.titlesize'] = 20 handles = [ patches.Patch(color=np.array(bgr[::-1]) / 255, label=classname) for classname, bgr in ub.dict_subset(task.class_colors, classnames).items() ] n_cols = 5 n = 1 pt.imshow(blend, pnum=(1, n_cols, slice(0, n_cols - n)), fnum=1) ax = pt.gca() ax.set_title('Scene {}, frame {}'.format(scene, frame_id)) pt.figure(fnum=1, pnum=(1, n_cols, slice(n_cols - n, n_cols))) ax = pt.gca() ax.grid(False) ax.set_xticks([]) ax.set_yticks([]) plt.legend(handles=handles) mplutil.adjust_subplots(top=.9, bottom=0, left=0, right=1, wspace=.01) fig = pt.gcf() inches = np.array(blend.shape[:2][::-1]) / fig.dpi fig.set_size_inches(*inches) ub.ensuredir('scene_plots') cv2.imwrite( 'scene_plots/scene_{}_{}.png'.format(scene, frame_id), mplutil.render_figure_to_image(fig, dpi=100, transparent=True))
def sort_gids_by_nannots(sampler, gids): img_aids = ub.dict_subset(sampler.dset.gid_to_aids, gids, default=[]) img_num_aids = ub.map_vals(len, img_aids) return ub.argsort(img_num_aids)[::-1]
def benchamrk_det_nms(): """ Benchmarks different implementations of non-max-supression on the CPU, GPU, and using cython / numpy / torch. CommandLine: xdoctest -m ~/code/kwimage/dev/bench_nms.py benchamrk_det_nms --show SeeAlso: PJR Darknet NonMax supression https://github.com/pjreddie/darknet/blob/master/src/box.c Lightnet NMS https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116 """ # N = 200 # bestof = 50 N = 1 bestof = 1 # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000] # max number of boxes yolo will spit out at a time max_boxes = 19 * 19 * 5 xdata = [ 10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, max_boxes ] # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500] # Demo values xdata = [0, 1, 2, 3, 10, 100, 200, 300, 500] if ub.argflag('--small'): xdata = [10, 100, 500, 1000, 1500, 2000, 5000, 10000] if ub.argflag('--medium'): xdata = [ 1000, 5000, 10000, 20000, 50000, ] if ub.argflag('--large'): xdata = [ 1000, 5000, 10000, 20000, 50000, 100000, ] if ub.argflag('--extra-large'): xdata = [ 1000, 2000, 10000, 20000, 40000, 100000, 200000, ] title_parts = [] SMALL_BOXES = ub.argflag('--small-boxes') if SMALL_BOXES: title_parts.append('small boxes') else: title_parts.append('large boxes') # NOTE: for large images we may have up to 21,850,753 detections! thresh = float(ub.argval('--thresh', default=0.4)) title_parts.append('thresh={:.2f}'.format(thresh)) from kwimage.algo.algo_nms import available_nms_impls valid_impls = available_nms_impls() print('valid_impls = {!r}'.format(valid_impls)) basis = { 'type': ['ndarray', 'tensor', 'tensor0'], # 'daq': [True, False], # 'daq': [False], # 'device': [None], # 'impl': valid_impls, 'impl': valid_impls + ['auto'], } if ub.argflag('--daq'): basis['daq'] = [True, False] # if torch.cuda.is_available(): # basis['device'].append(0) combos = [ ub.dzip(basis.keys(), vals) for vals in it.product(*basis.values()) ] def is_valid_combo(combo): # if combo['impl'] in {'py', 'cython_cpu'} and combo['device'] is not None: # return False # if combo['type'] == 'ndarray' and combo['impl'] == 'cython_gpu': # if combo['device'] is None: # return False # if combo['type'] == 'ndarray' and combo['impl'] != 'cython_gpu': # if combo['device'] is not None: # return False # if combo['type'].endswith('0'): # if combo['impl'] in {'numpy', 'cython_gpu', 'cython_cpu'}: # return False # if combo['type'] == 'ndarray': # if combo['impl'] in {'torch'}: # return False REMOVE_SLOW = True if REMOVE_SLOW: known_bad = [ { 'impl': 'torch', 'type': 'tensor' }, { 'impl': 'numpy', 'type': 'tensor' }, # {'impl': 'cython_gpu', 'type': 'tensor'}, { 'impl': 'cython_cpu', 'type': 'tensor' }, # {'impl': 'torch', 'type': 'tensor0'}, { 'impl': 'numpy', 'type': 'tensor0' }, # {'impl': 'cython_gpu', 'type': 'tensor0'}, # {'impl': 'cython_cpu', 'type': 'tensor0'}, { 'impl': 'torchvision', 'type': 'ndarray' }, ] for known in known_bad: if all(combo[key] == val for key, val in known.items()): return False return True combos = list(filter(is_valid_combo, combos)) times = ub.ddict(list) for num in xdata: if num > 10000: N = 1 bestof = 1 if num > 1000: N = 3 bestof = 1 if num > 100: N = 10 bestof = 3 elif num > 10: N = 100 bestof = 10 else: N = 1000 bestof = 10 print('\n\n---- number of boxes = {} ----\n'.format(num)) outputs = {} ti = ub.Timerit(N, bestof=bestof, verbose=1) # Build random test boxes and scores np_dets1 = kwimage.Detections.random(num // 2, scale=1000.0, rng=0) np_dets1.data['boxes'] = np_dets1.boxes.to_xywh() if SMALL_BOXES: max_dim = 100 np_dets1.boxes.data[..., 2] = np.minimum(np_dets1.boxes.width, max_dim).ravel() np_dets1.boxes.data[..., 3] = np.minimum(np_dets1.boxes.height, max_dim).ravel() np_dets2 = copy.deepcopy(np_dets1) np_dets2.boxes.translate(10, inplace=True) # add boxes that will definately be removed np_dets = kwimage.Detections.concatenate([np_dets1, np_dets2]) # make all scores unique to ensure comparability np_dets.scores[:] = np.linspace(0, 1, np_dets.num_boxes()) np_dets.data['scores'] = np_dets.scores.astype(np.float32) np_dets.boxes.data = np_dets.boxes.data.astype(np.float32) typed_data = {} # ---------------------------------- import netharn as nh for combo in combos: print('combo = {}'.format(ub.repr2(combo, nl=0))) label = nh.util.make_idstr(combo) mode = combo.copy() # if mode['impl'] == 'cython_gpu': # mode['device_id'] = mode['device'] mode_type = mode.pop('type') if mode_type in typed_data: dets = typed_data[mode_type] else: if mode_type == 'ndarray': dets = np_dets.numpy() elif mode_type == 'tensor': dets = np_dets.tensor(None) elif mode_type == 'tensor0': dets = np_dets.tensor(0) else: raise KeyError typed_data[mode_type] = dets for timer in ti.reset(label): with timer: keep = dets.non_max_supression(thresh=thresh, **mode) torch.cuda.synchronize() times[ti.label].append(ti.min()) outputs[ti.label] = ensure_numpy_indices(keep) # ---------------------------------- # Check that all kept boxes do not have more than `threshold` ious if 0: for key, keep_idxs in outputs.items(): kept = np_dets.take(keep_idxs).boxes ious = kept.ious(kept) max_iou = (np.tril(ious) - np.eye(len(ious))).max() if max_iou > thresh: print('{} produced a bad result with max_iou={}'.format( key, max_iou)) # Check result consistency: print('\nResult stats:') for key in sorted(outputs.keys()): print(' * {:<20}: num={}'.format(key, len(outputs[key]))) print('\nResult overlaps (method1, method2: jaccard):') datas = [] for k1, k2 in it.combinations(sorted(outputs.keys()), 2): idxs1 = set(outputs[k1]) idxs2 = set(outputs[k2]) jaccard = len(idxs1 & idxs2) / max(len(idxs1 | idxs2), 1) datas.append((k1, k2, jaccard)) datas = sorted(datas, key=lambda x: -x[2]) for k1, k2, jaccard in datas: print(' * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard)) if True: ydata = {key: 1.0 / np.array(vals) for key, vals in times.items()} ylabel = 'Hz' reverse = True yscale = 'symlog' else: ydata = {key: np.array(vals) for key, vals in times.items()} ylabel = 'seconds' reverse = False yscale = 'linear' scores = {key: vals[-1] for key, vals in ydata.items()} ydata = ub.dict_subset(ydata, ub.argsort(scores, reverse=reverse)) ### times_of_interest = [0, 10, 100, 200, 1000] times_of_interest = xdata lines = [] record = lines.append record('### times_of_interest = {!r}'.format(times_of_interest)) for x in times_of_interest: if times_of_interest[-1] == x: record('else:') elif times_of_interest[0] == x: record('if num <= {}:'.format(x)) else: record('elif num <= {}:'.format(x)) if x in xdata: pos = xdata.index(x) score_wrt_x = {} for key, vals in ydata.items(): score_wrt_x[key] = vals[pos] typekeys = ['tensor0', 'tensor', 'ndarray'] type_groups = dict([(b, ub.group_items(score_wrt_x, lambda y: y.endswith(b))[True]) for b in typekeys]) # print('\n=========') # print('x = {!r}'.format(x)) record(' if code not in {!r}:'.format(set(typekeys))) record(' raise KeyError(code)') for typekey, group in type_groups.items(): # print('-------') record(' if code == {!r}:'.format(typekey)) # print('typekey = {!r}'.format(typekey)) # print('group = {!r}'.format(group)) group_x = ub.dict_isect(score_wrt_x, group) valid_keys = ub.argsort(group_x, reverse=True) valid_x = ub.dict_subset(group_x, valid_keys) # parts = [','.split(k) for k in valid_keys] ordered_impls = [] ordered_impls2 = ub.odict() for k in valid_keys: vals = valid_x[k] p = k.split(',') d = dict(i.split('=') for i in p) ordered_impls2[d['impl']] = vals ordered_impls.append(d['impl']) ordered_impls = list(ub.oset(ordered_impls) - {'auto'}) ordered_impls2.pop('auto') record(' # {}'.format( ub.repr2(ordered_impls2, precision=1, nl=0, explicit=True))) record(' preference = {}'.format( ub.repr2(ordered_impls, nl=0))) record('### end times of interest ') print(ub.indent('\n'.join(lines), ' ' * 8)) ### markers = { key: 'o' if 'auto' in key else '' for key, score in scores.items() } if ub.argflag('--daq'): markers = { key: '+' if 'daq=True' in key else '' for key, score in scores.items() } labels = { key: '{:.2f} {} - {}'.format(score, ylabel[0:3], key) for key, score in scores.items() } title = 'NSM-impl speed: ' + ', '.join(title_parts) import kwplot kwplot.autompl() kwplot.multi_plot( xdata, ydata, xlabel='num boxes', ylabel=ylabel, label=labels, yscale=yscale, title=title, marker=markers, # xscale='symlog', ) kwplot.show_if_requested()