def __init__(self, components, dsize=None): self.components = components if dsize is None: dsize_cands = [comp.dsize for comp in self.components] if not ub.allsame(dsize_cands): raise ValueError( 'components must all have the same delayed size') dsize = dsize_cands[0] self.dsize = dsize self.num_bands = sum(comp.num_bands for comp in self.components) self.meta = { 'shape': self.shape, 'num_bands': self.num_bands, }
def __normalize__(self): if self._raw is None: self._data = {} elif isinstance(self._raw, dict): self._data = self._raw if __debug__: lens = [] for d in self._data.values(): if not isinstance(d, (list, np.ndarray)): raise TypeError(type(d)) lens.append(len(d)) assert ub.allsame(lens) elif isinstance(self._raw, DataFrameLight): self._data = copy.copy(self._raw._data) elif isinstance(self._raw, pd.DataFrame): self._data = self._raw.to_dict(orient='list') else: raise TypeError('Unknown _raw type')
def 数组_是否全部相同(iterable, eq=operator.eq): """ #allsame([1, 1, 1, 1]) # True #allsame([]) # True #allsame([0, 1]) # False #iterable = iter([0, 1, 1, 1]) #next(iterable) #allsame(iterable) # True #allsame(range(10)) # False #allsame(range(10), lambda a, b: True) # True """ return ub.allsame(iterable, eq)
def __init__(self, frames, dsize=None): self.frames = frames if dsize is None: dsize_cands = [frame.dsize for frame in self.frames] dsize = _largest_shape(dsize_cands) self.dsize = dsize nband_cands = [frame.num_bands for frame in self.frames] if any(c is None for c in nband_cands): num_bands = None if ub.allsame(nband_cands): num_bands = nband_cands[0] else: raise ValueError('components must all have the same delayed size') self.num_bands = num_bands self.num_frames = len(self.frames) self.meta = { 'num_bands': self.num_bands, 'num_frames': self.num_frames, 'shape': self.shape, }
def _fix_keys(model_state_dict): """ Hack around DataParallel wrapper. If there is nothing in common between the two models check to see if prepending 'module.' to other keys fixes it. """ other_keys = set(model_state_dict) self_keys = set(self_state) if 0: # Automatic way to reduce nodes in the trees? # If node b always follows node a, can we contract it? nodes1 = [n for p in other_keys for n in p.split('.')] nodes2 = [n for p in self_keys for n in p.split('.')] tups1 = list(tup for key in other_keys for tup in ub.iter_window(key.split('.'), 2)) tups2 = list(tup for key in self_keys for tup in ub.iter_window(key.split('.'), 2)) x = ub.ddict(list) for a, b in tups1: x[a].append(b) for a, b in tups2: x[a].append(b) nodehist = ub.dict_hist(nodes1 + nodes2) for k, v in x.items(): print('----') print(k) print(nodehist[k]) follow_hist = ub.dict_hist(v) print(follow_hist) total = sum(follow_hist.values()) if ub.allsame(follow_hist.values()) and total == nodehist[k]: print('CONTRACT') # pair_freq = ub.dict_hist(ub.flatten([tups1, tups2])) # print(forest_str(paths_to_otree(other_keys, '.'))) # common_keys = other_keys.intersection(self_keys) # if not common_keys: if not other_keys.issubset(self_keys): if association == 'strict': pass elif association == 'module-hack': # If there are no common keys try a hack prefix = 'module.' def smap(f, ss): return set(map(f, ss)) def fix1(k): return prefix + k def fix2(k): if k.startswith(prefix): return k[len(prefix):] if smap(fix1, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix1, model_state_dict) elif smap(fix2, other_keys).intersection(self_keys): model_state_dict = ub.map_keys(fix2, model_state_dict) elif association == 'prefix-hack': import functools def add_prefix(k, prefix): return prefix + k def remove_prefix(k, prefix): if k.startswith(prefix): return k[len(prefix):] # set1 = other_keys # target_set2 = self_keys found = _best_prefix_transform(other_keys, self_keys) if found is not None: for action, prefix in found['transform']: if action == 'add': func = functools.partial(add_prefix, prefix=prefix) elif action == 'remove': func = functools.partial(remove_prefix, prefix=prefix) else: raise AssertionError model_state_dict = ub.map_keys(func, model_state_dict) elif association in {'embedding', 'isomorphism'}: if verbose > 1: print('Using subpath {} association, may take some time'. format(association)) # I believe this is the correct way to solve the problem paths1 = sorted(other_keys) paths2 = sorted(self_state) if 1: # hack to filter to reduce tree size in embedding problem def shrink_paths(paths): new_paths = [] for p in paths: p = p.replace('.0', ':0') p = p.replace('.1', ':1') p = p.replace('.2', ':2') p = p.replace('.3', ':3') p = p.replace('.4', ':4') p = p.replace('.5', ':5') p = p.replace('.6', ':6') p = p.replace('.7', ':7') p = p.replace('.8', ':8') p = p.replace('.9', ':9') p = p.replace('.weight', ':weight') p = p.replace('.bias', ':bias') p = p.replace('.num_batches_tracked', ':num_batches_tracked') p = p.replace('.running_mean', ':running_mean') p = p.replace('.running_var', ':running_var') # p = p.replace('.conv1', ':conv1') # p = p.replace('.conv2', ':conv2') # p = p.replace('.conv3', ':conv3') # p = p.replace('.bn1', ':bn1') # p = p.replace('.bn2', ':bn2') # p = p.replace('.bn3', ':bn3') new_paths.append(p) return new_paths # Reducing the depth saves a lot of time paths1_ = shrink_paths(paths1) paths2_ = shrink_paths(paths2) subpaths1, subpaths2 = maximum_common_ordered_subpaths( paths1_, paths2_, sep='.', mode=association) subpaths1 = [p.replace(':', '.') for p in subpaths1] subpaths2 = [p.replace(':', '.') for p in subpaths2] mapping = ub.dzip(subpaths1, subpaths2) if verbose > 1: other_unmapped = sorted(other_keys - set(mapping.keys())) self_unmapped = sorted(self_keys - set(mapping.values())) print('-- embed association (other -> self) --') print('mapping = {}'.format(ub.repr2(mapping, nl=1))) print('self_unmapped = {}'.format( ub.repr2(self_unmapped, nl=1))) print('other_unmapped = {}'.format( ub.repr2(other_unmapped, nl=1))) print('len(mapping) = {}'.format( ub.repr2(len(mapping), nl=1))) print('len(self_unmapped) = {}'.format( ub.repr2(len(self_unmapped), nl=1))) print('len(other_unmapped) = {}'.format( ub.repr2(len(other_unmapped), nl=1))) print('-- end embed association --') # HACK: something might be wrong, there was an instance with # HRNet_w32 where multiple keys mapped to the same key # bad keys were incre_modules.3.0.conv1.weight and conv1.weight # # This will not error, but may produce bad output try: model_state_dict = ub.map_keys(lambda k: mapping.get(k, k), model_state_dict) except Exception as ex: HACK = 1 if HACK: new_state_dict_ = {} for k, v in model_state_dict.items(): new_state_dict_[mapping.get(k, k)] = v model_state_dict = new_state_dict_ warnings.warn('ex = {!r}'.format(ex)) else: raise else: raise KeyError(association) return model_state_dict
def run_demo(): """ CommandLine: python -m graphid.demo.demo_script run_demo --viz python -m graphid.demo.demo_script run_demo Example: >>> run_demo() """ from graphid import demo import matplotlib as mpl TMP_RC = { 'axes.titlesize': 12, 'axes.labelsize': int(ub.argval('--labelsize', default=8)), 'font.family': 'sans-serif', 'font.serif': 'CMU Serif', 'font.sans-serif': 'CMU Sans Serif', 'font.monospace': 'CMU Typewriter Text', 'xtick.labelsize': 12, 'ytick.labelsize': 12, # 'legend.alpha': .8, 'legend.fontsize': 12, 'legend.facecolor': 'w', } mpl.rcParams.update(TMP_RC) # ---- Synthetic data params params = { 'redun.pos': 2, 'redun.neg': 2, } # oracle_accuracy = .98 # oracle_accuracy = .90 # oracle_accuracy = (.8, 1.0) oracle_accuracy = (.85, 1.0) # oracle_accuracy = 1.0 # --- draw params VISUALIZE = ub.argflag('--viz') # QUIT_OR_EMEBED = 'embed' QUIT_OR_EMEBED = 'quit' def asint(p): return p if p is None else int(p) TARGET_REVIEW = asint(ub.argval('--target', default=None)) START = asint(ub.argval('--start', default=None)) END = asint(ub.argval('--end', default=None)) # ------------------ # rng = np.random.RandomState(42) # infr = demo.demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0) infr = demo.demodata_infr(pcc_sizes=[5, 2, 4]) infr.verbose = 100 infr.ensure_cliques() infr.ensure_full() # Dummy scoring infr.init_simulation(oracle_accuracy=oracle_accuracy, name='run_demo') # infr_gt = infr.copy() dpath = ub.ensuredir(ub.truepath('~/Desktop/demo')) if 0: ub.delete(dpath) ub.ensuredir(dpath) fig_counter = it.count(0) def show_graph(infr, title, final=False, selected_edges=None): from matplotlib import pyplot as plt if not VISUALIZE: return # TODO: rich colored text? latest = '\n'.join(infr.latest_logs()) showkw = dict( # fontsize=infr.graph.graph['fontsize'], # fontname=infr.graph.graph['fontname'], show_unreviewed_edges=True, show_inferred_same=False, show_inferred_diff=False, outof=(len(infr.aids)), # show_inferred_same=True, # show_inferred_diff=True, selected_edges=selected_edges, show_labels=True, simple_labels=True, # show_recent_review=not final, show_recent_review=False, # splines=infr.graph.graph['splines'], reposition=False, # with_colorbar=True ) verbose = infr.verbose infr.verbose = 0 infr_ = infr.copy() infr_ = infr infr_.verbose = verbose infr_.show(pickable=True, verbose=0, **showkw) infr.verbose = verbose # print('status ' + ub.repr2(infr_.status())) # infr.show(**showkw) ax = plt.gca() ax.set_title(title, fontsize=20) fig = plt.gcf() # fontsize = 22 fontsize = 12 if True: # postprocess xlabel lines = [] for line in latest.split('\n'): if False and line.startswith('ORACLE ERROR'): lines += ['ORACLE ERROR'] else: lines += [line] latest = '\n'.join(lines) if len(lines) > 10: fontsize = 16 if len(lines) > 12: fontsize = 14 if len(lines) > 14: fontsize = 12 if len(lines) > 18: fontsize = 10 if len(lines) > 23: fontsize = 8 if True: util.mplutil.adjust_subplots(top=.95, left=0, right=1, bottom=.45, fig=fig) ax.set_xlabel('\n' + latest) xlabel = ax.get_xaxis().get_label() xlabel.set_horizontalalignment('left') # xlabel.set_x(.025) # xlabel.set_x(-.6) xlabel.set_x(-2.0) # xlabel.set_fontname('CMU Typewriter Text') xlabel.set_fontname('Inconsolata') xlabel.set_fontsize(fontsize) ax.set_aspect('equal') # ax.xaxis.label.set_color('red') fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter))) fig.savefig( fpath, dpi=300, # transparent=True, edgecolor='none') # pt.save_figure(dpath=dpath, dpi=300) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs(groupby='name_label') infr.set_node_attrs('pin', 'true') node_dict = infr.graph.nodes print(ub.repr2(node_dict[1])) if VISUALIZE: infr.latest_logs() # Pin Nodes into the target groundtruth position show_graph(infr, 'target-gt') print(ub.repr2(infr.status())) infr.clear_feedback() infr.clear_name_labels() infr.clear_edges() print(ub.repr2(infr.status())) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs() infr.prioritize('prob_match') if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0: show_graph(infr, 'initial state') def on_new_candidate_edges(infr, edges): # hack updateing visual attrs as a callback if VISUALIZE: infr.update_visual_attrs() infr.on_new_candidate_edges = on_new_candidate_edges infr.params.update(**params) infr.refresh_candidate_edges() VIZ_ALL = (VISUALIZE and TARGET_REVIEW is None and START is None) print('VIZ_ALL = %r' % (VIZ_ALL, )) if VIZ_ALL or TARGET_REVIEW == 0: show_graph(infr, 'find-candidates') # _iter2 = enumerate(infr.generate_reviews(**params)) # _iter2 = list(_iter2) # assert len(_iter2) > 0 # prog = ub.ProgIter(_iter2, label='run_demo', bs=False, adjust=False, # enabled=False) count = 1 first = 1 for edge, priority in infr._generate_reviews(data=True): msg = 'review #%d, priority=%.3f' % (count, priority) print('\n----------') infr.print('pop edge {} with priority={:.3f}'.format(edge, priority)) # print('remaining_reviews = %r' % (infr.remaining_reviews()),) # Make the next review if START is not None: VIZ_ALL = count >= START if END is not None and count >= END: break infr.print(msg) if ub.allsame(infr.pos_graph.node_labels(*edge)) and first: # Have oracle make a mistake early feedback = infr.request_oracle_review(edge, accuracy=0) first -= 1 else: feedback = infr.request_oracle_review(edge) AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1 SHOW_CANDIATE_POP = True if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET): infr.print( ub.repr2(infr.task_probs['match_state'][edge], precision=4, si=True)) infr.print('len(queue) = %r' % (len(infr.queue))) # Show edge selection infr.print('Oracle will predict: ' + feedback['evidence_decision']) show_graph(infr, 'pre' + msg, selected_edges=[edge]) if count == TARGET_REVIEW: infr.EMBEDME = QUIT_OR_EMEBED == 'embed' infr.add_feedback(edge, **feedback) infr.print('len(queue) = %r' % (len(infr.queue))) # infr.apply_nondynamic_update() # Show the result if VIZ_ALL or AT_TARGET: show_graph(infr, msg) # import sys # sys.exit(1) if count == TARGET_REVIEW: break count += 1 infr.print('status = ' + ub.repr2(infr.status(extended=False))) show_graph(infr, 'post-review (#reviews={})'.format(count), final=True) if VISUALIZE: if not getattr(infr, 'EMBEDME', False): # import plottool as pt # util.mplutil.all_figures_tile() util.mplutil.show_if_requested()
def padded_collate(inbatch, fill_value=-1): """ Used for detection datasets with boxes. Example: >>> from netharn.data.collate import * >>> import torch >>> rng = np.random.RandomState(0) >>> inbatch = [] >>> bsize = 7 >>> for i in range(bsize): >>> # add an image and some dummy bboxes to the batch >>> img = torch.rand(3, 8, 8) # dummy 8x8 image >>> n = 11 if i == 3 else rng.randint(0, 11) >>> boxes = torch.rand(n, 4) >>> item = (img, boxes) >>> inbatch.append(item) >>> out_batch = padded_collate(inbatch) >>> assert len(out_batch) == 2 >>> assert list(out_batch[0].shape) == [bsize, 3, 8, 8] >>> assert list(out_batch[1].shape) == [bsize, 11, 4] Example: >>> import torch >>> rng = np.random.RandomState(0) >>> inbatch = [] >>> bsize = 4 >>> for _ in range(bsize): >>> # add an image and some dummy bboxes to the batch >>> img = torch.rand(3, 8, 8) # dummy 8x8 image >>> #boxes = torch.empty(0, 4) >>> boxes = torch.FloatTensor() >>> item = (img, [boxes]) >>> inbatch.append(item) >>> out_batch = padded_collate(inbatch) >>> assert len(out_batch) == 2 >>> assert list(out_batch[0].shape) == [bsize, 3, 8, 8] >>> #assert list(out_batch[1][0].shape) == [bsize, 0, 4] >>> assert list(out_batch[1][0].shape) in [[0], []] # torch .3 a .4 Example: >>> inbatch = [torch.rand(4, 4), torch.rand(8, 4), >>> torch.rand(0, 4), torch.rand(3, 4), >>> torch.rand(0, 4), torch.rand(1, 4)] >>> out_batch = padded_collate(inbatch) >>> assert list(out_batch.shape) == [6, 8, 4] """ try: if torch.is_tensor(inbatch[0]): num_items = [len(item) for item in inbatch] if ub.allsame(num_items): if len(num_items) == 0: # batch = torch.empty(0) batch = torch.FloatTensor() elif num_items[0] == 0: # batch = torch.empty(0) batch = torch.FloatTensor() # batch = torch.Tensor(inbatch) else: batch = default_collate(inbatch) else: max_size = max(num_items) real_tail_shape = None for item in inbatch: if item.numel(): tail_shape = item.shape[1:] if real_tail_shape is not None: assert real_tail_shape == tail_shape real_tail_shape = tail_shape padded_inbatch = [] for item in inbatch: n_extra = max_size - len(item) if n_extra > 0: shape = (n_extra, ) + tuple(real_tail_shape) if torch.__version__.startswith('0.3'): extra = torch.Tensor( np.full(shape, fill_value=fill_value)) else: extra = torch.full(shape, fill_value=fill_value, dtype=item.dtype) padded_item = torch.cat([item, extra], dim=0) padded_inbatch.append(padded_item) else: padded_inbatch.append(item) batch = inbatch batch = default_collate(padded_inbatch) else: batch = [ padded_collate(item) for item in list(map(list, zip(*inbatch))) ] except Exception as ex: print('Failed to collate inbatch={}'.format(inbatch)) raise return batch
def bench_find_optimal_blocksize(): r""" This function can help find the optimal blocksize for your usecase:w Notes: # Usage cd ~/code/ubelt/dev xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --dpath <PATH-TO-HDD-OR-SDD> \ --size <INT-IN-MB> \ --hash_algo <ALGO_NAME> \ # Benchmark on an HDD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo xx64 # Benchmark on an SSD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 # Test a small file xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 1 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 Throughout our tests on SSDs / HDDs with small and large files we are finding a chunksize of 2 ** 20 consistently working best with xx64. # Test with a slower hash algo xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo sha1 Even that shows 2 ** 20 working well. """ import os import numpy as np import timerit dpath = ub.argval('--dpath', default=None) if dpath is None: # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensure_app_cache_dir('ubelt/hash_test') else: ub.ensuredir(dpath) print('dpath = {!r}'.format(dpath)) target_size = int(ub.argval('--size', default=600)) hash_algo = ub.argval('--hash_algo', default='xx64') print('hash_algo = {!r}'.format(hash_algo)) print('target_size = {!r}'.format(target_size)) # Write a big file (~600 MB) MB = int(2 ** 20) size_pool = [target_size] rng = random.Random(0) # pool_size = max(target_size // 2, 1) # pool_size = max(1, target_size // 10) pool_size = 8 part_pool = [_random_data(rng, MB) for _ in range(pool_size)] fpath = _write_random_file(dpath, part_pool, size_pool, rng) print('fpath = {!r}'.format(fpath)) size_mb = os.stat(fpath).st_size / MB print('file size = {!r} MB'.format(size_mb)) ti = timerit.Timerit(4, bestof=2, verbose=2) results = [] # Find an optimal constant blocksize min_power = 16 max_power = 24 blocksize_candiates = [int(2 ** e) for e in range(min_power, max_power)] for blocksize in blocksize_candiates: for timer in ti.reset('constant blocksize=2 ** {} = {}'.format(np.log2(float(blocksize)), blocksize)): result = ub.hash_file(fpath, blocksize=blocksize, hasher=hash_algo) results.append(result) print('ti.rankings = {}'.format(ub.repr2(ti.rankings, nl=2, align=':'))) assert ub.allsame(results)
def triu_condense_multi_index(multi_index, dims, symetric=False): r""" Like np.ravel_multi_index but returns positions in an upper triangular condensed square matrix Examples: multi_index (Tuple[ArrayLike]): indexes for each dimension into the square matrix dims (Tuple[int]): shape of each dimension in the square matrix (should all be the same) symetric (bool): if True, converts lower triangular indices to their upper triangular location. This may cause a copy to occur. References: https://stackoverflow.com/a/36867493/887074 https://numpy.org/doc/stable/reference/generated/numpy.ravel_multi_index.html#numpy.ravel_multi_index Examples: >>> dims = (3, 3) >>> symetric = True >>> multi_index = (np.array([0, 0, 1]), np.array([1, 2, 2])) >>> condensed_idxs = triu_condense_multi_index(multi_index, dims, symetric=symetric) >>> assert condensed_idxs.tolist() == [0, 1, 2] >>> n = 7 >>> symetric = True >>> multi_index = np.triu_indices(n=n, k=1) >>> condensed_idxs = triu_condense_multi_index(multi_index, [n] * 2, symetric=symetric) >>> assert condensed_idxs.tolist() == list(range(n * (n - 1) // 2)) >>> from scipy.spatial.distance import pdist, squareform >>> square_mat = np.zeros((n, n)) >>> conden_mat = squareform(square_mat) >>> conden_mat[condensed_idxs] = np.arange(len(condensed_idxs)) + 1 >>> square_mat = squareform(conden_mat) >>> print('square_mat =\n{}'.format(ub.repr2(square_mat, nl=1))) >>> n = 7 >>> symetric = True >>> multi_index = np.tril_indices(n=n, k=-1) >>> condensed_idxs = triu_condense_multi_index(multi_index, [n] * 2, symetric=symetric) >>> assert sorted(condensed_idxs.tolist()) == list(range(n * (n - 1) // 2)) >>> from scipy.spatial.distance import pdist, squareform >>> square_mat = np.zeros((n, n)) >>> conden_mat = squareform(square_mat, checks=False) >>> conden_mat[condensed_idxs] = np.arange(len(condensed_idxs)) + 1 >>> square_mat = squareform(conden_mat) >>> print('square_mat =\n{}'.format(ub.repr2(square_mat, nl=1))) Ignore: >>> import xdev >>> n = 30 >>> symetric = True >>> multi_index = np.triu_indices(n=n, k=1) >>> condensed_idxs = xdev.profile_now(triu_condense_multi_index)(multi_index, [n] * 2) Ignore: # Numba helps here when ub.allsame is gone from numba import jit triu_condense_multi_index2 = jit(nopython=True)(triu_condense_multi_index) triu_condense_multi_index2 = jit()(triu_condense_multi_index) triu_condense_multi_index2(multi_index, [n] * 2) %timeit triu_condense_multi_index(multi_index, [n] * 2) %timeit triu_condense_multi_index2(multi_index, [n] * 2) """ if len(dims) != 2: raise NotImplementedError('only 2d matrices for now') if not ub.allsame(dims): raise NotImplementedError('only square matrices for now') rxs, cxs = multi_index triu_flags = rxs < cxs if not np.all(triu_flags): if np.any(rxs == cxs): raise NotImplementedError( 'multi_index contains diagonal elements, which are not ' 'allowed in a condensed matrix') tril_flags = ~triu_flags if not symetric: raise ValueError('multi_index cannot contain inputs from ' 'lower triangle unless symetric=True') else: rxs = rxs.copy() cxs = cxs.copy() _tmp_rxs = rxs[tril_flags] rxs[tril_flags] = cxs[tril_flags] cxs[tril_flags] = _tmp_rxs n = dims[0] # Let i = rxs # Let j = cxs # with i*n + j you go to the position in the square-formed matrix; # with - i*(i+1)/2 you remove lower triangle (including diagonal) in all lines before i; # with - i you remove positions in line i before the diagonal; # with - 1 you remove positions in line i on the diagonal. """ import sympy rxs, n, cxs = sympy.symbols(['rxs', 'n', 'cxs']) condensed_indices = (n * rxs + cxs) - (rxs * (rxs + 1) // 2) - rxs - 1 sympy.simplify(condensed_indices) %timeit cxs + (n - 1) * rxs - rxs*(rxs + 1)//2 - 1 %timeit (n * rxs + cxs) - (rxs * (rxs + 1) // 2) - rxs - 1 """ condensed_indices = cxs + (n - 1) * rxs - (rxs * (rxs + 1) // 2) - 1 # condensed_indices = (n * rxs + cxs) - (rxs * (rxs + 1) // 2) - rxs - 1 return condensed_indices
def are_nodes_connected(self, u, v): return ub.allsame(self.node_labels(u, v))
def run_pvpoke_ultra_experiment(): """ https://pvpoke.com/battle/matrix/ !pip install selenium """ """ Relevant page items: <button class="add-poke-btn button">+ Add Pokemon</button> '//*[@id="main"]/div[3]/div[3]/div/div[1]/button[1]' '/html/body/div[1]/div/div[3]/div[3]/div/div[1]/button[1]' <input class="poke-search" type="text" placeholder="Search name"> /html/body/div[5]/div/div[3]/div[1]/input /html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/a/span[1] Level Cap /html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[5] # IV GROUP ivs-group save-poke import sys, ubelt sys.path.append(ubelt.expandpath('~/code/pypogo')) from pypogo.pvpoke_experiment import * # NOQA from pypogo.pvpoke_experiment import _oldstuff """ from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import Select import ubelt as ub import os import pathlib import time import pandas as pd import pypogo # Requires the driver be in the PATH fpath = ensure_selenium_chromedriver() os.environ['PATH'] = os.pathsep.join( ub.oset(os.environ['PATH'].split(os.pathsep)) | ub.oset([str(fpath.parent)])) url = 'https://pvpoke.com/battle/matrix/' # chrome_exe = ub.find_exe("google-chrome") driver = webdriver.Chrome() driver.get(url) league = 'Great' # league = 'Master40' if league == 'Great': league_box_target = 'Great League (CP 1500)' have_ivs = list( ub.oset([ tuple([int(x) for x in p.strip().split(',') if x]) for p in ub.codeblock(''' 10, 10, 12, 10, 12, 14, 10, 12, 14, 10, 13, 10, 10, 13, 12, 10, 14, 14, 11, 12, 14, 11, 14, 12, 11, 14, 15, 11, 15, 11, 11, 15, 11, 11, 15, 12, 11, 15, 12, 12, 10, 12, 12, 11, 12, 12, 12, 15, 12, 14, 11, 12, 14, 15, 12, 15, 11, 12, 15, 12 12, 15, 12, 13, 11, 13 13, 12, 10 13, 12, 13, 13, 13, 10, 13, 13, 11, 13, 15, 10, 13, 15, 11, 13, 15, 11, 14, 10, 12, 14, 11, 10, 14, 11, 10, 14, 13, 11 14, 13, 14, 15, 10, 12 15, 11, 10, 15, 11, 11, 15, 12, 11 ''').split('\n') ])) to_check_mons = [ pypogo.Pokemon('Deoxys', form='defense', ivs=ivs, moves=['Counter', 'Rock Slide', 'Psycho Boost']).maximize(1500) for ivs in have_ivs ] meta_text = 'Great League Meta' elif league == 'Master40': league_box_target = 'Master League (Level 40)' meta_text = 'Master League Meta' # Test the effect of best buddies vs the master league to_check_mons = [ pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=40), pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=41), pypogo.Pokemon('Garchomp', ivs=[15, 15, 15], level=40), pypogo.Pokemon('Garchomp', ivs=[15, 15, 15], level=41), pypogo.Pokemon('Dragonite', ivs=[15, 14, 15], level=40), pypogo.Pokemon('Dragonite', ivs=[15, 14, 15], level=41), pypogo.Pokemon('Giratina', form='origin', ivs=[15, 14, 15], level=40), pypogo.Pokemon('Giratina', form='origin', ivs=[15, 14, 15], level=41), pypogo.Pokemon('Kyogre', ivs=[15, 15, 14], level=40), pypogo.Pokemon('Kyogre', ivs=[15, 15, 14], level=41), pypogo.Pokemon('Groudon', ivs=[14, 14, 13], level=40), pypogo.Pokemon('Groudon', ivs=[14, 14, 13], level=41), pypogo.Pokemon('Togekiss', ivs=[15, 15, 14], level=40), pypogo.Pokemon('Togekiss', ivs=[15, 15, 14], level=41), ] for mon in to_check_mons: mon.populate_all() else: pass leage_select = driver.find_elements_by_class_name('league-select')[0] leage_select.click() leage_select.send_keys(league_box_target) leage_select.click() leage_select.text.split('\n') leage_select.send_keys('\n') leage_select.send_keys('\n') def add_pokemon(mon): add_poke1_button = driver.find_elements_by_class_name( 'add-poke-btn')[0] add_poke1_button.click() select_drop = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/select') if 1: import xdev all_names = select_drop.text.split('\n') distances = xdev.edit_distance(mon.display_name(), all_names) chosen_name = all_names[ub.argmin(distances)] else: chosen_name = mon.name search_box = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/input') search_box.send_keys(chosen_name) advanced_ivs_arrow = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/a/span[1]') advanced_ivs_arrow.click() level40_cap = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[2]' ) level41_cap = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[3]' ) level50_cap = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[4]' ) level51_cap = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[2]/div[2]/div[5]' ) if mon.level >= 51: level51_cap.click() elif mon.level >= 50: level50_cap.click() elif mon.level >= 41: level41_cap.click() elif mon.level >= 40: level40_cap.click() level_box = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/input' ) level_box.click() level_box.clear() level_box.clear() level_box.send_keys(str(mon.level)) iv_a = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[1]' ) iv_d = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[2]' ) iv_s = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[9]/div/div[1]/div/input[3]' ) # TODO # driver.find_elements_by_class_name('move-select') iv_a.clear() iv_a.send_keys(str(mon.ivs[0])) iv_d.clear() iv_d.send_keys(str(mon.ivs[1])) iv_s.clear() iv_s.send_keys(str(mon.ivs[2])) # USE_MOVES = 1 if mon.moves is not None: # mon.populate_all() fast_select = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[1]') fast_select.click() fast_select.send_keys(mon.pvp_fast_move['name']) fast_select.send_keys(Keys.ENTER) charge1_select = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[2]') charge1_select.click() charge1_select.send_keys(mon.pvp_charge_moves[0]['name']) charge1_select.send_keys(Keys.ENTER) charge2_select = driver.find_element_by_xpath( '/html/body/div[5]/div/div[3]/div[1]/div[2]/div[10]/select[3]') charge2_select.click() charge2_select.send_keys(mon.pvp_charge_moves[1]['name']) charge2_select.send_keys(Keys.ENTER) save_button = driver.find_elements_by_class_name('save-poke')[0] save_button.click() quickfills = driver.find_elements_by_class_name('quick-fill-select') quickfill = quickfills[1] quickfill.text.split('\n') quickfill.click() quickfill.send_keys(meta_text) quickfill.click() import pypogo # mon1 = pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=40) # mon2 = pypogo.Pokemon('Mewtwo', ivs=[15, 15, 15], level=41) if 1: for mon in to_check_mons: pass add_pokemon(mon) shield_selectors = driver.find_elements_by_class_name('shield-select') shield_selectors[2].click() shield_selectors[2].send_keys('No shields') shield_selectors[2].send_keys(Keys.ENTER) shield_selectors[3].click() shield_selectors[3].send_keys('No shields') shield_selectors[3].send_keys(Keys.ENTER) shield_selectors[0].click() battle_btn = driver.find_elements_by_class_name('battle-btn')[0] battle_btn.click() # Clear previous downloaded files dlfolder = pathlib.Path(ub.expandpath('$HOME/Downloads')) for old_fpath in list(dlfolder.glob('_vs*.csv')): old_fpath.unlink() time.sleep(2.0) # Download new data dl_btn = driver.find_element_by_xpath( '//*[@id="main"]/div[4]/div[9]/div/a') dl_btn.click() while len(list(dlfolder.glob('_vs*.csv'))) < 1: pass new_fpaths = list(dlfolder.glob('_vs*.csv')) assert len(new_fpaths) == 1 fpath = new_fpaths[0] data = pd.read_csv(fpath, header=0, index_col=0) if 1: # GROUP ANALYSIS data.sum(axis=1).sort_values() (data > 500).sum(axis=1).sort_values() flipped = [] for key, col in data.T.iterrows(): if not ub.allsame(col > 500): flipped.append(key) flip_df = data.loc[:, flipped] def color(x): if x > 500: return ub.color_text(str(x), 'green') else: return ub.color_text(str(x), 'red') print(flip_df.applymap(color)) print(flip_df.columns.tolist()) (data > 500) else: # PAIR ANALYSIS pairs = list(ub.iter_window(range(len(data)), step=2)) for i, j in pairs: print('-----') matchup0 = data.iloc[i] matchup1 = data.iloc[j] delta = matchup1 - matchup0 print(delta[delta != 0]) wins0 = matchup0 > 500 wins1 = matchup1 > 500 flips = (wins0 != wins1) flipped_vs = matchup0.index[flips] num_flips = sum(flips) print('flipped_vs = {!r}'.format(flipped_vs)) print('num_flips = {!r}'.format(num_flips)) print(matchup0.mean()) print(matchup1.mean()) print(matchup1.mean() / matchup0.mean())
def TUPLE(*args, **kw): if args and ub.allsame(args): return ARRAY(TYPE=ub.peek(args), numItems=len(args), **kw) else: return ARRAY(TYPE=ANY, numItems=len(args), **kw)
def bench_bbox_iou_method(): """ On my system the torch impl was fastest (when the data was on the GPU). """ from kwimage.structs.boxes import _box_ious_torch, _box_ious_py, _bbox_ious_c ydata = ub.ddict(list) xdata = [ 10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 2000, 4000 ] bias = 0 if _bbox_ious_c is None: print('CYTHON IMPLEMENATION IS NOT AVAILABLE') for num in xdata: results = {} # Setup Timer N = max(20, int(1000 / num)) ti = ub.Timerit(N, bestof=10) # Setup input dat boxes1 = kwimage.Boxes.random(num, scale=10.0, rng=0, format='ltrb') boxes2 = kwimage.Boxes.random(num + 1, scale=10.0, rng=1, format='ltrb') ltrb1 = boxes1.tensor().data ltrb2 = boxes2.tensor().data for timer in ti.reset('iou-torch-cpu'): with timer: out = _box_ious_torch(ltrb1, ltrb2, bias) results[ti.label] = out.data.cpu().numpy() ydata[ti.label].append(ti.mean()) gpu = torch.device(0) ltrb1 = boxes1.tensor().data.to(gpu) ltrb2 = boxes2.tensor().data.to(gpu) for timer in ti.reset('iou-torch-gpu'): with timer: out = _box_ious_torch(ltrb1, ltrb2, bias) torch.cuda.synchronize() results[ti.label] = out.data.cpu().numpy() ydata[ti.label].append(ti.mean()) ltrb1 = boxes1.numpy().data ltrb2 = boxes2.numpy().data for timer in ti.reset('iou-numpy'): with timer: out = _box_ious_py(ltrb1, ltrb2, bias) results[ti.label] = out ydata[ti.label].append(ti.mean()) if _bbox_ious_c: ltrb1 = boxes1.numpy().data.astype(np.float32) ltrb2 = boxes2.numpy().data.astype(np.float32) for timer in ti.reset('iou-cython'): with timer: out = _bbox_ious_c(ltrb1, ltrb2, bias) results[ti.label] = out ydata[ti.label].append(ti.mean()) eq = partial(np.allclose, atol=1e-07) passed = ub.allsame(results.values(), eq) if passed: print( 'All methods produced the same answer for num={}'.format(num)) else: for k1, k2 in it.combinations(results.keys(), 2): v1 = results[k1] v2 = results[k2] if eq(v1, v2): print('pass: {} == {}'.format(k1, k2)) else: diff = np.abs(v1 - v2) print( 'FAIL: {} != {}: diff(max={}, mean={}, sum={})'.format( k1, k2, diff.max(), diff.mean(), diff.sum())) raise AssertionError('different methods report different results') print('num = {!r}'.format(num)) print('ti.measures = {}'.format( ub.repr2(ub.map_vals(ub.sorted_vals, ti.measures), align=':', nl=2, precision=6))) import kwplot kwplot.autoplt() kwplot.multi_plot(xdata, ydata, xlabel='num boxes', ylabel='seconds') kwplot.show_if_requested()
def list_collate(inbatch): """ Collates batches containing items with non-uniform data sizes Used for detection datasets with boxes. Args: inbatch: a list of items returned by __getitem__ for each item in the batch Example: >>> from netharn.data.collate import * >>> import torch >>> rng = np.random.RandomState(0) >>> inbatch = [] >>> bsize = 4 >>> for i in range(bsize): >>> # add an image and some dummy bboxes to the batch >>> img = torch.rand(3, 4, 4) # dummy 4x4 image >>> boxes = torch.LongTensor([[0, 0, 1, 1]] * i) >>> item = (img, boxes) >>> inbatch.append(item) >>> out_batch = list_collate(inbatch) >>> assert len(out_batch) == 2 >>> batch_img, batch_boxes = out_batch >>> assert list(out_batch[0].shape) == [bsize, 3, 4, 4] >>> assert len(out_batch[1]) == bsize >>> assert len(out_batch[1][0]) == 0 >>> assert len(out_batch[1][1]) == 1 >>> assert len(out_batch[1][2]) == 2 Example: >>> import torch >>> rng = np.random.RandomState(0) >>> inbatch = [] >>> bsize = 4 >>> for _ in range(bsize): >>> # add an image and some dummy bboxes to the batch >>> img = torch.rand(3, 8, 8) # dummy 8x8 image >>> boxes = torch.FloatTensor() >>> item = (img, [boxes]) >>> inbatch.append(item) >>> out_batch = list_collate(inbatch) >>> assert len(out_batch) == 2 >>> assert list(out_batch[0].shape) == [bsize, 3, 8, 8] >>> assert len(out_batch[1][0]) == bsize """ try: if torch.is_tensor(inbatch[0]): num_items = [len(item) for item in inbatch] if ub.allsame(num_items): if len(num_items) == 0 or num_items[0] == 0: batch = inbatch else: batch = default_collate(inbatch) else: batch = inbatch else: batch = _collate_else(inbatch, list_collate) except Exception as ex: if not isinstance(ex, CollateException): raise CollateException( 'Failed to collate inbatch={}. Reason: {!r}'.format(inbatch, ex)) else: raise return batch
def benchmark_reversed_range(): import ubelt as ub import pandas as pd import timerit import itertools as it methods = [] def custom_reversed_range_v1(start, stop): final = stop - 1 for idx in range(stop - start): yield final - idx def custom_reversed_range_v2(start, stop): yield from it.islice(it.count(stop - 1, step=-1), stop - start) @methods.append def reversed_builtin(x): start = 10 stop = x + start ret = list(reversed(range(start, stop))) return ret @methods.append def negative_range(x): start = 10 stop = x + start ret = list(range(stop - 1, start - 1, -1)) return ret # @methods.append # def custom_v1(x): # start = 10 # stop = x + start # ret = list(custom_reversed_range_v1(start, stop)) # return ret # @methods.append # def custom_v2(x): # start = 10 # stop = x + start # ret = list(custom_reversed_range_v2(start, stop)) # return ret method_lut = {f.__name__: f for f in methods} results = {k: func(10) for k, func in method_lut.items()} print('results = {}'.format(ub.repr2(results, nl=1, align=':'))) if not ub.allsame(results.values()): raise AssertionError('Failed consistency check') ti = timerit.Timerit(1000, bestof=10, verbose=2) basis = { 'method': list(method_lut.keys()), 'x': [2 ** i for i in range(14)], } grid_iter = ub.named_product(basis) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: key = ub.repr2(params, compact=1, si=1) kwargs = params.copy() method_key = kwargs.pop('method') method = method_lut[method_key] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x='x', y='min', hue='method', marker='o', ax=ax) # ax.set_xscale('log') ax.set_title('Benchmark Reveral Methods ') ax.set_xlabel('A better x-variable description') ax.set_ylabel('A better y-variable description')