def get_tokens(self, types=None): r""" Args: types (None): (default = None) Returns: list: tokens CommandLine: python -m mtgmonte.mtgobjs --exec-get_tokens --show Example: >>> # ENABLE_DOCTEST >>> from mtgmonte.mtgobjs import * # NOQA >>> tokens = tokenize_manacost('12WWUBRG(W/B)(2/G)(U/P)(U/P)') >>> self = ManaCost(tokens) >>> types = ['hybrid'] >>> tokens = self.get_tokens(types) >>> result = ('tokens = %s' % (ut.repr2(tokens),)) >>> print(result) tokens = [('(W/B)', 'hybrid'), ('(2/G)', 'hybrid')] """ if types is None: tokens = [(color, type_) for type_, color_list in self.type2_manas.items() for color in color_list] else: tokens = [(color, type_) for type_, color_list in ut.dict_subset(self.type2_manas, types, []).items() for color in color_list] return tokens
def on_pick(event, infr=None): import wbia.plottool as pt logger.info('ON PICK: %r' % (event, )) artist = event.artist plotdat = pt.get_plotdat_dict(artist) if plotdat: if 'node' in plotdat: all_node_data = ut.sort_dict(plotdat['node_data'].copy()) visual_node_data = ut.dict_subset(all_node_data, infr.visual_node_attrs, None) node_data = ut.delete_dict_keys(all_node_data, infr.visual_node_attrs) node = plotdat['node'] node_data['degree'] = infr.graph.degree(node) node_label = infr.pos_graph.node_label(node) logger.info('visual_node_data: ' + ut.repr2(visual_node_data, nl=1)) logger.info('node_data: ' + ut.repr2(node_data, nl=1)) ut.cprint('node: ' + ut.repr2(plotdat['node']), 'blue') logger.info('(pcc) node_label = %r' % (node_label, )) logger.info('artist = %r' % (artist, )) elif 'edge' in plotdat: all_edge_data = ut.sort_dict(plotdat['edge_data'].copy()) logger.info(infr.repr_edge_data(all_edge_data)) ut.cprint('edge: ' + ut.repr2(plotdat['edge']), 'blue') logger.info('artist = %r' % (artist, )) else: logger.info('???: ' + ut.repr2(plotdat)) logger.info(ut.get_timestamp())
def __init__(self, fname, ext='.cPkl'): relevant_params = relevance[fname] relevant_cfg = ut.dict_subset(config, relevant_params) cfgstr = ut.get_cfg_lbl(relevant_cfg) dbdir = ut.truepath('/raid/work/Oxford/') super(SMKCacher, self).__init__(fname, cfgstr, cache_dir=dbdir, ext=ext)
def basic_infodict(card): _basic_keys = [ 'name', 'mana_cost', 'color_indicator', 'types', 'subtypes', 'power', 'toughness', 'loyalty', #'rules_text' ] basic_keys = filter(lambda key: card.__dict__[key], _basic_keys) basic_infodict = ut.dict_subset(card.__dict__, basic_keys) return basic_infodict
def main(): tests_ = tests subset = ['consistent_info', 'inconsistent_info'] subset = ['chain1', 'chain2', 'chain3'] subset += ['triangle1', 'triangle2', 'triangle3'] # subset = ['inconsistent_info'] tests_ = ut.dict_subset(tests, subset) for name, func in tests_.items(): logger.info('\n==============') ut.cprint('name = %r' % (name, ), 'yellow') uvw_list, pass_values, fail_values = func() G = build_graph(uvw_list) nodes = sorted(G.nodes()) edges = [tuple(sorted(e)) for e in G.edges()] edges = ut.sortedby2(edges, edges) n_annots = len(nodes) n_names = n_annots annot_idxs = list(range(n_annots)) lookup_annot_idx = ut.dzip(nodes, annot_idxs) nx.set_node_attributes(G, name='annot_idx', values=lookup_annot_idx) edge_probs = np.array([ get_edge_id_probs(G, aid1, aid2, n_names) for aid1, aid2 in edges ]) logger.info('nodes = %r' % (nodes, )) # logger.info('edges = %r' % (edges,)) logger.info('Noisy Observations') logger.info( pd.DataFrame(edge_probs, columns=['same', 'diff'], index=pd.Series(edges))) edge_probs = None cut_step( G, nodes, edges, n_annots, n_names, lookup_annot_idx, edge_probs, pass_values, fail_values, ) edge_probs = bp_step(G, nodes, edges, n_annots, n_names, lookup_annot_idx)
def get_pipecfg_list(test_cfg_name_list, ibs=None, verbose=None): r""" Builds a list of varied query configurations. Only custom configs depend on an ibs object. The order of the output is not gaurenteed to aggree with input order. FIXME: This breaks if you proot=BC_DTW and ibs is None Args: test_cfg_name_list (list): list of strs ibs (wbia.IBEISController): wbia controller object (optional) Returns: tuple: (cfg_list, cfgx2_lbl) - cfg_list (list): list of config objects cfgx2_lbl (list): denotes which parameters are being varied. If there is just one config then nothing is varied CommandLine: python -m wbia get_pipecfg_list:0 python -m wbia get_pipecfg_list:1 --db humpbacks python -m wbia get_pipecfg_list:2 Example: >>> # ENABLE_DOCTEST >>> from wbia.expt.experiment_helpers import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='testdb1') >>> #test_cfg_name_list = ['best', 'custom', 'custom:sv_on=False'] >>> #test_cfg_name_list = ['default', 'default:sv_on=False', 'best'] >>> test_cfg_name_list = ['default', 'default:sv_on=False', 'best'] >>> # execute function >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs) >>> # verify results >>> assert pipecfg_list[0].sv_cfg.sv_on is True >>> assert pipecfg_list[1].sv_cfg.sv_on is False >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list) >>> result = ('pipecfg_lbls = '+ ut.repr2(pipecfg_lbls)) >>> print(result) pipecfg_lbls = ['default:', 'default:sv_on=False'] Example1: >>> # DISABLE_DOCTEST >>> import wbia_flukematch.plugin >>> from wbia.expt.experiment_helpers import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='humpbacks') >>> test_cfg_name_list = ['default:pipeline_root=BC_DTW,decision=average,crop_dim_size=[960,500]', 'default:K=[1,4]'] >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs) >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list) >>> result = ('pipecfg_lbls = '+ ut.repr2(pipecfg_lbls)) >>> print(result) >>> print_pipe_configs(pcfgdict_list, pipecfg_list) """ if verbose is None: verbose = ut.VERBOSE if ut.VERBOSE: logger.info( '[expt_help.get_pipecfg_list] building pipecfg_list using: %s' % test_cfg_name_list ) if isinstance(test_cfg_name_list, six.string_types): test_cfg_name_list = [test_cfg_name_list] _standard_cfg_names = [] _pcfgdict_list = [] # HACK: Parse out custom configs first for test_cfg_name in test_cfg_name_list: if test_cfg_name.startswith('custom:') or test_cfg_name == 'custom': logger.info('[expthelpers] Parsing nonstandard custom config') assert False, 'custom is no longer supported' # if test_cfg_name.startswith('custom:'): # # parse out modifications to custom # cfgstr_list = ':'.join(test_cfg_name.split(':')[1:]).split(',') # augcfgdict = ut.parse_cfgstr_list(cfgstr_list, smartcast=True) # else: # augcfgdict = {} # # Take the configuration from the wbia object # pipe_cfg = ibs.--cfg.query_cfg.deepcopy() # # Update with augmented params # pipe_cfg.update_query_cfg(**augcfgdict) # # Parse out a standard cfgdict # cfgdict = dict(pipe_cfg.parse_items()) # cfgdict['_cfgname'] = 'custom' # cfgdict['_cfgstr'] = test_cfg_name # _pcfgdict_list.append(cfgdict) else: _standard_cfg_names.append(test_cfg_name) # Handle stanndard configs next if len(_standard_cfg_names) > 0: # Get parsing information # cfg_default_dict = dict(Config.QueryConfig().parse_items()) # valid_keys = list(cfg_default_dict.keys()) cfgstr_list = _standard_cfg_names named_defaults_dict = ut.dict_subset( experiment_configs.__dict__, experiment_configs.TEST_NAMES ) alias_keys = experiment_configs.ALIAS_KEYS # Parse standard pipeline cfgstrings metadata = {'ibs': ibs} dict_comb_list = cfghelpers.parse_cfgstr_list2( cfgstr_list, named_defaults_dict, cfgtype=None, alias_keys=alias_keys, # Hack out valid keys for humpbacks # valid_keys=valid_keys, strict=False, metadata=metadata, ) # Get varied params (there may be duplicates) _pcfgdict_list.extend(ut.flatten(dict_comb_list)) # Expand cfgdicts into PipelineConfig config objects # TODO: respsect different algorithm parameters like flukes if ibs is None: configclass_list = [Config.QueryConfig] * len(_pcfgdict_list) else: root_to_config = ibs.depc_annot.configclass_dict.copy() from wbia.algo.smk import smk_pipeline root_to_config['smk'] = smk_pipeline.SMKRequestConfig configclass_list = [ root_to_config.get( _cfgdict.get('pipeline_root', _cfgdict.get('proot', 'vsmany')), Config.QueryConfig, ) for _cfgdict in _pcfgdict_list ] _pipecfg_list = [ cls(**_cfgdict) for cls, _cfgdict in zip(configclass_list, _pcfgdict_list) ] # Enforce rule that removes duplicate configs # by using feasiblity from wbia.algo.Config # TODO: Move this unique finding code to its own function # and then move it up one function level so even the custom # configs can be uniquified _flag_list = ut.flag_unique_items(_pipecfg_list) cfgdict_list = ut.compress(_pcfgdict_list, _flag_list) pipecfg_list = ut.compress(_pipecfg_list, _flag_list) if verbose: # for cfg in _pipecfg_list: # logger.info(cfg.get_cfgstr()) # logger.info(cfg) logger.info( '[harn.help] return %d / %d unique pipeline configs from: %r' % (len(cfgdict_list), len(_pcfgdict_list), test_cfg_name_list) ) if ut.get_argflag(('--pcfginfo', '--pinfo', '--pipecfginfo')): ut.colorprint('Requested PcfgInfo for tests... ', 'red') print_pipe_configs(cfgdict_list, pipecfg_list) ut.colorprint('Finished Reporting PcfgInfo. Exiting', 'red') sys.exit(0) return (cfgdict_list, pipecfg_list)
def tune_flann(dpts, target_precision=.90, build_weight=0.50, memory_weight=0.00, sample_fraction=0.01): r""" References: http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_pami2014.pdf http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf http://docs.opencv.org/trunk/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html Math:: cost of an algorithm is: LaTeX: \cost = \frac {\search + build_weight * \build } { \minoverparams( \search + build_weight \build)} + memory_weight * \memory Args: dpts (ndarray): target_precision (float): number between 0 and 1 representing desired accuracy. Higher values are more accurate. build_weight (float): importance weight given to minimizing build time relative to search time. This number can range from 0 to infinity. typically because building is a more complex computation you want to keep the number relatively low, (less than 1) otherwise you'll end up getting a linear search (no build time). memory_weight (float): Importance of memory relative to total speed. A value less than 1 gives more importance to the time spent and a value greater than 1 gives more importance to the memory usage. sample_fraction (float): number between 0 and 1 representing the fraction of the input data to use in the optimization. A higher number uses more data. Returns: dict: tuned_params CommandLine: python -m vtool.nearest_neighbors --test-tune_flann """ with ut.Timer('tuning flann'): print('Autotuning flann with %d %dD vectors' % (dpts.shape[0], dpts.shape[1])) print('a sample of %d vectors will be used' % (int(dpts.shape[0] * sample_fraction))) flann = pyflann.FLANN() #num_data = len(dpts) flann_atkwargs = dict(algorithm='autotuned', target_precision=target_precision, build_weight=build_weight, memory_weight=memory_weight, sample_fraction=sample_fraction) suffix = repr(flann_atkwargs) badchar_list = ',{}\': ' for badchar in badchar_list: suffix = suffix.replace(badchar, '') print('flann_atkwargs:') print(utool.dict_str(flann_atkwargs)) print('starting optimization') tuned_params = flann.build_index(dpts, **flann_atkwargs) print('finished optimization') # The algorithm is sometimes returned as default which is # very unuseful as the default name is embeded in the pyflann # module where most would not care to look. This finds the default # name for you. for key in ['algorithm', 'centers_init', 'log_level']: val = tuned_params.get(key, None) if val == 'default': dict_ = pyflann.FLANNParameters._translation_[key] other_algs = ut.dict_find_other_sameval_keys(dict_, 'default') assert len(other_algs ) == 1, 'more than 1 default for key=%r' % (key, ) tuned_params[key] = other_algs[0] common_params = [ 'algorithm', 'checks', ] relevant_params_dict = dict( linear=['algorithm'], #--- kdtree=['trees'], #--- kmeans=[ 'branching', 'iterations', 'centers_init', 'cb_index', ], #--- lsh=[ 'table_number', 'key_size', 'multi_probe_level', ], ) relevant_params_dict['composite'] = relevant_params_dict[ 'kmeans'] + relevant_params_dict['kdtree'] + common_params relevant_params_dict['kmeans'] += common_params relevant_params_dict['kdtree'] += common_params relevant_params_dict['lsh'] += common_params #kdtree_single_params = [ # 'leaf_max_size', #] #other_params = [ # 'build_weight', # 'sorted', #] out_file = 'flann_tuned' + suffix utool.write_to(out_file, ut.dict_str(tuned_params, sorted_=True, newlines=True)) flann.delete_index() if tuned_params['algorithm'] in relevant_params_dict: print('relevant_params=') relevant_params = relevant_params_dict[tuned_params['algorithm']] print( ut.dict_str(ut.dict_subset(tuned_params, relevant_params), sorted_=True, newlines=True)) print('irrelevant_params=') print( ut.dict_str(ut.dict_setdiff(tuned_params, relevant_params), sorted_=True, newlines=True)) else: print('unknown tuned algorithm=%r' % (tuned_params['algorithm'], )) print('all_tuned_params=') print(ut.dict_str(tuned_params, sorted_=True, newlines=True)) return tuned_params
def new_cpd(self, parents=None, pmf_func=None): """ Makes a new random variable that is an instance of this tempalte parents : only used to define the name of this node. """ if pmf_func is None: pmf_func = self.pmf_func # --- MAKE VARIABLE ID def _getid(obj): if isinstance(obj, int): return str(obj) elif isinstance(obj, six.string_types): return obj else: return obj._template_id if not ut.isiterable(parents): parents = [parents] template_ids = [_getid(cpd) for cpd in parents] HACK_SAME_IDS = True # TODO: keep track of parent index inheritence # then rectify uniqueness based on that if HACK_SAME_IDS and ut.allsame(template_ids): _id = template_ids[0] else: _id = ''.join(template_ids) variable = ''.join([self.varpref, _id]) # variable = '_'.join([self.varpref, '{' + _id + '}']) # variable = '$%s$' % (variable,) evidence_cpds = [cpd for cpd in parents if hasattr(cpd, 'ttype')] if len(evidence_cpds) == 0: evidence_cpds = None variable_card = len(self.basis) statename_dict = { variable: self.basis, } if self.evidence_ttypes is not None: if any(cpd.ttype != tcpd.ttype for cpd, tcpd in zip(evidence_cpds, evidence_cpds)): raise ValueError('Evidence is not of appropriate type') evidence_bases = [cpd.variable_statenames for cpd in evidence_cpds] evidence_card = list(map(len, evidence_bases)) evidence_states = list(ut.iprod(*evidence_bases)) for cpd in evidence_cpds: _dict = ut.dict_subset(cpd.statename_dict, [cpd.variable]) statename_dict.update(_dict) evidence = [cpd.variable for cpd in evidence_cpds] else: if evidence_cpds is not None: raise ValueError('Gave evidence for evidence-less template') evidence = None evidence_card = None # --- MAKE TABLE VALUES if pmf_func is not None: if isinstance(pmf_func, list): values = np.array(pmf_func) else: values = np.array([[ pmf_func(vstate, *estates) for estates in evidence_states ] for vstate in self.basis]) ensure_normalized = True if ensure_normalized: values = values / values.sum(axis=0) else: # assume uniform fill_value = 1.0 / variable_card if evidence_card is None: values = np.full((1, variable_card), fill_value) else: values = np.full([variable_card] + list(evidence_card), fill_value) try: cpd = pgmpy.factors.TabularCPD( variable=variable, variable_card=variable_card, values=values, evidence=evidence, evidence_card=evidence_card, # statename_dict=statename_dict, state_names=statename_dict, ) except Exception as ex: ut.printex( ex, 'Failed to create TabularCPD', keys=[ 'variable', 'variable_card', 'statename_dict', 'evidence_card', 'evidence', 'values.shape', ], ) ut.embed() raise cpd.ttype = self.ttype cpd._template_ = self cpd._template_id = _id return cpd
def estimate_twoday_count(ibs, day1, day2, filter_kw): #gid_list = ibs.get_valid_gids() all_images = ibs.images() dates = [dt.date() for dt in all_images.datetime] date_to_images = all_images.group_items(dates) date_to_images = ut.sort_dict(date_to_images) #date_hist = ut.map_dict_vals(len, date2_gids) #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),)) verbose = 0 visit_dates = [day1, day2] visit_info_list_ = [] for day in visit_dates: images = date_to_images[day] aids = ut.flatten(images.aids) aids = ibs.filter_annots_general(aids, filter_kw=filter_kw, verbose=verbose) nids = ibs.get_annot_name_rowids(aids) grouped_aids = ut.group_items(aids, nids) unique_nids = ut.unique(list(grouped_aids.keys())) if False: aids_list = ut.take(grouped_aids, unique_nids) for aids in aids_list: if len(aids) > 30: break timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list) # Do the five second rule marked_thresh = 5 flags = [] for nid, timedeltas in zip(unique_nids, timedeltas_list): flags.append(timedeltas.max() > marked_thresh) print('Unmarking %d names' % (len(flags) - sum(flags))) unique_nids = ut.compress(unique_nids, flags) grouped_aids = ut.dict_subset(grouped_aids, unique_nids) unique_aids = ut.flatten(list(grouped_aids.values())) info = { 'unique_nids': unique_nids, 'grouped_aids': grouped_aids, 'unique_aids': unique_aids, } visit_info_list_.append(info) # Estimate statistics from ibeis.other import dbinfo aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) nsight1 = len(nids_day1) nsight2 = len(nids_day2) resight = len(resight_nids) lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight) if False: from ibeis.other import dbinfo print('DAY 1 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1) # NOQA print('DAY 2 STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2) # NOQA print('COMBINED STATS:') _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2) # NOQA print('%d annots on day 1' % (len(aids_day1)) ) print('%d annots on day 2' % (len(aids_day2)) ) print('%d names on day 1' % (nsight1,)) print('%d names on day 2' % (nsight2,)) print('resight = %r' % (resight,)) print('lp_index = %r ± %r' % (lp_index, lp_error)) return nsight1, nsight2, resight, lp_index, lp_error
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc, target_looptime=1.0, serial_cheat=1, buffer_size=2, show_serial=True): """ # We are going to generate output of bgfunc in the background while # fgfunc is running in the foreground. fgfunc takes results of bffunc as # args. # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut with ut.Timer('One* call to bgfunc') as t_bgfunc: results = [bgfunc(arg) for arg in bgargs] bgfunctime = t_bgfunc.ellapsed / len(bgargs) #fgfunc = ut.is_prime with ut.Timer('One* call to fgfunc') as t_fgfunc: [fgfunc(x) for x in results] fgfunctime = t_fgfunc.ellapsed / len(bgargs) # compute amount of loops to run est_looptime = (bgfunctime + fgfunctime) _num_loops = round(target_looptime // est_looptime) num_data = int(_num_loops // len(bgargs)) num_loops = int(num_data * len(bgargs)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([bgargs] * num_data) est_tfg = fgfunctime * num_loops est_tbg = bgfunctime * num_loops est_needed_buffers = fgfunctime / bgfunctime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(bgfunc, data[:len(data) // serial_cheat]): fgfunc(x) t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size) for x in gen_: fgfunc(x) with ut.Timer('ut.generate') as t3: gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: fgfunc(x) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tfg, est_tbg): return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tfg, est_tbg),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),)) if show_serial: prac_tbg = t_serial - est_tfg print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),))
def tune_flann(dpts, target_precision=.90, build_weight=0.50, memory_weight=0.00, sample_fraction=0.01): r""" References: http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_pami2014.pdf http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf http://docs.opencv.org/trunk/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html Math:: cost of an algorithm is: LaTeX: \cost = \frac {\search + build_weight * \build } { \minoverparams( \search + build_weight \build)} + memory_weight * \memory Args: dpts (ndarray): target_precision (float): number between 0 and 1 representing desired accuracy. Higher values are more accurate. build_weight (float): importance weight given to minimizing build time relative to search time. This number can range from 0 to infinity. typically because building is a more complex computation you want to keep the number relatively low, (less than 1) otherwise you'll end up getting a linear search (no build time). memory_weight (float): Importance of memory relative to total speed. A value less than 1 gives more importance to the time spent and a value greater than 1 gives more importance to the memory usage. sample_fraction (float): number between 0 and 1 representing the fraction of the input data to use in the optimization. A higher number uses more data. Returns: dict: tuned_params CommandLine: python -m vtool.nearest_neighbors --test-tune_flann """ with ut.Timer('tuning flann'): print('Autotuning flann with %d %dD vectors' % (dpts.shape[0], dpts.shape[1])) print('a sample of %d vectors will be used' % (int(dpts.shape[0] * sample_fraction))) flann = pyflann.FLANN() #num_data = len(dpts) flann_atkwargs = dict(algorithm='autotuned', target_precision=target_precision, build_weight=build_weight, memory_weight=memory_weight, sample_fraction=sample_fraction) suffix = repr(flann_atkwargs) badchar_list = ',{}\': ' for badchar in badchar_list: suffix = suffix.replace(badchar, '') print('flann_atkwargs:') print(utool.dict_str(flann_atkwargs)) print('starting optimization') tuned_params = flann.build_index(dpts, **flann_atkwargs) print('finished optimization') # The algorithm is sometimes returned as default which is # very unuseful as the default name is embeded in the pyflann # module where most would not care to look. This finds the default # name for you. for key in ['algorithm', 'centers_init', 'log_level']: val = tuned_params.get(key, None) if val == 'default': dict_ = pyflann.FLANNParameters._translation_[key] other_algs = ut.dict_find_other_sameval_keys(dict_, 'default') assert len(other_algs) == 1, 'more than 1 default for key=%r' % (key,) tuned_params[key] = other_algs[0] common_params = [ 'algorithm', 'checks', ] relevant_params_dict = dict( linear=['algorithm'], #--- kdtree=[ 'trees' ], #--- kmeans=[ 'branching', 'iterations', 'centers_init', 'cb_index', ], #--- lsh=[ 'table_number', 'key_size', 'multi_probe_level', ], ) relevant_params_dict['composite'] = relevant_params_dict['kmeans'] + relevant_params_dict['kdtree'] + common_params relevant_params_dict['kmeans'] += common_params relevant_params_dict['kdtree'] += common_params relevant_params_dict['lsh'] += common_params #kdtree_single_params = [ # 'leaf_max_size', #] #other_params = [ # 'build_weight', # 'sorted', #] out_file = 'flann_tuned' + suffix utool.write_to(out_file, ut.dict_str(tuned_params, sorted_=True, newlines=True)) flann.delete_index() if tuned_params['algorithm'] in relevant_params_dict: print('relevant_params=') relevant_params = relevant_params_dict[tuned_params['algorithm']] print(ut.dict_str(ut.dict_subset(tuned_params, relevant_params), sorted_=True, newlines=True)) print('irrelevant_params=') print(ut.dict_str(ut.dict_setdiff(tuned_params, relevant_params), sorted_=True, newlines=True)) else: print('unknown tuned algorithm=%r' % (tuned_params['algorithm'],)) print('all_tuned_params=') print(ut.dict_str(tuned_params, sorted_=True, newlines=True)) return tuned_params
def get_hesaff_params(feat_cfg): import pyhesaff default_keys = list(pyhesaff.get_hesaff_default_params().keys()) hesaff_param_dict = ut.dict_subset(feat_cfg, default_keys) return hesaff_param_dict
def get_annotmatch_subgraph(ibs): r""" http://bokeh.pydata.org/en/latest/ https://github.com/jsexauer/networkx_viewer TODO: Need a special visualization In the web I need: * graph of annotations matches. * can move them around. * edit lines between them. * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool This should share functionality with a name view. Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show # Networkx example python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show Ignore: from ibeis import viz Example: >>> # ENABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> result = get_annotmatch_subgraph(ibs) >>> ut.show_if_requested() """ #import ibeis #ibs = ibeis.opendb(db='PZ_MTEST') #rowids = ibs._get_all_annotmatch_rowids() #aids1 = ibs.get_annotmatch_aid1(rowids) #aids2 = ibs.get_annotmatch_aid2(rowids) # # nids = ibs.get_valid_nids() nids = nids[0:5] aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) # Enumerate annotmatch properties rng = np.random.RandomState(0) edge_props = { 'weight': rng.rand(len(aids1)), 'reviewer_confidence': rng.rand(len(aids1)), 'algo_confidence': rng.rand(len(aids1)), } # Remove data that does not need to be visualized # (dont show all the aids if you dont have to) thresh = .5 flags = edge_props['weight'] > thresh aids1_ = ut.compress(aids1, flags) aids2_ = ut.compress(aids2, flags) chosen_props = ut.dict_subset(edge_props, ['weight']) edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags), chosen_props) edge_keys = list(edge_props.keys()) edge_vals = ut.dict_take(edge_props, edge_keys) edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)] unique_aids = list(set(aids1_ + aids2_)) # Make a graph between the chips nodes = unique_aids edges = list(zip(aids1_, aids2_, edge_attr_list)) import networkx as nx graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) from ibeis.viz import viz_graph fnum = None #zoom = kwargs.get('zoom', .4) viz_graph.viz_netx_chipgraph(ibs, graph, fnum=fnum, with_images=True, augment_graph=False)
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc, target_looptime=1.0, serial_cheat=1, buffer_size=2, show_serial=True): """ # We are going to generate output of bgfunc in the background while # fgfunc is running in the foreground. fgfunc takes results of bffunc as # args. # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut with ut.Timer('One* call to bgfunc') as t_bgfunc: results = [bgfunc(arg) for arg in bgargs] bgfunctime = t_bgfunc.ellapsed / len(bgargs) #fgfunc = ut.is_prime with ut.Timer('One* call to fgfunc') as t_fgfunc: [fgfunc(x) for x in results] fgfunctime = t_fgfunc.ellapsed / len(bgargs) # compute amount of loops to run est_looptime = (bgfunctime + fgfunctime) _num_loops = round(target_looptime // est_looptime) num_data = int(_num_loops // len(bgargs)) num_loops = int(num_data * len(bgargs)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([bgargs] * num_data) est_tfg = fgfunctime * num_loops est_tbg = bgfunctime * num_loops est_needed_buffers = fgfunctime / bgfunctime print('Estimated stats' + ut.dict_str( ut.dict_subset(locals(), [ 'num_loops', 'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(bgfunc, data[:len(data) // serial_cheat]): fgfunc(x) t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size) for x in gen_: fgfunc(x) with ut.Timer('ut.generate') as t3: gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: fgfunc(x) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tfg, est_tbg): return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % (parallel_efficiency(t_serial, est_tfg, est_tbg), )) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tfg, est_tbg), )) print('Theoretical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tfg, est_tbg), )) if show_serial: prac_tbg = t_serial - est_tfg print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % (parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg), )) print('Practical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg), )) print('Practical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg), ))
def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0, serial_cheat=1, argmode=False, buffer_size=2): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut #serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_fgfunc: results = [func(arg) for arg in args] functime = t_fgfunc.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One* call to sleep func') as t_sleep: if argmode: [sleepfunc(x) for x in results] else: [sleepfunc() for x in results] sleeptime = t_sleep.ellapsed / len(args) # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops est_needed_buffers = sleeptime / functime print('Estimated stats' + ut.dict_str( ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): if argmode: sleepfunc(x) else: sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % (parallel_efficiency(t_serial, est_tsleep, est_tfunc), )) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc), )) print('Theoretical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc), )) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % (parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc), )) print('Practical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc), )) print('Practical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc), ))
def heuristic_infodict(card): infodict = ut.dict_subset(card.__dict__, ['name', 'mana_cost']) heuristic_types, heuristic_subtypes = card.get_heuristic_info() infodict['heuristic_types'] = heuristic_types return infodict
def get_pipecfg_list(test_cfg_name_list, ibs=None): r""" Builds a list of varied query configurations. Only custom configs depend on an ibs object. The order of the output is not gaurenteed to aggree with input order. Args: test_cfg_name_list (list): list of strs ibs (IBEISController): ibeis controller object (optional) Returns: tuple: (cfg_list, cfgx2_lbl) - cfg_list (list): list of config objects cfgx2_lbl (list): denotes which parameters are being varied. If there is just one config then nothing is varied CommandLine: python -m ibeis.expt.experiment_helpers --exec-get_pipecfg_list:0 python -m ibeis.expt.experiment_helpers --exec-get_pipecfg_list:1 --db humpbacks Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.experiment_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> #test_cfg_name_list = ['best', 'custom', 'custom:sv_on=False'] >>> #test_cfg_name_list = ['default', 'default:sv_on=False', 'best'] >>> test_cfg_name_list = ['default', 'default:sv_on=False', 'best'] >>> # execute function >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs) >>> # verify results >>> assert pipecfg_list[0].sv_cfg.sv_on is True >>> assert pipecfg_list[1].sv_cfg.sv_on is False >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list) >>> result = ('pipecfg_lbls = '+ ut.list_str(pipecfg_lbls)) >>> print(result) pipecfg_lbls = [ 'default:', 'default:sv_on=False', ] Example1: >>> # DISABLE_DOCTEST >>> import ibeis_flukematch.plugin >>> from ibeis.expt.experiment_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='humpbacks') >>> test_cfg_name_list = ['default:pipeline_root=BC_DTW,decision=average', 'default:K=[1,4]'] >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs) >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list) >>> result = ('pipecfg_lbls = '+ ut.list_str(pipecfg_lbls)) >>> print(result) >>> print_pipe_configs(pcfgdict_list, pipecfg_list) """ if ut.VERBOSE: print('[expt_help.get_pipecfg_list] building pipecfg_list using: %s' % test_cfg_name_list) if isinstance(test_cfg_name_list, six.string_types): test_cfg_name_list = [test_cfg_name_list] _standard_cfg_names = [] _pcfgdict_list = [] # HACK: Parse out custom configs first for test_cfg_name in test_cfg_name_list: if test_cfg_name.startswith('custom:') or test_cfg_name == 'custom': print('[expthelpers] Parsing nonstandard custom config') if test_cfg_name.startswith('custom:'): # parse out modifications to custom cfgstr_list = ':'.join(test_cfg_name.split(':')[1:]).split(',') augcfgdict = ut.parse_cfgstr_list(cfgstr_list, smartcast=True) else: augcfgdict = {} # Take the configuration from the ibeis object pipe_cfg = ibs.cfg.query_cfg.deepcopy() # Update with augmented params pipe_cfg.update_query_cfg(**augcfgdict) # Parse out a standard cfgdict cfgdict = dict(pipe_cfg.parse_items()) cfgdict['_cfgname'] = 'custom' cfgdict['_cfgstr'] = test_cfg_name _pcfgdict_list.append(cfgdict) else: _standard_cfg_names.append(test_cfg_name) # Handle stanndard configs next if len(_standard_cfg_names) > 0: # Get parsing information #cfg_default_dict = dict(Config.QueryConfig().parse_items()) #valid_keys = list(cfg_default_dict.keys()) cfgstr_list = _standard_cfg_names named_defaults_dict = ut.dict_subset( experiment_configs.__dict__, experiment_configs.TEST_NAMES) alias_keys = experiment_configs.ALIAS_KEYS # Parse standard pipeline cfgstrings metadata = {'ibs': ibs} dict_comb_list = cfghelpers.parse_cfgstr_list2( cfgstr_list, named_defaults_dict, cfgtype=None, alias_keys=alias_keys, # Hack out valid keys for humpbacks #valid_keys=valid_keys, strict=False, metadata=metadata ) # Get varied params (there may be duplicates) _pcfgdict_list.extend(ut.flatten(dict_comb_list)) # TODO: respsect different algorithm parameters # like flukes # Expand cfgdicts into PipelineConfig config objects if ibs is None: configclass_list = [Config.QueryConfig] * len(_pcfgdict_list) else: root_to_config = ibs.depc.configclass_dict configclass_list = [ root_to_config.get(_cfgdict.get('pipeline_root', 'vsmany'), Config.QueryConfig) for _cfgdict in _pcfgdict_list] _pipecfg_list = [cls(**_cfgdict) for cls, _cfgdict in zip(configclass_list, _pcfgdict_list)] # Enforce rule that removes duplicate configs # by using feasiblity from ibeis.algo.Config # TODO: Move this unique finding code to its own function # and then move it up one function level so even the custom # configs can be uniquified _flag_list = ut.flag_unique_items(_pipecfg_list) cfgdict_list = ut.compress(_pcfgdict_list, _flag_list) pipecfg_list = ut.compress(_pipecfg_list, _flag_list) if ut.NOT_QUIET: print('[harn.help] return %d / %d unique pipeline configs from: %r' % (len(cfgdict_list), len(_pcfgdict_list), test_cfg_name_list)) if ut.get_argflag(('--pcfginfo', '--pinfo', '--pipecfginfo')): import sys ut.colorprint('Requested PcfgInfo for tests... ', 'red') print_pipe_configs(cfgdict_list, pipecfg_list) ut.colorprint('Finished Reporting PcfgInfo. Exiting', 'red') sys.exit(1) return (cfgdict_list, pipecfg_list)
def cut(self, event): keys = ['min_labels', 'max_labels'] infrkw = ut.dict_subset(self.config, keys) self.infr.relabel_using_inference(**infrkw) self.show_page()
print("Took %0.2f seconds" % active_toc) # give the list of active anchors (i.e. the keys in active_triplets) as the dataset # 'y' key is a hack full_triplet_sets = {k:full_triplets(active_triplets[k]) for k in active_triplets} dataset = full_triplet_sets #anchor_sets = {k:{'y':np.random.permutation(np.array(active_triplets[k].keys()))} for k in active_triplets} print("Active anchors left: %r" % ({k:len(v) for k, v in active_triplets.items()})) print("Active triplets left: %r" % ({k:v['y'].shape[0] for k, v in full_triplet_sets.items()})) # build the batch loader as a partial function on the dataset and the actives #batch_maker = {k:partial(make_batch, dataset=dset[k], actives=active_triplets[k]) for k in ['train','valid']} else: print("Number of pairs: %r" % ({k:len(dset[k]['y']) for k in dset})) train_pairs_shuffled = {k:shuffle_dataset(dict(ut.dict_subset(dset[k], ['pairs','y']))) for k in ['train','valid']} dataset = train_pairs_shuffled batch_maker = {k:partial(FUNCTIONS[options.loss_type]['bm'], dataset=dset[k]) for k in ['train','valid']} batch_loader = partial(FUNCTIONS[options.loss_type]['nonaug_bl'], batch_maker=batch_maker) # so we're going to just give dset as loss = train_epoch(iter_funcs, dataset, batch_size, batch_loader, layer_names=layer_names) epoch_losses.append(loss['train_loss']) batch_losses.append(loss['all_train_loss']) toc = time.time() - tic print("Learning rate: %0.5f" % momentum_params['l_r'].get_value()) print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" % (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss'])) print("Train %s failed: %s\nValid %s failed: %s" % (options.loss_type, loss['train_acc'],
def test_sver_wrapper(): """ Test to ensure cpp and python agree and that cpp is faster CommandLine: python -m vtool.sver_c_wrapper --test-test_sver_wrapper python -m vtool.sver_c_wrapper --test-test_sver_wrapper --rebuild-sver python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --dummy python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=easy2.png python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=hard3.png python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=carl.jpg --fname2=hard3.png Example: >>> # ENABLE_DOCTEST >>> from vtool.sver_c_wrapper import * # NOQA >>> test_sver_wrapper() Ignore: %timeit call_python_version(*args) %timeit get_affine_inliers_cpp(*args) """ import vtool.spatial_verification as sver import vtool.tests.dummy as dummy xy_thresh_sqrd = ktool.KPTS_DTYPE(.4) scale_thresh_sqrd = ktool.KPTS_DTYPE(2.0) ori_thresh = ktool.KPTS_DTYPE(TAU / 4.0) keys = 'xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh'.split(', ') print(ut.dict_str(ut.dict_subset(locals(), keys))) def report_errors(): pass if ut.get_argflag('--dummy'): testtup = dummy.testdata_dummy_matches() (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup fm_input = fm_input.astype(fm_dtype) #fm_input = fm_input[0:10].astype(fm_dtype) #fs_input = fs_input[0:10].astype(np.float32) else: fname1 = ut.get_argval('--fname1', type_=str, default='easy1.png') fname2 = ut.get_argval('--fname2', type_=str, default='easy2.png') testtup = dummy.testdata_ratio_matches(fname1, fname2) (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup # pack up call to aff hypothesis import vtool as vt import scipy.stats.mstats scales1 = vt.get_scales(kpts1.take(fm_input.T[0], axis=0)) scales2 = vt.get_scales(kpts2.take(fm_input.T[1], axis=0)) #fs_input = 1 / scipy.stats.mstats.gmean(np.vstack((scales1, scales2))) fs_input = scipy.stats.mstats.gmean(np.vstack((scales1, scales2))) print('fs_input = ' + ut.numpy_str(fs_input)) #fs_input[0:-9] = 0 #fs_input = np.ones(len(fm_input), dtype=fs_dtype) #ut.embed() #fs_input = scales1 * scales2 args = (kpts1, kpts2, fm_input, fs_input, xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh) ex_list = [] try: with ut.Indenter('[TEST1] '): inlier_tup = vt.compare_implementations( sver.get_affine_inliers, get_affine_inliers_cpp, args, lbl1='py', lbl2='c', output_lbl=('aff_inliers_list', 'aff_errors_list', 'Aff_mats')) out_inliers, out_errors, out_mats = inlier_tup except AssertionError as ex: ex_list.append(ex) raise try: import functools with ut.Indenter('[TEST2] '): bestinlier_tup = vt.compare_implementations( functools.partial(sver.get_best_affine_inliers, forcepy=True), get_best_affine_inliers_cpp, args, show_output=True, lbl1='py', lbl2='c', output_lbl=('bestinliers', 'besterror', 'bestmat')) bestinliers, besterror, bestmat = bestinlier_tup except AssertionError as ex: ex_list.append(ex) raise if len(ex_list) > 0: raise AssertionError('some tests failed. see previous stdout') #num_inliers_list = np.array(map(len, out_inliers_c)) #best_argx = num_inliers_list.argmax() ##best_inliers_py = out_inliers_py[best_argx] #best_inliers_c = out_inliers_c[best_argx] if ut.show_was_requested(): import plottool as pt fm_output = fm_input.take(bestinliers, axis=0) fnum = pt.next_fnum() pt.figure(fnum=fnum, doclf=True, docla=True) pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_input, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 1)) pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_output, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 2)) pt.show_if_requested()
def show_arch_nx_graph(layers, fnum=None, fullinfo=True): r""" CommandLine: python -m ibeis_cnn.draw_net show_arch_nx_graph:0 --show python -m ibeis_cnn.draw_net show_arch_nx_graph:1 --show Example0: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.draw_net import * # NOQA >>> from ibeis_cnn import models >>> model = models.mnist.MNISTModel(batch_size=128, output_dims=10, >>> data_shape=(24, 24, 3)) >>> model.init_arch() >>> layers = model.get_all_layers() >>> show_arch_nx_graph(layers) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() Example1: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.draw_net import * # NOQA >>> from ibeis_cnn import models >>> model = models.SiameseCenterSurroundModel(autoinit=True) >>> layers = model.get_all_layers() >>> show_arch_nx_graph(layers) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import networkx as nx import plottool as pt import ibeis_cnn.__LASAGNE__ as lasange #from matplotlib import offsetbox #import matplotlib as mpl REMOVE_BATCH_SIZE = True from ibeis_cnn import net_strs def get_hex_color(layer_type): if 'Input' in layer_type: return '#A2CECE' if 'Conv2D' in layer_type: return '#7C9ABB' if 'Dense' in layer_type: return '#6CCF8D' if 'Pool' in layer_type: return '#9D9DD2' if 'SoftMax' in layer_type: return '#7E9FD9' else: return '#{0:x}'.format(hash(layer_type + 'salt') % 2 ** 24) node_dict = {} edge_list = [] edge_attrs = ut.ddict(dict) # Make layer ids (ensure no duplicates) layer_to_id = { l: repr(l) if l.name is None else l.name for l in set(layers) } keys_ = layer_to_id.keys() dups = ut.find_duplicate_items(layer_to_id.values()) for dupval, dupidxs in dups.items(): newval_fmt = dupval + '_%d' for layer in ut.take(keys_, dupidxs): newid = ut.get_nonconflicting_string(newval_fmt, layer_to_id.values()) layer_to_id[layer] = newid def layerid(layer): return layer_to_id[layer] main_nodes = [] for i, layer in enumerate(layers): layer_info = net_strs.get_layer_info(layer) layer_type = layer_info['classalias'] key = layerid(layer) color = get_hex_color(layer_info['classalias']) # Make label lines = [] if layer_info['name'] is not None: lines.append(layer_info['name']) if fullinfo: lines.append(layer_info['classalias']) for attr, val in layer_info['layer_attrs'].items(): if attr == 'shape' and REMOVE_BATCH_SIZE: val = val[1:] if attr == 'output_shape' and REMOVE_BATCH_SIZE: val = val[1:] lines.append('{0}: {1}'.format(attr, val)) nonlinearity = layer_info.get('nonlinearity') if nonlinearity is not None: alias_map = { 'LeakyRectify': 'LReLU', } val = layer_info['nonlinearity']['type'] val = alias_map.get(val, val) lines.append('nonlinearity:\n{0}'.format(val)) label = '\n'.join(lines) # append node is_main_layer = len(layer.params) > 0 #is_main_layer = len(lasange.layers.get_all_params(layer, trainable=True)) > 0 if layer_info['classname'] in lasange.layers.normalization.__all__: is_main_layer = False if layer_info['classname'] in lasange.layers.special.__all__: is_main_layer = False if layer_info['classname'].startswith('BatchNorm'): is_main_layer = False if layer_info['classname'].startswith('ElemwiseSum'): is_main_layer = True if layer_type == 'Input': is_main_layer = True if hasattr(layer, '_is_main_layer'): is_main_layer = layer._is_main_layer #if getattr(layer, 'name', '') is not None and getattr(layer, 'name', '') .endswith('/sum'): # is_main_layer = True node_attr = dict(name=key, label=label, color=color, fillcolor=color, style='filled', is_main_layer=is_main_layer) node_attr['is_main_layer'] = is_main_layer if is_main_layer: main_nodes.append(key) node_attr['classalias'] = layer_info['classalias'] if is_main_layer or node_attr['classalias'].startswith('Conv'): if hasattr(layer, 'shape'): if len(layer.shape) == 3: node_attr['out_size'] = (layer.shape[2], layer.shape[1]) node_attr['depth'] = layer.output_shape[0] if hasattr(layer, 'output_shape'): if len(layer.output_shape) == 4: depth = layer.output_shape[1] width, height = (layer.output_shape[3], layer.output_shape[2]) xshift = -width * (.1 / (depth ** (1 / 3))) / 3 yshift = height * (.1 / (depth ** (1 / 3))) / 2 node_attr['depth'] = depth node_attr['xshift'] = xshift node_attr['yshift'] = yshift node_attr['out_size'] = (width, height) if len(layer.output_shape) == 2: node_attr['out_size'] = (1, layer.output_shape[1]) node_dict[key] = node_attr _input_layers = [] if hasattr(layer, 'input_layers'): _input_layers += layer.input_layers if hasattr(layer, 'input_layer'): _input_layers += [layer.input_layer] for input_layer in _input_layers: parent_key = layerid(input_layer) edge = (parent_key, key) edge_list.append(edge) main_size_ = np.array((100, 100)) * 4 sub_size = np.array((75, 50)) * 4 # Setup scaled width and heights out_size_list = [v['out_size'] for v in node_dict.values() if 'out_size' in v] out_size_list = np.array(out_size_list) #out_size_list = out_size_list[out_size_list.T[0] > 1] area_arr = np.prod(out_size_list, axis=1) main_outsize = np.array(out_size_list[area_arr.argmax()]) #main_outsize = np.array(out_size_list[area_arr.argmin()]) scale = main_size_ / main_outsize scale_dense_max = .25 scale_dense_min = 8 for k, v in node_dict.items(): if v['is_main_layer'] or v['classalias'].startswith('Conv'): if 'out_size' in v: # Make dense layers more visible if v['classalias'] == 'Dense': v['shape'] = 'rect' v['width'] = scale_dense_min if v['out_size'][1] > main_outsize[1]: v['height'] = v['out_size'][1] * scale[1] * scale_dense_max elif v['out_size'][1] < scale_dense_min: v['height'] = scale_dense_min * v['out_size'][1] else: v['height'] = v['out_size'][1] elif v['classalias'].startswith('Conv'): v['shape'] = 'stack' #v['shape'] = 'rect' v['width'] = v['out_size'][0] * scale[0] v['height'] = v['out_size'][1] * scale[1] else: v['shape'] = 'rect' v['width'] = v['out_size'][0] * scale[0] v['height'] = v['out_size'][1] * scale[1] else: v['shape'] = 'rect' v['width'] = main_size_[0] v['height'] = main_size_[1] else: #v['shape'] = 'ellipse' v['shape'] = 'rect' v['style'] = 'rounded' v['width'] = sub_size[0] v['height'] = sub_size[1] key_order = ut.take(layer_to_id, layers) node_dict = ut.dict_subset(node_dict, key_order) #print('node_dict = ' + ut.repr3(node_dict)) # Create the networkx graph structure G = nx.DiGraph() G.add_nodes_from(node_dict.items()) G.add_edges_from(edge_list) for key, val in edge_attrs.items(): nx.set_edge_attributes(G, key, val) # Add invisible structure #main_nodes = [key for key, val in # nx.get_node_attributes(G, 'is_main_layer').items() if val] main_children = ut.odict() #for n1, n2 in ut.itertwo(main_nodes): # print('n1, n2 = %r %r' % (n1, n2)) # import utool # utool.embed() # children = ut.nx_all_nodes_between(G, n1, n2) # if n1 in children: # children.remove(n1) # if n2 in children: # children.remove(n2) # main_children[n1] = children # #pass #main_children[main_nodes[-1]] = [] for n1 in main_nodes: main_children[n1] = [] # Main nodes only place constraints on nodes in the next main group. # Not their own next_main = None G.node[n1]['group'] = n1 for (_, n2) in nx.bfs_edges(G, n1): if next_main is None: if n2 in main_nodes: next_main = n2 else: G.node[n2]['group'] = n1 main_children[n1].append(n2) else: if n2 not in list(nx.descendants(G, next_main)): G.node[n2]['group'] = n1 main_children[n1].append(n2) # Custom positioning x = 0 y = 1000 #print('main_children = %s' % (ut.repr3(main_children),)) #main_nodes = ut.isect(list(nx.topological_sort(G)), main_nodes) xpad = main_size_[0] * .3 ypad = main_size_[1] * .3 # Draw each main node, and then put its children under it # Then move to the left and draw the next main node. cumwidth = 0 for n1 in main_nodes: cumheight = 0 maxwidth = G.node[n1]['width'] for n2 in main_children[n1]: maxwidth = max(maxwidth, G.node[n2]['width']) cumwidth += xpad cumwidth += maxwidth / 2 pos = np.array([x + cumwidth, y - cumheight]) G.node[n1]['pos'] = pos G.node[n1]['pin'] = 'true' height = G.node[n1]['height'] cumheight += height / 2 for n2 in main_children[n1]: height = G.node[n2]['height'] cumheight += ypad cumheight += height / 2 pos = np.array([x + cumwidth, y - cumheight]) G.node[n2]['pos'] = pos G.node[n2]['pin'] = 'true' cumheight += height / 2 cumwidth += maxwidth / 2 # Pin everybody nx.set_node_attributes(G, 'pin', 'true') layoutkw = dict(prog='neato', splines='line') #layoutkw = dict(prog='neato', splines='spline') layoutkw = dict(prog='neato', splines='ortho') G_ = G.copy() # delete lables for positioning _labels = nx.get_node_attributes(G_, 'label') ut.nx_delete_node_attr(G_, 'label') nx.set_node_attributes(G_, 'label', '') nolayout = False if nolayout: G_.remove_edges_from(list(G_.edges())) else: layout_info = pt.nx_agraph_layout(G_, inplace=True, **layoutkw) # NOQA # reset labels if not nolayout: nx.set_node_attributes(G_, 'label', _labels) _ = pt.show_nx(G_, fontsize=8, arrow_width=.3, layout='custom', fnum=fnum) # NOQA #pt.adjust_subplots(top=1, bot=0, left=0, right=1) pt.plt.tight_layout()
def new_cpd(self, parents=None, pmf_func=None): """ Makes a new random variable that is an instance of this tempalte parents : only used to define the name of this node. """ if pmf_func is None: pmf_func = self.pmf_func # --- MAKE VARIABLE ID def _getid(obj): if isinstance(obj, int): return str(obj) elif isinstance(obj, six.string_types): return obj else: return obj._template_id if not ut.isiterable(parents): parents = [parents] template_ids = [_getid(cpd) for cpd in parents] HACK_SAME_IDS = True # TODO: keep track of parent index inheritence # then rectify uniqueness based on that if HACK_SAME_IDS and ut.list_allsame(template_ids): _id = template_ids[0] else: _id = ''.join(template_ids) variable = ''.join([self.varpref, _id]) #variable = '_'.join([self.varpref, '{' + _id + '}']) #variable = '$%s$' % (variable,) evidence_cpds = [cpd for cpd in parents if hasattr(cpd, 'ttype')] if len(evidence_cpds) == 0: evidence_cpds = None variable_card = len(self.basis) statename_dict = { variable: self.basis, } if self.evidence_ttypes is not None: if any(cpd.ttype != tcpd.ttype for cpd, tcpd in zip(evidence_cpds, evidence_cpds)): raise ValueError('Evidence is not of appropriate type') evidence_bases = [cpd.variable_statenames for cpd in evidence_cpds] evidence_card = list(map(len, evidence_bases)) evidence_states = list(ut.iprod(*evidence_bases)) for cpd in evidence_cpds: _dict = ut.dict_subset(cpd.statename_dict, [cpd.variable]) statename_dict.update(_dict) evidence = [cpd.variable for cpd in evidence_cpds] else: if evidence_cpds is not None: raise ValueError('Gave evidence for evidence-less template') evidence = None evidence_card = None # --- MAKE TABLE VALUES if pmf_func is not None: if isinstance(pmf_func, list): values = np.array(pmf_func) else: values = np.array([ [pmf_func(vstate, *estates) for estates in evidence_states] for vstate in self.basis ]) ensure_normalized = True if ensure_normalized: values = values / values.sum(axis=0) else: # assume uniform fill_value = 1.0 / variable_card if evidence_card is None: values = np.full((1, variable_card), fill_value) else: values = np.full([variable_card] + list(evidence_card), fill_value) try: cpd = pgmpy.factors.TabularCPD( variable=variable, variable_card=variable_card, values=values, evidence=evidence, evidence_card=evidence_card, statename_dict=statename_dict, ) except Exception as ex: ut.printex(ex, 'Failed to create TabularCPD', keys=[ 'variable', 'variable_card', 'statename_dict', 'evidence_card', 'evidence', 'values.shape', ]) raise cpd.ttype = self.ttype cpd._template_ = self cpd._template_id = _id return cpd
def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0, serial_cheat=1, argmode=False, buffer_size=2): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut #serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_fgfunc: results = [func(arg) for arg in args] functime = t_fgfunc.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One* call to sleep func') as t_sleep: if argmode: [sleepfunc(x) for x in results] else: [sleepfunc() for x in results] sleeptime = t_sleep.ellapsed / len(args) # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops est_needed_buffers = sleeptime / functime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): if argmode: sleepfunc(x) else: sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc( ) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),)) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()
def asdict(dataset): # save all args passed into constructor as a dict key_list = ut.get_func_argspec(dataset.__init__).args[1:] data_dict = ut.dict_subset(dataset.__dict__, key_list) return data_dict
def make_annot_inference_dict(infr, internal=False): #import uuid def convert_to_name_uuid(nid): #try: text = ibs.get_name_texts(nid, apply_fix=False) if text is None: text = 'NEWNAME_%s' % (str(nid), ) #uuid_ = uuid.UUID(text) #except ValueError: # text = 'NEWNAME_%s' % (str(nid),) # #uuid_ = nid return text ibs = infr.qreq_.ibs if internal: get_annot_uuids = ut.identity else: get_annot_uuids = ibs.get_annot_uuids #return uuid_ # Compile the cluster_dict col_list = [ 'aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list', 'error_flag_list' ] cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples))) cluster_dict['annot_uuid_list'] = get_annot_uuids( cluster_dict['aid_list']) # We store the name's UUID as the name's text #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['orig_nid_list']] #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['new_nid_list']] cluster_dict['orig_name_list'] = [ convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list'] ] cluster_dict['new_name_list'] = [ convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list'] ] # Filter out only the keys we want to send back in the dictionary #key_list = ['annot_uuid_list', 'orig_name_uuid_list', # 'new_name_uuid_list', 'exemplar_flag_list', # 'error_flag_list'] key_list = [ 'annot_uuid_list', 'orig_name_list', 'new_name_list', 'exemplar_flag_list', 'error_flag_list' ] cluster_dict = ut.dict_subset(cluster_dict, key_list) # Compile the annot_pair_dict col_list = [ 'aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list', 'raw_score_list' ] annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list))) annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids( annot_pair_dict['aid_1_list']) annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids( annot_pair_dict['aid_2_list']) zipped = zip(annot_pair_dict['annot_uuid_1_list'], annot_pair_dict['annot_uuid_2_list'], annot_pair_dict['p_same_list']) annot_pair_dict['review_pair_list'] = [{ 'annot_uuid_key': annot_uuid_1, 'annot_uuid_1': annot_uuid_1, 'annot_uuid_2': annot_uuid_2, 'prior_matching_state': { 'p_match': p_same, 'p_nomatch': 1.0 - p_same, 'p_notcomp': 0.0, } } for (annot_uuid_1, annot_uuid_2, p_same) in zipped] # Filter out only the keys we want to send back in the dictionary key_list = ['review_pair_list', 'confidence_list'] annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list) # Compile the inference dict inference_dict = ut.odict([ ('cluster_dict', cluster_dict), ('annot_pair_dict', annot_pair_dict), ('_internal_state', None), ]) return inference_dict
def draw_twoday_count(ibs, visit_info_list_): import copy visit_info_list = copy.deepcopy(visit_info_list_) aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) if False: # HACK REMOVE DATA TO MAKE THIS FASTER num = 20 for info in visit_info_list: non_resight_nids = list(set(info['unique_nids']) - set(resight_nids)) sample_nids2 = non_resight_nids[0:num] + resight_nids[:num] info['grouped_aids'] = ut.dict_subset(info['grouped_aids'], sample_nids2) info['unique_nids'] = sample_nids2 # Build a graph of matches if False: debug = False for info in visit_info_list: edges = [] grouped_aids = info['grouped_aids'] aids_list = list(grouped_aids.values()) ams_list = ibs.get_annotmatch_rowids_in_cliques(aids_list) aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list) aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list) for ams, aids, aids1, aids2 in zip(ams_list, aids_list, aids1_list, aids2_list): edge_nodes = set(aids1 + aids2) ##if len(edge_nodes) != len(set(aids)): # #print('--') # #print('aids = %r' % (aids,)) # #print('edge_nodes = %r' % (edge_nodes,)) bad_aids = edge_nodes - set(aids) if len(bad_aids) > 0: print('bad_aids = %r' % (bad_aids,)) unlinked_aids = set(aids) - edge_nodes mst_links = list(ut.itertwo(list(unlinked_aids) + list(edge_nodes)[:1])) bad_aids.add(None) user_links = [(u, v) for (u, v) in zip(aids1, aids2) if u not in bad_aids and v not in bad_aids] new_edges = mst_links + user_links new_edges = [(int(u), int(v)) for u, v in new_edges if u not in bad_aids and v not in bad_aids] edges += new_edges info['edges'] = edges # Add edges between days grouped_aids1, grouped_aids2 = ut.take_column(visit_info_list, 'grouped_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) resight_aids1 = ut.take(grouped_aids1, resight_nids) resight_aids2 = ut.take(grouped_aids2, resight_nids) #resight_aids3 = [list(aids1) + list(aids2) for aids1, aids2 in zip(resight_aids1, resight_aids2)] ams_list = ibs.get_annotmatch_rowids_between_groups(resight_aids1, resight_aids2) aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list) aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list) between_edges = [] for ams, aids1, aids2, rawaids1, rawaids2 in zip(ams_list, aids1_list, aids2_list, resight_aids1, resight_aids2): link_aids = aids1 + aids2 rawaids3 = rawaids1 + rawaids2 badaids = ut.setdiff(link_aids, rawaids3) assert not badaids user_links = [(int(u), int(v)) for (u, v) in zip(aids1, aids2) if u is not None and v is not None] # HACK THIS OFF user_links = [] if len(user_links) == 0: # Hack in an edge between_edges += [(rawaids1[0], rawaids2[0])] else: between_edges += user_links assert np.all(0 == np.diff(np.array(ibs.unflat_map(ibs.get_annot_nids, between_edges)), axis=1)) import plottool_ibeis as pt import networkx as nx #pt.qt4ensure() #len(list(nx.connected_components(graph1))) #print(ut.graph_info(graph1)) # Layout graph layoutkw = dict( prog='neato', draw_implicit=False, splines='line', #splines='curved', #splines='spline', #sep=10 / 72, #prog='dot', rankdir='TB', ) def translate_graph_to_origin(graph): x, y, w, h = ut.get_graph_bounding_box(graph) ut.translate_graph(graph, (-x, -y)) def stack_graphs(graph_list, vert=False, pad=None): graph_list_ = [g.copy() for g in graph_list] for g in graph_list_: translate_graph_to_origin(g) bbox_list = [ut.get_graph_bounding_box(g) for g in graph_list_] if vert: dim1 = 3 dim2 = 2 else: dim1 = 2 dim2 = 3 dim1_list = np.array([bbox[dim1] for bbox in bbox_list]) dim2_list = np.array([bbox[dim2] for bbox in bbox_list]) if pad is None: pad = np.mean(dim1_list) / 2 offset1_list = ut.cumsum([0] + [d + pad for d in dim1_list[:-1]]) max_dim2 = max(dim2_list) offset2_list = [(max_dim2 - d2) / 2 for d2 in dim2_list] if vert: t_xy_list = [(d2, d1) for d1, d2 in zip(offset1_list, offset2_list)] else: t_xy_list = [(d1, d2) for d1, d2 in zip(offset1_list, offset2_list)] for g, t_xy in zip(graph_list_, t_xy_list): ut.translate_graph(g, t_xy) nx.set_node_attributes(g, name='pin', values='true') new_graph = nx.compose_all(graph_list_) #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False) # NOQA return new_graph # Construct graph for count, info in enumerate(visit_info_list): graph = nx.Graph() edges = [(int(u), int(v)) for u, v in info['edges'] if u is not None and v is not None] graph.add_edges_from(edges, attr_dict={'zorder': 10}) nx.set_node_attributes(graph, name='zorder', values=20) # Layout in neato _ = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) # NOQA # Extract components and then flatten in nid ordering ccs = list(nx.connected_components(graph)) root_aids = [] cc_graphs = [] for cc_nodes in ccs: cc = graph.subgraph(cc_nodes) try: root_aids.append(list(ut.nx_source_nodes(cc.to_directed()))[0]) except nx.NetworkXUnfeasible: root_aids.append(list(cc.nodes())[0]) cc_graphs.append(cc) root_nids = ibs.get_annot_nids(root_aids) nid2_graph = dict(zip(root_nids, cc_graphs)) resight_nids_ = set(resight_nids).intersection(set(root_nids)) noresight_nids_ = set(root_nids) - resight_nids_ n_graph_list = ut.take(nid2_graph, sorted(noresight_nids_)) r_graph_list = ut.take(nid2_graph, sorted(resight_nids_)) if len(n_graph_list) > 0: n_graph = nx.compose_all(n_graph_list) _ = pt.nx_agraph_layout(n_graph, inplace=True, **layoutkw) # NOQA n_graphs = [n_graph] else: n_graphs = [] r_graphs = [stack_graphs(chunk) for chunk in ut.ichunks(r_graph_list, 100)] if count == 0: new_graph = stack_graphs(n_graphs + r_graphs, vert=True) else: new_graph = stack_graphs(r_graphs[::-1] + n_graphs, vert=True) #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False) # NOQA info['graph'] = new_graph graph1_, graph2_ = ut.take_column(visit_info_list, 'graph') if False: _ = pt.show_nx(graph1_, layout='custom', node_labels=False, as_directed=False) # NOQA _ = pt.show_nx(graph2_, layout='custom', node_labels=False, as_directed=False) # NOQA graph_list = [graph1_, graph2_] twoday_graph = stack_graphs(graph_list, vert=True, pad=None) nx.set_node_attributes(twoday_graph, name='pin', values='true') if debug: ut.nx_delete_None_edge_attr(twoday_graph) ut.nx_delete_None_node_attr(twoday_graph) print('twoday_graph(pre) info' + ut.repr3(ut.graph_info(twoday_graph), nl=2)) # Hack, no idea why there are nodes that dont exist here between_edges_ = [edge for edge in between_edges if twoday_graph.has_node(edge[0]) and twoday_graph.has_node(edge[1])] twoday_graph.add_edges_from(between_edges_, attr_dict={'alpha': .2, 'zorder': 0}) ut.nx_ensure_agraph_color(twoday_graph) layoutkw['splines'] = 'line' layoutkw['prog'] = 'neato' agraph = pt.nx_agraph_layout(twoday_graph, inplace=True, return_agraph=True, **layoutkw)[-1] # NOQA if False: fpath = ut.truepath('~/ggr_graph.png') agraph.draw(fpath) ut.startfile(fpath) if debug: print('twoday_graph(post) info' + ut.repr3(ut.graph_info(twoday_graph))) _ = pt.show_nx(twoday_graph, layout='custom', node_labels=False, as_directed=False) # NOQA
def test_sver_wrapper(): """ Test to ensure cpp and python agree and that cpp is faster CommandLine: python -m vtool.sver_c_wrapper --test-test_sver_wrapper python -m vtool.sver_c_wrapper --test-test_sver_wrapper --rebuild-sver python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --dummy python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=easy2.png python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=hard3.png python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=carl.jpg --fname2=hard3.png Example: >>> # ENABLE_DOCTEST >>> from vtool.sver_c_wrapper import * # NOQA >>> test_sver_wrapper() Ignore: %timeit call_python_version(*args) %timeit get_affine_inliers_cpp(*args) """ import vtool.spatial_verification as sver import vtool.tests.dummy as dummy xy_thresh_sqrd = ktool.KPTS_DTYPE(.4) scale_thresh_sqrd = ktool.KPTS_DTYPE(2.0) ori_thresh = ktool.KPTS_DTYPE(TAU / 4.0) keys = 'xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh'.split(', ') print(ut.dict_str(ut.dict_subset(locals(), keys))) def report_errors(): pass if ut.get_argflag('--dummy'): testtup = dummy.testdata_dummy_matches() (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup fm_input = fm_input.astype(fm_dtype) #fm_input = fm_input[0:10].astype(fm_dtype) #fs_input = fs_input[0:10].astype(np.float32) else: fname1 = ut.get_argval('--fname1', type_=str, default='easy1.png') fname2 = ut.get_argval('--fname2', type_=str, default='easy2.png') testtup = dummy.testdata_ratio_matches(fname1, fname2) (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup # pack up call to aff hypothesis import vtool as vt import scipy.stats.mstats scales1 = vt.get_scales(kpts1.take(fm_input.T[0], axis=0)) scales2 = vt.get_scales(kpts2.take(fm_input.T[1], axis=0)) #fs_input = 1 / scipy.stats.mstats.gmean(np.vstack((scales1, scales2))) fs_input = scipy.stats.mstats.gmean(np.vstack((scales1, scales2))) print('fs_input = ' + ut.numpy_str(fs_input)) #fs_input[0:-9] = 0 #fs_input = np.ones(len(fm_input), dtype=fs_dtype) #ut.embed() #fs_input = scales1 * scales2 args = (kpts1, kpts2, fm_input, fs_input, xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh) ex_list = [] try: with ut.Indenter('[TEST1] '): inlier_tup = vt.compare_implementations( sver.get_affine_inliers, get_affine_inliers_cpp, args, lbl1='py', lbl2='c', output_lbl=('aff_inliers_list', 'aff_errors_list', 'Aff_mats') ) out_inliers, out_errors, out_mats = inlier_tup except AssertionError as ex: ex_list.append(ex) raise try: import functools with ut.Indenter('[TEST2] '): bestinlier_tup = vt.compare_implementations( functools.partial(sver.get_best_affine_inliers, forcepy=True), get_best_affine_inliers_cpp, args, show_output=True, lbl1='py', lbl2='c', output_lbl=('bestinliers', 'besterror', 'bestmat') ) bestinliers, besterror, bestmat = bestinlier_tup except AssertionError as ex: ex_list.append(ex) raise if len(ex_list) > 0: raise AssertionError('some tests failed. see previous stdout') #num_inliers_list = np.array(map(len, out_inliers_c)) #best_argx = num_inliers_list.argmax() ##best_inliers_py = out_inliers_py[best_argx] #best_inliers_c = out_inliers_c[best_argx] if ut.show_was_requested(): import plottool as pt fm_output = fm_input.take(bestinliers, axis=0) fnum = pt.next_fnum() pt.figure(fnum=fnum, doclf=True, docla=True) pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_input, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 1)) pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_output, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 2)) pt.show_if_requested()
def make_annot_inference_dict(infr, internal=False): #import uuid def convert_to_name_uuid(nid): #try: text = ibs.get_name_texts(nid, apply_fix=False) if text is None: text = 'NEWNAME_%s' % (str(nid),) #uuid_ = uuid.UUID(text) #except ValueError: # text = 'NEWNAME_%s' % (str(nid),) # #uuid_ = nid return text ibs = infr.qreq_.ibs if internal: get_annot_uuids = ut.identity else: get_annot_uuids = ibs.get_annot_uuids #return uuid_ # Compile the cluster_dict col_list = ['aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list', 'error_flag_list'] cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples))) cluster_dict['annot_uuid_list'] = get_annot_uuids(cluster_dict['aid_list']) # We store the name's UUID as the name's text #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['orig_nid_list']] #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid) # for nid in cluster_dict['new_nid_list']] cluster_dict['orig_name_list'] = [convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list']] cluster_dict['new_name_list'] = [convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list']] # Filter out only the keys we want to send back in the dictionary #key_list = ['annot_uuid_list', 'orig_name_uuid_list', # 'new_name_uuid_list', 'exemplar_flag_list', # 'error_flag_list'] key_list = ['annot_uuid_list', 'orig_name_list', 'new_name_list', 'exemplar_flag_list', 'error_flag_list'] cluster_dict = ut.dict_subset(cluster_dict, key_list) # Compile the annot_pair_dict col_list = ['aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list', 'raw_score_list'] annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list))) annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(annot_pair_dict['aid_1_list']) annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(annot_pair_dict['aid_2_list']) zipped = zip(annot_pair_dict['annot_uuid_1_list'], annot_pair_dict['annot_uuid_2_list'], annot_pair_dict['p_same_list']) annot_pair_dict['review_pair_list'] = [ { 'annot_uuid_key' : annot_uuid_1, 'annot_uuid_1' : annot_uuid_1, 'annot_uuid_2' : annot_uuid_2, 'prior_matching_state' : { 'p_match' : p_same, 'p_nomatch' : 1.0 - p_same, 'p_notcomp' : 0.0, } } for (annot_uuid_1, annot_uuid_2, p_same) in zipped ] # Filter out only the keys we want to send back in the dictionary key_list = ['review_pair_list', 'confidence_list'] annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list) # Compile the inference dict inference_dict = ut.odict([ ('cluster_dict', cluster_dict), ('annot_pair_dict', annot_pair_dict), ('_internal_state', None), ]) return inference_dict