def cheetah_stats(ibs): filters = [ dict(view=['right', 'frontright', 'backright'], minqual='good'), dict(view=['right', 'frontright', 'backright']), ] for filtkw in filters: annots = ibs.annots(ibs.filter_annots_general(**filtkw)) unique_nids, grouped_annots = annots.group(annots.nids) annots_per_name = ut.lmap(len, grouped_annots) annots_per_name_freq = ut.dict_hist(annots_per_name) def bin_mapper(num): if num < 5: return (num, num + 1) else: for bin, mod in [(20, 5), (50, 10)]: if num < bin: low = (num // mod) * mod high = low + mod return (low, high) if num >= bin: return (bin, None) else: assert False, str(num) hist = ut.ddict(lambda: 0) for num in annots_per_name: hist[bin_mapper(num)] += 1 hist = ut.sort_dict(hist) print('------------') print('filters = %s' % ut.repr4(filtkw)) print('num_annots = %r' % (len(annots))) print('num_names = %r' % (len(unique_nids))) print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq))) print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist))) assert sum(hist.values()) == len(unique_nids)
def print_database_structure(cur): import utool as ut tablename_list = ut.get_tablenames(cur) colinfos_list = [ut.get_table_columninfo_list(cur, tablename) for tablename in tablename_list] numrows_list = [ut.get_table_num_rows(cur, tablename) for tablename in tablename_list] for tablename, colinfo_list, num_rows in ut.sortedby(list(zip(tablename_list, colinfos_list, numrows_list)), numrows_list): print('+-------------') print('tablename = %r' % (tablename,)) print('num_rows = %r' % (num_rows,)) #print(ut.repr4(colinfo_list)) print(ut.repr4(ut.get_primary_columninfo(cur, tablename))) print(ut.repr4(ut.get_nonprimary_columninfo(cur, tablename))) print('+-------------')
def _postprocess_feats(extr, feats): # Take the filtered subset of columns if extr.feat_dims is not None: missing = set(extr.feat_dims).difference(feats.columns) if any(missing): # print('We have: ' + ut.repr4(feats.columns)) alt = feats.columns.difference(extr.feat_dims) mis_msg = ('Missing feature dims: ' + ut.repr4(missing)) alt_msg = ('Did you mean? ' + ut.repr4(alt)) print(mis_msg) print(alt_msg) raise KeyError(mis_msg) feats = feats[extr.feat_dims] return feats
def isect_info(self, other): set1 = set(self.rel_fpath_list) set2 = set(other.rel_fpath_list) set_comparisons = ut.odict([ ('s1', set1), ('s2', set2), ('union', set1.union(set2)), ('isect', set1.intersection(set2)), ('s1 - s2', set1.difference(set2)), ('s2 - s1', set1.difference(set1)), ]) stat_stats = ut.map_vals(len, set_comparisons) print(ut.repr4(stat_stats)) return set_comparisons if False: idx_lookup1 = ut.make_index_lookup(self.rel_fpath_list) idx_lookup2 = ut.make_index_lookup(other.rel_fpath_list) uuids1 = ut.take(self.uuids, ut.take(idx_lookup1, set_comparisons['union'])) uuids2 = ut.take(other.uuids, ut.take(idx_lookup2, set_comparisons['union'])) uuids1 == uuids2
def _print_previous_loop_statistics(infr, count): # Print stats about what happend in the this loop history = infr.metrics_list[-count:] recover_blocks = ut.group_items([ (k, sum(1 for i in g)) for k, g in it.groupby(ut.take_column(history, 'recovering')) ]).get(True, []) infr.print(( 'Recovery mode entered {} times, ' 'made {} recovery decisions.').format( len(recover_blocks), sum(recover_blocks)), color='green') testaction_hist = ut.dict_hist(ut.take_column(history, 'test_action')) infr.print( 'Test Action Histogram: {}'.format( ut.repr4(testaction_hist, si=True)), color='yellow') if infr.params['inference.enabled']: action_hist = ut.dict_hist( ut.emap(frozenset, ut.take_column(history, 'action'))) infr.print( 'Inference Action Histogram: {}'.format( ub.repr2(action_hist, si=True)), color='yellow') infr.print( 'Decision Histogram: {}'.format(ut.repr2(ut.dict_hist( ut.take_column(history, 'pred_decision') ), si=True)), color='yellow') infr.print( 'User Histogram: {}'.format(ut.repr2(ut.dict_hist( ut.take_column(history, 'user_id') ), si=True)), color='yellow')
def testdata_showchip(): import wbia ibs = wbia.opendb(defaultdb='PZ_MTEST') aid_list = ut.get_argval(('--aids', '--aid'), type_=list, default=None) if aid_list is None: aid_list = ibs.get_valid_aids()[0:4] weight_label = ut.get_argval('--weight_label', type_=str, default='fg_weights') annote = not ut.get_argflag('--no-annote') kwargs = dict(ori=ut.get_argflag('--ori'), weight_label=weight_label, annote=annote) kwargs['notitle'] = ut.get_argflag('--notitle') kwargs['pts'] = ut.get_argflag('--drawpts') kwargs['ell'] = True or ut.get_argflag('--drawell') kwargs['ell_alpha'] = ut.get_argval('--ellalpha', default=0.4) kwargs['ell_linewidth'] = ut.get_argval('--ell_linewidth', default=2) kwargs['draw_lbls'] = ut.get_argval('--draw_lbls', default=True) logger.info('kwargs = ' + ut.repr4(kwargs, nl=True)) default_config = dict(wbia.algo.Config.FeatureWeightConfig().parse_items()) cfgdict = ut.argparse_dict(default_config) logger.info('[viz_chip.testdata] cfgdict = %r' % (cfgdict, )) config2_ = cfgdict logger.info('[viz_chip.testdata] aid_list = %r' % (aid_list, )) return ibs, aid_list, kwargs, config2_
def update(client, data_list): client.review_vip = None if data_list is None: print('GRAPH CLIENT GOT NONE UPDATE') client.review_dict = None else: data_list = list(data_list) num_samples = 5 num_items = len(data_list) num_samples = min(num_samples, num_items) first = list(data_list[:num_samples]) print('UPDATING GRAPH CLIENT WITH {} ITEM(S):'.format(num_items)) print('First few are: ' + ut.repr4(first, si=2, precision=4)) client.review_dict = {} for (edge, priority, edge_data_dict) in data_list: aid1, aid2 = edge if aid2 < aid1: aid1, aid2 = aid2, aid1 edge = (aid1, aid2, ) if client.review_vip is None: # Hack around the double review problem if edge != client.prev_vip: client.review_vip = edge client.review_dict[edge] = (priority, edge_data_dict, )
def assert_unique(item_list, ignore=[], name='list', verbose=None): import utool as ut dups = ut.find_duplicate_items(item_list) ut.delete_dict_keys(dups, ignore) if len(dups) > 0: raise AssertionError('Found duplicate items in %s: %s' % (name, ut.repr4(dups))) if verbose: print('No duplicates found in %s' % (name, ))
def hardcase_review_gen(infr): """ Subiterator for hardcase review Re-review non-confident edges that vsone did not classify correctly """ infr.print('==============================', color='white') infr.print('--- HARDCASE PRIORITY LOOP ---', color='white') verifiers = infr.learn_evaluation_verifiers() verif = verifiers['match_state'] edges_ = list(infr.edges()) real_ = list(infr.edge_decision_from(edges_)) flags_ = [r in {POSTV, NEGTV, INCMP} for r in real_] real = ut.compress(real_, flags_) edges = ut.compress(edges_, flags_) hardness = 1 - verif.easiness(edges, real) if True: df = pd.DataFrame({'edges': edges, 'real': real}) df['hardness'] = hardness pred = verif.predict(edges) df['pred'] = pred.values df.sort_values('hardness', ascending=False) infr.print('hardness analysis') infr.print(str(df)) infr.print('infr status: ' + ut.repr4(infr.status())) # Don't re-review anything that was confidently reviewed # CONFIDENCE = const.CONFIDENCE # CODE_TO_INT = CONFIDENCE.CODE_TO_INT.copy() # CODE_TO_INT[CONFIDENCE.CODE.UNKNOWN] = 0 # conf = ut.take(CODE_TO_INT, infr.gen_edge_values( # 'confidence', edges, on_missing='default', # default=CONFIDENCE.CODE.UNKNOWN)) # This should only be run with certain params assert not infr.params['autoreview.enabled'] assert not infr.params['redun.enabled'] assert not infr.params['ranking.enabled'] assert infr.params['inference.enabled'] # const.CONFIDENCE.CODE.PRETTY_SURE if infr.params['queue.conf.thresh'] is None: # != 'pretty_sure': infr.print('WARNING: should queue.conf.thresh = "pretty_sure"?') # work around add_candidate_edges infr.prioritize(metric='hardness', edges=edges, scores=hardness) infr.set_edge_attrs('hardness', ut.dzip(edges, hardness)) for _ in infr._inner_priority_gen(use_refresh=False): yield _
def sed_projects(regexpr, repl, force=False, recursive=True, user_profile=None, **kwargs): r""" Args: regexpr (?): repl (?): force (bool): (default = False) recursive (bool): (default = True) user_profile (None): (default = None) CommandLine: python -m utool.util_project --exec-sed_projects Example: >>> # DISABLE_DOCTEST >>> from utool.util_project import * # NOQA >>> regexpr = ut.get_argval('--find', type_=str, default=sys.argv[-1]) >>> repl = ut.get_argval('--repl', type_=str, default=sys.argv[-2]) >>> force = False >>> recursive = True >>> user_profile = None >>> result = sed_projects(regexpr, repl, force, recursive, user_profile) >>> print(result) Ignore: regexpr = 'annotation match_scores' repl = 'draw_annot_scoresep' """ # FIXME: finishme import utool as ut user_profile = ensure_user_profile(user_profile) sedkw = {} sedkw['exclude_dirs'] = user_profile.project_exclude_dirs sedkw['dpath_list'] = user_profile.project_dpaths sedkw['include_patterns'] = user_profile.project_include_patterns sedkw.update(kwargs) msg_list1 = [] #msg_list2 = [] print_ = msg_list1.append print_('Seding Projects') print(' * regular expression : %r' % (regexpr,)) print(' * replacement : %r' % (repl,)) print_('sedkw = %s' % ut.repr4(sedkw, nl=True)) print(' * recursive: %r' % (recursive,)) print(' * force: %r' % (force,)) # Walk through each directory recursively for fpath in ut.matching_fpaths(sedkw['dpath_list'], sedkw['include_patterns'], sedkw['exclude_dirs'], recursive=recursive): ut.sedfile(fpath, regexpr, repl, force)
def get_timestats_str(unixtime_list, newlines=1, full=True, isutc=False): r""" Args: unixtime_list (list): newlines (bool): Returns: str: timestat_str CommandLine: python -m utool.util_time --test-get_timestats_str Example: >>> # ENABLE_DOCTEST >>> from utool.util_time import * # NOQA >>> import utool as ut >>> unixtime_list = [0, 0 + 60 * 60 * 5 , 10 + 60 * 60 * 5, 100 + 60 * 60 * 5, 1000 + 60 * 60 * 5] >>> newlines = 1 >>> full = False >>> timestat_str = get_timestats_str(unixtime_list, newlines, full=full, isutc=True) >>> result = ut.align(str(timestat_str), ':') >>> print(result) { 'max' : '1970/01/01 05:16:40', 'mean' : '1970/01/01 04:03:42', 'min' : '1970/01/01 00:00:00', 'range': '5:16:40', 'std' : '2:02:01', } Example2: >>> # ENABLE_DOCTEST >>> from utool.util_time import * # NOQA >>> import utool as ut >>> unixtime_list = [0, 0 + 60 * 60 * 5 , 10 + 60 * 60 * 5, 100 + 60 * 60 * 5, 1000 + 60 * 60 * 5, float('nan'), 0] >>> newlines = 1 >>> timestat_str = get_timestats_str(unixtime_list, newlines, isutc=True) >>> result = ut.align(str(timestat_str), ':') >>> print(result) { 'max' : '1970/01/01 05:16:40', 'mean' : '1970/01/01 03:23:05', 'min' : '1970/01/01 00:00:00', 'nMax' : 1, 'nMin' : 2, 'num_nan': 1, 'range' : '5:16:40', 'shape' : (7,), 'std' : '2:23:43', } """ import utool as ut datetime_stats = get_timestats_dict(unixtime_list, full=full, isutc=isutc) timestat_str = ut.repr4(datetime_stats, newlines=newlines) return timestat_str
def assert_unique(item_list, ignore=[], name='list', verbose=None): import utool as ut dups = ut.find_duplicate_items(item_list) ut.delete_dict_keys(dups, ignore) if len(dups) > 0: raise AssertionError( 'Found duplicate items in %s: %s' % ( name, ut.repr4(dups))) if verbose: print('No duplicates found in %s' % (name,))
def parse_window_type_and_flags(self): # type_ = self.windowType() for key, val in WindowTypes.items(): if bin(val).count('1') == 1: pass # logger.info('{:<16s}: 0x{:08b}'.format(key, val)) logger.info('{:<16s}: 0x{:08x}'.format(key, val)) has = [] missing = [] flags = int(self.windowFlags()) for key, val in WindowFlags.items(): if flags & val == val: has.append(key) else: missing.append(key) logger.info('has = %s' % (ut.repr4(has), )) logger.info('missing = %s' % (ut.repr4(missing), )) pass
def autogen_argparse_block(extra_args=[]): """ SHOULD TURN ANY REGISTERED ARGS INTO A A NEW PARSING CONFIG FILE FOR BETTER --help COMMANDS import utool as ut __REGISTERED_ARGS__ = ut.util_arg.__REGISTERED_ARGS__ Args: extra_args (list): (default = []) CommandLine: python -m utool.util_arg --test-autogen_argparse_block Example: >>> # DISABLE_DOCTEST >>> import utool as ut >>> extra_args = [] >>> result = ut.autogen_argparse_block(extra_args) >>> print(result) """ #import utool as ut # NOQA #__REGISTERED_ARGS__ # TODO FINISHME grouped_args = [] # Group similar a args for argtup in __REGISTERED_ARGS__: argstr_list, type_, default, help_ = argtup argstr_set = set(argstr_list) # <MULTIKEY_SETATTR> # hack in multikey setattr n**2 yuck found = False for index, (keyset, vals) in enumerate(grouped_args): if len(keyset.intersection(argstr_set)) > 0: # update keyset.update(argstr_set) vals.append(argtup) found = True break if not found: new_keyset = argstr_set new_vals = [argtup] grouped_args.append((new_keyset, new_vals)) # </MULTIKEY_SETATTR> # DEBUG multi_groups = [] for keyset, vals in grouped_args: if len(vals) > 1: multi_groups.append(vals) if len(multi_groups) > 0: import utool as ut print('Following arg was specified multiple times') print(ut.repr4(multi_groups, newlines=2))
def find_duplicates(index): # fpaths = list(index.files.keys()) files = list(index.files.values()) print('Grouping {} files'.format(len(files))) grouped = ut.group_items(files, [f.nbytes for f in files]) print('Found {} groups'.format(len(grouped))) potential_dups = {k: v for k, v in grouped.items() if len(v) > 1} print('Found {} potential dups by nbytes'.format(len(potential_dups))) GB = 2**30 # NOQA MB = 2**20 # NOQA max_bytes = 10 * MB min_bytes = 64 * MB duplicates = [] for k, fs in ut.ProgIter(potential_dups.items(), freq=1): names = [f.n for f in fs] if ut.allsame(names): # Don't do big files yet if k < max_bytes and k > min_bytes: if ut.allsame([f.hashid for f in fs]): duplicates.extend(fs) for f1, f2 in ut.combinations(fs, 2): f1.duplicates.add(f2) f2.duplicates.add(f1) def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2) # return sim_measures similarities = {} r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates]) for dpath, dups in r_to_dup.items(): # Check to see if the duplicates all point to the same dir f = dups[0] # NOQA common_dpath = set.intersection(*[{_.r for _ in f.duplicates} for f in dups]) for other in common_dpath: sim_measures = dpath_similarity(index, dpath, other) similarities[(dpath, other)] = sim_measures print(ut.repr4(similarities, si=True, nl=2))
def assert_union_invariant(infr, msg=''): edge_sets = { key: set(it.starmap(e_, graph.edges())) for key, graph in infr.review_graphs.items() } edge_union = set.union(*edge_sets.values()) all_edges = set(it.starmap(e_, infr.graph.edges())) if edge_union != all_edges: logger.info('ERROR STATUS DUMP:') logger.info(ut.repr4(infr.status())) raise AssertionError( 'edge sets must have full union. Found union=%d vs all=%d' % (len(edge_union), len(all_edges)))
def find_duplicates(index): # fpaths = list(index.files.keys()) files = list(index.files.values()) print('Grouping {} files'.format(len(files))) grouped = ut.group_items(files, [f.nbytes for f in files]) print('Found {} groups'.format(len(grouped))) potential_dups = {k: v for k, v in grouped.items() if len(v) > 1} print('Found {} potential dups by nbytes'.format(len(potential_dups))) GB = 2 ** 30 # NOQA MB = 2 ** 20 # NOQA max_bytes = 10 * MB min_bytes = 64 * MB duplicates = [] for k, fs in ut.ProgIter(potential_dups.items(), freq=1): names = [f.n for f in fs] if ut.allsame(names): # Don't do big files yet if k < max_bytes and k > min_bytes: if ut.allsame([f.hashid for f in fs]): duplicates.extend(fs) for f1, f2 in ut.combinations(fs, 2): f1.duplicates.add(f2) f2.duplicates.add(f1) def dpath_similarity(index, dpath1, dpath2): d1 = index[dpath1] d2 = index[dpath2] set1 = {f.hashid for f in ut.ProgIter(d1.files)} set2 = {f.hashid for f in ut.ProgIter(d2.files)} # n_isect = len(set1.intersection(set2)) size1, size2 = map(len, (set1, set2)) # minsize = min(size1, size2) # sim_measures = (n_isect, n_isect / minsize) return ut.set_overlaps(set1, set2) # return sim_measures similarities = {} r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates]) for dpath, dups in r_to_dup.items(): # Check to see if the duplicates all point to the same dir f = dups[0] # NOQA common_dpath = set.intersection(*[ {_.r for _ in f.duplicates} for f in dups]) for other in common_dpath: sim_measures = dpath_similarity(index, dpath, other) similarities[(dpath, other)] = sim_measures print(ut.repr4(similarities, si=True, nl=2))
def build_sphinx_apidoc_cmdstr(): print('') print('if this fails try: sudo pip install sphinx') print('') apidoc = 'sphinx-apidoc' if ut.WIN32: winprefix = 'C:/Python27/Scripts/' sphinx_apidoc_exe = winprefix + apidoc + '.exe' else: sphinx_apidoc_exe = apidoc apidoc_argfmt_list = [ sphinx_apidoc_exe, '--force', '--full', '--maxdepth="{maxdepth}"', '--doc-author="{author}"', '--doc-version="{doc_version}"', '--doc-release="{doc_release}"', '--output-dir="_doc"', #'--separate', # Put documentation for each module on its own page '--private', # Include "_private" modules '{pkgdir}', ] outputdir = '_doc' author = ut.parse_author() packages = ut.find_packages(maxdepth=1) assert len(packages) != 0, 'directory must contain at least one package' if len(packages) > 1: assert len(packages) == 1,\ ('FIXME I dont know what to do with more than one root package: %r' % (packages,)) pkgdir = packages[0] version = ut.parse_package_for_version(pkgdir) modpath = dirname(ut.truepath(pkgdir)) apidoc_fmtdict = { 'author': author, 'maxdepth': '8', 'pkgdir': pkgdir, 'doc_version': version, 'doc_release': version, 'outputdir': outputdir, } ut.assert_exists('setup.py') ut.ensuredir('_doc') apidoc_fmtstr = ' '.join(apidoc_argfmt_list) apidoc_cmdstr = apidoc_fmtstr.format(**apidoc_fmtdict) print('[util_setup] autogenerate sphinx docs for %r' % (pkgdir,)) if ut.VERBOSE: print(ut.repr4(apidoc_fmtdict)) return apidoc_cmdstr, modpath, outputdir
def __init__( extr, ibs=None, config={}, use_cache=True, verbose=1, # Nested config props match_config=None, pairfeat_cfg=None, global_keys=None, need_lnbnn=None, feat_dims=None, ): extr.verbose = verbose extr.use_cache = use_cache extr.ibs = ibs # Configs for this are a bit foobar. Allow config to be a catch-all It # can either store params in nested or flat form config = config.copy() vars_ = vars() def _popconfig(key, default): """ ensures param is either specified in func args xor config """ if key in config: if vars_.get(key, None) is not None: raise ValueError('{} specified twice'.format(key)) value = config.pop(key) else: # See if the local namespace has it value = vars_.get(key, None) if value is None: value = default return value # These also sort-of belong to pair-feat config extr.global_keys = _popconfig('global_keys', []) extr.need_lnbnn = _popconfig('need_lnbnn', False) extr.feat_dims = _popconfig('feat_dims', None) extr.match_config = MatchConfig(**_popconfig('match_config', {})) extr.pairfeat_cfg = PairFeatureConfig(**_popconfig('pairfeat_cfg', {})) # Allow config to store flat versions of these params extr.match_config.pop_update(config) extr.pairfeat_cfg.pop_update(config) if len(config) > 0: raise ValueError('Unused config items: ' + ut.repr4(config))
def auto_docstr(modname, funcname, verbose=True, moddir=None, **kwargs): r""" called from vim. Uses strings of filename and modnames to build docstr Args: modname (str): name of a python module funcname (str): name of a function in the module Returns: str: docstr CommandLine: python -m utool.util_autogen auto_docstr python -m utool --tf auto_docstr Example: >>> import utool as ut >>> from utool.util_autogen import * # NOQA >>> ut.util_autogen.rrr(verbose=False) >>> #docstr = ut.auto_docstr('ibeis.algo.hots.smk.smk_index', 'compute_negentropy_names') >>> modname = ut.get_argval('--modname', default='utool.util_autogen') >>> funcname = ut.get_argval('--funcname', default='auto_docstr') >>> moddir = ut.get_argval('--moddir', type_=str, default=None) >>> docstr = ut.util_autogen.auto_docstr(modname, funcname) >>> print(docstr) """ #import utool as ut func, module, error_str = load_func_from_module( modname, funcname, verbose=verbose, moddir=moddir) if error_str is None: try: docstr = make_default_docstr(func, **kwargs) except Exception as ex: import utool as ut error_str = ut.formatex(ex, 'Caught Error in parsing docstr', tb=True) #ut.printex(ex) error_str += ( '\n\nReplicateCommand:\n ' 'python -m utool --tf auto_docstr ' '--modname={modname} --funcname={funcname} --moddir={moddir}').format( modname=modname, funcname=funcname, moddir=moddir) error_str += '\n kwargs=' + ut.repr4(kwargs) return error_str else: docstr = error_str return docstr
def _test_pos_neg(): infr = demo.demodata_infr(num_pccs=0) # Make 3 inconsistent CCs infr.add_feedback((1, 2), POSTV) infr.add_feedback((2, 3), POSTV) infr.add_feedback((3, 4), POSTV) infr.add_feedback((4, 1), POSTV) infr.add_feedback((1, 3), NEGTV) # ----- infr.add_feedback((11, 12), POSTV) infr.add_feedback((12, 13), POSTV) infr.add_feedback((13, 11), NEGTV) # ----- infr.add_feedback((21, 22), POSTV) infr.add_feedback((22, 23), POSTV) infr.add_feedback((23, 21), NEGTV) # ----- # Fix inconsistency infr.add_feedback((23, 21), POSTV) # Merge inconsistent CCS infr.add_feedback((1, 11), POSTV) # Negative edge within an inconsistent CC infr.add_feedback((2, 13), NEGTV) # Negative edge external to an inconsistent CC infr.add_feedback((12, 21), NEGTV) # ----- # Make inconsistency from positive infr.add_feedback((31, 32), POSTV) infr.add_feedback((33, 34), POSTV) infr.add_feedback((31, 33), NEGTV) infr.add_feedback((32, 34), NEGTV) infr.add_feedback((31, 34), POSTV) # Fix everything infr.add_feedback((1, 3), POSTV) infr.add_feedback((2, 4), POSTV) infr.add_feedback((32, 34), POSTV) infr.add_feedback((31, 33), POSTV) infr.add_feedback((13, 11), POSTV) infr.add_feedback((23, 21), POSTV) infr.add_feedback((1, 11), NEGTV) logger.info('Final state:') logger.info(ut.repr4(sorted(infr.gen_edge_attrs('decision'))))
def start(actor, dbdir, aids='all', config={}, **kwargs): import wbia assert dbdir is not None, 'must specify dbdir' assert actor.infr is None, 'AnnotInference already running' ibs = wbia.opendb(dbdir=dbdir, use_cache=False, web=False, force_serial=True) # Create the AnnotInference log.info('starting via actor with ibs = %r' % (ibs, )) actor.infr = wbia.AnnotInference(ibs=ibs, aids=aids, autoinit=True) actor.infr.print('started via actor') actor.infr.print('config = {}'.format(ut.repr3(config))) # Configure query_annot_infr for key in config: actor.infr.params[key] = config[key] # Initialize # TODO: Initialize state from staging reviews after annotmatch # timestamps (in case of crash) actor.infr.print('Initializing infr tables') table = kwargs.get('init', 'staging') actor.infr.reset_feedback(table, apply=True) actor.infr.ensure_mst() actor.infr.apply_nondynamic_update() actor.infr.print('infr.status() = {}'.format( ut.repr4(actor.infr.status()))) # Load random forests (TODO: should this be config specifiable?) actor.infr.print('loading published models') try: actor.infr.load_published() except Exception: pass # Start actor.infr Main Loop actor.infr.print('start id review') actor.infr.start_id_review() return 'initialized'
def __debug_win_msvcr(): import utool as ut fname = 'msvcr*.dll' key_list = ['PATH'] found = ut.search_env_paths(fname, key_list) fpaths = ut.unique(ut.flatten(found.values())) fpaths = ut.lmap(ut.ensure_unixslash, fpaths) from os.path import basename dllnames = [basename(x) for x in fpaths] grouped = dict(ut.group_items(fpaths, dllnames)) print(ut.repr4(grouped, nl=4)) keytoid = { } for key, vals in grouped.items(): infos = ut.lmap(ut.get_file_nBytes, vals) #infos = ut.lmap(ut.get_file_uuid, vals) #uuids = [ut.get_file_uuid(val) for val in vals] keytoid[key] = list(zip(infos, vals)) ut.print_dict(keytoid, nl=2)
def _test_unrev_inference(): infr = demo.demodata_infr(num_pccs=0) # Make 2 consistent and 2 inconsistent CCs infr.add_feedback((1, 2), POSTV) infr.add_feedback((2, 3), POSTV) infr.add_feedback((3, 4), POSTV) infr.add_feedback((4, 1), POSTV) # ----- infr.add_feedback((11, 12), POSTV) infr.add_feedback((12, 13), POSTV) infr.add_feedback((13, 14), POSTV) infr.add_feedback((14, 11), POSTV) infr.add_feedback((12, 14), NEGTV) # ----- infr.add_feedback((21, 22), POSTV) infr.add_feedback((22, 23), POSTV) infr.add_feedback((23, 21), NEGTV) # ----- infr.add_feedback((31, 32), POSTV) infr.add_feedback((32, 33), POSTV) infr.add_feedback((33, 31), POSTV) infr.add_feedback((2, 32), NEGTV) infr.add_feedback((3, 33), NEGTV) infr.add_feedback((12, 21), NEGTV) # ----- # Incomparable within CCs logger.info('==========================') infr.add_feedback((1, 3), UNREV) infr.add_feedback((1, 4), UNREV) infr.add_feedback((1, 2), UNREV) infr.add_feedback((11, 13), UNREV) infr.add_feedback((11, 14), UNREV) infr.add_feedback((11, 12), UNREV) infr.add_feedback((1, 31), UNREV) infr.add_feedback((2, 32), UNREV) infr.add_feedback((12, 21), UNREV) infr.add_feedback((23, 21), UNREV) infr.add_feedback((12, 14), UNREV) logger.info('Final state:') logger.info(ut.repr4(sorted(infr.gen_edge_attrs('decision'))))
def refresh_candidate_edges(infr): """ Search for candidate edges. Assign each edge a priority and add to queue. """ infr.print('refresh_candidate_edges', 1) infr.assert_consistency_invariant() if infr.ibs is not None: candidate_edges = infr.find_lnbnn_candidate_edges() elif hasattr(infr, 'dummy_verif'): infr.print('Searching for dummy candidates') infr.print( 'dummy vsone params =' + ut.repr4(infr.dummy_verif.dummy_params, nl=1, si=True) ) ranks_top = infr.params['ranking.ntop'] candidate_edges = infr.dummy_verif.find_candidate_edges(K=ranks_top) else: raise Exception('No method available to search for candidate edges') infr.add_candidate_edges(candidate_edges) infr.assert_consistency_invariant()
def print_system_users(): r""" prints users on the system On unix looks for /bin/bash users in /etc/passwd CommandLine: python -m utool.util_cplat --test-print_system_users Example: >>> # SCRIPT >>> from utool.util_cplat import * # NOQA >>> result = print_system_users() >>> print(result) """ import utool as ut text = ut.read_from('/etc/passwd') userinfo_text_list = text.splitlines() userinfo_list = [uitext.split(':') for uitext in userinfo_text_list] #print(ut.repr4(sorted(userinfo_list))) bash_users = [tup for tup in userinfo_list if tup[-1] == '/bin/bash'] print(ut.repr4(sorted(bash_users)))
def print_module_info(modname): print('Checking modname = %r' % (modname,)) # Handles special cases for certain modules if modname.lower() == 'pillow': from PIL import Image import PIL pil_path = PIL.__path__ infodict = module_stdinfo_dict(Image, versionattr='PILLOW_VERSION', image_version=Image.VERSION, pil_path=pil_path) elif modname.lower() == 'pyqt4': from PyQt4 import QtCore infodict = module_stdinfo_dict(QtCore, 'PYQT_VERSION_STR') elif modname.lower() == 'pyqt5': from PyQt5 import QtCore infodict = module_stdinfo_dict(QtCore, 'PYQT_VERSION_STR') else: # Handle normal modules module = ut.import_modname(modname) infodict = module_stdinfo_dict(module) if any([infodict['__file__'].endswith(ext) for ext in ut.LIB_EXT_LIST]): infodict['libdep'] = ut.get_dynlib_dependencies(infodict['__file__']) return print(ut.repr4(infodict, strvals=True))
def demo2(): """ CommandLine: python -m wbia.algo.graph.demo demo2 --viz python -m wbia.algo.graph.demo demo2 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph.demo import * # NOQA >>> result = demo2() >>> print(result) """ import wbia.plottool as pt from wbia.scripts.thesis import TMP_RC import matplotlib as mpl mpl.rcParams.update(TMP_RC) # ---- Synthetic data params params = { 'redun.pos': 2, 'redun.neg': 2, } # oracle_accuracy = .98 # oracle_accuracy = .90 # oracle_accuracy = (.8, 1.0) oracle_accuracy = (0.85, 1.0) # oracle_accuracy = 1.0 # --- draw params VISUALIZE = ut.get_argflag('--viz') # QUIT_OR_EMEBED = 'embed' QUIT_OR_EMEBED = 'quit' TARGET_REVIEW = ut.get_argval('--target', type_=int, default=None) START = ut.get_argval('--start', type_=int, default=None) END = ut.get_argval('--end', type_=int, default=None) # ------------------ # rng = np.random.RandomState(42) # infr = demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0) # infr = demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0) # infr = demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0) infr = demodata_infr(pcc_sizes=[5, 2, 4]) infr.verbose = 100 # apply_dummy_viewpoints(infr) # infr.ensure_cliques() infr.ensure_cliques() infr.ensure_full() # infr.apply_edge_truth() # Dummy scoring infr.init_simulation(oracle_accuracy=oracle_accuracy, name='demo2') # infr_gt = infr.copy() dpath = ut.ensuredir(ut.truepath('~/Desktop/demo')) ut.remove_files_in_dir(dpath) fig_counter = it.count(0) def show_graph(infr, title, final=False, selected_edges=None): if not VISUALIZE: return # TODO: rich colored text? latest = '\n'.join(infr.latest_logs()) showkw = dict( # fontsize=infr.graph.graph['fontsize'], # fontname=infr.graph.graph['fontname'], show_unreviewed_edges=True, show_inferred_same=False, show_inferred_diff=False, outof=(len(infr.aids)), # show_inferred_same=True, # show_inferred_diff=True, selected_edges=selected_edges, show_labels=True, simple_labels=True, # show_recent_review=not final, show_recent_review=False, # splines=infr.graph.graph['splines'], reposition=False, # with_colorbar=True ) verbose = infr.verbose infr.verbose = 0 infr_ = infr.copy() infr_ = infr infr_.verbose = verbose infr_.show(pickable=True, verbose=0, **showkw) infr.verbose = verbose # logger.info('status ' + ut.repr4(infr_.status())) # infr.show(**showkw) ax = pt.gca() pt.set_title(title, fontsize=20) fig = pt.gcf() fontsize = 22 if True: # postprocess xlabel lines = [] for line in latest.split('\n'): if False and line.startswith('ORACLE ERROR'): lines += ['ORACLE ERROR'] else: lines += [line] latest = '\n'.join(lines) if len(lines) > 10: fontsize = 16 if len(lines) > 12: fontsize = 14 if len(lines) > 14: fontsize = 12 if len(lines) > 18: fontsize = 10 if len(lines) > 23: fontsize = 8 if True: pt.adjust_subplots(top=0.95, left=0, right=1, bottom=0.45, fig=fig) ax.set_xlabel('\n' + latest) xlabel = ax.get_xaxis().get_label() xlabel.set_horizontalalignment('left') # xlabel.set_x(.025) xlabel.set_x(-0.6) # xlabel.set_fontname('CMU Typewriter Text') xlabel.set_fontname('Inconsolata') xlabel.set_fontsize(fontsize) ax.set_aspect('equal') # ax.xaxis.label.set_color('red') from os.path import join fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter))) fig.savefig( fpath, dpi=300, # transparent=True, edgecolor='none', ) # pt.save_figure(dpath=dpath, dpi=300) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs(groupby='name_label') infr.set_node_attrs('pin', 'true') node_dict = ut.nx_node_dict(infr.graph) logger.info(ut.repr4(node_dict[1])) if VISUALIZE: infr.latest_logs() # Pin Nodes into the target groundtruth position show_graph(infr, 'target-gt') logger.info(ut.repr4(infr.status())) infr.clear_feedback() infr.clear_name_labels() infr.clear_edges() logger.info(ut.repr4(infr.status())) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs() infr.prioritize('prob_match') if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0: show_graph(infr, 'initial state') def on_new_candidate_edges(infr, edges): # hack updateing visual attrs as a callback infr.update_visual_attrs() infr.on_new_candidate_edges = on_new_candidate_edges infr.params.update(**params) infr.refresh_candidate_edges() VIZ_ALL = VISUALIZE and TARGET_REVIEW is None and START is None logger.info('VIZ_ALL = %r' % (VIZ_ALL, )) if VIZ_ALL or TARGET_REVIEW == 0: show_graph(infr, 'find-candidates') # _iter2 = enumerate(infr.generate_reviews(**params)) # _iter2 = list(_iter2) # assert len(_iter2) > 0 # prog = ut.ProgIter(_iter2, label='demo2', bs=False, adjust=False, # enabled=False) count = 1 first = 1 for edge, priority in infr._generate_reviews(data=True): msg = 'review #%d, priority=%.3f' % (count, priority) logger.info('\n----------') infr.print('pop edge {} with priority={:.3f}'.format(edge, priority)) # logger.info('remaining_reviews = %r' % (infr.remaining_reviews()),) # Make the next review if START is not None: VIZ_ALL = count >= START if END is not None and count >= END: break infr.print(msg) if ut.allsame(infr.pos_graph.node_labels(*edge)) and first: # Have oracle make a mistake early feedback = infr.request_oracle_review(edge, accuracy=0) first -= 1 else: feedback = infr.request_oracle_review(edge) AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1 SHOW_CANDIATE_POP = True if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET): # import utool # utool.embed() infr.print( ut.repr2(infr.task_probs['match_state'][edge], precision=4, si=True)) infr.print('len(queue) = %r' % (len(infr.queue))) # Show edge selection infr.print('Oracle will predict: ' + feedback['evidence_decision']) show_graph(infr, 'pre' + msg, selected_edges=[edge]) if count == TARGET_REVIEW: infr.EMBEDME = QUIT_OR_EMEBED == 'embed' infr.add_feedback(edge, **feedback) infr.print('len(queue) = %r' % (len(infr.queue))) # infr.apply_nondynamic_update() # Show the result if VIZ_ALL or AT_TARGET: show_graph(infr, msg) # import sys # sys.exit(1) if count == TARGET_REVIEW: break count += 1 infr.print('status = ' + ut.repr4(infr.status(extended=False))) show_graph(infr, 'post-review (#reviews={})'.format(count), final=True) # ROUND 2 FIGHT # if TARGET_REVIEW is None and round2_params is not None: # # HACK TO GET NEW THINGS IN QUEUE # infr.params = round2_params # _iter2 = enumerate(infr.generate_reviews(**params)) # prog = ut.ProgIter(_iter2, label='round2', bs=False, adjust=False, # enabled=False) # for count, (aid1, aid2) in prog: # msg = 'reviewII #%d' % (count) # logger.info('\n----------') # logger.info(msg) # logger.info('remaining_reviews = %r' % (infr.remaining_reviews()),) # # Make the next review evidence_decision # feedback = infr.request_oracle_review(edge) # if count == TARGET_REVIEW: # infr.EMBEDME = QUIT_OR_EMEBED == 'embed' # infr.add_feedback(edge, **feedback) # # Show the result # if PRESHOW or TARGET_REVIEW is None or count >= TARGET_REVIEW - 1: # show_graph(infr, msg) # if count == TARGET_REVIEW: # break # show_graph(infr, 'post-re-review', final=True) if not getattr(infr, 'EMBEDME', False): if ut.get_computer_name().lower() in ['hyrule', 'ooo']: pt.all_figures_tile(monitor_num=0, percent_w=0.5) else: pt.all_figures_tile() ut.show_if_requested()
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc, target_looptime=1.0, serial_cheat=1, buffer_size=2, show_serial=True): """ # We are going to generate output of bgfunc in the background while # fgfunc is running in the foreground. fgfunc takes results of bffunc as # args. # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut with ut.Timer('One* call to bgfunc') as t_bgfunc: results = [bgfunc(arg) for arg in bgargs] bgfunctime = t_bgfunc.ellapsed / len(bgargs) #fgfunc = ut.is_prime with ut.Timer('One* call to fgfunc') as t_fgfunc: [fgfunc(x) for x in results] fgfunctime = t_fgfunc.ellapsed / len(bgargs) # compute amount of loops to run est_looptime = (bgfunctime + fgfunctime) _num_loops = round(target_looptime // est_looptime) num_data = int(_num_loops // len(bgargs)) num_loops = int(num_data * len(bgargs)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([bgargs] * num_data) est_tfg = fgfunctime * num_loops est_tbg = bgfunctime * num_loops est_needed_buffers = fgfunctime / bgfunctime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(bgfunc, data[:len(data) // serial_cheat]): fgfunc(x) t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size) for x in gen_: fgfunc(x) with ut.Timer('ut.generate') as t3: gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: fgfunc(x) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tfg, est_tbg): return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tfg, est_tbg),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),)) if show_serial: prac_tbg = t_serial - est_tfg print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),))
pd.options.display.max_rows = 20 pd.options.display.max_columns = 40 pd.options.display.width = 160 pd.options.display.float_format = lambda x: '%.4f' % (x, ) # PARSE DATABASE # full_bibman = fix_bib.BibMan('FULL.bib', doc='thesis') bibman = fix_bib.BibMan('final-bib.bib', doc='thesis') bibman.sort_entries() bibman.write_testfile() bibman.printdiff() bibman.save() print('bibman.unregistered_pubs = {}'.format(ut.repr4( bibman.unregistered_pubs))) for pub in bibman.unregistered_pubs: if 'None' in str(pub): print(ut.repr4(pub.entry)) df = pd.DataFrame.from_dict(bibman.cleaned, orient='index') del df['abstract'] # want = text.count('@') want = len(df) # paged_items = df[~pd.isnull(df['pub_abbrev'])] # has_pages = ~pd.isnull(paged_items['pages']) # print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) # print(ut.repr4(paged_items[~has_pages]['title'].values.tolist()))
def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0, serial_cheat=1, argmode=False, buffer_size=2): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut #serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_fgfunc: results = [func(arg) for arg in args] functime = t_fgfunc.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One* call to sleep func') as t_sleep: if argmode: [sleepfunc(x) for x in results] else: [sleepfunc() for x in results] sleeptime = t_sleep.ellapsed / len(args) # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops est_needed_buffers = sleeptime / functime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): if argmode: sleepfunc(x) else: sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc( ) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),)) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
def compute_vocab(depc, fid_list, config): r""" Depcache method for computing a new visual vocab CommandLine: python -m wbia.core_annots --exec-compute_neighbor_index --show python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0 python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1 # FIXME make util_tests register python -m wbia.algo.smk.vocab_indexer compute_vocab:0 Ignore: >>> # Lev Oxford Debug Example >>> import wbia >>> ibs = wbia.opendb('Oxford') >>> depc = ibs.depc >>> table = depc['vocab'] >>> # Check what currently exists in vocab table >>> table.print_configs() >>> table.print_table() >>> table.print_internal_info() >>> # Grab aids used to compute vocab >>> from wbia.expt.experiment_helpers import get_annotcfg_list >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1] >>> qaids, daids = expanded_aids_list[0] >>> vocab_aids = daids >>> config = {'num_words': 64000} >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config) >>> print('exists = %r' % (exists,)) >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0] >>> print('vocab_rowid = %r' % (vocab_rowid,)) >>> vocab = table.get_row_data([vocab_rowid], 'words')[0] >>> print('vocab = %r' % (vocab,)) Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> # Test depcache access >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> input_tuple = [aid_list] >>> rowid_kw = {} >>> tablename = 'vocab' >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw) >>> vocab = depc.get(tablename, input_tuple, 'words')[0] >>> assert vocab.wordflann is not None >>> assert vocab.wordflann._FLANN__curindex_data is not None >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> fid_list = depc.get_rowids('feat', aid_list) >>> config = VocabConfig() >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs']) >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0] >>> self = vocab >>> ut.quit_if_noshow() >>> data = train_vecs >>> centroids = vocab.wx_to_word >>> import wbia.plottool as pt >>> vt.plot_centroids(data, centroids, num_pca_dims=2) >>> ut.show_if_requested() >>> #config = ibs.depc_annot['vocab'].configclass() """ logger.info('[IBEIS] COMPUTE_VOCAB:') vecs_list = depc.get_native('feat', fid_list, 'vecs') train_vecs = np.vstack(vecs_list).astype(np.float32) num_words = config['num_words'] logger.info( '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (num_words, len(fid_list), len(train_vecs))) if config['algorithm'] == 'kdtree': flann_params = vt.get_flann_params(random_seed=42) kwds = dict(max_iters=20, flann_params=flann_params) words = vt.akmeans(train_vecs, num_words, **kwds) elif config['algorithm'] == 'minibatch': logger.info('Using minibatch kmeans') import sklearn.cluster rng = np.random.RandomState(config['random_seed']) n_init = config['n_init'] with warnings.catch_warnings(): warnings.simplefilter('ignore') init_size = int(num_words * 4) batch_size = 1000 n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size) minibatch_params = dict( n_clusters=num_words, init='k-means++', init_size=init_size, n_init=n_init, max_iter=30000 // n_batches, batch_size=batch_size, tol=0.0, max_no_improvement=10, reassignment_ratio=0.01, ) logger.info('minibatch_params = %s' % (ut.repr4(minibatch_params), )) clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False, random_state=rng, verbose=2, **minibatch_params) try: clusterer.fit(train_vecs) except (Exception, KeyboardInterrupt) as ex: ut.printex(ex, tb=True) if ut.is_developer(): ut.embed() else: raise words = clusterer.cluster_centers_ logger.info('Finished clustering') # if False: # flann_params['checks'] = 64 # flann_params['trees'] = 4 # num_words = 128 # centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++') # words, hist = vt.akmeans_iterations( # train_vecs, centroids, max_iters=1000, monitor=True, # flann_params=flann_params) logger.info('Constructing vocab') vocab = VisualVocab(words) logger.info('Building vocab index') vocab.build() logger.info('Returning vocab') return (vocab, )
def inject_instance(self, classkey=None, allow_override=False, verbose=VERBOSE_CLASS, strict=True): """ Injects an instance (self) of type (classkey) with all functions registered to (classkey) call this in the __init__ class function Args: self: the class instance classkey: key for a class, preferably the class type itself, but it doesnt have to be SeeAlso: make_class_method_decorator Example: >>> # DOCTEST_DISABLE >>> utool.make_class_method_decorator(InvertedIndex)(smk_debug.invindex_dbgstr) >>> utool.inject_instance(invindex) """ import utool as ut if verbose: print('[util_class] begin inject_instance') try: if classkey is None: # Probably should depricate this block of code # It tries to do too much classkey = self.__class__ if classkey == 'ibeis.gui.models_and_views.IBEISTableView': # HACK HACK HACK # from guitool.__PYQT__ import QtGui # NOQA from guitool.__PYQT__ import QtWidgets # NOQA classkey = QtWidgets.QAbstractItemView if len(__CLASSTYPE_ATTRIBUTES__[classkey]) == 0: print('[utool] Warning: no classes of type %r are registered' % (classkey,)) print('[utool] type(self)=%r, self=%r' % (type(self), self)), print('[utool] Checking to see if anybody else was registered...') print('[utool] __CLASSTYPE_ATTRIBUTES__ = ' + ut.repr4(__CLASSTYPE_ATTRIBUTES__.keys())) for classtype_, _ in six.iteritems(__CLASSTYPE_ATTRIBUTES__): isinstance(self, classtype_) classkey = classtype_ print('[utool] Warning: using subclass=%r' % (classtype_,)) break func_list = __CLASSTYPE_ATTRIBUTES__[classkey] if verbose: print('[util_class] injecting %d methods\n with classkey=%r\n into %r' % (len(func_list), classkey, self,)) for func in func_list: if VERBOSE_CLASS: print('[util_class] * injecting %r' % (func,)) method_name = None # Allow user to register tuples for aliases if isinstance(func, tuple): func, method_name = func inject_func_as_method(self, func, method_name=method_name, allow_override=allow_override, verbose=verbose) except Exception as ex: ut.printex(ex, 'ISSUE WHEN INJECTING %r' % (classkey,), iswarning=not strict) if strict: raise
if fpaths: cmd_to_fpaths[cmd].extend(fpaths) for key in cmd_to_fpaths.keys(): cmd = key.lstrip('\\') if not root.find_descendant_type(cmd): print(key) from os.path import abspath, dirname used_fpaths = ut.flatten(cmd_to_fpaths.values()) used_fpaths = set(ut.emap(abspath, used_fpaths)) all_fpaths = set(ut.emap(abspath, ut.glob('.', ['*.png', '*.jpg'], recursive=True))) unused = list(all_fpaths - used_fpaths) unuse_dirs = ut.group_items(unused, ut.emap(dirname, unused)) semi_used = {} for dpath, fpaths in unuse_dirs.items(): used_in_dpath = set(ut.ls(dpath)) - set(fpaths) if len(used_in_dpath) == 0: # completely unused directories print(dpath) else: semi_used[dpath] = fpaths print(ut.repr4(list(semi_used.keys())))
def makeinit(mod_dpath, exclude_modnames=[], use_star=False): r""" Args: mod_dpath (str): exclude_modnames (list): (Defaults to []) use_star (bool): (Defaults to False) Returns: str: init_codeblock CommandLine: python -m utool.util_autogen makeinit --modname=ibeis.algo Example: >>> # SCRIPT >>> from utool.util_autogen import * # NOQA >>> import utool as ut >>> modname = ut.get_argval('--modname', str, default=None) >>> mod_dpath = (os.getcwd() if modname is None else >>> ut.get_modpath(modname, prefer_pkg=True)) >>> mod_dpath = ut.unixpath(mod_dpath) >>> mod_fpath = join(mod_dpath, '__init__.py') >>> exclude_modnames = ut.get_argval(('--exclude', '-x'), list, default=[]) >>> use_star = ut.get_argflag('--star') >>> init_codeblock = makeinit(mod_dpath, exclude_modnames, use_star) >>> ut.dump_autogen_code(mod_fpath, init_codeblock) """ from utool._internal import util_importer import utool as ut module_name = ut.get_modname_from_modpath(mod_dpath) IMPORT_TUPLES = util_importer.make_import_tuples(mod_dpath, exclude_modnames=exclude_modnames) initstr = util_importer.make_initstr(module_name, IMPORT_TUPLES) regen_command = 'cd %s\n' % (mod_dpath) regen_command += ' makeinit.py' regen_command += ' --modname={modname}'.format(modname=module_name) if use_star: regen_command += ' --star' if len(exclude_modnames ) > 0: regen_command += ' -x ' + ' '.join(exclude_modnames) regen_block = (ut.codeblock(''' """ Regen Command: {regen_command} """ ''').format(regen_command=regen_command)) importstar_codeblock = ut.codeblock( ''' """ python -c "import {module_name}" --dump-{module_name}-init python -c "import {module_name}" --update-{module_name}-init """ __DYNAMIC__ = True if __DYNAMIC__: # TODO: import all utool external prereqs. Then the imports will not import # anything that has already in a toplevel namespace # COMMENTED OUT FOR FROZEN __INIT__ # Dynamically import listed util libraries and their members. from utool._internal import util_importer # FIXME: this might actually work with rrrr, but things arent being # reimported because they are already in the modules list import_execstr = util_importer.dynamic_import(__name__, IMPORT_TUPLES) exec(import_execstr) DOELSE = False else: # Do the nonexec import (can force it to happen no matter what if alwyas set # to True) DOELSE = True if DOELSE: # <AUTOGEN_INIT> pass # </AUTOGEN_INIT> '''.format(module_name=module_name) ) ts_line = '# Autogenerated on {ts}'.format(ts=ut.get_timestamp('printable')) init_codeblock_list = ['# -*- coding: utf-8 -*-', ts_line] init_codeblock_list.append(initstr) init_codeblock_list.append('\nIMPORT_TUPLES = ' + ut.repr4(IMPORT_TUPLES)) if use_star: init_codeblock_list.append(importstar_codeblock) init_codeblock_list.append(regen_block) init_codeblock = '\n'.join(init_codeblock_list) return init_codeblock
def do(*cmd_list, **kwargs): import utool as ut import time import six import sys verbose = kwargs.get('verbose', False) orig_print = globals()['print'] print = ut.partial(orig_print, file=kwargs.get('file', sys.stdout)) # print('Running xctrl.do script') if verbose: print('Executing x do: %s' % (ut.repr4(cmd_list),)) debug = False cmdkw = dict(verbose=False, quiet=True, silence=True) # http://askubuntu.com/questions/455762/xbindkeys-wont-work-properly # Make things work even if other keys are pressed defaultsleep = 0.0 sleeptime = kwargs.get('sleeptime', defaultsleep) time.sleep(.05) out, err, ret = ut.cmd('xset r off', **cmdkw) if debug: print('----------') print('xset r off') print('ret = %r' % (ret,)) print('err = %r' % (err,)) print('out = %r' % (out,)) memory = {} tmpverbose = 0 for count, item in enumerate(cmd_list): # print('item = %r' % (item,)) sleeptime = kwargs.get('sleeptime', defaultsleep) if tmpverbose: print('moving on') tmpverbose = 0 nocommand = 0 assert isinstance(item, tuple) assert len(item) >= 2 xcmd, key_ = item[0:2] if len(item) >= 3: if isinstance(item[2], six.string_types) and item[2].endswith('?'): sleeptime = float(item[2][:-1]) tmpverbose = 1 print('special command sleep') print('sleeptime = %r' % (sleeptime,)) else: sleeptime = float(item[2]) if xcmd == 'focus': key_ = str(key_) if key_.startswith('$'): key_ = memory[key_[1:]] pattern = key_ win_id = XCtrl.find_window_id(pattern, method='mru') if win_id is None: args = ['wmctrl', '-xa', pattern] else: args = ['wmctrl', '-ia', hex(win_id)] elif xcmd == 'focus_id': key_ = str(key_) if key_.startswith('$'): key_ = memory[key_[1:]] args = ['wmctrl', '-ia', hex(key_)] elif xcmd == 'remember_window_id': out, err, ret = ut.cmd('xdotool getwindowfocus', **cmdkw) memory[key_] = int(out.strip()) nocommand = True args = [] elif xcmd == 'remember_window_name': out, err, ret = ut.cmd('xdotool getwindowfocus getwindowname', **cmdkw) import pipes memory[key_] = pipes.quote(out.strip()) nocommand = True args = [] elif xcmd == 'type': args = [ 'xdotool', 'keyup', '--window', '0', '7', 'type', '--clearmodifiers', '--window', '0', str(key_) ] elif xcmd == 'type2': import pipes args = [ 'xdotool', 'type', pipes.quote(str(key_)) ] elif xcmd == 'xset-r-on': args = ['xset', 'r', 'on'] elif xcmd == 'xset-r-off': args = ['xset', 'r', 'off'] else: args = ['xdotool', str(xcmd), str(key_)] if verbose or tmpverbose: print('\n\n# Step %d' % (count,)) print(args, ' '.join(args)) if nocommand: continue # print('args = %r -> %s' % (args, ' '.join(args),)) # print('args = %r' % (args,)) out, err, ret = ut.cmd(*args, **cmdkw) if debug: print('---- ' + xcmd + ' ------') print(' '.join(args)) print('ret = %r' % (ret,)) print('err = %r' % (err,)) print('out = %r' % (out,)) if sleeptime > 0: time.sleep(sleeptime) out, err, ret = ut.cmd('xset r on', verbose=False, quiet=True, silence=True) if debug: print('----------') print('xset r on') print('ret = %r' % (ret,)) print('err = %r' % (err,)) print('out = %r' % (out,))
def get_dbinfo( ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None, aids=None, ): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: SeeAlso: python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all CommandLine: python -m wbia.other.dbinfo --exec-get_dbinfo:0 python -m wbia.other.dbinfo --test-get_dbinfo:1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> defaultdb = 'testdb1' >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = wbia.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from wbia.expt import cfghelpers >>> #from wbia.expt import annotation_configs >>> #from wbia.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> verbose = True >>> short = True >>> #ibs = wbia.opendb(db='GZ_ALL') >>> #ibs = wbia.opendb(db='PZ_Master0') >>> ibs = wbia.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = wbia.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... if aids is not None: aid_list = aids # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,)) from wbia.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list ) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) # aid_list = if verbose: logger.info('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) # associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation valid_images = ibs.images(valid_gids) valid_annots = ibs.annots(valid_aids) # Image info if verbose: logger.info('Checking Image Info') gx2_aids = valid_images.aids if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.repr4( ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True ) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: logger.info('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) if False: # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from wbia.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) config = {'seconds_thresh': 4 * 60 * 60} flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences( ibs, gid_list, config=config, verbose=False ) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = { oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items() } return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1 } nid2_occurx_resight = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1 } singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats( list(map(len, singlesight_encounters)), use_median=True, use_sum=True ) resight_name_stats = ut.get_stats( list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True ) # Encounter Info def break_annots_into_encounters(aids): from wbia.algo.preproc import occurrence_blackbox import datetime thresh_sec = datetime.timedelta(minutes=30).seconds posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids)) # latlons = ibs.get_annot_image_gps(aids) labels = occurrence_blackbox.cluster_timespace2( posixtimes, None, thresh_sec=thresh_sec ) return labels # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()] # ut.square_pdist(ave_enc_time) try: am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[ 0 ] aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids) undirected_tags = ibs.get_aidpair_tags( aid_pairs.T[0], aid_pairs.T[1], directed=False ) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) except Exception: pair_tag_info = {} # logger.info(ut.repr2(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: logger.info('Checking Annot Species') unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots)) species_list = valid_annots.species_texts species2_annots = valid_annots.group_items(valid_annots.species_texts) species2_nAids = {key: len(val) for key, val in species2_annots.items()} if verbose: logger.info('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: logger.info('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: logger.info('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = collections.OrderedDict( [ ('max', wh_list.max(0)), ('min', wh_list.min(0)), ('mean', wh_list.mean(0)), ('std', wh_list.std(0)), ] ) def arr2str(var): return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']' ret = ',\n '.join( ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()] ) return '{\n ' + ret + '\n}' logger.info('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: logger.info('Building Stats String') multiton_stats = ut.repr3( ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True ) # Time stats unixtime_list = valid_images.unixtime2 # valid_unixtime_list = [time for time in unixtime_list if time != -1] # unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda: 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if max_age is None: max_age = min_age if min_age is None: min_age = max_age if max_age is None and min_age is None: logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,)) age_dict['UNKNOWN'] += 1 elif (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (max_age is None or 36 <= max_age): age_dict['Adult'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str( set(annot_sextext_list) - set(sex_keys) ) sextext2_nAnnots = ut.odict( [(key, len(sextext2_aids.get(key, []))) for key in sex_keys] ) # Filter 0's sextext2_nAnnots = { key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0 } return sextext2_nAnnots def get_annot_qual_stats(ibs, aid_list): annots = ibs.annots(aid_list) qualtext2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.quality_texts)), list(ibs.const.QUALITY_TEXT_TO_INT.keys()), ) return qualtext2_nAnnots def get_annot_viewpoint_stats(ibs, aid_list): annots = ibs.annots(aid_list) viewcode2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.viewpoint_code)), list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None], ) return viewcode2_nAnnots if verbose: logger.info('Checking Other Annot Stats') qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids) viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: logger.info('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags) contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags) contributor_tag_to_qualstats = { key: get_annot_qual_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_viewstats = { key: get_annot_viewpoint_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_nImages = { key: len(val) for key, val in six.iteritems(contributor_tag_to_gids) } contributor_tag_to_nAnnots = { key: len(val) for key, val in six.iteritems(contributor_tag_to_aids) } if verbose: logger.info('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_annots) num_annots = len(valid_aids) if with_bytes: if verbose: logger.info('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: logger.info('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_annots) _num_names_total_check = ( num_names_singleton + num_names_unassociated + num_names_multiton ) _num_annots_total_check = ( num_unknown_annots + num_singleton_annots + num_multiton_annots ) assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' # if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex( ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ], ) raise # Get contributor statistics contributor_rowids = ibs.get_valid_contributor_rowids() num_contributors = len(contributor_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.repr2(dict_, si=True) return align2(str_) header_block_lines = [('+============================')] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = ( [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] ) name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = ( [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] ) occurrence_block_lines = ( [ ('--' * num_tabs), # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] ) annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = ( [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] ) contributor_block_lines = ( [ '# Images per contributor = ' + align_dict2(contributor_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contributor_tag_to_nAnnots), '# Quality per contributor = ' + ut.repr2(contributor_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.repr2(contributor_tag_to_viewstats, sorted_=True), ] if with_contrib else [] ) img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), # ('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contributor_block_lines + imgsize_stat_lines + [('L============================')] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: logger.info(info_str2) locals_ = locals() return locals_
def grep_projects(tofind_list, user_profile=None, verbose=True, new=False, **kwargs): r""" Greps the projects defined in the current UserProfile Args: tofind_list (list): user_profile (None): (default = None) Kwargs: user_profile CommandLine: python -m utool --tf grep_projects grep_projects Example: >>> # DISABLE_DOCTEST >>> from utool.util_project import * # NOQA >>> import utool as ut >>> import sys >>> tofind_list = ut.get_argval('--find', type_=list, >>> default=[sys.argv[-1]]) >>> grep_projects(tofind_list) """ import utool as ut user_profile = ensure_user_profile(user_profile) kwargs = kwargs.copy() colored = kwargs.pop('colored', True) grepkw = {} grepkw['greater_exclude_dirs'] = user_profile.project_exclude_dirs grepkw['exclude_dirs'] = user_profile.project_exclude_dirs grepkw['dpath_list'] = user_profile.project_dpaths grepkw['include_patterns'] = user_profile.project_include_patterns grepkw['exclude_patterns'] = user_profile.project_exclude_patterns grepkw.update(kwargs) msg_list1 = [] msg_list2 = [] print_ = msg_list1.append print_('Greping Projects') print_('tofind_list = %s' % (ut.repr4(tofind_list, nl=True),)) #print_('grepkw = %s' % ut.repr4(grepkw, nl=True)) if verbose: print('\n'.join(msg_list1)) #with ut.Timer('greping', verbose=True): grep_result = ut.grep(tofind_list, **grepkw) found_fpath_list, found_lines_list, found_lxs_list = grep_result # HACK, duplicate behavior. TODO: write grep print result function reflags = grepkw.get('reflags', 0) _exprs_flags = [ut.extend_regex2(expr, reflags) for expr in tofind_list] extended_regex_list = ut.take_column(_exprs_flags, 0) reflags_list = ut.take_column(_exprs_flags, 1) # HACK # pat = ut.util_regex.regex_or(extended_regex_list) reflags = reflags_list[0] # from utool import util_regex resultstr = ut.make_grep_resultstr(grep_result, extended_regex_list, reflags, colored=colored) msg_list2.append(resultstr) print_ = msg_list2.append #for fpath, lines, lxs in zip(found_fpath_list, found_lines_list, # found_lxs_list): # print_('----------------------') # print_('found %d line(s) in %r: ' % (len(lines), fpath)) # name = split(fpath)[1] # max_line = len(lines) # ndigits = str(len(str(max_line))) # for (lx, line) in zip(lxs, lines): # line = line.replace('\n', '') # print_(('%s : %' + ndigits + 'd |%s') % (name, lx, line)) # iter_ = zip(found_fpath_list, found_lines_list, found_lxs_list) # for fpath, lines, lxs in iter_: # print_('----------------------') # print_('found %d line(s) in %r: ' % (len(lines), fpath)) # name = split(fpath)[1] # max_line = len(lines) # ndigits = str(len(str(max_line))) # for (lx, line) in zip(lxs, lines): # line = line.replace('\n', '') # colored_line = ut.highlight_regex( # line.rstrip('\n'), pat, reflags=reflags) # print_(('%s : %' + ndigits + 'd |%s') % (name, lx, colored_line)) print_('====================') print_('found_fpath_list = ' + ut.repr4(found_fpath_list)) print_('') #print_('gvim -o ' + ' '.join(found_fpath_list)) if verbose: print('\n'.join(msg_list2)) msg_list = msg_list1 + msg_list2 if new: return GrepResult(found_fpath_list, found_lines_list, found_lxs_list, extended_regex_list, reflags) else: return msg_list
def run_asmk_script(): with ut.embed_on_exception_context: # NOQA """ >>> from wbia.algo.smk.script_smk import * """ # NOQA # ============================================== # PREPROCESSING CONFIGURATION # ============================================== config = { # 'data_year': 2013, 'data_year': None, 'dtype': 'float32', # 'root_sift': True, 'root_sift': False, # 'centering': True, 'centering': False, 'num_words': 2**16, # 'num_words': 1E6 # 'num_words': 8000, 'kmeans_impl': 'sklearn.mini', 'extern_words': False, 'extern_assign': False, 'assign_algo': 'kdtree', 'checks': 1024, 'int_rvec': True, 'only_xy': False, } # Define which params are relevant for which operations relevance = {} relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year'] relevance['words'] = relevance['feats'] + [ 'num_words', 'extern_words', 'kmeans_impl', ] relevance['assign'] = relevance['words'] + [ 'checks', 'extern_assign', 'assign_algo', ] # relevance['ydata'] = relevance['assign'] + ['int_rvec'] # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec'] nAssign = 1 class SMKCacher(ut.Cacher): def __init__(self, fname, ext='.cPkl'): relevant_params = relevance[fname] relevant_cfg = ut.dict_subset(config, relevant_params) cfgstr = ut.get_cfg_lbl(relevant_cfg) dbdir = ut.truepath('/raid/work/Oxford/') super(SMKCacher, self).__init__(fname, cfgstr, cache_dir=dbdir, ext=ext) # ============================================== # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES # ============================================== if config['data_year'] == 2007: data = load_oxford_2007() elif config['data_year'] == 2013: data = load_oxford_2013() elif config['data_year'] is None: data = load_oxford_wbia() offset_list = data['offset_list'] all_kpts = data['all_kpts'] raw_vecs = data['all_vecs'] query_uri_order = data['query_uri_order'] data_uri_order = data['data_uri_order'] # del data # ================ # PRE-PROCESS # ================ import vtool as vt # Alias names to avoid errors in interactive sessions proc_vecs = raw_vecs del raw_vecs feats_cacher = SMKCacher('feats', ext='.npy') all_vecs = feats_cacher.tryload() if all_vecs is None: if config['dtype'] == 'float32': logger.info('Converting vecs to float32') proc_vecs = proc_vecs.astype(np.float32) else: proc_vecs = proc_vecs raise NotImplementedError('other dtype') if config['root_sift']: with ut.Timer('Apply root sift'): np.sqrt(proc_vecs, out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['centering']: with ut.Timer('Apply centering'): mean_vec = np.mean(proc_vecs, axis=0) # Center and then re-normalize np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['dtype'] == 'int8': smk_funcs all_vecs = proc_vecs feats_cacher.save(all_vecs) del proc_vecs # ===================================== # BUILD VISUAL VOCABULARY # ===================================== if config['extern_words']: words = data['words'] assert config['num_words'] is None or len( words) == config['num_words'] else: word_cacher = SMKCacher('words') words = word_cacher.tryload() if words is None: with ut.embed_on_exception_context: if config['kmeans_impl'] == 'sklearn.mini': import sklearn.cluster rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) init_size = int(config['num_words'] * 4) # converged after 26043 iterations clusterer = sklearn.cluster.MiniBatchKMeans( config['num_words'], init_size=init_size, batch_size=1000, compute_labels=False, max_iter=20, random_state=rng, n_init=1, verbose=1, ) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ elif config['kmeans_impl'] == 'yael': from yael import ynumpy centroids, qerr, dis, assign, nassign = ynumpy.kmeans( all_vecs, config['num_words'], init='kmeans++', verbose=True, output='all', ) words = centroids word_cacher.save(words) # ===================================== # ASSIGN EACH VECTOR TO ITS NEAREST WORD # ===================================== if config['extern_assign']: assert config[ 'extern_words'], 'need extern cluster to extern assign' idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2) idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32) idx_to_wxs = np.ma.array(idx_to_wxs) idx_to_maws = np.ma.array(idx_to_maws) else: from wbia.algo.smk import vocab_indexer vocab = vocab_indexer.VisualVocab(words) dassign_cacher = SMKCacher('assign') assign_tup = dassign_cacher.tryload() if assign_tup is None: vocab.flann_params['algorithm'] = config['assign_algo'] vocab.build() # Takes 12 minutes to assign jegous vecs to 2**16 vocab with ut.Timer('assign vocab neighbors'): _idx_to_wx, _idx_to_wdist = vocab.nn_index( all_vecs, nAssign, checks=config['checks']) if nAssign > 1: idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns( _idx_to_wx, _idx_to_wdist, massign_alpha=1.2, massign_sigma=80.0, massign_equal_weights=True, ) else: idx_to_wxs = np.ma.masked_array(_idx_to_wx, fill_value=-1) idx_to_maws = np.ma.ones(idx_to_wxs.shape, fill_value=-1, dtype=np.float32) idx_to_maws.mask = idx_to_wxs.mask assign_tup = (idx_to_wxs, idx_to_maws) dassign_cacher.save(assign_tup) idx_to_wxs, idx_to_maws = assign_tup # Breakup vectors, keypoints, and word assignments by annotation wx_lists = [ idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list) ] maw_lists = [ idx_to_maws[left:right] for left, right in ut.itertwo(offset_list) ] vecs_list = [ all_vecs[left:right] for left, right in ut.itertwo(offset_list) ] kpts_list = [ all_kpts[left:right] for left, right in ut.itertwo(offset_list) ] # ======================= # FIND QUERY SUBREGIONS # ======================= ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots( data_uri_order, query_uri_order) daids = data_annots.aids qaids = query_annots.aids query_super_kpts = ut.take(kpts_list, qx_to_dx) query_super_vecs = ut.take(vecs_list, qx_to_dx) query_super_wxs = ut.take(wx_lists, qx_to_dx) query_super_maws = ut.take(maw_lists, qx_to_dx) # Mark which keypoints are within the bbox of the query query_flags_list = [] only_xy = config['only_xy'] for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes): flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy) query_flags_list.append(flags) logger.info('Queries are crops of existing database images.') logger.info('Looking at average percents') percent_list = [ flags_.sum() / flags_.shape[0] for flags_ in query_flags_list ] percent_stats = ut.get_stats(percent_list) logger.info('percent_stats = %s' % (ut.repr4(percent_stats), )) import vtool as vt query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0) query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0) query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0) query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0) # ======================= # CONSTRUCT QUERY / DATABASE REPR # ======================= # int_rvec = not config['dtype'].startswith('float') int_rvec = config['int_rvec'] X_list = [] _prog = ut.ProgPartial(length=len(qaids), label='new X', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog( zip(qaids, query_wxs, query_maws)): X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) X_list.append(X) # ydata_cacher = SMKCacher('ydata') # Y_list = ydata_cacher.tryload() # if Y_list is None: Y_list = [] _prog = ut.ProgPartial(length=len(daids), label='new Y', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists, maw_lists)): Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) Y_list.append(Y) # ydata_cacher.save(Y_list) # ====================== # Add in some groundtruth logger.info('Add in some groundtruth') for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)): Y.nid = nid for X, nid in zip(X_list, ibs.get_annot_nids(qaids)): X.nid = nid for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)): Y.qual = qual # ====================== # Add in other properties for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list): Y.vecs = vecs Y.kpts = kpts imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images') for Y, imgid in zip(Y_list, data_uri_order): gpath = ut.unixjoin(imgdir, imgid + '.jpg') Y.gpath = gpath for X, vecs, kpts in zip(X_list, query_vecs, query_kpts): X.kpts = kpts X.vecs = vecs # ====================== logger.info('Building inverted list') daids = [Y.aid for Y in Y_list] # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list])) wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list])) assert daids == data_annots.aids assert len(wx_list) <= config['num_words'] wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list], all_wxs=wx_list) # Compute IDF weights logger.info('Compute IDF weights') ndocs_total = len(daids) # Use only the unique number of words ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list]) logger.info('ndocs_perword stats: ' + ut.repr4(ut.get_stats(ndocs_per_word))) idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word) wx_to_weight = dict(zip(wx_list, idf_per_word)) logger.info('idf stats: ' + ut.repr4(ut.get_stats(wx_to_weight.values()))) # Filter junk Y_list_ = [Y for Y in Y_list if Y.qual != 'junk'] # ======================= # CHOOSE QUERY KERNEL # ======================= params = { 'asmk': dict(alpha=3.0, thresh=0.0), 'bow': dict(), 'bow2': dict(), } # method = 'bow' method = 'bow2' method = 'asmk' smk = SMK(wx_to_weight, method=method, **params[method]) # Specific info for the type of query if method == 'asmk': # Make residual vectors if True: # The stacked way is 50x faster # TODO: extend for multi-assignment and record fxs flat_query_vecs = np.vstack(query_vecs) flat_query_wxs = np.vstack(query_wxs) flat_query_offsets = np.array( [0] + ut.cumsum(ut.lmap(len, query_wxs))) flat_wxs_assign = flat_query_wxs flat_offsets = flat_query_offsets flat_vecs = flat_query_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list, agg_flags_list): X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags[:, None] flat_wxs_assign = idx_to_wxs flat_offsets = offset_list flat_vecs = all_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list, agg_flags_list): Y.agg_rvecs = agg_rvecs Y.agg_flags = agg_flags[:, None] else: # This non-stacked way is about 500x slower _prog = ut.ProgPartial(label='agg Y rvecs', bs=True, adjust=True) for Y in _prog(Y_list_): make_agg_vecs(Y, words, Y.vecs) _prog = ut.ProgPartial(label='agg X rvecs', bs=True, adjust=True) for X in _prog(X_list): make_agg_vecs(X, words, X.vecs) elif method == 'bow2': # Hack for orig tf-idf bow vector nwords = len(words) for X in ut.ProgIter(X_list, label='make bow vector'): ensure_tf(X) bow_vector(X, wx_to_weight, nwords) for Y in ut.ProgIter(Y_list_, label='make bow vector'): ensure_tf(Y) bow_vector(Y, wx_to_weight, nwords) if method != 'bow2': for X in ut.ProgIter(X_list, 'compute X gamma'): X.gamma = smk.gamma(X) for Y in ut.ProgIter(Y_list_, 'compute Y gamma'): Y.gamma = smk.gamma(Y) # Execute matches (could go faster by enumerating candidates) scores_list = [] for X in ut.ProgIter(X_list, label='query %s' % (smk, )): scores = [smk.kernel(X, Y) for Y in Y_list_] scores = np.array(scores) scores = np.nan_to_num(scores) scores_list.append(scores) import sklearn.metrics avep_list = [] _iter = list(zip(scores_list, X_list)) _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, )) for scores, X in _iter: truth = [X.nid == Y.nid for Y in Y_list_] avep = sklearn.metrics.average_precision_score(truth, scores) avep_list.append(avep) avep_list = np.array(avep_list) mAP = np.mean(avep_list) logger.info('mAP = %r' % (mAP, ))
def argparse_dict(default_dict_, lbl=None, verbose=None, only_specified=False, force_keys={}, type_hint=None, alias_dict={}): r""" Gets values for a dict based on the command line Args: default_dict_ (?): only_specified (bool): if True only returns keys that are specified on commandline. no defaults. Returns: dict_: dict_ - a dictionary CommandLine: python -m utool.util_arg --test-argparse_dict python -m utool.util_arg --test-argparse_dict --foo=3 python -m utool.util_arg --test-argparse_dict --flag1 python -m utool.util_arg --test-argparse_dict --flag2 python -m utool.util_arg --test-argparse_dict --noflag2 python -m utool.util_arg --test-argparse_dict --thresh=43 python -m utool.util_arg --test-argparse_dict --bins=-10 python -m utool.util_arg --test-argparse_dict --bins=-10 --only-specified --helpx Example: >>> # DISABLE_DOCTEST >>> from utool.util_arg import * # NOQA >>> import utool as ut >>> # build test data >>> default_dict_ = { ... 'bins': 8, ... 'foo': None, ... 'flag1': False, ... 'flag2': True, ... 'max': 0.2, ... 'neg': -5, ... 'thresh': -5.333, ... } >>> # execute function >>> only_specified = ut.get_argflag('--only-specified') >>> dict_ = argparse_dict(default_dict_, only_specified=only_specified) >>> # verify results >>> result = ut.repr4(dict_, sorted_=True) >>> print(result) """ if verbose is None: verbose = VERBOSE_ARGPARSE def make_argstrs(key, prefix_list): for prefix in prefix_list: yield prefix + key yield prefix + key.replace('-', '_') yield prefix + key.replace('_', '-') def get_dictkey_cmdline_val(key, default, type_hint): # see if the user gave a commandline value for this dict key defaulttype_ = None if default is None else type(default) if type_hint is None: type_ = defaulttype_ elif isinstance(type_hint, dict): type_ = type_hint.get(key, defaulttype_) elif isinstance(type_hint, type): type_ = type_hint else: raise NotImplementedError('Unknown type of type_hint=%r' % (type_hint,)) was_specified = False if isinstance(default, bool): val = default if default is True: falsekeys = list(set(make_argstrs(key, ['--no', '--no-']))) notval, was_specified = get_argflag(falsekeys, return_specified=True) val = not notval if not was_specified: truekeys = list(set(make_argstrs(key, ['--']))) val_, was_specified = get_argflag(truekeys, return_specified=True) if was_specified: val = val_ elif default is False: truekeys = list(set(make_argstrs(key, ['--']))) val, was_specified = get_argflag(truekeys, return_specified=True) else: argtup = list(set(make_argstrs(key, ['--']))) #if key == 'species': # import utool as ut # ut.embed() val, was_specified = get_argval(argtup, type_=type_, default=default, return_specified=True) return val, was_specified dict_ = {} num_specified = 0 for key, default in six.iteritems(default_dict_): val, was_specified = get_dictkey_cmdline_val(key, default, type_hint) if not was_specified: alias_keys = meta_util_iter.ensure_iterable(alias_dict.get(key, [])) for alias_key in alias_keys: val, was_specified = get_dictkey_cmdline_val(alias_key, default, type_hint) if was_specified: break if VERBOSE_ARGPARSE: if was_specified: num_specified += 1 print('[argparse_dict] Specified key=%r, val=%r' % (key, val)) #if key == 'foo': # import utool as ut # ut.embed() if not only_specified or was_specified or key in force_keys: dict_[key] = val if VERBOSE_ARGPARSE: print('[argparse_dict] num_specified = %r' % (num_specified,)) print('[argparse_dict] force_keys = %r' % (force_keys,)) #dict_ = {key: get_dictkey_cmdline_val(key, default) for key, default in #six.iteritems(default_dict_)} if verbose: for key in dict_: if dict_[key] != default_dict_[key]: print('[argparse_dict] GOT ARGUMENT: cfgdict[%r] = %r' % (key, dict_[key])) do_helpx = get_argflag('--helpx', help_='Specifies that argparse_dict should print help and quit') if get_argflag(('--help', '--help2')) or do_helpx: import utool as ut print('COMMAND LINE IS ACCEPTING THESE PARAMS WITH DEFAULTS:') if lbl is not None: print(lbl) #print(ut.align(ut.repr4(dict_, sorted_=True), ':')) print(ut.align(ut.repr4(default_dict_, sorted_=True), ':')) if do_helpx: sys.exit(1) return dict_