def compute_word_metrics(invindex): invindex.idx2_wxs = np.array(invindex.idx2_wxs) wx2_idxs = invindex.wx2_idxs idx2_dvec = invindex.idx2_dvec words = invindex.words wx2_pdist = {} wx2_wdist = {} wx2_nMembers = {} wx2_pdist_stats = {} wx2_wdist_stats = {} wordidx_iter = ut.progiter(six.iteritems(wx2_idxs), lbl='Word Dists: ', num=len(wx2_idxs), freq=200) for _item in wordidx_iter: wx, idxs = _item dvecs = idx2_dvec.take(idxs, axis=0) word = words[wx:wx + 1] wx2_pdist[wx] = spdist.pdist(dvecs) # pairwise dist between words wx2_wdist[wx] = ut.euclidean_dist(dvecs, word) # dist to word center wx2_nMembers[wx] = len(idxs) for wx, pdist in ut.progiter(six.iteritems(wx2_pdist), lbl='Word pdist Stats: ', num=len(wx2_idxs), freq=2000): wx2_pdist_stats[wx] = ut.get_stats(pdist) for wx, wdist in ut.progiter(six.iteritems(wx2_wdist), lbl='Word wdist Stats: ', num=len(wx2_idxs), freq=2000): wx2_wdist_stats[wx] = ut.get_stats(wdist) ut.print_stats(wx2_nMembers.values(), 'word members') metrics = Metrics(wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats) return metrics
def dictinfo(dict_): if not isinstance(dict_, dict): return 'expected dict got %r' % type(dict_) keys = list(dict_.keys()) vals = list(dict_.values()) num_keys = len(keys) key_types = list(set(map(type, keys))) val_types = list(set(map(type, vals))) fmtstr_ = '\n' + ut.unindent(''' * num_keys = {num_keys} * key_types = {key_types} * val_types = {val_types} '''.strip('\n')) if len(val_types) == 1: if val_types[0] == np.ndarray: # each key holds an ndarray val_shape_stats = ut.get_stats(set(map(np.shape, vals)), axis=0) val_shape_stats_str = ut.dict_str(val_shape_stats, strvals=True, newlines=False) val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * val_shape_stats = {val_shape_stats_str} * val_dtypes = {val_dtypes} '''.strip('\n')) elif val_types[0] == list: # each key holds a list val_len_stats = ut.get_stats(set(map(len, vals))) val_len_stats_str = ut.dict_str(val_len_stats, strvals=True, newlines=False) depth = ut.list_depth(vals) deep_val_types = list(set(ut.list_deep_types(vals))) fmtstr_ += ut.unindent(''' * list_depth = {depth} * val_len_stats = {val_len_stats_str} * deep_types = {deep_val_types} '''.strip('\n')) if len(deep_val_types) == 1: if deep_val_types[0] == np.ndarray: deep_val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * deep_val_dtypes = {deep_val_dtypes} ''').strip('\n') elif val_types[0] in [ np.uint8, np.int8, np.int32, np.int64, np.float16, np.float32, np.float64 ]: # each key holds a scalar val_stats = ut.get_stats(vals) fmtstr_ += ut.unindent(''' * val_stats = {val_stats} ''').strip('\n') fmtstr = fmtstr_.format(**locals()) return ut.indent(fmtstr)
def print_feat_stats(kpts, vecs): assert len(vecs) == len(kpts), 'disagreement' print('keypoints and vecs agree') flat_kpts = np.vstack(kpts) num_kpts = list(map(len, kpts)) kpt_scale = vt.get_scales(flat_kpts) num_kpts_stats = ut.get_stats(num_kpts) scale_kpts_stats = ut.get_stats(kpt_scale) print('Number of ' + prefix + ' keypoints: ' + ut.repr3(num_kpts_stats, nl=0, precision=2)) print('Scale of ' + prefix + ' keypoints: ' + ut.repr3(scale_kpts_stats, nl=0, precision=2))
def dictinfo(dict_): if not isinstance(dict_, dict): return 'expected dict got %r' % type(dict_) keys = list(dict_.keys()) vals = list(dict_.values()) num_keys = len(keys) key_types = list(set(map(type, keys))) val_types = list(set(map(type, vals))) fmtstr_ = '\n' + ut.unindent(''' * num_keys = {num_keys} * key_types = {key_types} * val_types = {val_types} '''.strip('\n')) if len(val_types) == 1: if val_types[0] == np.ndarray: # each key holds an ndarray val_shape_stats = ut.get_stats(set(map(np.shape, vals)), axis=0) val_shape_stats_str = ut.dict_str(val_shape_stats, strvals=True, newlines=False) val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * val_shape_stats = {val_shape_stats_str} * val_dtypes = {val_dtypes} '''.strip('\n')) elif val_types[0] == list: # each key holds a list val_len_stats = ut.get_stats(set(map(len, vals))) val_len_stats_str = ut.dict_str(val_len_stats, strvals=True, newlines=False) depth = ut.list_depth(vals) deep_val_types = list(set(ut.list_deep_types(vals))) fmtstr_ += ut.unindent(''' * list_depth = {depth} * val_len_stats = {val_len_stats_str} * deep_types = {deep_val_types} '''.strip('\n')) if len(deep_val_types) == 1: if deep_val_types[0] == np.ndarray: deep_val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * deep_val_dtypes = {deep_val_dtypes} ''').strip('\n') elif val_types[0] in [np.uint8, np.int8, np.int32, np.int64, np.float16, np.float32, np.float64]: # each key holds a scalar val_stats = ut.get_stats(vals) fmtstr_ += ut.unindent(''' * val_stats = {val_stats} ''').strip('\n') fmtstr = fmtstr_.format(**locals()) return ut.indent(fmtstr)
def vector_normal_stats(vectors): import numpy.linalg as npl norm_list = npl.norm(vectors, axis=1) #norm_list2 = np.sqrt((vectors ** 2).sum(axis=1)) #assert np.all(norm_list == norm_list2) norm_stats = ut.get_stats(norm_list) print('normal_stats:' + ut.dict_str(norm_stats, newlines=False))
def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs, wx2_axs): invassign.fstack = fstack invassign.vocab = vocab invassign.wx2_idxs = wx2_idxs invassign.wx2_maws = wx2_maws invassign.wx2_fxs = wx2_fxs invassign.wx2_axs = wx2_axs invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs) invassign.wx_list = sorted(invassign.wx2_num.keys()) invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list) invassign.perword_stats = ut.get_stats(invassign.num_list)
def print_dataset_info(data, labels, key): labelhist = {key: len(val) for key, val in ut.group_items(labels, labels).items()} stats_dict = ut.get_stats(data.ravel()) ut.delete_keys(stats_dict, ['shape', 'nMax', 'nMin']) print('[dataset] Dataset Info: ') print('[dataset] * Data:') print('[dataset] %s_data(shape=%r, dtype=%r)' % (key, data.shape, data.dtype)) print('[dataset] %s_memory(data) = %r' % (key, ut.get_object_size_str(data),)) print('[dataset] %s_stats(data) = %s' % (key, ut.repr2(stats_dict, precision=2),)) print('[dataset] * Labels:') print('[dataset] %s_labels(shape=%r, dtype=%r)' % (key, labels.shape, labels.dtype)) print('[dataset] %s_label histogram = %s' % (key, ut.repr2(labelhist)))
def inspect_deck(deck): def get_card_tags(card, deck): tags = [] stats = card.mana_source_stats(deck) if stats is not None: tags.append("land") if len(stats[1]) > 0: tags.append("tapland") else: tags.append("untapland") return tags # ------------ print("len(deck) = %r" % (len(deck),)) tags_list = [get_card_tags(card, deck) for card in deck.card_list] print("Deck Counts:") print(ut.repr2(ut.dict_hist(ut.flatten(tags_list)), nl=True)) hand = deck.sample_hand() manastats_list = [card.mana_source_stats(deck) for card in hand] print(ut.list_str([card.name + ": " + text_type(stats) for card, stats in zip(hand, manastats_list)])) tags_list = [get_card_tags(card, deck) for card in hand] print("Hand Counts") print(ut.repr2(ut.dict_hist(ut.flatten(tags_list)), nl=True)) valid_tags = ["land", "tapland", "untapland"] x = {tag: [] for tag in valid_tags} for _ in range(500): hand = deck.sample_hand() tags_list = [get_card_tags(card, deck) for card in hand] taghist = ut.dict_hist(ut.flatten(tags_list)) for key, val in x.items(): val.append(taghist.get(key, 0)) print("Monte Stats:") for key, val in list(x.items()): print("%15s: %s" % (key, ut.repr2(ut.get_stats(val), precision=2))) def hand_stats(): # [card.types for card in hand] # [card.rrr() for card in hand] [card.mana_source_stats(deck) for card in hand] card.types
def get_timestats_dict(unixtime_list, full=True, isutc=False): import utool as ut unixtime_stats = ut.get_stats(unixtime_list, use_nan=True) datetime_stats = {} if unixtime_stats.get('empty_list', False): datetime_stats = unixtime_stats return datetime_stats for key in ['max', 'min', 'mean']: try: datetime_stats[key] = ut.unixtime_to_datetimestr( unixtime_stats[key], isutc=isutc) except KeyError: pass except ValueError as ex: datetime_stats[key] = 'NA' except Exception as ex: ut.printex(ex, keys=['key', 'unixtime_stats']) raise for key in ['std']: try: datetime_stats[key] = str( ut.get_unix_timedelta(int(round(unixtime_stats[key])))) except KeyError: pass except ValueError as ex: datetime_stats[key] = 'NA' try: datetime_stats['range'] = str( ut.get_unix_timedelta( int(round(unixtime_stats['max'] - unixtime_stats['min'])))) except KeyError: pass except ValueError as ex: datetime_stats['range'] = 'NA' if full: #unused_keys = (set(unixtime_stats.keys()) - set(datetime_stats.keys()) for key in ['nMax', 'num_nan', 'shape', 'nMin']: datetime_stats[key] = unixtime_stats[key] #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),)) return datetime_stats
def get_timestats_dict(unixtime_list, full=True, isutc=False): import utool as ut unixtime_stats = ut.get_stats(unixtime_list, use_nan=True) datetime_stats = {} if unixtime_stats.get('empty_list', False): datetime_stats = unixtime_stats return datetime_stats # elif unixtime_stats.get('num_nan', 0) == unixtime_stats.get('shape')[0]: # datetime_stats = unixtime_stats # return datetime_stats for key in ['max', 'min', 'mean']: try: datetime_stats[key] = ut.unixtime_to_datetimestr(unixtime_stats[key], isutc=isutc) except KeyError: pass except (ValueError, OSError) as ex: datetime_stats[key] = 'NA' except Exception as ex: ut.printex(ex, keys=['key', 'unixtime_stats']) raise for key in ['std']: try: datetime_stats[key] = str(ut.get_unix_timedelta(int(round(unixtime_stats[key])))) except KeyError: pass except (ValueError, OSError) as ex: datetime_stats[key] = 'NA' try: datetime_stats['range'] = str(ut.get_unix_timedelta(int(round(unixtime_stats['max'] - unixtime_stats['min'])))) except KeyError: pass except (ValueError, OSError) as ex: datetime_stats['range'] = 'NA' if full: #unused_keys = (set(unixtime_stats.keys()) - set(datetime_stats.keys()) for key in ['nMax', 'num_nan', 'shape', 'nMin']: datetime_stats[key] = unixtime_stats[key] #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),)) return datetime_stats
def latex_get_stats(lbl, data, mode=0): import utool as ut stats_ = ut.get_stats(data) if stats_.get('empty_list', False): return '% NA: latex_get_stats, data=[]' try: max_ = stats_['max'] min_ = stats_['min'] mean = stats_['mean'] std = stats_['std'] shape = stats_['shape'] except KeyError as ex: stat_keys = stats_.keys() # NOQA ut.printex(ex, key_list=['stat_keys', 'stats_', 'data']) raise #int_fmt = lambda num: util.num_fmt(int(num)) def float_fmt(num): return util_num.num_fmt(float(num)) def tup_fmt(tup): return str(tup) fmttup = (float_fmt(min_), float_fmt(max_), float_fmt(mean), float_fmt(std), tup_fmt(shape)) lll = ' ' * len(lbl) if mode == 0: prefmtstr = r''' {label} stats & min ; max = %s ; %s\\ {space} & mean; std = %s ; %s\\ {space} & shape = %s \\''' if mode == 1: prefmtstr = r''' {label} stats & min = $%s$\\ {space} & max = $%s$\\ {space} & mean = $%s$\\ {space} & std = $%s$\\ {space} & shape = $%s$\\''' fmtstr = prefmtstr.format(label=lbl, space=lll) latex_str = textwrap.dedent(fmtstr % fmttup).strip('\n') + '\n' return latex_str
def wx_len_stats(wx2_xxx): """ Example: >>> from ibeis.algo.hots.smk.smk_debug import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> from ibeis.algo.hots.smk import smk_repr >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids) >>> qparams = qreq_.qparams >>> invindex = smk_repr.index_data_annots(annots_df, daids, words) >>> qaid = qaids[0] >>> wx2_qrvecs, wx2_qaids, wx2_qfxs, query_sccw = smk_repr.new_qindex(annots_df, qaid, invindex, qparams) >>> print(ut.dict_str(wx2_rvecs_stats(wx2_qrvecs))) """ import utool as ut if wx2_xxx is None: return 'None' if isinstance(wx2_xxx, dict): #len_list = [len(xxx) for xxx in ] val_list = wx2_xxx.values() else: val_list = wx2_xxx try: len_list = [len(xxx) for xxx in val_list] statdict = ut.get_stats(len_list) return ut.dict_str(statdict, strvals=True, newlines=False) except Exception as ex: ut.printex(ex) for count, xxx in wx2_xxx: try: len(xxx) except Exception: print('failed on count=%r' % (count,)) print('failed on xxx=%r' % (xxx,)) pass raise
def wx_len_stats(wx2_xxx): """ Example: >>> from ibeis.algo.hots.smk.smk_debug import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> from ibeis.algo.hots.smk import smk_repr >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids) >>> qparams = qreq_.qparams >>> invindex = smk_repr.index_data_annots(annots_df, daids, words) >>> qaid = qaids[0] >>> wx2_qrvecs, wx2_qaids, wx2_qfxs, query_sccw = smk_repr.new_qindex(annots_df, qaid, invindex, qparams) >>> print(ut.dict_str(wx2_rvecs_stats(wx2_qrvecs))) """ import utool as ut if wx2_xxx is None: return 'None' if isinstance(wx2_xxx, dict): #len_list = [len(xxx) for xxx in ] val_list = wx2_xxx.values() else: val_list = wx2_xxx try: len_list = [len(xxx) for xxx in val_list] statdict = ut.get_stats(len_list) return ut.dict_str(statdict, strvals=True, newlines=False) except Exception as ex: ut.printex(ex) for count, xxx in wx2_xxx: try: len(xxx) except Exception: print('failed on count=%r' % (count, )) print('failed on xxx=%r' % (xxx, )) pass raise
def get_timestats_dict(unixtime_list, full=True): import utool as ut unixtime_stats = ut.get_stats(unixtime_list, use_nan=True) datetime_stats = {} for key in ['max', 'min', 'mean']: try: datetime_stats[key] = ut.unixtime_to_datetime(unixtime_stats[key]) except KeyError: pass for key in ['std']: try: datetime_stats[key] = str(ut.get_unix_timedelta(int(round(unixtime_stats[key])))) except KeyError: pass try: datetime_stats['range'] = str(ut.get_unix_timedelta(int(round(unixtime_stats['max'] - unixtime_stats['min'])))) except KeyError: pass if full: for key in ['nMax', 'num_nan', 'shape', 'nMin']: datetime_stats[key] = unixtime_stats[key] #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),)) return datetime_stats
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def compute_occurrence_groups(ibs, gid_list, cluster_algo, cfgdict={}, use_gps=False, verbose=None): r""" Args: ibs (IBEISController): ibeis controller object gid_list (list): Returns: tuple: (None, None) CommandLine: python -m ibeis --tf compute_occurrence_groups python -m ibeis --tf compute_occurrence_groups --show --zoom=.3 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_occurrence import * # NOQA >>> import ibeis >>> import vtool as vt >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> gid_list = ibs.get_valid_gids(require_unixtime=True, require_gps=True) >>> use_gps = True >>> cluster_algo = 'meanshift' >>> cfgdict = dict(quantile=.005, min_imgs_per_occurence=2) >>> (occur_labels, occur_gids) = compute_occurrence_groups(ibs, gid_list, cluster_algo, cfgdict, use_gps=use_gps) >>> aidsgroups_list = ibs.unflat_map(ibs.get_image_aids, occur_gids) >>> aids_list = list(map(ut.flatten, aidsgroups_list)) >>> nids_list = list(map(np.array, ibs.unflat_map(ibs.get_annot_name_rowids, aids_list))) >>> metric = [len(np.unique(nids[nids > -1])) for nids in nids_list] >>> metric = [vt.safe_max(np.array(ut.dict_hist(nids).values())) for nids in nids_list] >>> #metric = list(map(len, aids_list)) >>> sortx = ut.list_argsort(metric)[::-1] >>> index = sortx[20] >>> #gids = occur_gids[index] >>> aids = aids_list[index] >>> aids = ibs.filter_annots_general(aids, min_qual='ok', is_known=True) >>> gids = list(set(ibs.get_annot_gids(aids))) >>> print('len(aids) = %r' % (len(aids),)) >>> img_list = ibs.get_images(gids) >>> ut.quit_if_noshow() >>> from ibeis.viz import viz_graph >>> import plottool as pt >>> #pt.imshow(bigimg) >>> #aids = ibs.group_annots_by_name(aids)[0][0] >>> self = viz_graph.make_name_graph_interaction(ibs, aids=aids, >>> with_all=False, >>> prog='neato') >>> ut.show_if_requested() ibs.unflat_map(ibs.get_annot_case_tags, aids_list) ibs.filter_aidpairs_by_tags(has_any='photobomb') photobomb_aids = ibs.filter_aidpairs_by_tags(has_any='photobomb') aids = photobomb_aids[0:10].flatten() _gt_aids = ibs.get_annot_groundtruth(aids) gt_aids = ut.get_list_column_slice(_gt_aids, slice(0, 3)) aid_set = np.unique(np.append(aids.flatten(), ut.flatten(gt_aids))) aid_set = ibs.filter_annots_general(aid_set, minqual='ok') # This is the set of annotations used for testing intraoccurrence photobombs #print(ut.repr3(ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=aid_set), strvals=True, nl=1)) print(ut.repr3(ibs.get_annot_stats_dict(aid_set, forceall=True), strvals=True, nl=1)) """ if verbose is None: verbose = ut.NOT_QUIET # Config info gid_list = np.unique(gid_list) if verbose: print("[occur] Computing %r occurrences on %r images." % (cluster_algo, len(gid_list))) if len(gid_list) == 0: print("[occur] WARNING: len(gid_list) == 0. " "No images to compute occurrences with") occur_labels, occur_gids = [], [] else: if len(gid_list) == 1: print("[occur] WARNING: custering 1 image into its own occurrence") gid_arr = np.array(gid_list) label_arr = np.zeros(gid_arr.shape) else: X_data, gid_arr = prepare_X_data(ibs, gid_list, use_gps=use_gps) # Agglomerative clustering of unixtimes if cluster_algo == "agglomerative": seconds_thresh = cfgdict.get("seconds_thresh", 60.0) label_arr = agglomerative_cluster_occurrences(X_data, seconds_thresh) elif cluster_algo == "meanshift": quantile = cfgdict.get("quantile", 0.01) label_arr = meanshift_cluster_occurrences(X_data, quantile) else: raise AssertionError("[occurrence] Uknown clustering algorithm: %r" % cluster_algo) # Group images by unique label labels, label_gids = group_images_by_label(label_arr, gid_arr) # Remove occurrences less than the threshold occur_labels = labels occur_gids = label_gids occur_unixtimes = compute_occurrence_unixtime(ibs, occur_gids) min_imgs_per_occurence = cfgdict.get("min_imgs_per_occurence", 1) occur_labels, occur_gids = filter_and_relabel(labels, label_gids, min_imgs_per_occurence, occur_unixtimes) if verbose: print("[occur] Found %d clusters." % len(occur_labels)) if len(label_gids) > 0 and verbose: print("[occur] Cluster image size stats:") ut.print_dict(ut.get_stats(list(map(len, occur_gids)), use_median=True, use_sum=True), "occur image stats") return occur_labels, occur_gids
def find_consistent_labeling(grouped_oldnames, extra_prefix='_extra_name', verbose=False): r""" Solves a a maximum bipirtite matching problem to find a consistent name assignment that minimizes the number of annotations with different names. For each new grouping of annotations we assign For each group of annotations we must assign them all the same name, either from To reduce the running time Args: gropued_oldnames (list): A group of old names where the grouping is based on new names. For instance: Given: aids = [1, 2, 3, 4, 5] old_names = [0, 1, 1, 1, 0] new_names = [0, 0, 1, 1, 0] The grouping is [[0, 1, 0], [1, 1]] This lets us keep the old names in a split case and re-use exising names and make minimal changes to current annotation names while still being consistent with the new and improved grouping. The output will be: [0, 1] Meaning that all annots in the first group are assigned the name 0 and all annots in the second group are assigned the name 1. References: http://stackoverflow.com/questions/1398822/assignment-problem-numpy CommandLine: python -m ibeis.scripts.name_recitifer find_consistent_labeling Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = testdata_oldnames(25, 15, 5, n_per_incon=5) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> grouped_oldnames = testdata_oldnames(0, 15, 5, n_per_incon=1) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> grouped_oldnames = testdata_oldnames(0, 0, 0, n_per_incon=1) >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> ydata = [] >>> xdata = list(range(10, 150, 50)) >>> for x in xdata: >>> print('x = %r' % (x,)) >>> grouped_oldnames = testdata_oldnames(x, 15, 5, n_per_incon=5) >>> t = ut.Timerit(3, verbose=1) >>> for timer in t: >>> with timer: >>> new_names = find_consistent_labeling(grouped_oldnames) >>> ydata.append(t.ave_secs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> pt.qtensure() >>> pt.multi_plot(xdata, [ydata]) >>> ut.show_if_requested() Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1) >>> result = ut.repr2(new_names) >>> print(new_names) ['a', 'b', 'e'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'a', '_extra_name0'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['e'], ['a', 'a', 'b'], [], ['a'], ['d']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'e', 'a', '_extra_name0', '_extra_name1', 'd'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [[], ['a', 'a'], [], >>> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['_extra_name0', 'a', '_extra_name1', 'b', '_extra_name2'] """ unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) n_old_names = len(unique_old_names) n_new_names = len(grouped_oldnames) # Initialize assignment to all Nones assignment = [None for _ in range(n_new_names)] if verbose: print('finding maximally consistent labeling') print('n_old_names = %r' % (n_old_names, )) print('n_new_names = %r' % (n_new_names, )) # For each old_name, determine now many new_names use it. oldname_sets = list(map(set, grouped_oldnames)) oldname_usage = ut.dict_hist(ut.flatten(oldname_sets)) # Any name used more than once is a conflict and must be resolved conflict_oldnames = {k for k, v in oldname_usage.items() if v > 1} # Partition into trivial and non-trivial cases nontrivial_oldnames = [] nontrivial_new_idxs = [] trivial_oldnames = [] trivial_new_idxs = [] for new_idx, group in enumerate(grouped_oldnames): if set(group).intersection(conflict_oldnames): nontrivial_oldnames.append(group) nontrivial_new_idxs.append(new_idx) else: trivial_oldnames.append(group) trivial_new_idxs.append(new_idx) # Rectify trivial cases # Any new-name that does not share any of its old-names with other # new-names can be resolved trivially n_trivial_unchanged = 0 n_trivial_ignored = 0 n_trivial_merges = 0 for group, new_idx in zip(trivial_oldnames, trivial_new_idxs): if len(group) > 0: # new-names that use more than one old-name are simple merges h = ut.dict_hist(group) if len(h) > 1: n_trivial_merges += 1 else: n_trivial_unchanged += 1 hitems = list(h.items()) hvals = [i[1] for i in hitems] maxval = max(hvals) g = min([k for k, v in hitems if v == maxval]) assignment[new_idx] = g else: # new-names that use no old-names can be ignored n_trivial_ignored += 1 if verbose: n_trivial = len(trivial_oldnames) n_nontrivial = len(nontrivial_oldnames) print('rectify %d trivial groups' % (n_trivial, )) print(' * n_trivial_unchanged = %r' % (n_trivial_unchanged, )) print(' * n_trivial_merges = %r' % (n_trivial_merges, )) print(' * n_trivial_ignored = %r' % (n_trivial_ignored, )) print('rectify %d non-trivial groups' % (n_nontrivial, )) # Partition nontrivial_oldnames into smaller disjoint sets nontrivial_oldnames_sets = list(map(set, nontrivial_oldnames)) import networkx as nx g = nx.Graph() g.add_nodes_from(range(len(nontrivial_oldnames_sets))) for u, group1 in enumerate(nontrivial_oldnames_sets): rest = nontrivial_oldnames_sets[u + 1:] for v, group2 in enumerate(rest, start=u + 1): if group1.intersection(group2): g.add_edge(u, v) nontrivial_partition = list(nx.connected_components(g)) if verbose: print(' * partitioned non-trivial into %d subgroups' % (len(nontrivial_partition))) part_size_stats = ut.get_stats(map(len, nontrivial_partition)) stats_str = ut.repr2(part_size_stats, precision=2, strkeys=True) print(' * partition size stats = %s' % (stats_str, )) # Rectify nontrivial cases for part_idxs in ut.ProgIter(nontrivial_partition, labels='rectify parts', enabled=verbose): part_oldnames = ut.take(nontrivial_oldnames, part_idxs) part_newidxs = ut.take(nontrivial_new_idxs, part_idxs) # Rectify this part assignment_ = simple_munkres(part_oldnames) for new_idx, new_name in zip(part_newidxs, assignment_): assignment[new_idx] = new_name # Any unassigned name is now given a new unique label with a prefix if extra_prefix is not None: num_extra = 0 for idx, val in enumerate(assignment): if val is None: assignment[idx] = '%s%d' % ( extra_prefix, num_extra, ) num_extra += 1 return assignment
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
def run_asmk_script(): with ut.embed_on_exception_context: # NOQA """ >>> from wbia.algo.smk.script_smk import * """ # NOQA # ============================================== # PREPROCESSING CONFIGURATION # ============================================== config = { # 'data_year': 2013, 'data_year': None, 'dtype': 'float32', # 'root_sift': True, 'root_sift': False, # 'centering': True, 'centering': False, 'num_words': 2**16, # 'num_words': 1E6 # 'num_words': 8000, 'kmeans_impl': 'sklearn.mini', 'extern_words': False, 'extern_assign': False, 'assign_algo': 'kdtree', 'checks': 1024, 'int_rvec': True, 'only_xy': False, } # Define which params are relevant for which operations relevance = {} relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year'] relevance['words'] = relevance['feats'] + [ 'num_words', 'extern_words', 'kmeans_impl', ] relevance['assign'] = relevance['words'] + [ 'checks', 'extern_assign', 'assign_algo', ] # relevance['ydata'] = relevance['assign'] + ['int_rvec'] # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec'] nAssign = 1 class SMKCacher(ut.Cacher): def __init__(self, fname, ext='.cPkl'): relevant_params = relevance[fname] relevant_cfg = ut.dict_subset(config, relevant_params) cfgstr = ut.get_cfg_lbl(relevant_cfg) dbdir = ut.truepath('/raid/work/Oxford/') super(SMKCacher, self).__init__(fname, cfgstr, cache_dir=dbdir, ext=ext) # ============================================== # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES # ============================================== if config['data_year'] == 2007: data = load_oxford_2007() elif config['data_year'] == 2013: data = load_oxford_2013() elif config['data_year'] is None: data = load_oxford_wbia() offset_list = data['offset_list'] all_kpts = data['all_kpts'] raw_vecs = data['all_vecs'] query_uri_order = data['query_uri_order'] data_uri_order = data['data_uri_order'] # del data # ================ # PRE-PROCESS # ================ import vtool as vt # Alias names to avoid errors in interactive sessions proc_vecs = raw_vecs del raw_vecs feats_cacher = SMKCacher('feats', ext='.npy') all_vecs = feats_cacher.tryload() if all_vecs is None: if config['dtype'] == 'float32': logger.info('Converting vecs to float32') proc_vecs = proc_vecs.astype(np.float32) else: proc_vecs = proc_vecs raise NotImplementedError('other dtype') if config['root_sift']: with ut.Timer('Apply root sift'): np.sqrt(proc_vecs, out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['centering']: with ut.Timer('Apply centering'): mean_vec = np.mean(proc_vecs, axis=0) # Center and then re-normalize np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['dtype'] == 'int8': smk_funcs all_vecs = proc_vecs feats_cacher.save(all_vecs) del proc_vecs # ===================================== # BUILD VISUAL VOCABULARY # ===================================== if config['extern_words']: words = data['words'] assert config['num_words'] is None or len( words) == config['num_words'] else: word_cacher = SMKCacher('words') words = word_cacher.tryload() if words is None: with ut.embed_on_exception_context: if config['kmeans_impl'] == 'sklearn.mini': import sklearn.cluster rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) init_size = int(config['num_words'] * 4) # converged after 26043 iterations clusterer = sklearn.cluster.MiniBatchKMeans( config['num_words'], init_size=init_size, batch_size=1000, compute_labels=False, max_iter=20, random_state=rng, n_init=1, verbose=1, ) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ elif config['kmeans_impl'] == 'yael': from yael import ynumpy centroids, qerr, dis, assign, nassign = ynumpy.kmeans( all_vecs, config['num_words'], init='kmeans++', verbose=True, output='all', ) words = centroids word_cacher.save(words) # ===================================== # ASSIGN EACH VECTOR TO ITS NEAREST WORD # ===================================== if config['extern_assign']: assert config[ 'extern_words'], 'need extern cluster to extern assign' idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2) idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32) idx_to_wxs = np.ma.array(idx_to_wxs) idx_to_maws = np.ma.array(idx_to_maws) else: from wbia.algo.smk import vocab_indexer vocab = vocab_indexer.VisualVocab(words) dassign_cacher = SMKCacher('assign') assign_tup = dassign_cacher.tryload() if assign_tup is None: vocab.flann_params['algorithm'] = config['assign_algo'] vocab.build() # Takes 12 minutes to assign jegous vecs to 2**16 vocab with ut.Timer('assign vocab neighbors'): _idx_to_wx, _idx_to_wdist = vocab.nn_index( all_vecs, nAssign, checks=config['checks']) if nAssign > 1: idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns( _idx_to_wx, _idx_to_wdist, massign_alpha=1.2, massign_sigma=80.0, massign_equal_weights=True, ) else: idx_to_wxs = np.ma.masked_array(_idx_to_wx, fill_value=-1) idx_to_maws = np.ma.ones(idx_to_wxs.shape, fill_value=-1, dtype=np.float32) idx_to_maws.mask = idx_to_wxs.mask assign_tup = (idx_to_wxs, idx_to_maws) dassign_cacher.save(assign_tup) idx_to_wxs, idx_to_maws = assign_tup # Breakup vectors, keypoints, and word assignments by annotation wx_lists = [ idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list) ] maw_lists = [ idx_to_maws[left:right] for left, right in ut.itertwo(offset_list) ] vecs_list = [ all_vecs[left:right] for left, right in ut.itertwo(offset_list) ] kpts_list = [ all_kpts[left:right] for left, right in ut.itertwo(offset_list) ] # ======================= # FIND QUERY SUBREGIONS # ======================= ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots( data_uri_order, query_uri_order) daids = data_annots.aids qaids = query_annots.aids query_super_kpts = ut.take(kpts_list, qx_to_dx) query_super_vecs = ut.take(vecs_list, qx_to_dx) query_super_wxs = ut.take(wx_lists, qx_to_dx) query_super_maws = ut.take(maw_lists, qx_to_dx) # Mark which keypoints are within the bbox of the query query_flags_list = [] only_xy = config['only_xy'] for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes): flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy) query_flags_list.append(flags) logger.info('Queries are crops of existing database images.') logger.info('Looking at average percents') percent_list = [ flags_.sum() / flags_.shape[0] for flags_ in query_flags_list ] percent_stats = ut.get_stats(percent_list) logger.info('percent_stats = %s' % (ut.repr4(percent_stats), )) import vtool as vt query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0) query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0) query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0) query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0) # ======================= # CONSTRUCT QUERY / DATABASE REPR # ======================= # int_rvec = not config['dtype'].startswith('float') int_rvec = config['int_rvec'] X_list = [] _prog = ut.ProgPartial(length=len(qaids), label='new X', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog( zip(qaids, query_wxs, query_maws)): X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) X_list.append(X) # ydata_cacher = SMKCacher('ydata') # Y_list = ydata_cacher.tryload() # if Y_list is None: Y_list = [] _prog = ut.ProgPartial(length=len(daids), label='new Y', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists, maw_lists)): Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) Y_list.append(Y) # ydata_cacher.save(Y_list) # ====================== # Add in some groundtruth logger.info('Add in some groundtruth') for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)): Y.nid = nid for X, nid in zip(X_list, ibs.get_annot_nids(qaids)): X.nid = nid for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)): Y.qual = qual # ====================== # Add in other properties for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list): Y.vecs = vecs Y.kpts = kpts imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images') for Y, imgid in zip(Y_list, data_uri_order): gpath = ut.unixjoin(imgdir, imgid + '.jpg') Y.gpath = gpath for X, vecs, kpts in zip(X_list, query_vecs, query_kpts): X.kpts = kpts X.vecs = vecs # ====================== logger.info('Building inverted list') daids = [Y.aid for Y in Y_list] # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list])) wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list])) assert daids == data_annots.aids assert len(wx_list) <= config['num_words'] wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list], all_wxs=wx_list) # Compute IDF weights logger.info('Compute IDF weights') ndocs_total = len(daids) # Use only the unique number of words ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list]) logger.info('ndocs_perword stats: ' + ut.repr4(ut.get_stats(ndocs_per_word))) idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word) wx_to_weight = dict(zip(wx_list, idf_per_word)) logger.info('idf stats: ' + ut.repr4(ut.get_stats(wx_to_weight.values()))) # Filter junk Y_list_ = [Y for Y in Y_list if Y.qual != 'junk'] # ======================= # CHOOSE QUERY KERNEL # ======================= params = { 'asmk': dict(alpha=3.0, thresh=0.0), 'bow': dict(), 'bow2': dict(), } # method = 'bow' method = 'bow2' method = 'asmk' smk = SMK(wx_to_weight, method=method, **params[method]) # Specific info for the type of query if method == 'asmk': # Make residual vectors if True: # The stacked way is 50x faster # TODO: extend for multi-assignment and record fxs flat_query_vecs = np.vstack(query_vecs) flat_query_wxs = np.vstack(query_wxs) flat_query_offsets = np.array( [0] + ut.cumsum(ut.lmap(len, query_wxs))) flat_wxs_assign = flat_query_wxs flat_offsets = flat_query_offsets flat_vecs = flat_query_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list, agg_flags_list): X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags[:, None] flat_wxs_assign = idx_to_wxs flat_offsets = offset_list flat_vecs = all_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list, agg_flags_list): Y.agg_rvecs = agg_rvecs Y.agg_flags = agg_flags[:, None] else: # This non-stacked way is about 500x slower _prog = ut.ProgPartial(label='agg Y rvecs', bs=True, adjust=True) for Y in _prog(Y_list_): make_agg_vecs(Y, words, Y.vecs) _prog = ut.ProgPartial(label='agg X rvecs', bs=True, adjust=True) for X in _prog(X_list): make_agg_vecs(X, words, X.vecs) elif method == 'bow2': # Hack for orig tf-idf bow vector nwords = len(words) for X in ut.ProgIter(X_list, label='make bow vector'): ensure_tf(X) bow_vector(X, wx_to_weight, nwords) for Y in ut.ProgIter(Y_list_, label='make bow vector'): ensure_tf(Y) bow_vector(Y, wx_to_weight, nwords) if method != 'bow2': for X in ut.ProgIter(X_list, 'compute X gamma'): X.gamma = smk.gamma(X) for Y in ut.ProgIter(Y_list_, 'compute Y gamma'): Y.gamma = smk.gamma(Y) # Execute matches (could go faster by enumerating candidates) scores_list = [] for X in ut.ProgIter(X_list, label='query %s' % (smk, )): scores = [smk.kernel(X, Y) for Y in Y_list_] scores = np.array(scores) scores = np.nan_to_num(scores) scores_list.append(scores) import sklearn.metrics avep_list = [] _iter = list(zip(scores_list, X_list)) _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, )) for scores, X in _iter: truth = [X.nid == Y.nid for Y in Y_list_] avep = sklearn.metrics.average_precision_score(truth, scores) avep_list.append(avep) avep_list = np.array(avep_list) mAP = np.mean(avep_list) logger.info('mAP = %r' % (mAP, ))
def latex_dbstats(ibs_list, **kwargs): r""" Args: ibs (IBEISController): wbia controller object CommandLine: python -m wbia.other.dbinfo --exec-latex_dbstats --dblist testdb1 python -m wbia.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show python -m wbia.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show python -m wbia.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show python -m wbia.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show Example: >>> # DISABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1']) >>> ibs_list = [wbia.opendb(db=db) for db in db_list] >>> tabular_str = latex_dbstats(ibs_list) >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str) >>> ut.copy_text_to_clipboard(tabular_cmd) >>> write_fpath = ut.get_argval('--write', type_=str, default=None) >>> if write_fpath is not None: >>> fpath = ut.truepath(write_fpath) >>> text = ut.readfrom(fpath) >>> new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>') >>> ut.writeto(fpath, new_text) >>> ut.print_code(tabular_cmd, 'latex') >>> ut.quit_if_noshow() >>> ut.render_latex_text('\\noindent \n' + tabular_str) """ import wbia # Parse for aids test data aids_list = [wbia.testdata_aids(ibs=ibs) for ibs in ibs_list] # dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list] dbinfo_list = [ get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids) for ibs, aids in zip(ibs_list, aids_list) ] # title = db_name + ' database statistics' title = 'Database statistics' stat_title = '# Annotations per name (multiton)' # col_lbls = [ # 'multiton', # #'singleton', # 'total', # 'multiton', # 'singleton', # 'total', # ] key_to_col_lbls = { 'num_names_multiton': 'multiton', 'num_names_singleton': 'singleton', 'num_names': 'total', 'num_multiton_annots': 'multiton', 'num_singleton_annots': 'singleton', 'num_unknown_annots': 'unknown', 'num_annots': 'total', } # Structure of columns / multicolumns multi_col_keys = [ ( '# Names', ( 'num_names_multiton', # 'num_names_singleton', 'num_names', ), ), ( '# Annots', ( 'num_multiton_annots', 'num_singleton_annots', # 'num_unknown_annots', 'num_annots', ), ), ] # multicol_lbls = [('# Names', 3), ('# Annots', 3)] multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys] # Flatten column labels col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1)) col_lbls = ut.dict_take(key_to_col_lbls, col_keys) row_lbls = [] row_values = [] # stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax'] stat_col_lbls = ['max', 'min', 'mean', 'std', 'med'] # stat_row_lbls = ['# Annot per Name (multiton)'] stat_row_lbls = [] stat_row_values = [] SINGLE_TABLE = False EXTRA = True for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): row_ = ut.dict_take(dbinfo_locals, col_keys) dbname = ibs.get_dbname_alias() row_lbls.append(dbname) multiton_annot_stats = ut.get_stats( dbinfo_locals['multiton_nid2_nannots'], use_median=True, nl=1 ) stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls) if SINGLE_TABLE: row_.extend(stat_rows) else: stat_row_lbls.append(dbname) stat_row_values.append(stat_rows) row_values.append(row_) CENTERLINE = False AS_TABLE = True tablekw = dict( astable=AS_TABLE, centerline=CENTERLINE, FORCE_INT=False, precision=2, col_sep='', multicol_sep='|', **kwargs ) if EXTRA: extra_keys = [ # 'species2_nAids', 'qualtext2_nAnnots', 'viewcode2_nAnnots', ] extra_titles = { 'species2_nAids': 'Annotations per species.', 'qualtext2_nAnnots': 'Annotations per quality.', 'viewcode2_nAnnots': 'Annotations per viewpoint.', } extra_collbls = ut.ddict(list) extra_rowvalues = ut.ddict(list) extra_tables = ut.ddict(list) for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_collbls[key] = ut.unique_ordered( extra_collbls[key] + list(dbinfo_locals[key].keys()) ) extra_collbls['qualtext2_nAnnots'] = [ 'excellent', 'good', 'ok', 'poor', 'junk', 'UNKNOWN', ] # extra_collbls['viewcode2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None] extra_collbls['viewcode2_nAnnots'] = [ 'BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', None, ] for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_rowvalues[key].append( ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0) ) qualalias = {'UNKNOWN': None} extra_collbls['viewcode2_nAnnots'] = [ ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['viewcode2_nAnnots'] ] extra_collbls['qualtext2_nAnnots'] = [ qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots'] ] for key in extra_keys: extra_tables[key] = ut.util_latex.make_score_tabular( row_lbls, extra_collbls[key], extra_rowvalues[key], title=extra_titles[key], col_align='r', table_position='[h!]', **tablekw ) # tabular_str = util_latex.tabular_join(tabular_body_list) if SINGLE_TABLE: col_lbls += stat_col_lbls multicol_lbls += [(stat_title, len(stat_col_lbls))] count_tabular_str = ut.util_latex.make_score_tabular( row_lbls, col_lbls, row_values, title=title, multicol_lbls=multicol_lbls, table_position='[ht!]', **tablekw ) # logger.info(row_lbls) if SINGLE_TABLE: tabular_str = count_tabular_str else: stat_tabular_str = ut.util_latex.make_score_tabular( stat_row_lbls, stat_col_lbls, stat_row_values, title=stat_title, col_align='r', table_position='[h!]', **tablekw ) # Make a table of statistics if tablekw['astable']: tablesep = '\n%--\n' else: tablesep = '\\\\\n%--\n' if EXTRA: tabular_str = tablesep.join( [count_tabular_str, stat_tabular_str] + ut.dict_take(extra_tables, extra_keys) ) else: tabular_str = tablesep.join([count_tabular_str, stat_tabular_str]) return tabular_str
def get_dbinfo( ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None, aids=None, ): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: SeeAlso: python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all CommandLine: python -m wbia.other.dbinfo --exec-get_dbinfo:0 python -m wbia.other.dbinfo --test-get_dbinfo:1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> defaultdb = 'testdb1' >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = wbia.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from wbia.expt import cfghelpers >>> #from wbia.expt import annotation_configs >>> #from wbia.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> verbose = True >>> short = True >>> #ibs = wbia.opendb(db='GZ_ALL') >>> #ibs = wbia.opendb(db='PZ_Master0') >>> ibs = wbia.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = wbia.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... if aids is not None: aid_list = aids # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,)) from wbia.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list ) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) # aid_list = if verbose: logger.info('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) # associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation valid_images = ibs.images(valid_gids) valid_annots = ibs.annots(valid_aids) # Image info if verbose: logger.info('Checking Image Info') gx2_aids = valid_images.aids if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.repr4( ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True ) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: logger.info('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) if False: # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from wbia.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) config = {'seconds_thresh': 4 * 60 * 60} flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences( ibs, gid_list, config=config, verbose=False ) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = { oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items() } return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1 } nid2_occurx_resight = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1 } singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats( list(map(len, singlesight_encounters)), use_median=True, use_sum=True ) resight_name_stats = ut.get_stats( list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True ) # Encounter Info def break_annots_into_encounters(aids): from wbia.algo.preproc import occurrence_blackbox import datetime thresh_sec = datetime.timedelta(minutes=30).seconds posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids)) # latlons = ibs.get_annot_image_gps(aids) labels = occurrence_blackbox.cluster_timespace2( posixtimes, None, thresh_sec=thresh_sec ) return labels # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()] # ut.square_pdist(ave_enc_time) try: am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[ 0 ] aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids) undirected_tags = ibs.get_aidpair_tags( aid_pairs.T[0], aid_pairs.T[1], directed=False ) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) except Exception: pair_tag_info = {} # logger.info(ut.repr2(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: logger.info('Checking Annot Species') unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots)) species_list = valid_annots.species_texts species2_annots = valid_annots.group_items(valid_annots.species_texts) species2_nAids = {key: len(val) for key, val in species2_annots.items()} if verbose: logger.info('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: logger.info('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: logger.info('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = collections.OrderedDict( [ ('max', wh_list.max(0)), ('min', wh_list.min(0)), ('mean', wh_list.mean(0)), ('std', wh_list.std(0)), ] ) def arr2str(var): return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']' ret = ',\n '.join( ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()] ) return '{\n ' + ret + '\n}' logger.info('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: logger.info('Building Stats String') multiton_stats = ut.repr3( ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True ) # Time stats unixtime_list = valid_images.unixtime2 # valid_unixtime_list = [time for time in unixtime_list if time != -1] # unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda: 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if max_age is None: max_age = min_age if min_age is None: min_age = max_age if max_age is None and min_age is None: logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,)) age_dict['UNKNOWN'] += 1 elif (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (max_age is None or 36 <= max_age): age_dict['Adult'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str( set(annot_sextext_list) - set(sex_keys) ) sextext2_nAnnots = ut.odict( [(key, len(sextext2_aids.get(key, []))) for key in sex_keys] ) # Filter 0's sextext2_nAnnots = { key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0 } return sextext2_nAnnots def get_annot_qual_stats(ibs, aid_list): annots = ibs.annots(aid_list) qualtext2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.quality_texts)), list(ibs.const.QUALITY_TEXT_TO_INT.keys()), ) return qualtext2_nAnnots def get_annot_viewpoint_stats(ibs, aid_list): annots = ibs.annots(aid_list) viewcode2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.viewpoint_code)), list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None], ) return viewcode2_nAnnots if verbose: logger.info('Checking Other Annot Stats') qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids) viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: logger.info('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags) contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags) contributor_tag_to_qualstats = { key: get_annot_qual_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_viewstats = { key: get_annot_viewpoint_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_nImages = { key: len(val) for key, val in six.iteritems(contributor_tag_to_gids) } contributor_tag_to_nAnnots = { key: len(val) for key, val in six.iteritems(contributor_tag_to_aids) } if verbose: logger.info('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_annots) num_annots = len(valid_aids) if with_bytes: if verbose: logger.info('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: logger.info('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_annots) _num_names_total_check = ( num_names_singleton + num_names_unassociated + num_names_multiton ) _num_annots_total_check = ( num_unknown_annots + num_singleton_annots + num_multiton_annots ) assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' # if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex( ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ], ) raise # Get contributor statistics contributor_rowids = ibs.get_valid_contributor_rowids() num_contributors = len(contributor_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.repr2(dict_, si=True) return align2(str_) header_block_lines = [('+============================')] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = ( [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] ) name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = ( [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] ) occurrence_block_lines = ( [ ('--' * num_tabs), # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] ) annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = ( [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] ) contributor_block_lines = ( [ '# Images per contributor = ' + align_dict2(contributor_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contributor_tag_to_nAnnots), '# Quality per contributor = ' + ut.repr2(contributor_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.repr2(contributor_tag_to_viewstats, sorted_=True), ] if with_contrib else [] ) img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), # ('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contributor_block_lines + imgsize_stat_lines + [('L============================')] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: logger.info(info_str2) locals_ = locals() return locals_
def compute_occurrence_groups(ibs, gid_list, config={}, use_gps=False, verbose=None): r""" Args: ibs (IBEISController): wbia controller object gid_list (list): Returns: tuple: (None, None) CommandLine: python -m wbia compute_occurrence_groups Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.preproc.preproc_occurrence import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='testdb1') >>> verbose = True >>> images = ibs.images() >>> gid_list = images.gids >>> config = {} # wbia.algo.Config.OccurrenceConfig().asdict() >>> tup = wbia_compute_occurrences(ibs, gid_list) >>> (flat_imgsetids, flat_gids) >>> aids_list = list(ut.group_items(aid_list_, flat_imgsetids).values()) >>> metric = list(map(len, aids_list)) >>> sortx = ut.list_argsort(metric)[::-1] >>> index = sortx[1] >>> aids = aids_list[index] >>> gids = list(set(ibs.get_annot_gids(aids))) """ if verbose is None: verbose = ut.NOT_QUIET # Config info gid_list = np.unique(gid_list) if verbose: logger.info('[occur] Computing occurrences on %r images.' % (len(gid_list))) logger.info('[occur] config = ' + ut.repr3(config)) use_gps = config['use_gps'] datas = prepare_X_data(ibs, gid_list, use_gps=use_gps) from wbia.algo.preproc import occurrence_blackbox cluster_algo = config.get('cluster_algo', 'agglomerative') km_per_sec = config.get('km_per_sec', occurrence_blackbox.KM_PER_SEC) thresh_sec = config.get('seconds_thresh', 30 * 60.0) min_imgs_per_occurence = config.get('min_imgs_per_occurence', 1) # 30 minutes = 3.6 kilometers # 5 minutes = 0.6 kilometers assert cluster_algo == 'agglomerative', 'only agglomerative is supported' # Group datas with different values separately all_gids = [] all_labels = [] for key in datas.keys(): val = datas[key] gids, latlons, posixtimes = val labels = occurrence_blackbox.cluster_timespace_sec( latlons, posixtimes, thresh_sec, km_per_sec=km_per_sec ) if labels is None: labels = np.zeros(len(gids), dtype=np.int) all_gids.append(gids) all_labels.append(labels) # Combine labels across different groups pads = [vt.safe_max(ys, fill=0) + 1 for ys in all_labels] offsets = np.array([0] + pads[:-1]).cumsum() all_labels_ = [ys + offset for ys, offset in zip(all_labels, offsets)] label_arr = np.array(ut.flatten(all_labels_)) gid_arr = np.array(ut.flatten(all_gids)) # Group images by unique label labels, label_gids = group_images_by_label(label_arr, gid_arr) # Remove occurrences less than the threshold occur_labels = labels occur_gids = label_gids occur_unixtimes = compute_occurrence_unixtime(ibs, occur_gids) occur_labels, occur_gids = filter_and_relabel( labels, label_gids, min_imgs_per_occurence, occur_unixtimes ) if verbose: logger.info('[occur] Found %d clusters.' % len(occur_labels)) if len(label_gids) > 0 and verbose: logger.info('[occur] Cluster image size stats:') ut.print_dict( ut.get_stats(list(map(len, occur_gids)), use_median=True, use_sum=True), 'occur image stats', ) return occur_labels, occur_gids
def get_toy_data_1vM(num_annots, num_names=None, **kwargs): r""" Args: num_annots (int): num_names (int): (default = None) Kwargs: initial_aids, initial_nids, nid_sequence, seed Returns: tuple: (pair_list, feat_list) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1000 >>> num_names = 40 >>> get_toy_data_1vM(num_annots, num_names) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) # Test a simple SVM classifier nid2_nexemp = ut.dict_hist(nids1) aid2_nid = dict(zip(aids, nids)) ut.fix_embed_globals() #def add_to_globals(globals_, subdict): # globals_.update(subdict) unique_nids = list(nid2_nexemp.keys()) def annot_to_class_feats2(aid, aid2_nid, top=None): pair_list = [] score_list = [] nexemplar_list = [] for nid in unique_nids: label = (aid2_nid[aid] == nid) num_exemplars = nid2_nexemp.get(nid, 0) if num_exemplars == 0: continue params = toy_params[label] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma, size=num_exemplars).max() score = np.clip(score_, 0, np.inf) pair_list.append((aid, nid)) score_list.append(score) nexemplar_list.append(num_exemplars) rank_list = ut.argsort(score_list, reverse=True) feat_list = np.array([score_list, rank_list, nexemplar_list]).T sortx = np.argsort(rank_list) feat_list = feat_list.take(sortx, axis=0) pair_list = np.array(pair_list).take(sortx, axis=0) if top is not None: feat_list = feat_list[:top] pair_list = pair_list[0:top] return pair_list, feat_list toclass_features = [ annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids ] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) score_list = feat_list.T[0:1].T lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs] from sklearn import svm #clf1 = svm.LinearSVC() print('Learning classifiers') clf3 = svm.SVC(probability=True) clf3.fit(feat_list, lbl_list) #prob_true, prob_false = clf3.predict_proba(feat_list).T clf1 = svm.LinearSVC() clf1.fit(score_list, lbl_list) # Score new annots against the training database tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids) aids, nids, aids1, nids1, all_aids, all_nids = tup_ aid2_nid = dict(zip(aids, nids)) toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs]) print('Running tests') score_list = feat_list.T[0:1].T tp_feat_list = feat_list[lbl_list] tn_feat_list = feat_list[~lbl_list] tp_lbls = lbl_list[lbl_list] tn_lbls = lbl_list[~lbl_list] print('num tp: %d' % len(tp_lbls)) print('num fp: %d' % len(tn_lbls)) tp_score_list = score_list[lbl_list] tn_score_list = score_list[~lbl_list] print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2)) print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2)) tp_pred3 = clf3.predict(tp_feat_list) tn_pred3 = clf3.predict(tn_feat_list) print((tp_pred3.sum(), tp_pred3.shape)) print((tn_pred3.sum(), tn_pred3.shape)) tp_score3 = clf3.score(tp_feat_list, tp_lbls) tn_score3 = clf3.score(tn_feat_list, tn_lbls) tp_pred1 = clf1.predict(tp_score_list) tn_pred1 = clf1.predict(tn_score_list) print((tp_pred1.sum(), tp_pred1.shape)) print((tn_pred1.sum(), tn_pred1.shape)) tp_score1 = clf1.score(tp_score_list, tp_lbls) tn_score1 = clf1.score(tn_score_list, tn_lbls) print('tp score with rank = %r' % (tp_score3, )) print('tn score with rank = %r' % (tn_score3, )) print('tp score without rank = %r' % (tp_score1, )) print('tn score without rank = %r' % (tn_score1, )) toy_data = {} return toy_data
def select_ith_match(self, mx): """ Selects the ith match and visualizes and prints information concerning features weights, keypoint details, and sift descriptions """ import wbia.plottool as pt from wbia.plottool import viz_featrow from wbia.plottool import interact_helpers as ih fnum = self.fnum same_fig = self.same_fig rchip1 = self.rchip1 rchip2 = self.rchip2 self.mx = mx print('+--- SELECT --- ') print('... selecting mx-th=%r feature match' % mx) fsv = self.fsv fs = self.fs print('score stats:') print(ut.repr2(ut.get_stats(fsv, axis=0), nl=1)) print('fsv[mx] = %r' % (fsv[mx], )) print('fs[mx] = %r' % (fs[mx], )) # ---------------------- # Get info for the select_ith_match plot self.mode = 1 # Get the mx-th feature match fx1, fx2 = self.fm[mx] # Older info fscore2 = self.fs[mx] fk2 = None if self.fk is None else self.fk[mx] kp1, kp2 = self.kpts1[fx1], self.kpts2[fx2] vecs1, vecs2 = self.vecs1[fx1], self.vecs2[fx2] info1 = '\nquery' info2 = '\nk=%r fscore=%r' % (fk2, fscore2) # self.last_fx = fx1 self.last_fx = fx1 # Extracted keypoints to draw extracted_list = [ (rchip1, kp1, vecs1, fx1, 'aid1', info1), (rchip2, kp2, vecs2, fx2, 'aid2', info2), ] # Normalizng Keypoint # if hasattr(cm, 'filt2_meta') and 'lnbnn' in cm.filt2_meta: # qfx2_norm = cm.filt2_meta['lnbnn'] # # Normalizing chip and feature # (aid3, fx3, normk) = qfx2_norm[fx1] # rchip3 = ibs.get_annot_chips(aid3) # kp3 = ibs.get_annot_kpts(aid3)[fx3] # sift3 = ibs.get_annot_vecs(aid3)[fx3] # info3 = '\nnorm %s k=%r' % (vh.get_aidstrs(aid3), normk) # extracted_list.append((rchip3, kp3, sift3, fx3, aid3, info3)) # else: # pass # #print('WARNING: meta doesnt exist') # ---------------------- # Draw the select_ith_match plot nRows, nCols = len(extracted_list) + same_fig, 3 # Draw matching chips and features sel_fm = np.array([(fx1, fx2)]) pnum1 = (nRows, 1, 1) if same_fig else (1, 1, 1) vert = self.vert if self.vert is not None else False self.chipmatch_view( pnum=pnum1, ell_alpha=0.4, ell_linewidth=1.8, colors=pt.BLUE, sel_fm=sel_fm, vert=vert, ) # Draw selected feature matches px = nCols * same_fig # plot offset prevsift = None if not same_fig: # fnum2 = fnum + len(viz.FNUMS) fnum2 = self.fnum2 fig2 = pt.figure(fnum=fnum2, docla=True, doclf=True) else: fnum2 = fnum for (rchip, kp, sift, fx, aid, info) in extracted_list: px = viz_featrow.draw_feat_row( rchip, fx, kp, sift, fnum2, nRows, nCols, px, prevsift=prevsift, aid=aid, info=info, ) prevsift = sift if not same_fig: ih.connect_callback(fig2, 'button_press_event', self.on_click)
def get_toy_data_1vM(num_annots, num_names=None, **kwargs): r""" Args: num_annots (int): num_names (int): (default = None) Kwargs: initial_aids, initial_nids, nid_sequence, seed Returns: tuple: (pair_list, feat_list) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1000 >>> num_names = 40 >>> get_toy_data_1vM(num_annots, num_names) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) # Test a simple SVM classifier nid2_nexemp = ut.dict_hist(nids1) aid2_nid = dict(zip(aids, nids)) ut.fix_embed_globals() #def add_to_globals(globals_, subdict): # globals_.update(subdict) unique_nids = list(nid2_nexemp.keys()) def annot_to_class_feats2(aid, aid2_nid, top=None): pair_list = [] score_list = [] nexemplar_list = [] for nid in unique_nids: label = (aid2_nid[aid] == nid) num_exemplars = nid2_nexemp.get(nid, 0) if num_exemplars == 0: continue params = toy_params[label] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma, size=num_exemplars).max() score = np.clip(score_, 0, np.inf) pair_list.append((aid, nid)) score_list.append(score) nexemplar_list.append(num_exemplars) rank_list = ut.argsort(score_list, reverse=True) feat_list = np.array([score_list, rank_list, nexemplar_list]).T sortx = np.argsort(rank_list) feat_list = feat_list.take(sortx, axis=0) pair_list = np.array(pair_list).take(sortx, axis=0) if top is not None: feat_list = feat_list[:top] pair_list = pair_list[0:top] return pair_list, feat_list toclass_features = [annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) score_list = feat_list.T[0:1].T lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs] from sklearn import svm #clf1 = svm.LinearSVC() print('Learning classifiers') clf3 = svm.SVC() clf3.fit(feat_list, lbl_list) clf1 = svm.LinearSVC() clf1.fit(score_list, lbl_list) # Score new annots against the training database tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids) aids, nids, aids1, nids1, all_aids, all_nids = tup_ aid2_nid = dict(zip(aids, nids)) toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs]) print('Running tests') score_list = feat_list.T[0:1].T tp_feat_list = feat_list[lbl_list] tn_feat_list = feat_list[~lbl_list] tp_lbls = lbl_list[lbl_list] tn_lbls = lbl_list[~lbl_list] print('num tp: %d' % len(tp_lbls)) print('num fp: %d' % len(tn_lbls)) tp_score_list = score_list[lbl_list] tn_score_list = score_list[~lbl_list] print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2)) print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2)) tp_pred3 = clf3.predict(tp_feat_list) tn_pred3 = clf3.predict(tn_feat_list) print((tp_pred3.sum(), tp_pred3.shape)) print((tn_pred3.sum(), tn_pred3.shape)) tp_score3 = clf3.score(tp_feat_list, tp_lbls) tn_score3 = clf3.score(tn_feat_list, tn_lbls) tp_pred1 = clf1.predict(tp_score_list) tn_pred1 = clf1.predict(tn_score_list) print((tp_pred1.sum(), tp_pred1.shape)) print((tn_pred1.sum(), tn_pred1.shape)) tp_score1 = clf1.score(tp_score_list, tp_lbls) tn_score1 = clf1.score(tn_score_list, tn_lbls) print('tp score with rank = %r' % (tp_score3,)) print('tn score with rank = %r' % (tn_score3,)) print('tp score without rank = %r' % (tp_score1,)) print('tn score without rank = %r' % (tn_score1,)) toy_data = {} return toy_data
def ggr_random_name_splits(): """ CommandLine: python -m wbia.viz.viz_graph2 ggr_random_name_splits --show Ignore: sshfs -o idmap=user lev:/ ~/lev Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_graph2 import * # NOQA >>> ggr_random_name_splits() """ import wbia.guitool as gt gt.ensure_qtapp() # nid_list = ibs.get_valid_nids(filter_empty=True) import wbia dbdir = '/media/danger/GGR/GGR-IBEIS' dbdir = (dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')) ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) orig_filter_kw = { 'multiple': None, # 'view': ['right'], # 'minqual': 'good', 'is_known': True, 'min_pername': 2, } orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union( orig_filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, )) orig_all_annots = ibs.annots(orig_aids) orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group( orig_all_annots.nids) # Ensure we get everything orig_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids) ] # pip install quantumrandom if False: import quantumrandom data = quantumrandom.uint16() seed = data.sum() print('seed = %r' % (seed, )) # import Crypto.Random # from Crypto import Random # quantumrandom.get_data() # StrongRandom = Crypto.Random.random.StrongRandom # aes.reseed(3340258) # chars = [str(chr(x)) for x in data.view(np.uint8)] # aes_seed = str('').join(chars) # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator() # aes.reseed(aes_seed) # aes.pseudo_random_data(10) orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258) orig_sample_size = 75 random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs) orig_annot_sample = random_annot_groups[:orig_sample_size] # OOOPS MADE ERROR REDO ---- filter_kw = { 'multiple': None, 'view': ['right'], 'minqual': 'good', 'is_known': True, 'min_pername': 2, } filter_kw_ = ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, ) refiltered_sample = [ ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_) for annot in orig_annot_sample ] is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2 ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok) ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample] # Now compute real sample aids = ibs.filter_annots_general(filter_kw=filter_kw_) all_annots = ibs.annots(aids) unique_nids, grouped_annots_ = all_annots.group(all_annots.nids) grouped_annots = grouped_annots_ # Ensure we get everything # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)] pop = len(grouped_annots) pername_list = ut.lmap(len, grouped_annots) groups = wbia.annots.AnnotGroups(grouped_annots, ibs) match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags] tag_case_hist = ut.dict_hist(ut.flatten(match_tags)) print('name_pop = %r' % (pop, )) print('Annots per Multiton Name' + ut.repr3(ut.get_stats(pername_list, use_median=True))) print('Name Tag Hist ' + ut.repr3(tag_case_hist)) print('Percent Photobomb: %.2f%%' % (tag_case_hist['photobomb'] / pop * 100)) print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100)) # Remove the ok part from this sample remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids) remain_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids) ] sample_size = 75 import vtool as vt vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05) remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots), seed=3340258) remain_sample_size = sample_size - len(ok_part_orig_nids) remain_random_annot_groups = ut.take(remain_grouped_annots, remain_rand_idxs) remain_annot_sample = remain_random_annot_groups[:remain_sample_size] annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample # Filter out all bad parts annot_sample_filter = [ ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)) for annot in annot_sample_nofilter ] annot_sample = annot_sample_filter win = None from wbia.viz import viz_graph2 for annots in ut.InteractiveIter(annot_sample): if win is not None: win.close() win = viz_graph2.make_qt_graph_interface(ibs, aids=annots.aids, init_mode='rereview') print(win) sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs) flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags] print('Using Split and Photobomb') is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only Photobomb') is_positive = ['photobomb' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only SplitCase') is_positive = ['splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95)
def latex_dbstats(ibs_list, **kwargs): r""" Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show python -m ibeis.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1']) >>> ibs_list = [ibeis.opendb(db=db) for db in db_list] >>> tabular_str = latex_dbstats(ibs_list) >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str) >>> ut.copy_text_to_clipboard(tabular_cmd) >>> write_fpath = ut.get_argval('--write', type_=str, default=None) >>> if write_fpath is not None: >>> fpath = ut.truepath(write_fpath) >>> text = ut.readfrom(fpath) >>> new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>') >>> ut.writeto(fpath, new_text) >>> ut.print_code(tabular_cmd, 'latex') >>> ut.quit_if_noshow() >>> ut.render_latex_text('\\noindent \n' + tabular_str) """ import ibeis # Parse for aids test data aids_list = [ibeis.testdata_aids(ibs=ibs) for ibs in ibs_list] #dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list] dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids) for ibs, aids in zip(ibs_list, aids_list)] #title = db_name + ' database statistics' title = 'Database statistics' stat_title = '# Annotations per name (multiton)' #col_lbls = [ # 'multiton', # #'singleton', # 'total', # 'multiton', # 'singleton', # 'total', #] key_to_col_lbls = { 'num_names_multiton': 'multiton', 'num_names_singleton': 'singleton', 'num_names': 'total', 'num_multiton_annots': 'multiton', 'num_singleton_annots': 'singleton', 'num_unknown_annots': 'unknown', 'num_annots': 'total', } # Structure of columns / multicolumns multi_col_keys = [ ('# Names', ( 'num_names_multiton', #'num_names_singleton', 'num_names', )), ('# Annots', ( 'num_multiton_annots', 'num_singleton_annots', #'num_unknown_annots', 'num_annots')), ] #multicol_lbls = [('# Names', 3), ('# Annots', 3)] multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys] # Flatten column labels col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1)) col_lbls = ut.dict_take(key_to_col_lbls, col_keys) row_lbls = [] row_values = [] #stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax'] stat_col_lbls = ['max', 'min', 'mean', 'std', 'med'] #stat_row_lbls = ['# Annot per Name (multiton)'] stat_row_lbls = [] stat_row_values = [] SINGLE_TABLE = False EXTRA = True for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): row_ = ut.dict_take(dbinfo_locals, col_keys) dbname = ibs.get_dbname_alias() row_lbls.append(dbname) multiton_annot_stats = ut.get_stats(dbinfo_locals['multiton_nid2_nannots'], use_median=True) stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls) if SINGLE_TABLE: row_.extend(stat_rows) else: stat_row_lbls.append(dbname) stat_row_values.append(stat_rows) row_values.append(row_) CENTERLINE = False AS_TABLE = True tablekw = dict( astable=AS_TABLE, centerline=CENTERLINE, FORCE_INT=False, precision=2, col_sep='', multicol_sep='|', **kwargs) if EXTRA: extra_keys = [ #'species2_nAids', 'qualtext2_nAnnots', 'yawtext2_nAnnots', ] extra_titles = { 'species2_nAids': 'Annotations per species.', 'qualtext2_nAnnots': 'Annotations per quality.', 'yawtext2_nAnnots': 'Annotations per viewpoint.', } extra_collbls = ut.ddict(list) extra_rowvalues = ut.ddict(list) extra_tables = ut.ddict(list) for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_collbls[key] = ut.unique_ordered(extra_collbls[key] + list(dbinfo_locals[key].keys())) extra_collbls['qualtext2_nAnnots'] = ['excellent', 'good', 'ok', 'poor', 'junk', 'UNKNOWN'] #extra_collbls['yawtext2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None] extra_collbls['yawtext2_nAnnots'] = ['BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', None] for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list): for key in extra_keys: extra_rowvalues[key].append(ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0)) qualalias = {'UNKNOWN': None} extra_collbls['yawtext2_nAnnots'] = [ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['yawtext2_nAnnots']] extra_collbls['qualtext2_nAnnots'] = [qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots']] for key in extra_keys: extra_tables[key] = ut.util_latex.make_score_tabular( row_lbls, extra_collbls[key], extra_rowvalues[key], title=extra_titles[key], col_align='r', table_position='[h!]', **tablekw) #tabular_str = util_latex.tabular_join(tabular_body_list) if SINGLE_TABLE: col_lbls += stat_col_lbls multicol_lbls += [(stat_title, len(stat_col_lbls))] count_tabular_str = ut.util_latex.make_score_tabular( row_lbls, col_lbls, row_values, title=title, multicol_lbls=multicol_lbls, table_position='[ht!]', **tablekw) #print(row_lbls) if SINGLE_TABLE: tabular_str = count_tabular_str else: stat_tabular_str = ut.util_latex.make_score_tabular( stat_row_lbls, stat_col_lbls, stat_row_values, title=stat_title, col_align='r', table_position='[h!]', **tablekw) # Make a table of statistics if tablekw['astable']: tablesep = '\n%--\n' else: tablesep = '\\\\\n%--\n' if EXTRA: tabular_str = tablesep.join([count_tabular_str, stat_tabular_str] + ut.dict_take(extra_tables, extra_keys)) else: tabular_str = tablesep.join([count_tabular_str, stat_tabular_str]) return tabular_str