def ggr_random_name_splits(): """ CommandLine: python -m wbia.viz.viz_graph2 ggr_random_name_splits --show Ignore: sshfs -o idmap=user lev:/ ~/lev Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_graph2 import * # NOQA >>> ggr_random_name_splits() """ import wbia.guitool as gt gt.ensure_qtapp() # nid_list = ibs.get_valid_nids(filter_empty=True) import wbia dbdir = '/media/danger/GGR/GGR-IBEIS' dbdir = (dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')) ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) orig_filter_kw = { 'multiple': None, # 'view': ['right'], # 'minqual': 'good', 'is_known': True, 'min_pername': 2, } orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union( orig_filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, )) orig_all_annots = ibs.annots(orig_aids) orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group( orig_all_annots.nids) # Ensure we get everything orig_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids) ] # pip install quantumrandom if False: import quantumrandom data = quantumrandom.uint16() seed = data.sum() print('seed = %r' % (seed, )) # import Crypto.Random # from Crypto import Random # quantumrandom.get_data() # StrongRandom = Crypto.Random.random.StrongRandom # aes.reseed(3340258) # chars = [str(chr(x)) for x in data.view(np.uint8)] # aes_seed = str('').join(chars) # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator() # aes.reseed(aes_seed) # aes.pseudo_random_data(10) orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258) orig_sample_size = 75 random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs) orig_annot_sample = random_annot_groups[:orig_sample_size] # OOOPS MADE ERROR REDO ---- filter_kw = { 'multiple': None, 'view': ['right'], 'minqual': 'good', 'is_known': True, 'min_pername': 2, } filter_kw_ = ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, ) refiltered_sample = [ ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_) for annot in orig_annot_sample ] is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2 ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok) ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample] # Now compute real sample aids = ibs.filter_annots_general(filter_kw=filter_kw_) all_annots = ibs.annots(aids) unique_nids, grouped_annots_ = all_annots.group(all_annots.nids) grouped_annots = grouped_annots_ # Ensure we get everything # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)] pop = len(grouped_annots) pername_list = ut.lmap(len, grouped_annots) groups = wbia.annots.AnnotGroups(grouped_annots, ibs) match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags] tag_case_hist = ut.dict_hist(ut.flatten(match_tags)) print('name_pop = %r' % (pop, )) print('Annots per Multiton Name' + ut.repr3(ut.get_stats(pername_list, use_median=True))) print('Name Tag Hist ' + ut.repr3(tag_case_hist)) print('Percent Photobomb: %.2f%%' % (tag_case_hist['photobomb'] / pop * 100)) print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100)) # Remove the ok part from this sample remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids) remain_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids) ] sample_size = 75 import vtool as vt vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05) remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots), seed=3340258) remain_sample_size = sample_size - len(ok_part_orig_nids) remain_random_annot_groups = ut.take(remain_grouped_annots, remain_rand_idxs) remain_annot_sample = remain_random_annot_groups[:remain_sample_size] annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample # Filter out all bad parts annot_sample_filter = [ ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)) for annot in annot_sample_nofilter ] annot_sample = annot_sample_filter win = None from wbia.viz import viz_graph2 for annots in ut.InteractiveIter(annot_sample): if win is not None: win.close() win = viz_graph2.make_qt_graph_interface(ibs, aids=annots.aids, init_mode='rereview') print(win) sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs) flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags] print('Using Split and Photobomb') is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only Photobomb') is_positive = ['photobomb' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only SplitCase') is_positive = ['splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95)
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def test_scoremech(): import utool as ut import wbia from wbia.algo.hots import _pipeline_helpers as plh # NOQA base = {'query_rotation_heuristic': False, 'sv_on': False} base = {'query_rotation_heuristic': True, 'sv_on': False, 'K': 1} base = {'query_rotation_heuristic': True, 'sv_on': False, 'K': 1} cfgdict1 = ut.dict_union(base, { 'score_method': 'nsum', 'prescore_method': 'nsum' }) cfgdict2 = ut.dict_union(base, { 'score_method': 'csum', 'prescore_method': 'csum' }) qaids = [13] daids = [ 1, 5, 11, 19, 22, 27, 31, 36, 39, 42, 43, 46, 50, 53, 55, 58, 64, 68, 73, 79, 81, 84, 85, 89, 95, 98, 99, 105, 108, 111, 114, 119, ] ibs = wbia.opendb('PZ_MTEST') qreq1_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict1) qreq2_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict2) cm_list1 = qreq1_.execute() cm_list2 = qreq2_.execute() cm1, cm2 = cm_list1[0], cm_list2[0] ai1 = cm1.pandas_annot_info().set_index(['daid', 'dnid'], drop=True) ai2 = cm2.pandas_annot_info().set_index(['daid', 'dnid'], drop=True) ai1 = ai1.rename(columns={c: c + '1' for c in ai2.columns}) ai2 = ai2.rename(columns={c: c + '2' for c in ai2.columns}) # logger.info(ai1.join(ai2)) ni1 = cm1.pandas_name_info().set_index(['dnid'], drop=True) ni2 = cm2.pandas_name_info().set_index(['dnid'], drop=True) ni1 = ni1.rename(columns={c: c + '1' for c in ni2.columns}) ni2 = ni2.rename(columns={c: c + '2' for c in ni2.columns}) logger.info(ni1.join(ni2)) cm1 == cm2 from wbia.algo.hots.chip_match import check_arrs_eq cm1.score_list == cm2.score_list cm1.name_score_list == cm2.name_score_list cm2.annot_score_list == cm2.annot_score_list assert check_arrs_eq(cm1.fm_list, cm2.fm_list) assert check_arrs_eq(cm1.fsv_list, cm2.fsv_list) # assert np.all(cm1.daid_list == cm2.daid_list) cm1.score_list cm2.score_list cm, qreq_ = cm1, qreq1_ cm.evaluate_nsum_name_score(qreq_) cm.evaluate_maxcsum_name_score(qreq_) # fmech = cm.algo_name_scores['nsum'] # amech = cm.algo_name_scores['maxcsum'] if qreq_.qparams.K == 1 and qreq_.qparams.query_rotation_heuristic is False: import numpy as np assert np.all( cm.algo_name_scores['nsum'] == cm.algo_name_scores['maxcsum']) from wbia.algo.hots import name_scoring # name_scoring.compute_fmech_score(cm, qreq_=qreq_, hack_single_ori=False) name_scoring.compute_fmech_score(cm, qreq_=qreq_)