def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None): """ Gets database annot_rowids based on command line arguments DEPRICATE CommandLine: python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra Args: ibs (IBEISController): ibeis controller object default_daids (str): (default = 'all') qaid_list (list): list of chosen qaids that may affect daids (default = None) Returns: list: available_daids CommandLine: python -m ibeis.init.main_helpers --test-get_test_daids python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --verbmhelp python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4 python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_daids = 'all' >>> qaid_list = [1] >>> available_daids = get_test_daids(ibs, default_daids, qaid_list) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True) >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False) >>> print('len(available_daids) %d' % len(available_daids)) >>> print(result) available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] """ daid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_daids] + --- GET_TEST_DAIDS ---') print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False))) print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False))) # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * include step') available_daids = [] CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids') DSHUFFLE = ut.get_argval('--dshuffle') DINDEX = params.args.dindex NO_JUNK = not ut.get_argflag('--junk') EXCLUDE_QUERY = ut.get_argflag('--exclude-query') #daids_exclude = params.args.daid_exclude daids_exclude = None if CONTROLLED_CASES: print('[get_test_daids] * Including controlled daids') from ibeis.other import ibsfuncs controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False) available_daids.extend(controlled_daids) daid_request_info['controlled'] = True else: daid_request_info['controlled'] = False # ---- CHECK_DEFAULTS DATA if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) if len(available_daids) == 0: print('[get_test_daids] * ... defaulting, no available daids on command line.') if isinstance(default_daids, six.string_types): if default_daids == 'all': default_daids = ibs.get_valid_aids() daid_request_info['default_daids'] = 'all' elif default_daids == 'gt': default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list)) daid_request_info['default_daids'] = 'gt' #available_qaids = valid_aids[0:1] assert not isinstance(available_daids, six.string_types) available_daids = default_daids else: if VERB_MAIN_HELPERS: print('[get_test_daids] * ... not defaulting') available_daids = ut.unique_ordered(available_daids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * exclude step') species = ut.get_argval('--species', type_=str, default=None) if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering junk') available_daids = ibs.filter_junk_annotations(available_daids) if EXCLUDE_QUERY: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding query qaids') assert qaid_list is not None, 'must specify qaids to exclude' available_daids = ut.setdiff_ordered(available_daids, qaid_list) if daids_exclude is not None: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding specified daids') available_daids = ut.setdiff_ordered(available_daids, daids_exclude) if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_daids) species = ibs.get_primary_database_species() if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering to species=%r' % (species,)) import numpy as np isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species available_daids = ut.compress(available_daids, isvalid_list) # ---- SUBINDEXING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * subindex step') #ut.get_argval('--qshuffle') if DSHUFFLE: # Determenistic shuffling available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43)) daid_request_info['shuffled'] = True if DINDEX is not None: dindexes = ensure_flatlistlike(DINDEX) _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)] print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids))) available_daids = _test_daids if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] L ___ GET_TEST_DAIDS ___') if return_annot_info: return available_daids, daid_request_info else: return available_daids
def test_siamese_performance(model, data, labels, flat_metadata, dataname=''): r""" CommandLine: utprof.py -m ibeis_cnn --tf pz_patchmatch --db liberty --test --weights=liberty:current --arch=siaml2_128 --test python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test --ensure python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test --ensure --weights=new python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --train --weights=new python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128 --test # NOQA python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128 """ import vtool as vt import plottool as pt # TODO: save in model.trainind_dpath/diagnostics/figures ut.colorprint('\n[siam_perf] Testing Siamese Performance', 'white') #epoch_dpath = model.get_epoch_diagnostic_dpath() epoch_dpath = model.arch_dpath ut.vd(epoch_dpath) dataname += ' ' + model.get_history_hashid() + '\n' history_text = ut.list_str(model.era_history, newlines=True) ut.write_to(ut.unixjoin(epoch_dpath, 'era_history.txt'), history_text) #if True: # import matplotlib as mpl # mpl.rcParams['agg.path.chunksize'] = 100000 #data = data[::50] #labels = labels[::50] #from ibeis_cnn import utils #data, labels = utils.random_xy_sample(data, labels, 10000, model.data_per_label_input) FULL = not ut.get_argflag('--quick') fnum_gen = pt.make_fnum_nextgen() ut.colorprint('[siam_perf] Show era history', 'white') fig = model.show_era_loss(fnum=fnum_gen()) pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180) # hack ut.colorprint('[siam_perf] Show weights image', 'white') fig = model.show_weights_image(fnum=fnum_gen()) pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180) #model.draw_all_conv_layer_weights(fnum=fnum_gen()) #model.imwrite_weights(1) #model.imwrite_weights(2) # Compute each type of score ut.colorprint('[siam_perf] Building Scores', 'white') test_outputs = model.predict2(model, data) network_output = test_outputs['network_output_determ'] # hack converting network output to distances for non-descriptor networks if len(network_output.shape) == 2 and network_output.shape[1] == 1: cnn_scores = network_output.T[0] elif len(network_output.shape) == 1: cnn_scores = network_output elif len(network_output.shape) == 2 and network_output.shape[1] > 1: assert model.data_per_label_output == 2 vecs1 = network_output[0::2] vecs2 = network_output[1::2] cnn_scores = vt.L2(vecs1, vecs2) else: assert False cnn_scores = cnn_scores.astype(np.float64) # Segfaults with the data passed in is large (AND MEMMAPPED apparently) # Fixed in hesaff implementation SIFT = FULL if SIFT: sift_scores, sift_list = test_sift_patchmatch_scores(data, labels) sift_scores = sift_scores.astype(np.float64) ut.colorprint('[siam_perf] Learning Encoders', 'white') # Learn encoders encoder_kw = { #'monotonize': False, 'monotonize': True, } cnn_encoder = vt.ScoreNormalizer(**encoder_kw) cnn_encoder.fit(cnn_scores, labels) if SIFT: sift_encoder = vt.ScoreNormalizer(**encoder_kw) sift_encoder.fit(sift_scores, labels) # Visualize ut.colorprint('[siam_perf] Visualize Encoders', 'white') viz_kw = dict( with_scores=False, with_postbayes=False, with_prebayes=False, target_tpr=.95, ) inter_cnn = cnn_encoder.visualize( figtitle=dataname + ' CNN scores. #data=' + str(len(data)), fnum=fnum_gen(), **viz_kw) if SIFT: inter_sift = sift_encoder.visualize( figtitle=dataname + ' SIFT scores. #data=' + str(len(data)), fnum=fnum_gen(), **viz_kw) # Save pt.save_figure(fig=inter_cnn.fig, dpath=epoch_dpath) if SIFT: pt.save_figure(fig=inter_sift.fig, dpath=epoch_dpath) # Save out examples of hard errors #cnn_fp_label_indicies, cnn_fn_label_indicies = #cnn_encoder.get_error_indicies(cnn_scores, labels) #sift_fp_label_indicies, sift_fn_label_indicies = #sift_encoder.get_error_indicies(sift_scores, labels) with_patch_examples = FULL if with_patch_examples: ut.colorprint('[siam_perf] Visualize Confusion Examples', 'white') cnn_indicies = cnn_encoder.get_confusion_indicies(cnn_scores, labels) if SIFT: sift_indicies = sift_encoder.get_confusion_indicies(sift_scores, labels) warped_patch1_list, warped_patch2_list = list(zip(*ut.ichunks(data, 2))) samp_args = (warped_patch1_list, warped_patch2_list, labels) _sample = functools.partial(draw_results.get_patch_sample_img, *samp_args) cnn_fp_img = _sample({'fs': cnn_scores}, cnn_indicies.fp)[0] cnn_fn_img = _sample({'fs': cnn_scores}, cnn_indicies.fn)[0] cnn_tp_img = _sample({'fs': cnn_scores}, cnn_indicies.tp)[0] cnn_tn_img = _sample({'fs': cnn_scores}, cnn_indicies.tn)[0] if SIFT: sift_fp_img = _sample({'fs': sift_scores}, sift_indicies.fp)[0] sift_fn_img = _sample({'fs': sift_scores}, sift_indicies.fn)[0] sift_tp_img = _sample({'fs': sift_scores}, sift_indicies.tp)[0] sift_tn_img = _sample({'fs': sift_scores}, sift_indicies.tn)[0] #if ut.show_was_requested(): #def rectify(arr): # return np.flipud(arr) SINGLE_FIG = False if SINGLE_FIG: def dump_img(img_, lbl, fnum): fig, ax = pt.imshow(img_, figtitle=dataname + ' ' + lbl, fnum=fnum) pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180) dump_img(cnn_fp_img, 'cnn_fp_img', fnum_gen()) dump_img(cnn_fn_img, 'cnn_fn_img', fnum_gen()) dump_img(cnn_tp_img, 'cnn_tp_img', fnum_gen()) dump_img(cnn_tn_img, 'cnn_tn_img', fnum_gen()) dump_img(sift_fp_img, 'sift_fp_img', fnum_gen()) dump_img(sift_fn_img, 'sift_fn_img', fnum_gen()) dump_img(sift_tp_img, 'sift_tp_img', fnum_gen()) dump_img(sift_tn_img, 'sift_tn_img', fnum_gen()) #vt.imwrite(dataname + '_' + 'cnn_fp_img.png', (cnn_fp_img)) #vt.imwrite(dataname + '_' + 'cnn_fn_img.png', (cnn_fn_img)) #vt.imwrite(dataname + '_' + 'sift_fp_img.png', (sift_fp_img)) #vt.imwrite(dataname + '_' + 'sift_fn_img.png', (sift_fn_img)) else: print('Drawing TP FP TN FN') fnum = fnum_gen() pnum_gen = pt.make_pnum_nextgen(4, 2) fig = pt.figure(fnum) pt.imshow(cnn_fp_img, title='CNN FP', fnum=fnum, pnum=pnum_gen()) pt.imshow(sift_fp_img, title='SIFT FP', fnum=fnum, pnum=pnum_gen()) pt.imshow(cnn_fn_img, title='CNN FN', fnum=fnum, pnum=pnum_gen()) pt.imshow(sift_fn_img, title='SIFT FN', fnum=fnum, pnum=pnum_gen()) pt.imshow(cnn_tp_img, title='CNN TP', fnum=fnum, pnum=pnum_gen()) pt.imshow(sift_tp_img, title='SIFT TP', fnum=fnum, pnum=pnum_gen()) pt.imshow(cnn_tn_img, title='CNN TN', fnum=fnum, pnum=pnum_gen()) pt.imshow(sift_tn_img, title='SIFT TN', fnum=fnum, pnum=pnum_gen()) pt.set_figtitle(dataname + ' confusions') pt.adjust_subplots(left=0, right=1.0, bottom=0., wspace=.01, hspace=.05) pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18)) with_patch_desc = FULL if with_patch_desc: ut.colorprint('[siam_perf] Visualize Patch Descriptors', 'white') fnum = fnum_gen() fig = pt.figure(fnum=fnum, pnum=(1, 1, 1)) num_rows = 7 pnum_gen = pt.make_pnum_nextgen(num_rows, 3) # Compare actual output descriptors for index in ut.random_indexes(len(sift_list), num_rows): vec_sift = sift_list[index] vec_cnn = network_output[index] patch = data[index] pt.imshow(patch, fnum=fnum, pnum=pnum_gen()) pt.plot_descriptor_signature(vec_cnn, 'cnn vec', fnum=fnum, pnum=pnum_gen()) pt.plot_sift_signature(vec_sift, 'sift vec', fnum=fnum, pnum=pnum_gen()) pt.set_figtitle('Patch Descriptors') pt.adjust_subplots(left=0, right=0.95, bottom=0., wspace=.1, hspace=.15) pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18))
def get_test_qaids(ibs, default_qaids=None, return_annot_info=False, aidcfg=None): """ Gets test annot_rowids based on command line arguments DEPRICATE Args: ibs (IBEISController): ibeis controller object default_qaids (None): if list then used only if no other aids are available (default = [1]) as a string it mimics the command line Returns: list: available_qaids CommandLine: python -m ibeis.init.main_helpers --test-get_test_qaids python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qaid 1 python -m ibeis.init.main_helpers --test-get_test_qaids --allgt --db PZ_MTEST python -m ibeis.init.main_helpers --test-get_test_qaids --qaid 4 5 8 --verbmhelp python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 --verbmhelp python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qindex 0:10 --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_qaids --controlled --db PZ_Master0 --exec-mode python -m ibeis.init.main_helpers --exec-get_test_qaids --db testdb1 --allgt --qindex 0:256 Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_qaids = None >>> available_qaids = get_test_qaids(ibs, default_qaids) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_qaids, with_contrib=False, short=True) >>> result = 'available_qaids = ' + ut.obj_str(available_qaids, truncate=True, nl=False) >>> print('len(available_qaids) = %d' % len(available_qaids)) >>> print(result) available_qaids = [1] """ qaid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_qaids] + --- GET_TEST_QAIDS ---') # Old version of this function if VERB_MAIN_HELPERS: print('[get_test_qaids] + --- GET_TEST_QAIDS ---') print('[get_test_qaids] * default_qaids = %s' % (ut.obj_str(default_qaids, truncate=True, nl=False))) valid_aids = ibs.get_valid_aids() if len(valid_aids) == 0: print('[get_test_qaids] WARNING no annotations available') # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_qaids] * include step') available_qaids = [] #ut.get_argflag(('--all-cases', '--all')) #ut.get_argflag(('--all-gt-cases', '--allgt')) #ut.get_argflag(('--all-hard-cases', '--allhard')) #ut.get_argflag(('--qaid', '--qaids')) #ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids') #not ut.get_argflag('--junk') ALL_CASES = params.args.all_cases or default_qaids == 'all' GT_CASES = params.args.all_gt_cases or default_qaids == 'gt' HARD_CASES = params.args.all_hard_cases or ut.get_argflag(('--all-hard-cases', '--allhard', '--hard')) NO_JUNK = not ut.get_argflag('--junk') CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids') NO_REVIEWED = ut.get_argflag('--unreviewed') species = ut.get_argval('--species') #QAID = params.args.qaid QAID = ut.get_argval('--qaid', type_='fuzzy_subset', default=None) QINDEX = params.args.qindex QSHUFFLE = ut.get_argval('--qshuffle') if QAID is not None: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including cmdline specified qaids') try: args_qaid = ensure_flatlistlike(QAID) except Exception: args_qaid = QAID available_qaids.extend(args_qaid) qaid_request_info['custom_commandline'] = args_qaid if ALL_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including all qaids') available_qaids.extend(valid_aids) qaid_request_info['all_cases'] = True if HARD_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including hard qaids') is_hard_list = ibs.get_annot_is_hard(valid_aids) hard_aids = ut.compress(valid_aids, is_hard_list) available_qaids.extend(hard_aids) qaid_request_info['hard_cases'] = True if GT_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including groundtruth qaids') has_gt_list = ibs.get_annot_has_groundtruth(valid_aids) hasgt_aids = ut.compress(valid_aids, has_gt_list) print('[get_test_qaids] Adding all %d/%d ground-truthed test cases' % (len(hasgt_aids), len(valid_aids))) available_qaids.extend(hasgt_aids) qaid_request_info['gt_cases'] = True if CONTROLLED_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including controlled qaids') from ibeis.other import ibsfuncs # Override all other gts with controlled controlled_qaids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=True) available_qaids.extend(controlled_qaids) qaid_request_info['controlled'] = True else: qaid_request_info['controlled'] = False # ---- CHECK_DEFAULTS QUERY if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) if len(available_qaids) == 0: print('[get_test_qaids] * ... defaulting, no available qaids on command line.') if default_qaids is None: default_qaids = valid_aids[0:1] qaid_request_info['default_one'] = True elif isinstance(default_qaids, six.string_types): if default_qaids == 'gt' or default_qaids == 'allgt': default_qaids = ibs.get_valid_aids(hasgt=True) qaid_request_info['default_gt'] = True available_qaids = default_qaids else: if VERB_MAIN_HELPERS: print('[get_test_qaids] * ... not defaulting') available_qaids = ut.unique_ordered(available_qaids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] * exclude step') if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering junk') available_qaids = ibs.filter_junk_annotations(available_qaids) qaid_request_info['has_junk'] = False if NO_REVIEWED: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering unreviewed') isreviewed_list = ibs.get_annot_has_reviewed_matching_aids(available_qaids) available_qaids = ut.filterfalse_items(available_qaids, isreviewed_list) qaid_request_info['has_unreviewed'] = False if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_qaids) species = ibs.get_primary_database_species() qaid_request_info['primary_species'] = True if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering to species=%r' % (species,)) isvalid_list = np.array(ibs.get_annot_species(available_qaids)) == species available_qaids = ut.compress(available_qaids, isvalid_list) qaid_request_info['species_filter'] = species if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] * subindex step') # ---- INDEX SUBSET #ut.get_argval('--qshuffle') if QSHUFFLE: # Determenistic shuffling available_qaids = ut.take(available_qaids, ut.random_indexes(len(available_qaids), seed=42)) qaid_request_info['shuffled'] = True # Sample a large pool of chosen query qindexes if QINDEX is not None: # FIXME: should use a slice of the list or a sublist qindexes = ensure_flatlistlike(QINDEX) _test_qaids = [available_qaids[qx] for qx in qindexes if qx < len(available_qaids)] print('[get_test_qaids] Chose subset of size %d/%d' % (len(_test_qaids), len(available_qaids))) available_qaids = _test_qaids qaid_request_info['subset'] = qindexes if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] L ___ GET_TEST_QAIDS ___') if return_annot_info: return available_qaids, qaid_request_info else: return available_qaids
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None): """ Gets database annot_rowids based on command line arguments DEPRICATE CommandLine: python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra Args: ibs (IBEISController): ibeis controller object default_daids (str): (default = 'all') qaid_list (list): list of chosen qaids that may affect daids (default = None) Returns: list: available_daids CommandLine: python -m ibeis.init.main_helpers --test-get_test_daids python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --verbmhelp python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4 python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_daids = 'all' >>> qaid_list = [1] >>> available_daids = get_test_daids(ibs, default_daids, qaid_list) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True) >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False) >>> print('len(available_daids) %d' % len(available_daids)) >>> print(result) available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] """ daid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_daids] + --- GET_TEST_DAIDS ---') print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False))) print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False))) # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * include step') available_daids = [] CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids') DSHUFFLE = ut.get_argval('--dshuffle') DINDEX = params.args.dindex NO_JUNK = not ut.get_argflag('--junk') EXCLUDE_QUERY = ut.get_argflag('--exclude-query') #daids_exclude = params.args.daid_exclude daids_exclude = None if CONTROLLED_CASES: print('[get_test_daids] * Including controlled daids') from ibeis import ibsfuncs controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False) available_daids.extend(controlled_daids) daid_request_info['controlled'] = True else: daid_request_info['controlled'] = False # ---- CHECK_DEFAULTS DATA if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) if len(available_daids) == 0: print('[get_test_daids] * ... defaulting, no available daids on command line.') if isinstance(default_daids, six.string_types): if default_daids == 'all': default_daids = ibs.get_valid_aids() daid_request_info['default_daids'] = 'all' elif default_daids == 'gt': default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list)) daid_request_info['default_daids'] = 'gt' #available_qaids = valid_aids[0:1] assert not isinstance(available_daids, six.string_types) available_daids = default_daids else: if VERB_MAIN_HELPERS: print('[get_test_daids] * ... not defaulting') available_daids = ut.unique_keep_order(available_daids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * exclude step') species = ut.get_argval('--species', type_=str, default=None) if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering junk') available_daids = ibs.filter_junk_annotations(available_daids) if EXCLUDE_QUERY: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding query qaids') assert qaid_list is not None, 'must specify qaids to exclude' available_daids = ut.setdiff_ordered(available_daids, qaid_list) if daids_exclude is not None: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding specified daids') available_daids = ut.setdiff_ordered(available_daids, daids_exclude) if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_daids) species = ibs.get_primary_database_species() if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering to species=%r' % (species,)) import numpy as np isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species available_daids = ut.compress(available_daids, isvalid_list) # ---- SUBINDEXING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * subindex step') #ut.get_argval('--qshuffle') if DSHUFFLE: # Determenistic shuffling available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43)) daid_request_info['shuffled'] = True if DINDEX is not None: dindexes = ensure_flatlistlike(DINDEX) _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)] print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids))) available_daids = _test_daids if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] L ___ GET_TEST_DAIDS ___') if return_annot_info: return available_daids, daid_request_info else: return available_daids
def get_test_qaids(ibs, default_qaids=None, return_annot_info=False, aidcfg=None): """ Gets test annot_rowids based on command line arguments DEPRICATE Args: ibs (IBEISController): ibeis controller object default_qaids (None): if list then used only if no other aids are available (default = [1]) as a string it mimics the command line Returns: list: available_qaids CommandLine: python -m ibeis.init.main_helpers --test-get_test_qaids python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qaid 1 python -m ibeis.init.main_helpers --test-get_test_qaids --allgt --db PZ_MTEST python -m ibeis.init.main_helpers --test-get_test_qaids --qaid 4 5 8 --verbmhelp python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 --verbmhelp python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qindex 0:10 --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_qaids --controlled --db PZ_Master0 --exec-mode python -m ibeis.init.main_helpers --exec-get_test_qaids --db testdb1 --allgt --qindex 0:256 Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_qaids = None >>> available_qaids = get_test_qaids(ibs, default_qaids) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_qaids, with_contrib=False, short=True) >>> result = 'available_qaids = ' + ut.obj_str(available_qaids, truncate=True, nl=False) >>> print('len(available_qaids) = %d' % len(available_qaids)) >>> print(result) available_qaids = [1] """ qaid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_qaids] + --- GET_TEST_QAIDS ---') # Old version of this function if VERB_MAIN_HELPERS: print('[get_test_qaids] + --- GET_TEST_QAIDS ---') print('[get_test_qaids] * default_qaids = %s' % (ut.obj_str(default_qaids, truncate=True, nl=False))) valid_aids = ibs.get_valid_aids() if len(valid_aids) == 0: print('[get_test_qaids] WARNING no annotations available') # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_qaids] * include step') available_qaids = [] #ut.get_argflag(('--all-cases', '--all')) #ut.get_argflag(('--all-gt-cases', '--allgt')) #ut.get_argflag(('--all-hard-cases', '--allhard')) #ut.get_argflag(('--qaid', '--qaids')) #ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids') #not ut.get_argflag('--junk') ALL_CASES = params.args.all_cases or default_qaids == 'all' GT_CASES = params.args.all_gt_cases or default_qaids == 'gt' HARD_CASES = params.args.all_hard_cases or ut.get_argflag(('--all-hard-cases', '--allhard', '--hard')) NO_JUNK = not ut.get_argflag('--junk') CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids') NO_REVIEWED = ut.get_argflag('--unreviewed') species = ut.get_argval('--species') #QAID = params.args.qaid QAID = ut.get_argval('--qaid', type_='fuzzy_subset', default=None) QINDEX = params.args.qindex QSHUFFLE = ut.get_argval('--qshuffle') if QAID is not None: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including cmdline specified qaids') try: args_qaid = ensure_flatlistlike(QAID) except Exception: args_qaid = QAID available_qaids.extend(args_qaid) qaid_request_info['custom_commandline'] = args_qaid if ALL_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including all qaids') available_qaids.extend(valid_aids) qaid_request_info['all_cases'] = True if HARD_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including hard qaids') is_hard_list = ibs.get_annot_is_hard(valid_aids) hard_aids = ut.compress(valid_aids, is_hard_list) available_qaids.extend(hard_aids) qaid_request_info['hard_cases'] = True if GT_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including groundtruth qaids') has_gt_list = ibs.get_annot_has_groundtruth(valid_aids) hasgt_aids = ut.compress(valid_aids, has_gt_list) print('[get_test_qaids] Adding all %d/%d ground-truthed test cases' % (len(hasgt_aids), len(valid_aids))) available_qaids.extend(hasgt_aids) qaid_request_info['gt_cases'] = True if CONTROLLED_CASES: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Including controlled qaids') from ibeis import ibsfuncs # Override all other gts with controlled controlled_qaids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=True) available_qaids.extend(controlled_qaids) qaid_request_info['controlled'] = True else: qaid_request_info['controlled'] = False # ---- CHECK_DEFAULTS QUERY if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) if len(available_qaids) == 0: print('[get_test_qaids] * ... defaulting, no available qaids on command line.') if default_qaids is None: default_qaids = valid_aids[0:1] qaid_request_info['default_one'] = True elif isinstance(default_qaids, six.string_types): if default_qaids == 'gt' or default_qaids == 'allgt': default_qaids = ibs.get_valid_aids(hasgt=True) qaid_request_info['default_gt'] = True available_qaids = default_qaids else: if VERB_MAIN_HELPERS: print('[get_test_qaids] * ... not defaulting') available_qaids = ut.unique_keep_order(available_qaids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] * exclude step') if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering junk') available_qaids = ibs.filter_junk_annotations(available_qaids) qaid_request_info['has_junk'] = False if NO_REVIEWED: if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering unreviewed') isreviewed_list = ibs.get_annot_has_reviewed_matching_aids(available_qaids) available_qaids = ut.filterfalse_items(available_qaids, isreviewed_list) qaid_request_info['has_unreviewed'] = False if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_qaids) species = ibs.get_primary_database_species() qaid_request_info['primary_species'] = True if VERB_MAIN_HELPERS: print('[get_test_qaids] * Filtering to species=%r' % (species,)) isvalid_list = np.array(ibs.get_annot_species(available_qaids)) == species available_qaids = ut.compress(available_qaids, isvalid_list) qaid_request_info['species_filter'] = species if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] * subindex step') # ---- INDEX SUBSET #ut.get_argval('--qshuffle') if QSHUFFLE: # Determenistic shuffling available_qaids = ut.take(available_qaids, ut.random_indexes(len(available_qaids), seed=42)) qaid_request_info['shuffled'] = True # Sample a large pool of chosen query qindexes if QINDEX is not None: # FIXME: should use a slice of the list or a sublist qindexes = ensure_flatlistlike(QINDEX) _test_qaids = [available_qaids[qx] for qx in qindexes if qx < len(available_qaids)] print('[get_test_qaids] Chose subset of size %d/%d' % (len(_test_qaids), len(available_qaids))) available_qaids = _test_qaids qaid_request_info['subset'] = qindexes if VERB_MAIN_HELPERS: print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids))) print('[get_test_qaids] L ___ GET_TEST_QAIDS ___') if return_annot_info: return available_qaids, qaid_request_info else: return available_qaids
def ggr_random_name_splits(): """ CommandLine: python -m wbia.viz.viz_graph2 ggr_random_name_splits --show Ignore: sshfs -o idmap=user lev:/ ~/lev Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_graph2 import * # NOQA >>> ggr_random_name_splits() """ import wbia.guitool as gt gt.ensure_qtapp() # nid_list = ibs.get_valid_nids(filter_empty=True) import wbia dbdir = '/media/danger/GGR/GGR-IBEIS' dbdir = (dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')) ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) orig_filter_kw = { 'multiple': None, # 'view': ['right'], # 'minqual': 'good', 'is_known': True, 'min_pername': 2, } orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union( orig_filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, )) orig_all_annots = ibs.annots(orig_aids) orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group( orig_all_annots.nids) # Ensure we get everything orig_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids) ] # pip install quantumrandom if False: import quantumrandom data = quantumrandom.uint16() seed = data.sum() print('seed = %r' % (seed, )) # import Crypto.Random # from Crypto import Random # quantumrandom.get_data() # StrongRandom = Crypto.Random.random.StrongRandom # aes.reseed(3340258) # chars = [str(chr(x)) for x in data.view(np.uint8)] # aes_seed = str('').join(chars) # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator() # aes.reseed(aes_seed) # aes.pseudo_random_data(10) orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258) orig_sample_size = 75 random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs) orig_annot_sample = random_annot_groups[:orig_sample_size] # OOOPS MADE ERROR REDO ---- filter_kw = { 'multiple': None, 'view': ['right'], 'minqual': 'good', 'is_known': True, 'min_pername': 2, } filter_kw_ = ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }, ) refiltered_sample = [ ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_) for annot in orig_annot_sample ] is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2 ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok) ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample] # Now compute real sample aids = ibs.filter_annots_general(filter_kw=filter_kw_) all_annots = ibs.annots(aids) unique_nids, grouped_annots_ = all_annots.group(all_annots.nids) grouped_annots = grouped_annots_ # Ensure we get everything # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)] pop = len(grouped_annots) pername_list = ut.lmap(len, grouped_annots) groups = wbia.annots.AnnotGroups(grouped_annots, ibs) match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags] tag_case_hist = ut.dict_hist(ut.flatten(match_tags)) print('name_pop = %r' % (pop, )) print('Annots per Multiton Name' + ut.repr3(ut.get_stats(pername_list, use_median=True))) print('Name Tag Hist ' + ut.repr3(tag_case_hist)) print('Percent Photobomb: %.2f%%' % (tag_case_hist['photobomb'] / pop * 100)) print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100)) # Remove the ok part from this sample remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids) remain_grouped_annots = [ ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids) ] sample_size = 75 import vtool as vt vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05) remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots), seed=3340258) remain_sample_size = sample_size - len(ok_part_orig_nids) remain_random_annot_groups = ut.take(remain_grouped_annots, remain_rand_idxs) remain_annot_sample = remain_random_annot_groups[:remain_sample_size] annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample # Filter out all bad parts annot_sample_filter = [ ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)) for annot in annot_sample_nofilter ] annot_sample = annot_sample_filter win = None from wbia.viz import viz_graph2 for annots in ut.InteractiveIter(annot_sample): if win is not None: win.close() win = viz_graph2.make_qt_graph_interface(ibs, aids=annots.aids, init_mode='rereview') print(win) sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs) flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags] print('Using Split and Photobomb') is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only Photobomb') is_positive = ['photobomb' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95) print('Only SplitCase') is_positive = ['splitcase' in t for t in flat_tags] num_positive = sum(is_positive) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=0.95)
def random_xy_sample(X, y, size_, data_per_label, seed=0): label_indicies = ut.random_indexes(len(y), seed=seed)[0:size_] data_indicies = expand_data_indicies(label_indicies, data_per_label) X_subset = X.take(data_indicies, axis=0) y_sbuset = y.take(label_indicies, axis=0) return X_subset, y_sbuset