def import_cyth_execstr(pyth_modname): """ >>> from cyth.cyth_importer import * # NOQA >>> from vtool import trig # NOQA >>> pyth_modname = 'vtool.trig' """ dummy_cythonized_funcs = import_cyth_default(pyth_modname) pyth_list = [] for funcname, func in dummy_cythonized_funcs.items(): pyth_list.append(funcname + ' = ' + get_funcname(func)) pyth_list2 = utool.align_lines(sorted(pyth_list), '=') try: cyth_list = [] pkgname, fromlist, cyth_modname = pkg_submodule_split(pyth_modname) cythonized_funcs = get_cythonized_funcs(pyth_modname) for funcname, func in cythonized_funcs.items(): cyth_list.append(funcname + ' = ' + cyth_modname + '.' + func.__name__) cyth_list2 = ['import ' + cyth_modname] + utool.align_lines(sorted(cyth_list), '=') except ImportError: cyth_list2 = ['raise ImportError("no cyth")'] except Exception as ex: cyth_list2 = ['raise ImportError("cyth import error: %s")' % str(ex)] cyth_block = utool.indentjoin(cyth_list2).strip() pyth_block = utool.indentjoin(pyth_list2).strip() execstr = utool.unindent( ''' try: if not cyth.WITH_CYTH: raise ImportError('no cyth') {cyth_block} CYTHONIZED = True # print('cyth is on in %s' % (__name__,)) except ImportError: {pyth_block} # print('cyth is off in %s' % (__name__,)) CYTHONIZED = False''').format(**locals()).strip('\n') #print(execstr) if cyth_args.CYTH_WRITE: write_explicit(pyth_modname, execstr) return execstr
def print_scores(match): match.lazy_compute() score_keys = [ 'num_matches', 'sum_score', 'ave_score', 'weight_ave_score', 'coverage_score', 'weighted_coverage_score' ] msglist = [] for key in score_keys: msglist.append(' * %s = %6.2f' % (key, match.__dict__[key])) msglist_aligned = ut.align_lines(msglist, '=') msg = '\n'.join(msglist_aligned) print('key = %r' % (match.key, )) print(msg)
def print_scores(match): match.lazy_compute() score_keys = [ "num_matches", "sum_score", "ave_score", "weight_ave_score", "coverage_score", "weighted_coverage_score", ] msglist = [] for key in score_keys: msglist.append(" * %s = %6.2f" % (key, match.__dict__[key])) msglist_aligned = ut.align_lines(msglist, "=") msg = "\n".join(msglist_aligned) print("key = %r" % (match.key,)) print(msg)
def print_score_diffs(match, match_tn): score_keys = [ 'num_matches', 'sum_score', 'ave_score', 'weight_ave_score', 'coverage_score', 'weighted_coverage_score' ] msglist = [' * <key> = <tp>, <tn>, <diff>, <factor>'] for key in score_keys: score = match.__dict__[key] score_tn = match_tn.__dict__[key] score_diff = score - score_tn score_factor = score / score_tn msglist.append(' * %s = %6.2f, %6.2f, %6.2f, %6.2f' % (key, score, score_tn, score_diff, score_factor)) msglist_aligned = ut.align_lines(msglist, '=') msg = '\n'.join(msglist_aligned) print('key = %r' % (match.key, )) print(msg)
def make_args_docstr(argname_list, argtype_list, argdesc_list, ismethod): r""" Builds the argument docstring Args: argname_list (list): names argtype_list (list): types argdesc_list (list): descriptions Returns: str: arg_docstr Example: >>> # ENABLE_DOCTEST >>> from utool.util_autogen import * # NOQA >>> argname_list = ['argname_list', 'argtype_list', 'argdesc_list'] >>> argtype_list = ['list', 'list', 'list'] >>> argdesc_list = ['names', 'types', 'descriptions'] >>> ismethod = False >>> arg_docstr = make_args_docstr(argname_list, argtype_list, argdesc_list, ismethod) >>> result = str(arg_docstr) >>> print(result) argname_list (list): names argtype_list (list): types argdesc_list (list): descriptions """ import utool as ut if ismethod: argname_list = argname_list[1:] argtype_list = argtype_list[1:] argdesc_list = argdesc_list[1:] argdoc_list = [arg + ' (%s): %s' % (_type, desc) for arg, _type, desc in zip(argname_list, argtype_list, argdesc_list)] # align? align_args = False if align_args: argdoc_aligned_list = ut.align_lines(argdoc_list, character='(') arg_docstr = '\n'.join(argdoc_aligned_list) else: arg_docstr = '\n'.join(argdoc_list) return arg_docstr
def print_score_diffs(match, match_tn): score_keys = [ "num_matches", "sum_score", "ave_score", "weight_ave_score", "coverage_score", "weighted_coverage_score", ] msglist = [" * <key> = <tp>, <tn>, <diff>, <factor>"] for key in score_keys: score = match.__dict__[key] score_tn = match_tn.__dict__[key] score_diff = score - score_tn score_factor = score / score_tn msglist.append(" * %s = %6.2f, %6.2f, %6.2f, %6.2f" % (key, score, score_tn, score_diff, score_factor)) msglist_aligned = ut.align_lines(msglist, "=") msg = "\n".join(msglist_aligned) print("key = %r" % (match.key,)) print(msg)
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def make_args_docstr(argname_list, argtype_list, argdesc_list, ismethod, va_name=None, kw_name=None, kw_keys=[]): r""" Builds the argument docstring Args: argname_list (list): names argtype_list (list): types argdesc_list (list): descriptions ismethod (bool): if generating docs for a method va_name (Optional[str]): varargs name kw_name (Optional[str]): kwargs name kw_keys (Optional[list]): accepted kwarg keys Returns: str: arg_docstr CommandLine: python -m utool.util_autogen make_args_docstr Example: >>> # ENABLE_DOCTEST >>> from utool.util_autogen import * # NOQA >>> argname_list = ['argname_list', 'argtype_list', 'argdesc_list'] >>> argtype_list = ['list', 'list', 'list'] >>> argdesc_list = ['names', 'types', 'descriptions'] >>> va_name = 'args' >>> kw_name = 'kwargs' >>> kw_keys = [''] >>> ismethod = False >>> arg_docstr = make_args_docstr(argname_list, argtype_list, >>> argdesc_list, ismethod, va_name, >>> kw_name, kw_keys) >>> result = str(arg_docstr) >>> print(result) argname_list (list): names argtype_list (list): types argdesc_list (list): descriptions *args: **kwargs: """ import utool as ut if ismethod: # Remove self from the list argname_list = argname_list[1:] argtype_list = argtype_list[1:] argdesc_list = argdesc_list[1:] argdoc_list = [arg + ' (%s): %s' % (_type, desc) for arg, _type, desc in zip(argname_list, argtype_list, argdesc_list)] # Add in varargs and kwargs # References: # http://www.sphinx-doc.org/en/stable/ext/example_google.html#example-google if va_name is not None: argdoc_list.append('*' + va_name + ':') if kw_name is not None: import textwrap prefix = '**' + kw_name + ': ' wrapped_lines = textwrap.wrap(', '.join(kw_keys), width=70 - len(prefix)) sep = '\n' + (' ' * len(prefix)) kw_keystr = sep.join(wrapped_lines) argdoc_list.append((prefix + kw_keystr).strip()) # align? align_args = False if align_args: argdoc_aligned_list = ut.align_lines(argdoc_list, character='(') arg_docstr = '\n'.join(argdoc_aligned_list) else: arg_docstr = '\n'.join(argdoc_list) return arg_docstr