def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None): """ Colors edges and nodes by nid """ # TODO use ut.color_nodes import plottool as pt ensure_graph_nid_labels(graph, unique_nids, ibs=ibs) node_to_nid = nx.get_node_attributes(graph, 'nid') unique_nids = ut.unique(node_to_nid.values()) ncolors = len(unique_nids) if (ncolors) == 1: unique_colors = [pt.UNKNOWN_PURP] else: if nid2_color_ is not None: unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2) else: unique_colors = pt.distinct_colors(ncolors) # Find edges and aids strictly between two nids nid_to_color = dict(zip(unique_nids, unique_colors)) if nid2_color_ is not None: # HACK NEED TO ENSURE COLORS ARE NOT REUSED nid_to_color.update(nid2_color_) edge_aids = list(graph.edges()) edge_nids = ut.unflat_take(node_to_nid, edge_aids) flags = [nids[0] == nids[1] for nids in edge_nids] flagged_edge_aids = ut.compress(edge_aids, flags) flagged_edge_nids = ut.compress(edge_nids, flags) flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids] edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors)) node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color), node_to_nid) nx.set_edge_attributes(graph, 'color', edge_to_color) nx.set_node_attributes(graph, 'color', node_to_color)
def send_wbia_request(suffix, type_='post', json=True, **kwargs): """ Posts a request to a url suffix """ import requests import utool as ut if not suffix.endswith('/'): raise Exception('YOU PROBABLY WANT A / AT THE END OF YOUR URL') payload = ut.map_dict_vals(ut.to_json, kwargs) if type_ == 'post': resp = requests.post(baseurl + suffix, data=payload) content = resp._content elif type_ == 'get': resp = requests.get(baseurl + suffix, data=payload) content = resp.content if json: try: content = ut.from_json(content) except ValueError: raise Exception('Expected JSON string but got content=%r' % (content, )) else: # logger.info('content = %r' % (content,)) if content['status']['code'] != 200: logger.info(content['status']['message']) raise Exception(content['status']['message']) content = content['response'] return content
def _debug_repr_cpd(cpd): import re import utool as ut code_fmt = ut.codeblock( ''' {variable} = pgmpy.factors.TabularCPD( variable={variable_repr}, variable_card={variable_card_repr}, values={get_cpd_repr}, evidence={evidence_repr}, evidence_card={evidence_card_repr}, ) ''') keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card'] dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys])) # HACK dict_['values'] = cpd.get_cpd() r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True) print(r) # Parse props that are needed for this fmtstr fmt_keys = [match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt)] need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')] need_keys = [key for key in fmt_keys if not key.endswith('_repr')] # Get corresponding props # Call methods if needbe tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs] tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp] fmtdict = dict(tmp) fmtdict = ut.map_dict_vals(ut.repr2, fmtdict) fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict) tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys] fmtdict.update(dict(tmp2)) code = code_fmt.format(**fmtdict) return code
def send_ibeis_request(suffix, type_='post', **kwargs): """ Posts a request to a url suffix """ import requests import utool as ut if not suffix.endswith('/'): raise Exception('YOU PROBABLY WANT A / AT THE END OF YOUR URL') payload = ut.map_dict_vals(ut.to_json, kwargs) if type_ == 'post': resp = requests.post(baseurl + suffix, data=payload) json_content = resp._content elif type_ == 'get': resp = requests.get(baseurl + suffix, data=payload) json_content = resp.content try: content = ut.from_json(json_content) except ValueError: raise Exception('Expected JSON string but got json_content=%r' % (json_content,)) else: # print('content = %r' % (content,)) if content['status']['code'] != 200: print(content['status']['message']) raise Exception(content['status']['message']) request_response = content['response'] return request_response
def print_size_info(inva): sizes = inva.get_size_info() sizes = ut.sort_dict(sizes, 'vals', ut.identity) total_nbytes = sum(sizes.values()) logger.info( ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':') ) logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs, wx2_axs): invassign.fstack = fstack invassign.vocab = vocab invassign.wx2_idxs = wx2_idxs invassign.wx2_maws = wx2_maws invassign.wx2_fxs = wx2_fxs invassign.wx2_axs = wx2_axs invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs) invassign.wx_list = sorted(invassign.wx2_num.keys()) invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list) invassign.perword_stats = ut.get_stats(invassign.num_list)
def infohist(group): cardnames = [six.text_type(c) for c in group.cards] types_ = [c.types[-1] for c in group.cards] hgroup = dict(ut.hierarchical_group_items(cardnames, [types_, cardnames])) infohist1 = ut.map_dict_vals(ut.dict_hist, hgroup).items() # Grouped infohist infohist = [(key, [(vals[n], n) for n in vals]) for key, vals in infohist1] # list_ = [six.text_type(c) for c in group.cards] # dict_ = ut.dict_hist(list_) # ulist_ = ut.unique_ordered(list_) # infohist = [(dict_[item], item) for item in ulist_] # ut.embed() return infohist
def send_ibeis_request(suffix, type_='post', **kwargs): """ Posts a request to a url suffix """ import requests import utool as ut payload = ut.map_dict_vals(ut.to_json, kwargs) if type_ == 'post': resp = requests.post(baseurl + suffix, data=payload) content = ut.from_json(resp._content) elif type_ == 'get': resp = requests.get(baseurl + suffix, data=payload) content = ut.from_json(resp.content) response = content['response'] return response
def apply_hard_soft_evidence(cpd_list, evidence_list): for cpd, ev in zip(cpd_list, evidence_list): if isinstance(ev, int): # hard internal evidence evidence[cpd.variable] = ev if isinstance(ev, six.string_types): # hard external evidence evidence[cpd.variable] = cpd._internal_varindex( cpd.variable, ev) if isinstance(ev, dict): # soft external evidence # HACK THAT MODIFIES CPD IN PLACE def rectify_evidence_val(_v, card=cpd.variable_card): # rectify hacky string structures tmp = 1 / (2 * card**2) return (1 + tmp) / (card + tmp) if _v == '+eps' else _v ev_ = ut.map_dict_vals(rectify_evidence_val, ev) fill = (1.0 - sum(ev_.values())) / (cpd.variable_card - len(ev_)) # HACK fix for float problems if len(ev_) == cpd.variable_card - 1: fill = 0 assert fill > -1e7, 'fill=%r' % (fill, ) row_labels = list(ut.iprod(*cpd.statenames)) for i, lbl in enumerate(row_labels): if lbl in ev_: # external case1 cpd.values[i] = ev_[lbl] elif len(lbl) == 1 and lbl[0] in ev_: # external case2 cpd.values[i] = ev_[lbl[0]] elif i in ev_: # internal case cpd.values[i] = ev_[i] else: cpd.values[i] = fill cpd.normalize() soft_evidence[cpd.variable] = True
def apply_hard_soft_evidence(cpd_list, evidence_list): for cpd, ev in zip(cpd_list, evidence_list): if isinstance(ev, int): # hard internal evidence evidence[cpd.variable] = ev if isinstance(ev, six.string_types): # hard external evidence evidence[cpd.variable] = cpd._internal_varindex( cpd.variable, ev) if isinstance(ev, dict): # soft external evidence # HACK THAT MODIFIES CPD IN PLACE def rectify_evidence_val(_v, card=cpd.variable_card): # rectify hacky string structures tmp = (1 / (2 * card ** 2)) return (1 + tmp) / (card + tmp) if _v == '+eps' else _v ev_ = ut.map_dict_vals(rectify_evidence_val, ev) fill = (1.0 - sum(ev_.values())) / (cpd.variable_card - len(ev_)) # HACK fix for float problems if len(ev_) == cpd.variable_card - 1: fill = 0 assert fill > -1E7, 'fill=%r' % (fill,) row_labels = list(ut.iprod(*cpd.statenames)) for i, lbl in enumerate(row_labels): if lbl in ev_: # external case1 cpd.values[i] = ev_[lbl] elif len(lbl) == 1 and lbl[0] in ev_: # external case2 cpd.values[i] = ev_[lbl[0]] elif i in ev_: # internal case cpd.values[i] = ev_[i] else: cpd.values[i] = fill cpd.normalize() soft_evidence[cpd.variable] = True
def _debug_repr_cpd(cpd): import re import utool as ut code_fmt = ut.codeblock(""" {variable} = pgmpy.factors.TabularCPD( variable={variable_repr}, variable_card={variable_card_repr}, values={get_cpd_repr}, evidence={evidence_repr}, evidence_card={evidence_card_repr}, ) """) keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card'] dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys])) # HACK dict_['values'] = cpd.get_cpd() r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True) logger.info(r) # Parse props that are needed for this fmtstr fmt_keys = [ match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt) ] need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')] need_keys = [key for key in fmt_keys if not key.endswith('_repr')] # Get corresponding props # Call methods if needbe tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs] tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp] fmtdict = dict(tmp) fmtdict = ut.map_dict_vals(ut.repr2, fmtdict) fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict) tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys] fmtdict.update(dict(tmp2)) code = code_fmt.format(**fmtdict) return code
def nx_dag_node_rank(graph, nodes=None): """ Returns rank of nodes that define the "level" each node is on in a topological sort. This is the same as the Graphviz dot rank. Ignore: simple_graph = ut.simplify_graph(exi_graph) adj_dict = ut.nx_to_adj_dict(simple_graph) import plottool as pt pt.qt4ensure() pt.show_nx(graph) Example: >>> # ENABLE_DOCTEST >>> from utool.util_graph import * # NOQA >>> import utool as ut >>> adj_dict = {0: [5], 1: [5], 2: [1], 3: [4], 4: [0], 5: [], 6: [4], 7: [9], 8: [6], 9: [1]} >>> import networkx as nx >>> nodes = [2, 1, 5] >>> f_graph = ut.nx_from_adj_dict(adj_dict, nx.DiGraph) >>> graph = f_graph.reverse() >>> #ranks = ut.nx_dag_node_rank(graph, nodes) >>> ranks = ut.nx_dag_node_rank(graph, nodes) >>> result = ('ranks = %r' % (ranks,)) >>> print(result) ranks = [3, 2, 1] """ import utool as ut source = list(ut.nx_source_nodes(graph))[0] longest_paths = dict([(target, dag_longest_path(graph, source, target)) for target in graph.nodes()]) node_to_rank = ut.map_dict_vals(len, longest_paths) if nodes is None: return node_to_rank else: ranks = ut.dict_take(node_to_rank, nodes) return ranks
def add_split(dataset, key, idxs): print('[dataset] adding split %r' % (key,)) # Build subset filenames ut.ensuredir(dataset.split_dpath) ext = dataset._ext fmtdict = dict(key=key, ext=ext, size=len(idxs)) fmtstr = dataset.get_split_fmtstr(forward=True) splitset = { type_: join(dataset.split_dpath, fmtstr.format(type_=type_, **fmtdict)) for type_ in ['data', 'labels', 'metadata'] } # Partition data into the subset part_dict = { 'data': dataset.data.take(idxs, axis=0), 'labels': dataset.labels.take(idxs, axis=0), } if dataset.metadata is not None: taker = ut.partial(ut.take, index_list=idxs) part_dict['metadata'] = ut.map_dict_vals(taker, dataset.metadata) # Write splitset data to files for type_ in part_dict.keys(): ut.save_data(splitset[type_], part_dict[type_]) # Register filenames with dataset dataset.fpath_dict[key] = splitset
def draw_feat_scoresep(testres, f=None, disttype=None): r""" SeeAlso: ibeis.algo.hots.scorenorm.train_featscore_normalizer CommandLine: python -m ibeis --tf TestResult.draw_feat_scoresep --show python -m ibeis --tf TestResult.draw_feat_scoresep --show -t default:sv_on=[True,False] python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift,fg python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=True python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=False python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift -t best:SV=False utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=1:2 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=0:1 utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=False,bar_l2_on=True --fsvx=0:1 # We want to query the oxford annots taged query # and we want the database to contain # K correct images per query, as well as the distractors python -m ibeis --tf TestResult.draw_feat_scoresep --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=ok python -m ibeis --tf TestResult.draw_feat_scoresep --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=good python -m ibeis --tf get_annotcfg_list --db PZ_Master1 -a timectrl --acfginfo --verbtd --veryverbtd --nocache-aid python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=ratio Example: >>> # SCRIPT >>> from ibeis.expt.test_result import * # NOQA >>> from ibeis.init import main_helpers >>> disttype = ut.get_argval('--disttype', type_=list, default=None) >>> ibs, testres = main_helpers.testdata_expts( >>> defaultdb='PZ_MTEST', a=['timectrl'], t=['best']) >>> f = ut.get_argval(('--filt', '-f'), type_=list, default=['']) >>> testres.draw_feat_scoresep(f=f) >>> ut.show_if_requested() """ print('[testres] draw_feat_scoresep') import plottool as pt def load_feat_scores(qreq_, qaids): import ibeis # NOQA from os.path import dirname, join # NOQA # HACKY CACHE cfgstr = qreq_.get_cfgstr(with_input=True) cache_dir = join(dirname(dirname(ibeis.__file__)), 'TMP_FEATSCORE_CACHE') namemode = ut.get_argval('--namemode', default=True) fsvx = ut.get_argval('--fsvx', type_='fuzzy_subset', default=slice(None, None, None)) threshx = ut.get_argval('--threshx', type_=int, default=None) thresh = ut.get_argval('--thresh', type_=float, default=.9) num = ut.get_argval('--num', type_=int, default=1) cfg_components = [ cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num ] cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components)) cache_hashid = ut.hashstr27(cache_cfgstr + '_v1') cache_name = ('get_cfgx_feat_scores_' + cache_hashid) @ut.cached_func(cache_name, cache_dir=cache_dir, key_argx=[], use_cache=True) def get_cfgx_feat_scores(qreq_, qaids): from ibeis.algo.hots import scorenorm cm_list = qreq_.execute(qaids) # print('Done loading cached chipmatches') tup = scorenorm.get_training_featscores(qreq_, cm_list, disttype, namemode, fsvx, threshx, thresh, num=num) # print(ut.depth_profile(tup)) tp_scores, tn_scores, scorecfg = tup return tp_scores, tn_scores, scorecfg tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids) return tp_scores, tn_scores, scorecfg valid_case_pos = testres.case_sample2(filt_cfg=f, return_mask=False) cfgx2_valid_qxs = ut.group_items(valid_case_pos.T[0], valid_case_pos.T[1]) test_qaids = testres.get_test_qaids() cfgx2_valid_qaids = ut.map_dict_vals(ut.partial(ut.take, test_qaids), cfgx2_valid_qxs) join_acfgs = True # TODO: option to average over pipeline configurations if join_acfgs: groupxs = testres.get_cfgx_groupxs() else: groupxs = list(zip(range(len(testres.cfgx2_qreq_)))) grouped_qreqs = ut.apply_grouping(testres.cfgx2_qreq_, groupxs) grouped_scores = [] for cfgxs, qreq_group in zip(groupxs, grouped_qreqs): # testres.print_pcfg_info() score_group = [] for cfgx, qreq_ in zip(cfgxs, testres.cfgx2_qreq_): print('Loading cached chipmatches') qaids = cfgx2_valid_qaids[cfgx] tp_scores, tn_scores, scorecfg = load_feat_scores(qreq_, qaids) score_group.append((tp_scores, tn_scores, scorecfg)) grouped_scores.append(score_group) cfgx2_shortlbl = testres.get_short_cfglbls(join_acfgs=join_acfgs) for score_group, lbl in zip(grouped_scores, cfgx2_shortlbl): tp_scores = np.hstack(ut.take_column(score_group, 0)) tn_scores = np.hstack(ut.take_column(score_group, 1)) scorecfg = '+++'.join(ut.unique(ut.take_column(score_group, 2))) score_group # TODO: learn this score normalizer as a model # encoder = vt.ScoreNormalizer(adjust=4, monotonize=False) encoder = vt.ScoreNormalizer(adjust=2, monotonize=True) encoder.fit_partitioned(tp_scores, tn_scores, verbose=False) figtitle = 'Feature Scores: %s, %s' % (scorecfg, lbl) fnum = None vizkw = {} sephack = ut.get_argflag('--sephack') if not sephack: vizkw['target_tpr'] = .95 vizkw['score_range'] = (0, 1.0) encoder.visualize( figtitle=figtitle, fnum=fnum, with_scores=False, #with_prebayes=True, with_prebayes=False, with_roc=True, with_postbayes=False, #with_postbayes=True, **vizkw) icon = testres.ibs.get_database_icon() if icon is not None: pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0)) if ut.get_argflag('--contextadjust'): pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2) pt.adjust_subplots(use_argv=True) return encoder
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print( ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print( ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [ list(itertools.combinations(aids, 2)) for aids in grouped_aids ] aid_matches = [ list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids) ] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes( graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids) }) nx.set_node_attributes( graph, 'label', {node: 'n%r' % (node[1], ) for node in name_nodes}) nx.set_node_attributes( graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def show_top_featmatches(qreq_, cm_list): """ Args: qreq_ (ibeis.QueryRequest): query request object with hyper-parameters cm_list (list): SeeAlso: python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True,lnbnn_normalizer=normlnbnn-test -a default --sephack python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=True -a timectrl --sephack python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True -a default:size=30 --sephack python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=5,lnbnn_on=True -a default:size=30 --sephack python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=3,lnbnn_on=True -a default --sephack CommandLine: python -m ibeis.viz.viz_nearest_descriptors --exec-show_top_featmatches --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_nearest_descriptors import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist(defaultdb='PZ_MTEST', >>> a=['default:has_none=mother,size=30']) >>> show_top_featmatches(qreq_, cm_list) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ # for cm in cm_list: # cm.score_csum(qreq_) import numpy as np import vtool as vt from functools import partial # Stack chipmatches ibs = qreq_.ibs infos = [cm.get_flat_fm_info() for cm in cm_list] flat_metadata = dict([(k, np.concatenate(v)) for k, v in ut.dict_stack2(infos).items()]) fsv_flat = flat_metadata['fsv'] flat_metadata['fs'] = fsv_flat.prod(axis=1) aids1 = flat_metadata['aid1'][:, None] aids2 = flat_metadata['aid2'][:, None] flat_metadata['aid_pairs'] = np.concatenate([aids1, aids2], axis=1) # Take sample of metadata sortx = flat_metadata['fs'].argsort()[::-1] num = len(cm_list) * 3 # num = 10 taker = partial(np.take, indices=sortx[:num], axis=0) flat_metadata_top = ut.map_dict_vals(taker, flat_metadata) aid1s, aid2s, fms = ut.dict_take(flat_metadata_top, ['aid1', 'aid2', 'fm']) annots = {} aids = np.unique(np.hstack((aid1s, aid2s))) annots = {aid: ibs.get_annot_lazy_dict(aid, config2_=qreq_.qparams) for aid in aids} label_lists = ibs.get_aidpair_truths(aid1s, aid2s) == ibs.const.TRUTH_MATCH patch_size = 64 def extract_patches(annots, aid, fxs): """ custom_func(lazydict, key, subkeys) for multigroup_lookup """ annot = annots[aid] kpts = annot['kpts'] rchip = annot['rchip'] kpts_m = kpts.take(fxs, axis=0) warped_patches, warped_subkpts = vt.get_warped_patches(rchip, kpts_m, patch_size=patch_size) return warped_patches data_lists = vt.multigroup_lookup(annots, [aid1s, aid2s], fms.T, extract_patches) import plottool as pt pt.ensure_pylab_qt4() import ibeis_cnn inter = ibeis_cnn.draw_results.interact_patches( label_lists, data_lists, flat_metadata_top, chunck_sizes=(2, 4), ibs=ibs, hack_one_per_aid=False, sortby='fs', qreq_=qreq_) inter.show()
def get_dbinfo( ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None, aids=None, ): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: SeeAlso: python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all CommandLine: python -m wbia.other.dbinfo --exec-get_dbinfo:0 python -m wbia.other.dbinfo --test-get_dbinfo:1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> defaultdb = 'testdb1' >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = wbia.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from wbia.expt import cfghelpers >>> #from wbia.expt import annotation_configs >>> #from wbia.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> verbose = True >>> short = True >>> #ibs = wbia.opendb(db='GZ_ALL') >>> #ibs = wbia.opendb(db='PZ_Master0') >>> ibs = wbia.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = wbia.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... if aids is not None: aid_list = aids # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,)) from wbia.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list ) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) # aid_list = if verbose: logger.info('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) # associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation valid_images = ibs.images(valid_gids) valid_annots = ibs.annots(valid_aids) # Image info if verbose: logger.info('Checking Image Info') gx2_aids = valid_images.aids if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.repr4( ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True ) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: logger.info('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) if False: # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from wbia.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) config = {'seconds_thresh': 4 * 60 * 60} flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences( ibs, gid_list, config=config, verbose=False ) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = { oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items() } return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1 } nid2_occurx_resight = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1 } singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats( list(map(len, singlesight_encounters)), use_median=True, use_sum=True ) resight_name_stats = ut.get_stats( list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True ) # Encounter Info def break_annots_into_encounters(aids): from wbia.algo.preproc import occurrence_blackbox import datetime thresh_sec = datetime.timedelta(minutes=30).seconds posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids)) # latlons = ibs.get_annot_image_gps(aids) labels = occurrence_blackbox.cluster_timespace2( posixtimes, None, thresh_sec=thresh_sec ) return labels # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()] # ut.square_pdist(ave_enc_time) try: am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[ 0 ] aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids) undirected_tags = ibs.get_aidpair_tags( aid_pairs.T[0], aid_pairs.T[1], directed=False ) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) except Exception: pair_tag_info = {} # logger.info(ut.repr2(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: logger.info('Checking Annot Species') unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots)) species_list = valid_annots.species_texts species2_annots = valid_annots.group_items(valid_annots.species_texts) species2_nAids = {key: len(val) for key, val in species2_annots.items()} if verbose: logger.info('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: logger.info('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: logger.info('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = collections.OrderedDict( [ ('max', wh_list.max(0)), ('min', wh_list.min(0)), ('mean', wh_list.mean(0)), ('std', wh_list.std(0)), ] ) def arr2str(var): return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']' ret = ',\n '.join( ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()] ) return '{\n ' + ret + '\n}' logger.info('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: logger.info('Building Stats String') multiton_stats = ut.repr3( ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True ) # Time stats unixtime_list = valid_images.unixtime2 # valid_unixtime_list = [time for time in unixtime_list if time != -1] # unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda: 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if max_age is None: max_age = min_age if min_age is None: min_age = max_age if max_age is None and min_age is None: logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,)) age_dict['UNKNOWN'] += 1 elif (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (max_age is None or 36 <= max_age): age_dict['Adult'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str( set(annot_sextext_list) - set(sex_keys) ) sextext2_nAnnots = ut.odict( [(key, len(sextext2_aids.get(key, []))) for key in sex_keys] ) # Filter 0's sextext2_nAnnots = { key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0 } return sextext2_nAnnots def get_annot_qual_stats(ibs, aid_list): annots = ibs.annots(aid_list) qualtext2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.quality_texts)), list(ibs.const.QUALITY_TEXT_TO_INT.keys()), ) return qualtext2_nAnnots def get_annot_viewpoint_stats(ibs, aid_list): annots = ibs.annots(aid_list) viewcode2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.viewpoint_code)), list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None], ) return viewcode2_nAnnots if verbose: logger.info('Checking Other Annot Stats') qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids) viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: logger.info('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags) contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags) contributor_tag_to_qualstats = { key: get_annot_qual_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_viewstats = { key: get_annot_viewpoint_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_nImages = { key: len(val) for key, val in six.iteritems(contributor_tag_to_gids) } contributor_tag_to_nAnnots = { key: len(val) for key, val in six.iteritems(contributor_tag_to_aids) } if verbose: logger.info('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_annots) num_annots = len(valid_aids) if with_bytes: if verbose: logger.info('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: logger.info('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_annots) _num_names_total_check = ( num_names_singleton + num_names_unassociated + num_names_multiton ) _num_annots_total_check = ( num_unknown_annots + num_singleton_annots + num_multiton_annots ) assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' # if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex( ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ], ) raise # Get contributor statistics contributor_rowids = ibs.get_valid_contributor_rowids() num_contributors = len(contributor_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.repr2(dict_, si=True) return align2(str_) header_block_lines = [('+============================')] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = ( [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] ) name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = ( [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] ) occurrence_block_lines = ( [ ('--' * num_tabs), # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] ) annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = ( [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] ) contributor_block_lines = ( [ '# Images per contributor = ' + align_dict2(contributor_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contributor_tag_to_nAnnots), '# Quality per contributor = ' + ut.repr2(contributor_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.repr2(contributor_tag_to_viewstats, sorted_=True), ] if with_contrib else [] ) img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), # ('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contributor_block_lines + imgsize_stat_lines + [('L============================')] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: logger.info(info_str2) locals_ = locals() return locals_
def get_annotmatch_subgraph(ibs): r""" http://bokeh.pydata.org/en/latest/ https://github.com/jsexauer/networkx_viewer TODO: Need a special visualization In the web I need: * graph of annotations matches. * can move them around. * edit lines between them. * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool This should share functionality with a name view. Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show # Networkx example python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show Ignore: from ibeis import viz Example: >>> # ENABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> result = get_annotmatch_subgraph(ibs) >>> ut.show_if_requested() """ #import ibeis #ibs = ibeis.opendb(db='PZ_MTEST') #rowids = ibs._get_all_annotmatch_rowids() #aids1 = ibs.get_annotmatch_aid1(rowids) #aids2 = ibs.get_annotmatch_aid2(rowids) # # nids = ibs.get_valid_nids() nids = nids[0:5] aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) # Enumerate annotmatch properties rng = np.random.RandomState(0) edge_props = { 'weight': rng.rand(len(aids1)), 'reviewer_confidence': rng.rand(len(aids1)), 'algo_confidence': rng.rand(len(aids1)), } # Remove data that does not need to be visualized # (dont show all the aids if you dont have to) thresh = .5 flags = edge_props['weight'] > thresh aids1_ = ut.compress(aids1, flags) aids2_ = ut.compress(aids2, flags) chosen_props = ut.dict_subset(edge_props, ['weight']) edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags), chosen_props) edge_keys = list(edge_props.keys()) edge_vals = ut.dict_take(edge_props, edge_keys) edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)] unique_aids = list(set(aids1_ + aids2_)) # Make a graph between the chips nodes = unique_aids edges = list(zip(aids1_, aids2_, edge_attr_list)) import networkx as nx graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) from ibeis.viz import viz_graph fnum = None #zoom = kwargs.get('zoom', .4) viz_graph.viz_netx_chipgraph(ibs, graph, fnum=fnum, with_images=True, augment_graph=False)
def update_bindings(): r""" Returns: dict: matchtups CommandLine: python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings Example: >>> # DISABLE_DOCTEST >>> from autogen_bindings import * # NOQA >>> import sys >>> import utool as ut >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts')) >>> matchtups = update_bindings() >>> result = ('matchtups = %s' % (ut.repr2(matchtups),)) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ from os.path import basename import difflib import numpy as np import re binding_names = [ 'build_index', 'used_memory', 'add_points', 'remove_point', 'compute_cluster_centers', 'load_index', 'save_index', 'find_nearest_neighbors', 'radius_search', 'remove_points', 'free_index', 'find_nearest_neighbors_index', # 'size', # 'veclen', # 'get_point', # 'flann_get_distance_order', # 'flann_get_distance_type', # 'flann_log_verbosity', # 'clean_removed_points', ] _places = [ '~/code/flann/src/cpp/flann/flann.cpp', '~/code/flann/src/cpp/flann/flann.h', '~/code/flann/src/python/pyflann/flann_ctypes.py', '~/code/flann/src/python/pyflann/index.py', ] eof_sentinals = { # 'flann_ctypes.py': '# END DEFINE BINDINGS', 'flann_ctypes.py': 'def ensure_2d_array(arr', # 'flann.h': '// END DEFINE BINDINGS', 'flann.h': '#ifdef __cplusplus', 'flann.cpp': None, 'index.py': None, } block_sentinals = { 'flann.h': re.escape('/**'), 'flann.cpp': 'template *<typename Distance>', # 'flann_ctypes.py': '\n', 'flann_ctypes.py': 'flann\.[a-z_.]* =', # 'index.py': ' def .*', 'index.py': ' [^ ].*', } places = { basename(fpath): fpath for fpath in ut.lmap(ut.truepath, _places) } text_dict = ut.map_dict_vals(ut.readfrom, places) lines_dict = {key: val.split('\n') for key, val in text_dict.items()} orig_texts = text_dict.copy() # NOQA binding_defs = {} named_blocks = {} print('binding_names = %r' % (binding_names, )) for binding_name in binding_names: blocks, defs = autogen_parts(binding_name) binding_defs[binding_name] = defs named_blocks[binding_name] = blocks for binding_name in ut.ProgIter(binding_names): ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name, ), 'yellow') blocks_dict = named_blocks[binding_name] for key in places.keys(): ut.colorprint( '---- generating %s for %s -----' % ( binding_name, key, ), 'yellow') # key = 'flann_ctypes.py' # print(text_dict[key]) old_text = text_dict[key] line_list = lines_dict[key] #text = old_text block = blocks_dict[key] debug = ut.get_argflag('--debug') # debug = True # if debug: # print(ut.highlight_code(block, splitext(key)[1])) # Find a place in the code that already exists searchblock = block if key.endswith('.cpp') or key.endswith('.h'): searchblock = re.sub(ut.REGEX_C_COMMENT, '', searchblock, flags=re.MULTILINE | re.DOTALL) searchblock = '\n'.join(searchblock.splitlines()[0:3]) # @ut.cached_func(verbose=False) def cached_match(old_text, searchblock): def isjunk(x): return False return x in ' \t,*()' def isjunk2(x): return x in ' \t,*()' # Not sure why the first one just doesnt find it # isjunk = None sm = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=False) sm0 = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=True) sm1 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=False) sm2 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=True) matchtups = (sm.get_matching_blocks() + sm0.get_matching_blocks() + sm1.get_matching_blocks() + sm2.get_matching_blocks()) return matchtups matchtups = cached_match(old_text, searchblock) # Find a reasonable match in matchtups found = False if debug: # print('searchblock =\n%s' % (searchblock,)) print('searchblock = %r' % (searchblock, )) for (a, b, size) in matchtups: matchtext = old_text[a:a + size] pybind = binding_defs[binding_name]['py_binding_name'] if re.search(binding_name + '\\b', matchtext) or re.search( pybind + '\\b', matchtext): found = True pos = a + size if debug: print('MATCHING TEXT') print(matchtext) break else: if debug and 0: print('Not matching') print('matchtext = %r' % (matchtext, )) matchtext2 = old_text[a - 10:a + size + 20] print('matchtext2 = %r' % (matchtext2, )) if found: linelens = np.array(ut.lmap(len, line_list)) + 1 sumlen = np.cumsum(linelens) row = np.where(sumlen < pos)[0][-1] + 1 #print(line_list[row]) # Search for extents of the block to overwrite block_sentinal = block_sentinals[key] row1 = ut.find_block_end(row, line_list, block_sentinal, -1) - 1 row2 = ut.find_block_end(row + 1, line_list, block_sentinal, +1) eof_sentinal = eof_sentinals[key] if eof_sentinal is not None: print('eof_sentinal = %r' % (eof_sentinal, )) row2 = min([ count for count, line in enumerate(line_list) if line.startswith(eof_sentinal) ][-1], row2) nr = len((block + '\n\n').splitlines()) new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('FOUND AND REPLACING %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint( 'FOUND AND REPLACED WITH %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print( ut.color_diff_text( ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) else: # Append to end of the file eof_sentinal = eof_sentinals[key] if eof_sentinal is None: row2 = len(line_list) - 1 else: row2_choice = [ count for count, line in enumerate(line_list) if line.startswith(eof_sentinal) ] if len(row2_choice) == 0: row2 = len(line_list) - 1 assert False else: row2 = row2_choice[-1] - 1 # row1 = row2 - 1 # row2 = row2 - 1 row1 = row2 new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) # block + '\n\n\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) nr = len((block + '\n\n').splitlines()) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint( 'NOT FOUND AND REPLACING %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint( 'NOT FOUND AND REPLACED WITH %s' % (binding_name, ), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print( ut.color_diff_text( ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) text_dict[key] = '\n'.join(new_line_list) lines_dict[key] = new_line_list ut.colorprint('L___ GENERATED BINDING %s ___' % (binding_name, ), 'yellow') for key in places: new_text = '\n'.join(lines_dict[key]) #ut.writeto(ut.augpath(places[key], '.new'), new_text) ut.writeto(ut.augpath(places[key]), new_text) for key in places: if ut.get_argflag('--diff'): difftext = ut.get_textdiff(orig_texts[key], new_text, num_context_lines=7, ignore_whitespace=True) difftext = ut.color_diff_text(difftext) print(difftext)
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list(zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same)) ** 2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples
def analyize_multiple_drives(drives): """ CommandLine: export PYTHONPATH=$PYTHONPATH:~/local/scripts python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/ python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/ /media/joncrall/backup cd ~/local/scripts Example: >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> drive = Broadcaster(drives) >>> drive.compute_info() >>> #drive.build_fpath_hashes() >>> drive.check_consistency() >>> E = drive = drives[0] >>> analyize_multiple_drives(drives) >>> #D, E, F = drives >>> #drive = D """ # ----- ## Find the files shared on all disks #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives]) #print('#allhave = %r' % (len(allhave),)) #allhave.keys()[0:3] #allhave.values()[0:3] #ut.embed() #for drive in drives: #drive.rrr() #print(drive.root_dpath) #print(len(drive.hash_to_unique_fpaths)) #print(len(drive.hash_to_fpaths)) #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths)) # Build dict to map from dpath to file pointers of unique descendants #unique_fidxs_list = drive.hash_to_fidxs.values() #fidxs = ut.flatten(unique_fidxs_list) esc = re.escape # Find which files exist on all drives hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives] allhave_hashes = reduce(set.intersection, hashes_list) print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes))) lbls = [drive_.root_dpath for drive_ in drives] isect_lens = np.zeros((len(drives), len(drives))) for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)): for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)): if drive1 is not drive2: common = set.intersection(hashes1, hashes2) isect_lens[idx1, idx2] = len(common) else: isect_lens[idx1, idx2] = len(hashes2) import pandas as pd print(pd.DataFrame(isect_lens, index=lbls, columns=lbls)) # for drive in drives drive = drives[0] print('Finding unique files in drive=%r' % (drive, )) # Get subset of fidxs on this drive unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes) valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs)) # Filter fpaths by patterns ignore_patterns = [esc('Thumbs.db')] ignore_paths = ['Spotify'] patterns = ignore_paths + ignore_patterns valid_fpaths = ut.take(drive.fpath_list, valid_fidxs) valid_flags = [ not any([re.search(p, fpath) for p in patterns]) for fpath in valid_fpaths ] valid_flags = np.array(valid_flags) valid_fidxs = ut.compress(valid_fidxs, valid_flags) print(ut.filtered_infostr(valid_flags, 'invalid fpaths')) fidxs = valid_fidxs valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs)) dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs, drive.root_dpath) def make_tree_structure(valid_fpaths): root = {} def dict_getitem_default(dict_, key, type_): try: val = dict_[key] except KeyError: val = type_() dict_[key] = val return val for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000): path_components = ut.dirsplit(fpath) current = root for comp in path_components[:-1]: current = dict_getitem_default(current, comp, dict) contents = dict_getitem_default(current, '.', list) contents.append(path_components[-1]) return root root = make_tree_structure(valid_fpaths) def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path, )) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1)) def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip( key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp def print_tree_struct(*args, **kwargs): tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1))) print(tree_str) #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])) #print('path = %r, %s' % (path, bytes_str)) #print(ut.repr3(key_list)) return tree_str dpath_to_unique_fidx dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx) complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx) complete_root = make_tree_structure(complete_unique_dpaths.keys()) globals()['ut'] = ut globals()['os'] = os globals()['join'] = join print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\']))) get_tree_info(root, path='E:\\', depth=0) get_tree_info(complete_root, path='E:\\', depth=0) get_tree_info(root, path='E:\\', depth=1) print(print_tree_struct(root, path='E:\\Clutter', depth=0)) print_tree(root, path=r'E:\TV') print_tree(root, path=r'E:\Movies') print_tree(root, path=r'E:\Boot') print_tree(root, path='E:\\') print_tree(root, path=r'E:\Downloaded') print_tree(root, path=r'E:\Recordings') print_tree(root, path=r'E:\Clutter') print_tree(root, path=r'E:\Audio Books') # TODO: # * Ignore list # * Find and rectify internal duplicates # * Update registry with new files and deleted ones # * Ensure that all unique files are backed up # Index the C: Drive as well. # * Lazy properties of drive # * Multiple types of identifiers (hash, fname, ext, fsize) # Drive subsets # Export/Import Drive for analysis on other machines ut.embed()
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids] aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids)] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes(graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids)}) nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],) for node in name_nodes}) nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def update_bindings(): r""" Returns: dict: matchtups CommandLine: python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings Example: >>> # DISABLE_DOCTEST >>> from autogen_bindings import * # NOQA >>> import sys >>> import utool as ut >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts')) >>> matchtups = update_bindings() >>> result = ('matchtups = %s' % (ut.repr2(matchtups),)) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ from os.path import basename import difflib import numpy as np import re binding_names = [ 'build_index', 'used_memory', 'add_points', 'remove_point', 'compute_cluster_centers', 'load_index', 'save_index', 'find_nearest_neighbors', 'radius_search', 'remove_points', 'free_index', 'find_nearest_neighbors_index', # 'size', # 'veclen', # 'get_point', # 'flann_get_distance_order', # 'flann_get_distance_type', # 'flann_log_verbosity', # 'clean_removed_points', ] _places = [ '~/code/flann/src/cpp/flann/flann.cpp', '~/code/flann/src/cpp/flann/flann.h', '~/code/flann/src/python/pyflann/flann_ctypes.py', '~/code/flann/src/python/pyflann/index.py', ] eof_sentinals = { # 'flann_ctypes.py': '# END DEFINE BINDINGS', 'flann_ctypes.py': 'def ensure_2d_array(arr', # 'flann.h': '// END DEFINE BINDINGS', 'flann.h': '#ifdef __cplusplus', 'flann.cpp': None, 'index.py': None, } block_sentinals = { 'flann.h': re.escape('/**'), 'flann.cpp': 'template *<typename Distance>', # 'flann_ctypes.py': '\n', 'flann_ctypes.py': 'flann\.[a-z_.]* =', # 'index.py': ' def .*', 'index.py': ' [^ ].*', } places = {basename(fpath): fpath for fpath in ut.lmap(ut.truepath, _places)} text_dict = ut.map_dict_vals(ut.readfrom, places) lines_dict = {key: val.split('\n') for key, val in text_dict.items()} orig_texts = text_dict.copy() # NOQA binding_defs = {} named_blocks = {} print('binding_names = %r' % (binding_names,)) for binding_name in binding_names: blocks, defs = autogen_parts(binding_name) binding_defs[binding_name] = defs named_blocks[binding_name] = blocks for binding_name in ut.ProgIter(binding_names): ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name,), 'yellow') blocks_dict = named_blocks[binding_name] for key in places.keys(): ut.colorprint('---- generating %s for %s -----' % (binding_name, key,), 'yellow') # key = 'flann_ctypes.py' # print(text_dict[key]) old_text = text_dict[key] line_list = lines_dict[key] #text = old_text block = blocks_dict[key] debug = ut.get_argflag('--debug') # debug = True # if debug: # print(ut.highlight_code(block, splitext(key)[1])) # Find a place in the code that already exists searchblock = block if key.endswith('.cpp') or key.endswith('.h'): searchblock = re.sub(ut.REGEX_C_COMMENT, '', searchblock, flags=re.MULTILINE | re.DOTALL) searchblock = '\n'.join(searchblock.splitlines()[0:3]) # @ut.cached_func(verbose=False) def cached_match(old_text, searchblock): def isjunk(x): return False return x in ' \t,*()' def isjunk2(x): return x in ' \t,*()' # Not sure why the first one just doesnt find it # isjunk = None sm = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=False) sm0 = difflib.SequenceMatcher(isjunk, old_text, searchblock, autojunk=True) sm1 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=False) sm2 = difflib.SequenceMatcher(isjunk2, old_text, searchblock, autojunk=True) matchtups = (sm.get_matching_blocks() + sm0.get_matching_blocks() + sm1.get_matching_blocks() + sm2.get_matching_blocks()) return matchtups matchtups = cached_match(old_text, searchblock) # Find a reasonable match in matchtups found = False if debug: # print('searchblock =\n%s' % (searchblock,)) print('searchblock = %r' % (searchblock,)) for (a, b, size) in matchtups: matchtext = old_text[a: a + size] pybind = binding_defs[binding_name]['py_binding_name'] if re.search(binding_name + '\\b', matchtext) or re.search(pybind + '\\b', matchtext): found = True pos = a + size if debug: print('MATCHING TEXT') print(matchtext) break else: if debug and 0: print('Not matching') print('matchtext = %r' % (matchtext,)) matchtext2 = old_text[a - 10: a + size + 20] print('matchtext2 = %r' % (matchtext2,)) if found: linelens = np.array(ut.lmap(len, line_list)) + 1 sumlen = np.cumsum(linelens) row = np.where(sumlen < pos)[0][-1] + 1 #print(line_list[row]) # Search for extents of the block to overwrite block_sentinal = block_sentinals[key] row1 = ut.find_block_end(row, line_list, block_sentinal, -1) - 1 row2 = ut.find_block_end(row + 1, line_list, block_sentinal, +1) eof_sentinal = eof_sentinals[key] if eof_sentinal is not None: print('eof_sentinal = %r' % (eof_sentinal,)) row2 = min([count for count, line in enumerate(line_list) if line.startswith(eof_sentinal)][-1], row2) nr = len((block + '\n\n').splitlines()) new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('FOUND AND REPLACING %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint('FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) else: # Append to end of the file eof_sentinal = eof_sentinals[key] if eof_sentinal is None: row2 = len(line_list) - 1 else: row2_choice = [count for count, line in enumerate(line_list) if line.startswith(eof_sentinal)] if len(row2_choice) == 0: row2 = len(line_list) - 1 assert False else: row2 = row2_choice[-1] - 1 # row1 = row2 - 1 # row2 = row2 - 1 row1 = row2 new_line_list = ut.insert_block_between_lines( block + '\n', row1, row2, line_list) # block + '\n\n\n', row1, row2, line_list) rtext1 = '\n'.join(line_list[row1:row2]) nr = len((block + '\n\n').splitlines()) rtext2 = '\n'.join(new_line_list[row1:row1 + nr]) if debug: print('-----') ut.colorprint('NOT FOUND AND REPLACING %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext1)) if debug: print('-----') ut.colorprint('NOT FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow') print(ut.highlight_code(rtext2)) if not ut.get_argflag('--diff') and not debug: print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True))) text_dict[key] = '\n'.join(new_line_list) lines_dict[key] = new_line_list ut.colorprint('L___ GENERATED BINDING %s ___' % (binding_name,), 'yellow') for key in places: new_text = '\n'.join(lines_dict[key]) #ut.writeto(ut.augpath(places[key], '.new'), new_text) ut.writeto(ut.augpath(places[key]), new_text) for key in places: if ut.get_argflag('--diff'): difftext = ut.get_textdiff(orig_texts[key], new_text, num_context_lines=7, ignore_whitespace=True) difftext = ut.color_diff_text(difftext) print(difftext)
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
def merge_level_order(level_orders, topsort): """ Merge orders of individual subtrees into a total ordering for computation. >>> level_orders = { >>> 'multi_chip_multitest': [['dummy_annot'], ['chip'], ['multitest'], >>> ['multitest_score'], ], >>> 'multi_fgweight_multitest': [ ['dummy_annot'], ['chip', 'probchip'], >>> ['keypoint'], ['fgweight'], ['multitest'], ['multitest_score'], ], >>> 'multi_keypoint_nnindexer': [ ['dummy_annot'], ['chip'], ['keypoint'], >>> ['nnindexer'], ['multitest'], ['multitest_score'], ], >>> 'normal': [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'], >>> ['fgweight'], ['spam'], ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_multitest_1': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_multitest_2': [ ['dummy_annot'], ['notch'], ['multitest'], >>> ['multitest_score'], ], >>> 'nwise_notch_notchpair_1': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> 'nwise_notch_notchpair_2': [ ['dummy_annot'], ['notch'], ['notchpair'], >>> ['multitest'], ['multitest_score'], ], >>> } >>> topsort = [u'dummy_annot', u'notch', u'probchip', u'chip', u'keypoint', >>> u'fgweight', u'nnindexer', u'spam', u'notchpair', u'multitest', >>> u'multitest_score'] >>> print(ut.repr3(ut.merge_level_order(level_orders, topsort))) EG2: level_orders = {u'normal': [[u'dummy_annot'], [u'chip', u'probchip'], [u'keypoint'], [u'fgweight'], [u'spam']]} topsort = [u'dummy_annot', u'probchip', u'chip', u'keypoint', u'fgweight', u'spam'] """ import utool as ut if False: compute_order = [] level_orders = ut.map_dict_vals(ut.total_flatten, level_orders) level_sets = ut.map_dict_vals(set, level_orders) for tablekey in topsort: compute_order.append((tablekey, [groupkey for groupkey, set_ in level_sets.items() if tablekey in set_])) return compute_order else: # Do on common subgraph import itertools # Pointer to current level.: Start at the end and # then work your way up. main_ptr = len(topsort) - 1 stack = [] #from six.moves import zip_longest keys = list(level_orders.keys()) type_to_ptr = {key: -1 for key in keys} print('level_orders = %s' % (ut.repr3(level_orders),)) for count in itertools.count(0): print('----') print('count = %r' % (count,)) ptred_levels = [] for key in keys: levels = level_orders[key] ptr = type_to_ptr[key] try: level = tuple(levels[ptr]) except IndexError: level = None ptred_levels.append(level) print('ptred_levels = %r' % (ptred_levels,)) print('main_ptr = %r' % (main_ptr,)) # groupkeys, groupxs = ut.group_indices(ptred_levels) # Group keys are tablenames # They point to the (type) of the input # num_levelkeys = len(ut.total_flatten(ptred_levels)) groupkeys, groupxs = ut.group_indices(ptred_levels) main_idx = None while main_idx is None and main_ptr >= 0: target = topsort[main_ptr] print('main_ptr = %r' % (main_ptr,)) print('target = %r' % (target,)) # main_idx = ut.listfind(groupkeys, (target,)) # if main_idx is None: possible_idxs = [idx for idx, keytup in enumerate(groupkeys) if keytup is not None and target in keytup] if len(possible_idxs) == 1: main_idx = possible_idxs[0] else: main_idx = None if main_idx is None: main_ptr -= 1 if main_idx is None: print('break I') break found_groups = ut.apply_grouping(keys, groupxs)[main_idx] print('found_groups = %r' % (found_groups,)) stack.append((target, found_groups)) for k in found_groups: type_to_ptr[k] -= 1 if len(found_groups) == len(keys): main_ptr -= 1 if main_ptr < 0: print('break E') break print('stack = %s' % (ut.repr3(stack),)) print('have = %r' % (sorted(ut.take_column(stack, 0)),)) print('need = %s' % (sorted(ut.total_flatten(level_orders.values())),)) compute_order = stack[::-1] return compute_order
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list( zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same))**2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples
def intraoccurrence_connected(): r""" CommandLine: python -m ibeis.scripts.specialdraw intraoccurrence_connected --show python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = intraoccurrence_connected() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='PZ_Master1') nid2_aid = { #4880: [3690, 3696, 3703, 3706, 3712, 3721], 4880: [3690, 3696, 3703], 6537: [3739], 6653: [7671], 6610: [7566, 7408], #6612: [7664, 7462, 7522], #6624: [7465, 7360], #6625: [7746, 7383, 7390, 7477, 7376, 7579], 6630: [7586, 7377, 7464, 7478], #6677: [7500] } nid2_dbaids = {4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660]} if ut.get_argflag('--small') or ut.get_argflag('--smaller'): del nid2_aid[6630] del nid2_aid[6537] del nid2_dbaids[6537] if ut.get_argflag('--smaller'): nid2_dbaids[4880].remove(33) nid2_aid[4880].remove(3690) nid2_aid[6610].remove(7408) #del nid2_aid[4880] #del nid2_dbaids[4880] aids = ut.flatten(nid2_aid.values()) temp_nids = [1] * len(aids) postcut = ut.get_argflag('--postcut') aids_list = ibs.group_annots_by_name(aids)[0] ensure_edges = 'all' if True or not postcut else None unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups( ibs, aids_list, #invis_edges=invis_edges, ensure_edges=ensure_edges, temp_nids=temp_nids) viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] * len(list(unlabeled_graph.nodes()))) viz_graph.ensure_node_images(ibs, unlabeled_graph) nx.set_node_attributes(unlabeled_graph, 'shape', 'rect') #unlabeled_graph = unlabeled_graph.to_undirected() # Find the "database exemplars for these annots" if False: gt_aids = ibs.get_annot_groundtruth(aids) gt_aids = [ut.setdiff(s, aids) for s in gt_aids] dbaids = ut.unique(ut.flatten(gt_aids)) dbaids = ibs.filter_annots_general(dbaids, minqual='good') ibs.get_annot_quality_texts(dbaids) else: dbaids = ut.flatten(nid2_dbaids.values()) exemplars = nx.DiGraph() #graph = exemplars # NOQA exemplars.add_nodes_from(dbaids) def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}): edge_list = ut.upper_diag_self_prodx(nodes) graph.add_edges_from(edge_list, **edgeattrs) return edge_list for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)): add_clique(exemplars, aids_) viz_graph.ensure_node_images(ibs, exemplars) viz_graph.color_by_nids(exemplars, ibs=ibs) nx.set_node_attributes(unlabeled_graph, 'framewidth', False) nx.set_node_attributes(exemplars, 'framewidth', 4.0) nx.set_node_attributes(unlabeled_graph, 'group', 'unlab') nx.set_node_attributes(exemplars, 'group', 'exemp') #big_graph = nx.compose_all([unlabeled_graph]) big_graph = nx.compose_all([exemplars, unlabeled_graph]) # add sparse connections from unlabeled to exemplars import numpy as np rng = np.random.RandomState(0) if True or not postcut: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes()))) flags = np.logical_or(exnids == nid_, flags) exmatches = ut.compress(list(exemplars.nodes()), flags) big_graph.add_edges_from(list(ut.product([aid_], exmatches)), color=pt.ORANGE, implicit=True) else: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 exmatches = ut.compress(list(exemplars.nodes()), flags) nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(exmatches)) exmatches = ut.compress(exmatches, exnids == nid_) big_graph.add_edges_from(list(ut.product([aid_], exmatches))) pass nx.set_node_attributes(big_graph, 'shape', 'rect') #if False and postcut: # ut.nx_delete_node_attr(big_graph, 'nid') # ut.nx_delete_edge_attr(big_graph, 'color') # viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs) # viz_graph.color_by_nids(big_graph, ibs=ibs) # big_graph = big_graph.to_undirected() layoutkw = { 'sep': 1 / 5, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } as_directed = False #as_directed = True #hacknode = True hacknode = 0 graph = big_graph ut.nx_ensure_agraph_color(graph) if hacknode: nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()}) nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()}) explicit_graph = pt.get_explicit_graph(graph) _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, inplace=True, **layoutkw) if ut.get_argflag('--smaller'): graph.node[7660]['pos'] = np.array([550, 350]) graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) elif ut.get_argflag('--small'): graph.node[7660]['pos'] = np.array([750, 350]) graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400]) graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) if not postcut: #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw, # as_directed=False) #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw, # as_directed=as_directed, hacknode=hacknode) pt.show_nx(graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) else: #explicit_graph = pt.get_explicit_graph(graph) #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, # **layoutkw) #layout_info['edge']['alpha'] = .8 #pt.apply_graph_layout_attrs(graph, layout_info) #graph_layout_attrs = layout_info['graph'] ##edge_layout_attrs = layout_info['edge'] ##node_layout_attrs = layout_info['node'] #for key, vals in layout_info['node'].items(): # #print('[special] key = %r' % (key,)) # nx.set_node_attributes(graph, key, vals) #for key, vals in layout_info['edge'].items(): # #print('[special] key = %r' % (key,)) # nx.set_edge_attributes(graph, key, vals) #nx.set_edge_attributes(graph, 'alpha', .8) #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'polyline' # graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'line' cut_graph = graph.copy() edge_list = list(cut_graph.edges()) edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list)) cut_flags = edge_nids.T[0] != edge_nids.T[1] cut_edges = ut.compress(edge_list, cut_flags) cut_graph.remove_edges_from(cut_edges) ut.nx_delete_node_attr(cut_graph, 'nid') viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs) #ut.nx_get_default_node_attributes(exemplars, 'color', None) ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes()) aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None) nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys())) nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors) nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors) #for val in nid2_colors.values(): # assert len(val) <= 1 # Get initial colors nid2_color_ = { nid: colors_[0] for nid, colors_ in nid2_colors.items() if len(colors_) == 1 } graph = cut_graph viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_) nx.set_node_attributes(cut_graph, 'framewidth', 4) pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) pt.zoom_factory()
def check_database_overlap(ibs1, ibs2): """ CommandLine: python -m wbia.other.dbinfo --test-get_dbinfo:1 --db PZ_MTEST dev.py -t listdbs python -m wbia.dbio.export_subset check_database_overlap --db PZ_MTEST --db2 PZ_MOTHERS CommandLine: python -m wbia.dbio.export_subset check_database_overlap python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_MTEST --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=NNP_Master3 --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_Master0 --db2=GZ_ALL python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_ALL --db2=lewa_grevys python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_FlankHack --db2=PZ_Master1 python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_PB_RF_TRAIN --db2=PZ_Master1 Example: >>> # SCRIPT >>> from wbia.dbio.export_subset import * # NOQA >>> import wbia >>> import utool as ut >>> #ibs1 = wbia.opendb(db='PZ_Master0') >>> #ibs2 = wbia.opendb(dbdir='/raid/work2/Turk/PZ_Master') >>> db1 = ut.get_argval('--db1', str, default='PZ_MTEST') >>> db2 = ut.get_argval('--db2', str, default='testdb1') >>> dbdir1 = ut.get_argval('--dbdir1', str, default=None) >>> dbdir2 = ut.get_argval('--dbdir2', str, default=None) >>> ibs1 = wbia.opendb(db=db1, dbdir=dbdir1) >>> ibs2 = wbia.opendb(db=db2, dbdir=dbdir2) >>> check_database_overlap(ibs1, ibs2) """ import numpy as np def print_isect(items1, items2, lbl=''): set1_ = set(items1) set2_ = set(items2) items_isect = set1_.intersection(set2_) fmtkw1 = dict( part=1, lbl=lbl, num=len(set1_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set1_), ) fmtkw2 = dict( part=2, lbl=lbl, num=len(set2_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set2_), ) fmt_a = ' * Num {lbl} {part}: {num_isect} / {num} = {percent:.2f}%' # fmt_b = ' * Num {lbl} isect: {num}' logger.info('Checking {lbl} intersection'.format(lbl=lbl)) logger.info(fmt_a.format(**fmtkw1)) logger.info(fmt_a.format(**fmtkw2)) # logger.info(fmt_b.format(lbl=lbl, num=len(items_isect))) # items = items_isect # list_ = items1 x_list1 = ut.find_list_indexes(items1, items_isect) x_list2 = ut.find_list_indexes(items2, items_isect) return x_list1, x_list2 gids1 = ibs1.images() gids2 = ibs2.images() # Find common images # items1, items2, lbl, = gids1.uuids, gids2.uuids, 'images' gx_list1, gx_list2 = print_isect(gids1.uuids, gids2.uuids, 'images') gids_isect1 = gids1.take(gx_list1) gids_isect2 = gids2.take(gx_list2) assert gids_isect2.uuids == gids_isect1.uuids, 'sequence must be aligned' SHOW_ISECT_GIDS = False if SHOW_ISECT_GIDS: if len(gx_list1) > 0: logger.info('gids_isect1 = %r' % (gids_isect1, )) logger.info('gids_isect2 = %r' % (gids_isect2, )) if False: # Debug code import wbia.viz import wbia.plottool as pt gid_pairs = list(zip(gids_isect1, gids_isect2)) pairs_iter = ut.ichunks(gid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for gid1, gid2 in pairs: wbia.viz.show_image(ibs1, gid1, pnum=pnum_(), fnum=fnum) wbia.viz.show_image(ibs2, gid2, pnum=pnum_(), fnum=fnum) # if False: # aids1 = ibs1.get_valid_aids() # aids2 = ibs2.get_valid_aids() # ibs1.update_annot_visual_uuids(aids1) # ibs2.update_annot_visual_uuids(aids2) # ibs1.update_annot_semantic_uuids(aids1) # ibs2.update_annot_semantic_uuids(aids2) # Check to see which intersecting images have different annotations image_aids_isect1 = gids_isect1.aids image_aids_isect2 = gids_isect2.aids image_avuuids_isect1 = np.array( ibs1.unflat_map(ibs1.get_annot_visual_uuids, image_aids_isect1)) image_avuuids_isect2 = np.array( ibs2.unflat_map(ibs2.get_annot_visual_uuids, image_aids_isect2)) changed_image_xs = np.nonzero( image_avuuids_isect1 != image_avuuids_isect2)[0] if len(changed_image_xs) > 0: logger.info( 'There are %d images with changes in annotation visual information' % (len(changed_image_xs), )) changed_gids1 = ut.take(gids_isect1, changed_image_xs) changed_gids2 = ut.take(gids_isect2, changed_image_xs) SHOW_CHANGED_GIDS = False if SHOW_CHANGED_GIDS: logger.info('gids_isect1 = %r' % (changed_gids2, )) logger.info('gids_isect2 = %r' % (changed_gids1, )) # if False: # # Debug code # import wbia.viz # import wbia.plottool as pt # gid_pairs = list(zip(changed_gids1, changed_gids2)) # pairs_iter = ut.ichunks(gid_pairs, chunksize=8) # for fnum, pairs in enumerate(pairs_iter, start=1): # pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) # for gid1, gid2 in pairs: # wbia.viz.show_image( # ibs1, gid1, pnum=pnum_(), fnum=fnum) # wbia.viz.show_image( # ibs2, gid2, pnum=pnum_(), fnum=fnum) # Check for overlapping annotations (visual info only) in general aids1 = ibs1.annots() aids2 = ibs2.annots() # Check for overlapping annotations (visual + semantic info) in general aux_list1, aux_list2 = print_isect(aids1.uuids, aids2.uuids, 'uuids') avx_list1, avx_list2 = print_isect(aids1.visual_uuids, aids2.visual_uuids, 'vuuids') asx_list1, asx_list2 = print_isect(aids1.semantic_uuids, aids2.semantic_uuids, 'suuids') # Check which images with the same visual uuids have different semantic # uuids changed_ax_list1 = ut.setdiff_ordered(avx_list1, asx_list1) changed_ax_list2 = ut.setdiff_ordered(avx_list2, asx_list2) assert len(changed_ax_list1) == len(changed_ax_list2) assert ut.take(aids1.visual_uuids, changed_ax_list1) == ut.take(aids2.visual_uuids, changed_ax_list2) changed_aids1 = np.array(ut.take(aids1, changed_ax_list1)) changed_aids2 = np.array(ut.take(aids2, changed_ax_list2)) changed_sinfo1 = ibs1.get_annot_semantic_uuid_info(changed_aids1) changed_sinfo2 = ibs2.get_annot_semantic_uuid_info(changed_aids2) sinfo1_arr = np.array(changed_sinfo1) sinfo2_arr = np.array(changed_sinfo2) is_semantic_diff = sinfo2_arr != sinfo1_arr # Inspect semantic differences if np.any(is_semantic_diff): colxs, rowxs = np.nonzero(is_semantic_diff) colx2_rowids = ut.group_items(rowxs, colxs) prop2_rowids = ut.map_dict_keys(changed_sinfo1._fields.__getitem__, colx2_rowids) logger.info('changed_value_counts = ' + ut.repr2(ut.map_dict_vals(len, prop2_rowids))) yawx = changed_sinfo1._fields.index('yaw') # Show change in viewpoints if len(colx2_rowids[yawx]) > 0: vp_category_diff = ibsfuncs.viewpoint_diff( sinfo1_arr[yawx], sinfo2_arr[yawx]).astype(np.float) # Look for category changes # any_diff = np.floor(vp_category_diff) > 0 # _xs = np.nonzero(any_diff)[0] # _aids1 = changed_aids1.take(_xs) # _aids2 = changed_aids2.take(_xs) # Look for significant changes is_significant_diff = np.floor(vp_category_diff) > 1 significant_xs = np.nonzero(is_significant_diff)[0] significant_aids1 = changed_aids1.take(significant_xs) significant_aids2 = changed_aids2.take(significant_xs) logger.info('There are %d significant viewpoint changes' % (len(significant_aids2), )) # vt.ori_distance(sinfo1_arr[yawx], sinfo2_arr[yawx]) # zip(ibs1.get_annot_viewpoint_code(significant_aids1), # ibs2.get_annot_viewpoint_code(significant_aids2)) # logger.info('yawdiff = %r' % ) # if False: # Hack: Apply fixes # good_yaws = ibs2.get_annot_yaws(significant_aids2) # ibs1.set_annot_yaws(significant_aids1, good_yaws) # pass if False: # Debug code import wbia.viz import wbia.plottool as pt # aid_pairs = list(zip(_aids1, _aids2)) aid_pairs = list(zip(significant_aids1, significant_aids2)) pairs_iter = ut.ichunks(aid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for aid1, aid2 in pairs: wbia.viz.show_chip( ibs1, aid1, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) wbia.viz.show_chip( ibs2, aid2, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) # nAnnots_per_image1 = np.array(ibs1.get_image_num_annotations(gids1)) nAnnots_per_image2 = np.array(ibs2.get_image_num_annotations(gids2)) # images_without_annots1 = sum(nAnnots_per_image1 == 0) images_without_annots2 = sum(nAnnots_per_image2 == 0) logger.info('images_without_annots1 = %r' % (images_without_annots1, )) logger.info('images_without_annots2 = %r' % (images_without_annots2, )) nAnnots_per_image1
def show_top_featmatches(qreq_, cm_list): """ Args: qreq_ (wbia.QueryRequest): query request object with hyper-parameters cm_list (list): SeeAlso: python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True,lnbnn_normalizer=normlnbnn-test -a default --sephack python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=True -a timectrl --sephack python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True -a default:size=30 --sephack python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=5,lnbnn_on=True -a default:size=30 --sephack python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=3,lnbnn_on=True -a default --sephack CommandLine: python -m wbia.viz.viz_nearest_descriptors --exec-show_top_featmatches --show Example: >>> # DISABLE_DOCTEST >>> from wbia.viz.viz_nearest_descriptors import * # NOQA >>> import wbia >>> cm_list, qreq_ = wbia.testdata_cmlist(defaultdb='PZ_MTEST', >>> a=['default:has_none=mother,size=30']) >>> show_top_featmatches(qreq_, cm_list) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> ut.show_if_requested() """ # for cm in cm_list: # cm.score_annot_csum(qreq_) import numpy as np import vtool as vt from functools import partial # Stack chipmatches ibs = qreq_.ibs infos = [cm.get_flat_fm_info() for cm in cm_list] flat_metadata = dict([(k, np.concatenate(v)) for k, v in ut.dict_stack2(infos).items()]) fsv_flat = flat_metadata['fsv'] flat_metadata['fs'] = fsv_flat.prod(axis=1) aids1 = flat_metadata['aid1'][:, None] aids2 = flat_metadata['aid2'][:, None] flat_metadata['aid_pairs'] = np.concatenate([aids1, aids2], axis=1) # Take sample of metadata sortx = flat_metadata['fs'].argsort()[::-1] num = len(cm_list) * 3 # num = 10 taker = partial(np.take, indices=sortx[:num], axis=0) flat_metadata_top = ut.map_dict_vals(taker, flat_metadata) aid1s, aid2s, fms = ut.dict_take(flat_metadata_top, ['aid1', 'aid2', 'fm']) annots = {} aids = np.unique(np.hstack((aid1s, aid2s))) annots = { aid: ibs.get_annot_lazy_dict(aid, config2_=qreq_.qparams) for aid in aids } label_lists = (ibs.get_match_truths( aid1s, aid2s) == ibs.const.EVIDENCE_DECISION.POSITIVE) patch_size = 64 def extract_patches(annots, aid, fxs): """ custom_func(lazydict, key, subkeys) for multigroup_lookup """ annot = annots[aid] kpts = annot['kpts'] rchip = annot['rchip'] kpts_m = kpts.take(fxs, axis=0) warped_patches, warped_subkpts = vt.get_warped_patches( rchip, kpts_m, patch_size=patch_size) return warped_patches data_lists = vt.multigroup_lookup(annots, [aid1s, aid2s], fms.T, extract_patches) import wbia.plottool as pt # NOQA pt.ensureqt() import wbia_cnn inter = wbia_cnn.draw_results.interact_patches( label_lists, data_lists, flat_metadata_top, chunck_sizes=(2, 4), ibs=ibs, hack_one_per_aid=False, sortby='fs', qreq_=qreq_, ) inter.show()
def analyize_multiple_drives(drives): """ CommandLine: export PYTHONPATH=$PYTHONPATH:~/local/scripts python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/ python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/ /media/joncrall/backup cd ~/local/scripts Example: >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> drive = Broadcaster(drives) >>> drive.compute_info() >>> #drive.build_fpath_hashes() >>> drive.check_consistency() >>> E = drive = drives[0] >>> analyize_multiple_drives(drives) >>> #D, E, F = drives >>> #drive = D """ # ----- ## Find the files shared on all disks #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives]) #print('#allhave = %r' % (len(allhave),)) #allhave.keys()[0:3] #allhave.values()[0:3] #ut.embed() #for drive in drives: #drive.rrr() #print(drive.root_dpath) #print(len(drive.hash_to_unique_fpaths)) #print(len(drive.hash_to_fpaths)) #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths)) # Build dict to map from dpath to file pointers of unique descendants #unique_fidxs_list = drive.hash_to_fidxs.values() #fidxs = ut.flatten(unique_fidxs_list) esc = re.escape # Find which files exist on all drives hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives] allhave_hashes = reduce(set.intersection, hashes_list) print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes))) lbls = [drive_.root_dpath for drive_ in drives] isect_lens = np.zeros((len(drives), len(drives))) for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)): for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)): if drive1 is not drive2: common = set.intersection(hashes1, hashes2) isect_lens[idx1, idx2] = len(common) else: isect_lens[idx1, idx2] = len(hashes2) import pandas as pd print(pd.DataFrame(isect_lens, index=lbls, columns=lbls)) # for drive in drives drive = drives[0] print('Finding unique files in drive=%r' % (drive,)) # Get subset of fidxs on this drive unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes) valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs)) # Filter fpaths by patterns ignore_patterns = [ esc('Thumbs.db') ] ignore_paths = [ 'Spotify' ] patterns = ignore_paths + ignore_patterns valid_fpaths = ut.take(drive.fpath_list, valid_fidxs) valid_flags = [not any([re.search(p, fpath) for p in patterns]) for fpath in valid_fpaths] valid_flags = np.array(valid_flags) valid_fidxs = ut.compress(valid_fidxs, valid_flags) print(ut.filtered_infostr(valid_flags, 'invalid fpaths')) fidxs = valid_fidxs valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs)) dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs, drive.root_dpath) def make_tree_structure(valid_fpaths): root = {} def dict_getitem_default(dict_, key, type_): try: val = dict_[key] except KeyError: val = type_() dict_[key] = val return val for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000): path_components = ut.dirsplit(fpath) current = root for comp in path_components[:-1]: current = dict_getitem_default(current, comp, dict) contents = dict_getitem_default(current, '.', list) contents.append(path_components[-1]) return root root = make_tree_structure(valid_fpaths) def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path,)) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1)) def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp def print_tree_struct(*args, **kwargs): tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1))) print(tree_str) #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])) #print('path = %r, %s' % (path, bytes_str)) #print(ut.repr3(key_list)) return tree_str dpath_to_unique_fidx dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx) complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx) complete_root = make_tree_structure(complete_unique_dpaths.keys()) globals()['ut'] = ut globals()['os'] = os globals()['join'] = join print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\']))) get_tree_info(root, path='E:\\', depth=0) get_tree_info(complete_root, path='E:\\', depth=0) get_tree_info(root, path='E:\\', depth=1) print(print_tree_struct(root, path='E:\\Clutter', depth=0)) print_tree(root, path=r'E:\TV') print_tree(root, path=r'E:\Movies') print_tree(root, path=r'E:\Boot') print_tree(root, path='E:\\') print_tree(root, path=r'E:\Downloaded') print_tree(root, path=r'E:\Recordings') print_tree(root, path=r'E:\Clutter') print_tree(root, path=r'E:\Audio Books') # TODO: # * Ignore list # * Find and rectify internal duplicates # * Update registry with new files and deleted ones # * Ensure that all unique files are backed up # Index the C: Drive as well. # * Lazy properties of drive # * Multiple types of identifiers (hash, fname, ext, fsize) # Drive subsets # Export/Import Drive for analysis on other machines ut.embed()
def ensure_tf(X): termfreq = ut.dict_hist(X.wx_list) # do what video google does termfreq = ut.map_dict_vals(lambda x: x / len(X.wx_list), termfreq) X.termfreq = termfreq
def intraoccurrence_connected(): r""" CommandLine: python -m ibeis.scripts.specialdraw intraoccurrence_connected --show python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = intraoccurrence_connected() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='PZ_Master1') nid2_aid = { #4880: [3690, 3696, 3703, 3706, 3712, 3721], 4880: [3690, 3696, 3703], 6537: [3739], 6653: [7671], 6610: [7566, 7408], #6612: [7664, 7462, 7522], #6624: [7465, 7360], #6625: [7746, 7383, 7390, 7477, 7376, 7579], 6630: [7586, 7377, 7464, 7478], #6677: [7500] } nid2_dbaids = { 4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660] } if ut.get_argflag('--small') or ut.get_argflag('--smaller'): del nid2_aid[6630] del nid2_aid[6537] del nid2_dbaids[6537] if ut.get_argflag('--smaller'): nid2_dbaids[4880].remove(33) nid2_aid[4880].remove(3690) nid2_aid[6610].remove(7408) #del nid2_aid[4880] #del nid2_dbaids[4880] aids = ut.flatten(nid2_aid.values()) temp_nids = [1] * len(aids) postcut = ut.get_argflag('--postcut') aids_list = ibs.group_annots_by_name(aids)[0] ensure_edges = 'all' if True or not postcut else None unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups( ibs, aids_list, #invis_edges=invis_edges, ensure_edges=ensure_edges, temp_nids=temp_nids) viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] * len(list(unlabeled_graph.nodes()))) viz_graph.ensure_node_images(ibs, unlabeled_graph) nx.set_node_attributes(unlabeled_graph, 'shape', 'rect') #unlabeled_graph = unlabeled_graph.to_undirected() # Find the "database exemplars for these annots" if False: gt_aids = ibs.get_annot_groundtruth(aids) gt_aids = [ut.setdiff(s, aids) for s in gt_aids] dbaids = ut.unique(ut.flatten(gt_aids)) dbaids = ibs.filter_annots_general(dbaids, minqual='good') ibs.get_annot_quality_texts(dbaids) else: dbaids = ut.flatten(nid2_dbaids.values()) exemplars = nx.DiGraph() #graph = exemplars # NOQA exemplars.add_nodes_from(dbaids) def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}): edge_list = ut.upper_diag_self_prodx(nodes) graph.add_edges_from(edge_list, **edgeattrs) return edge_list for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)): add_clique(exemplars, aids_) viz_graph.ensure_node_images(ibs, exemplars) viz_graph.color_by_nids(exemplars, ibs=ibs) nx.set_node_attributes(unlabeled_graph, 'framewidth', False) nx.set_node_attributes(exemplars, 'framewidth', 4.0) nx.set_node_attributes(unlabeled_graph, 'group', 'unlab') nx.set_node_attributes(exemplars, 'group', 'exemp') #big_graph = nx.compose_all([unlabeled_graph]) big_graph = nx.compose_all([exemplars, unlabeled_graph]) # add sparse connections from unlabeled to exemplars import numpy as np rng = np.random.RandomState(0) if True or not postcut: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes()))) flags = np.logical_or(exnids == nid_, flags) exmatches = ut.compress(list(exemplars.nodes()), flags) big_graph.add_edges_from(list(ut.product([aid_], exmatches)), color=pt.ORANGE, implicit=True) else: for aid_ in unlabeled_graph.nodes(): flags = rng.rand(len(exemplars)) > .5 exmatches = ut.compress(list(exemplars.nodes()), flags) nid_ = ibs.get_annot_nids(aid_) exnids = np.array(ibs.get_annot_nids(exmatches)) exmatches = ut.compress(exmatches, exnids == nid_) big_graph.add_edges_from(list(ut.product([aid_], exmatches))) pass nx.set_node_attributes(big_graph, 'shape', 'rect') #if False and postcut: # ut.nx_delete_node_attr(big_graph, 'nid') # ut.nx_delete_edge_attr(big_graph, 'color') # viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs) # viz_graph.color_by_nids(big_graph, ibs=ibs) # big_graph = big_graph.to_undirected() layoutkw = { 'sep' : 1 / 5, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } as_directed = False #as_directed = True #hacknode = True hacknode = 0 graph = big_graph ut.nx_ensure_agraph_color(graph) if hacknode: nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()}) nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()}) explicit_graph = pt.get_explicit_graph(graph) _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, inplace=True, **layoutkw) if ut.get_argflag('--smaller'): graph.node[7660]['pos'] = np.array([550, 350]) graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) elif ut.get_argflag('--small'): graph.node[7660]['pos'] = np.array([750, 350]) graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400]) graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400]) graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400]) nx.set_node_attributes(graph, 'pin', 'true') _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) if not postcut: #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw, # as_directed=False) #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw, # as_directed=as_directed, hacknode=hacknode) pt.show_nx(graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) else: #explicit_graph = pt.get_explicit_graph(graph) #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph, # **layoutkw) #layout_info['edge']['alpha'] = .8 #pt.apply_graph_layout_attrs(graph, layout_info) #graph_layout_attrs = layout_info['graph'] ##edge_layout_attrs = layout_info['edge'] ##node_layout_attrs = layout_info['node'] #for key, vals in layout_info['node'].items(): # #print('[special] key = %r' % (key,)) # nx.set_node_attributes(graph, key, vals) #for key, vals in layout_info['edge'].items(): # #print('[special] key = %r' % (key,)) # nx.set_edge_attributes(graph, key, vals) #nx.set_edge_attributes(graph, 'alpha', .8) #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'polyline' # graph_layout_attrs.get('splines', 'line') #graph.graph['splines'] = 'line' cut_graph = graph.copy() edge_list = list(cut_graph.edges()) edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list)) cut_flags = edge_nids.T[0] != edge_nids.T[1] cut_edges = ut.compress(edge_list, cut_flags) cut_graph.remove_edges_from(cut_edges) ut.nx_delete_node_attr(cut_graph, 'nid') viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs) #ut.nx_get_default_node_attributes(exemplars, 'color', None) ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes()) aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None) nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys())) nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors) nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors) #for val in nid2_colors.values(): # assert len(val) <= 1 # Get initial colors nid2_color_ = {nid: colors_[0] for nid, colors_ in nid2_colors.items() if len(colors_) == 1} graph = cut_graph viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_) nx.set_node_attributes(cut_graph, 'framewidth', 4) pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw, as_directed=as_directed, hacknode=hacknode) pt.zoom_factory()