def _cached_pairwise_features(extr, edges): """ Create pairwise features for annotations in a test inference object based on the features used to learn here TODO: need a more systematic way of specifying which feature dimensions need to be computed Notes: Given a edge (u, v), we need to: * Check which classifiers we have * Check which feat-cols the classifier needs, and construct a configuration that can acheive that. * Construct the chip/feat config * Construct the vsone config * Additional LNBNN enriching config * Pairwise feature construction config * Then we can apply the feature to the classifier edges = [(1, 2)] """ edges = list(edges) if extr.verbose: print('[pairfeat] Requesting {} cached pairwise features'.format( len(edges))) # TODO: use object properties if len(edges) == 0: assert extr.feat_dims is not None, 'no edges and unset feat dims' index = nxu.ensure_multi_index([], ('aid1', 'aid2')) feats = pd.DataFrame(columns=extr.feat_dims, index=index) return feats else: use_cache = not extr.need_lnbnn and len(edges) > 2 cache_dir = join(extr.ibs.get_cachedir(), 'infr_bulk_cache') feat_cfgstr = extr._make_cfgstr(edges) cacher = ub.Cacher('bulk_pairfeats_v3', feat_cfgstr, enabled=use_cache, dpath=cache_dir, verbose=extr.verbose - 3) # if cacher.exists() and extr.verbose > 3: # fpath = cacher.get_fpath() # print('Load match cache size: {}'.format( # ut.get_file_nBytes_str(fpath))) data = cacher.tryload() if data is None: data = extr._make_pairwise_features(edges) cacher.save(data) # if cacher.enabled and extr.verbose > 3: # fpath = cacher.get_fpath() # print('Save match cache size: {}'.format( # ut.get_file_nBytes_str(fpath))) matches, feats = data feats = extr._postprocess_feats(feats) return feats
def predict_proba_df(verif, edges): """ CommandLine: python -m ibeis.algo.graph.demo DummyVerif.predict_edges Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.graph.demo import * # NOQA >>> from ibeis.algo.graph import demo >>> import networkx as nx >>> kwargs = dict(num_pccs=40, size=2) >>> infr = demo.demodata_infr(**kwargs) >>> verif = infr.dummy_verif >>> edges = list(infr.graph.edges()) >>> probs = verif.predict_proba_df(edges) >>> #print('scores = %r' % (scores,)) >>> #hashid = ut.hash_data(scores) >>> #print('hashid = %r' % (hashid,)) >>> #assert hashid == 'cdlkytilfeqgmtsihvhqwffmhczqmpil' """ infr = verif.infr edges = list(it.starmap(verif.infr.e_, edges)) prob_cache = infr.task_probs['match_state'] is_miss = np.array([e not in prob_cache for e in edges]) # is_hit = ~is_miss if np.any(is_miss): miss_edges = ut.compress(edges, is_miss) miss_truths = [verif._get_truth(edge) for edge in miss_edges] grouped_edges = ut.group_items(miss_edges, miss_truths, sorted_=False) # Need to make this determenistic too states = [POSTV, NEGTV, INCMP] for key in sorted(grouped_edges.keys()): group = grouped_edges[key] probs0 = randn(shape=[len(group)], rng=verif.rng, a_max=1, a_min=0, **verif.dummy_params[key]) # Just randomly assign other probs probs1 = verif.rng.rand(len(group)) * (1 - probs0) probs2 = 1 - (probs0 + probs1) for edge, probs in zip(group, zip(probs0, probs1, probs2)): prob_cache[edge] = ut.dzip(states, probs) from ibeis.algo.graph import nx_utils as nxu import pandas as pd probs = pd.DataFrame(ut.take(prob_cache, edges), index=nxu.ensure_multi_index( edges, ('aid1', 'aid2'))) return probs
def match_state_df(infr, index): """ Returns groundtruth state based on ibeis controller """ index = ensure_multi_index(index, ('aid1', 'aid2')) aid_pairs = np.asarray(index.tolist()) aid_pairs = vt.ensure_shape(aid_pairs, (None, 2)) is_same = infr.is_same(aid_pairs) is_comp = infr.is_comparable(aid_pairs) match_state_df = pd.DataFrame.from_items([ (NEGTV, ~is_same & is_comp), (POSTV, is_same & is_comp), (INCMP, ~is_comp), ]) match_state_df.index = index return match_state_df
def _make_pairwise_features(extr, edges): """ Construct matches and their pairwise features CommandLine: python -m ibeis.algo.verif.pairfeat _make_pairwise_features Doctest: >>> from ibeis.algo.verif.pairfeat import * >>> from ibeis.algo.graph import demo >>> infr = demo.demodata_mtest_infr() >>> extr = PairwiseFeatureExtractor(ibs=infr.ibs) >>> match_config = {'K': 1, 'Knorm': 3, 'affine_invariance': True, >>> 'augment_orientation': True, 'checks': 20, >>> 'ratio_thresh': 0.8, 'refine_method': 'homog', >>> 'sv_on': True, 'sver_xy_thresh': 0.01, >>> 'symmetric': True, 'weight': 'fgweights'} >>> local_keys = [ >>> 'fgweights', 'match_dist', 'norm_dist', 'norm_x1', 'norm_x2', >>> 'norm_y1', 'norm_y2', 'ratio_score', 'scale1', 'scale2', >>> 'sver_err_ori', 'sver_err_scale', 'sver_err_xy', >>> 'weighted_norm_dist', 'weighted_ratio_score'] >>> pairfeat_cfg = { >>> 'bin_key': 'ratio', >>> 'bins': [0.6, 0.7, 0.8], >>> 'indices': [], >>> 'local_keys': local_keys, >>> 'sorters': [], >>> 'summary_ops': {'len', 'mean', 'sum'} >>> } >>> global_keys = ['gps', 'qual', 'time', 'view'] >>> ibs = infr.ibs >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config, >>> pairfeat_cfg=pairfeat_cfg, >>> global_keys=global_keys) >>> multi_index = True >>> edges = [(1, 2), (2, 3)] >>> matches, X = extr._make_pairwise_features(edges) >>> featinfo = vt.AnnotPairFeatInfo(X.columns) >>> print(featinfo.get_infostr()) >>> match = matches[0] >>> glob_X = match._make_global_feature_vector(global_keys) >>> assert len(glob_X) == 19 """ edges = ut.lmap(tuple, ut.aslist(edges)) if len(edges) == 0: return [], [] matches = extr._enriched_pairwise_matches(edges) # --------------- # Try different feature constructions print('[extr] building pairwise features') pairfeat_cfg = extr.pairfeat_cfg.copy() use_na = pairfeat_cfg.pop('use_na') pairfeat_cfg['summary_ops'] = set(pairfeat_cfg['summary_ops']) X = pd.DataFrame([ m.make_feature_vector(**pairfeat_cfg) for m in ut.ProgIter(matches, label='making pairwise feats') ]) multi_index = True if multi_index: # Index features by edges uv_index = nxu.ensure_multi_index(edges, ('aid1', 'aid2')) X.index = uv_index X[pd.isnull(X)] = np.nan X[np.isinf(X)] = np.nan # Re-order column names to ensure dimensions are consistent X = X.reindex_axis(sorted(X.columns), axis=1) # hack to fix feature validity if 'global(speed)' in X.columns: if np.any(np.isinf(X['global(speed)'])): flags = np.isinf(X['global(speed)']) numer = X.loc[flags, 'global(gps_delta)'] denom = X.loc[flags, 'global(time_delta)'] newvals = np.full(len(numer), np.nan) newvals[(numer == 0) & (denom == 0)] = 0 X.loc[flags, 'global(speed)'] = newvals aid_pairs_ = [(m.annot1['aid'], m.annot2['aid']) for m in matches] assert aid_pairs_ == edges, 'edge ordering changed' if not use_na: # Fill nan values with very large values to workaround lack of nan # support in sklearn master. X[pd.isnull(X)] = (2 ** 30) - 1 return matches, X
def ensure_priority_scores(infr, priority_edges): """ Ensures that priority attributes are assigned to the edges. This does not change the state of the queue. Doctest: >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> infr = ibeis.AnnotInference(ibs, aids='all') >>> infr.ensure_mst() >>> priority_edges = list(infr.edges())[0:1] >>> infr.ensure_priority_scores(priority_edges) Doctest: >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> infr = ibeis.AnnotInference(ibs, aids='all') >>> infr.ensure_mst() >>> infr.load_published() >>> priority_edges = list(infr.edges()) >>> infr.ensure_priority_scores(priority_edges) Doctest: >>> from ibeis.algo.graph import demo >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2) >>> edges = list(infr.edges()) >>> infr.ensure_priority_scores(edges) """ infr.print('Checking for verifiers: %r' % (infr.verifiers, )) if infr.verifiers: infr.print( 'Prioritizing {} edges with one-vs-one probs'.format( len(priority_edges)), 1) infr.print('Using thresholds: %r' % (infr.task_thresh, )) infr.print('Using infr.params[autoreview.enabled] : %r' % (infr.params['autoreview.enabled'], )) infr.print('Using infr.params[autoreview.prioritize_nonpos]: %r' % (infr.params['autoreview.prioritize_nonpos'], )) infr.ensure_task_probs(priority_edges) primary_task = 'match_state' match_probs = infr.task_probs[primary_task] primary_thresh = infr.task_thresh[primary_task] # Read match_probs into a DataFrame primary_probs = pd.DataFrame(ut.take(match_probs, priority_edges), index=nxu.ensure_multi_index( priority_edges, ('aid1', 'aid2'))) # Convert match-state probabilities into priorities prob_match = primary_probs[POSTV] # Initialize priorities to probability of matching default_priority = prob_match.copy() # If the edges are currently between the same individual, then # prioritize by non-positive probability (because those edges might # expose an inconsistency) already_pos = [ infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v) for u, v in priority_edges ] default_priority[already_pos] = 1 - default_priority[already_pos] if infr.params['autoreview.enabled']: if infr.params['autoreview.prioritize_nonpos']: # Give positives that pass automatic thresholds high priority _probs = primary_probs[POSTV] flags = _probs > primary_thresh[POSTV] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 # Give negatives that pass automatic thresholds high priority _probs = primary_probs[NEGTV] flags = _probs > primary_thresh[NEGTV] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 # Give not-comps that pass automatic thresholds high priority _probs = primary_probs[INCMP] flags = _probs > primary_thresh[INCMP] default_priority[flags] = np.maximum( default_priority[flags], _probs[flags]) + 1 infr.set_edge_attrs('prob_match', prob_match.to_dict()) infr.set_edge_attrs('default_priority', default_priority.to_dict()) metric = 'default_priority' priority = default_priority elif infr.cm_list is not None: infr.print( 'Prioritizing {} edges with one-vs-vsmany scores'.format( len(priority_edges), 1)) # Not given any deploy classifier, this is the best we can do scores = infr._make_lnbnn_scores(priority_edges) metric = 'normscore' priority = scores else: infr.print('WARNING: No verifiers to prioritize {} edge(s)'.format( len(priority_edges))) metric = 'random' priority = np.zeros(len(priority_edges)) + 1e-6 infr.set_edge_attrs(metric, ut.dzip(priority_edges, priority)) return metric, priority