def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None): r""" DEPRICATE fm_list = [fm[:min(len(fm), 10)] for fm in fm_list] fs_list = [fs[:min(len(fs), 10)] for fs in fs_list] """ fx1_list = [fm.T[0] for fm in fm_list] # Group annotation matches by name name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name, keep track of original indicies via offets name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list)) name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0) name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) if kpts1 is not None: xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_data_ids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_comboid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat) xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_comboid_flat) name_group_fx1_groupxs_list = xyid_groupxs_list else: # Make nested group for every name by query feature index fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat] name_group_fx1_groupxs_list = fx1_groupxs_list name_grouped_fid_grouped_fs_list = [ vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list) ] # Flag which features are valid in this grouped space. Only one keypoint should be able to vote # for each group name_grouped_fid_grouped_isvalid_list = [ np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list]) for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list ] # Go back to being grouped only in name space #dtype = np.bool name_grouped_isvalid_flat_list = [ vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool) for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list) ] name_grouped_isvalid_unflat_list = [ ut.unflatten2(isvalid_flat, invertable_cumsum_list) for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list) ] # Reports which features were valid in name scoring for every annotation featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs) return featflag_list
def done_part(cand, num_neighbs): # Find the first `num_neighbs` complete columns in each row rowxs, colxs = np.where(cand.validflags) unique_rows, groupxs = vt.group_indices(rowxs, assume_sorted=True) first_k_groupxs = [groupx[0:num_neighbs] for groupx in groupxs] if DEBUG_REQUERY: assert all(ut.issorted(groupx) for groupx in groupxs) assert all( [len(group) == num_neighbs for group in first_k_groupxs]) chosen_xs = np.array(ut.flatten(first_k_groupxs), dtype=np.int) # chosen_xs = np.hstack(first_k_groupxs) # then convert these to multi-indices done_rows = rowxs.take(chosen_xs) done_cols = colxs.take(chosen_xs) multi_index = (done_rows, done_cols) # done_shape = (cand.validflags.shape[0], num_neighbs) # flat_xs = np.ravel_multi_index(multi_index, done_shape) flat_xs = np.ravel_multi_index(multi_index, cand.idxs.shape) _shape = (-1, num_neighbs) idxs = cand.idxs.take(flat_xs).reshape(_shape) dists = cand.dists.take(flat_xs).reshape(_shape) trueks = colxs.take(chosen_xs).reshape(_shape) if DEBUG_REQUERY: # dists2 = dists.copy() for count, (row, cols) in enumerate(zip(unique_rows, groupxs)): pass assert np.all(np.diff(dists, axis=1) >= 0) valid = cand.validflags.take(flat_xs).reshape(_shape) assert np.all(valid) return idxs, dists, trueks
def report_partitioning_statistics(new_reduced_joint): # compute partitioning statistics import vtool_ibeis as vt vals, idxs = vt.group_indices(new_reduced_joint.values.ravel()) #groupsize = list(map(len, idxs)) #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs) all_states = new_reduced_joint._row_labels(asindex=True) clusterstats = [tuple(sorted(list(ut.dict_hist(a).values()))) for a in all_states] grouped_vals = ut.group_items(new_reduced_joint.values.ravel(), clusterstats) #probs_assigned_to_clustertype = [( # sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a) # for a, b in grouped_vals.items()] probs_assigned_to_clustertype = [( ut.dict_hist(np.array(b).round(decimals=5)), a) for a, b in grouped_vals.items()] sortx = ut.argsort([max(c[0].keys()) for c in probs_assigned_to_clustertype]) probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx) # This list of 2-tuples with the first item being the unique # probabilies that are assigned to a cluster type along with the number # of times they were assigned. A cluster type is the second item. Every # number represents how many annotations were assigned to a specific # label. The length of that list is the number of total labels. For # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]] # indicating that that the assignment of everyone to a different label happend once # where the probability was somenum and a 800 times where the probability was 0. #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items()) #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()]) print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
def _make_anygroup_hashes(annots, nids): """ helper function import ibeis qreq_ = ibeis.testdata_qreq_( defaultdb='PZ_MTEST', qaid_override=[1, 2, 3, 4, 5, 6, 10, 11], daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24], ) import ibeis qreq_ = ibeis.testdata_qreq_(defaultdb='PZ_Master1') %timeit qreq_._make_namegroup_data_hashes() %timeit qreq_._make_namegroup_data_uuids() """ # make sure items are sorted to ensure same assignment # gives same uuids # annots = qreq_.ibs.annots(sorted(qreq_.daids)) unique_nids, groupxs = vt.group_indices(nids) grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs) group_hashes = [ ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1()) for uuids in grouped_visual_uuids ] nid_to_grouphash = dict(zip(unique_nids, group_hashes)) return nid_to_grouphash
def group_images_by_label(label_arr, gid_arr): """ Input: Length N list of labels and ids Output: Length M list of unique labels, and lenth M list of lists of ids """ # Reverse the image to cluster index mapping import vtool_ibeis as vt labels_, groupxs_ = vt.group_indices(label_arr) sortx = np.array(list(map(len, groupxs_))).argsort()[::-1] labels = labels_.take(sortx, axis=0) groupxs = ut.take(groupxs_, sortx) label_gids = vt.apply_grouping(gid_arr, groupxs) return labels, label_gids
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True): r""" Args: ibs (IBEISController): ibeis controller object aid1_list (list): aid2_list (list): directed (bool): (default = True) Returns: list: tags_list CommandLine: python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting Example: >>> # DISABLE_DOCTEST >>> from ibeis.tag_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> has_any = ut.get_argval('--tags', type_=list, default=None) >>> min_num = ut.get_argval('--min_num', type_=int, default=1) >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1) >>> aid1_list = aid_pairs.T[0] >>> aid2_list = aid_pairs.T[1] >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False) >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) >>> print(ut.repr2(tagged_pairs)) >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) >>> print(ut.repr2(tag_dict, nl=2)) >>> print(ut.repr2(ut.map_dict_vals(len, tag_dict))) """ aid_pairs = np.vstack([aid1_list, aid2_list]).T if directed: annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) else: annotmatch_rowid = ibs.get_annotmatch_rowid_from_undirected_superkey(aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) if False: expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]]) expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey( expanded_aid_pairs.T[0], expanded_aid_pairs.T[1]) expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs) unique_edgeids, groupxs = vt.group_indices(expanded_edgeids) expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid) grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs) undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags] edgeid2_tags = dict(zip(unique_edgeids, undirected_tags)) input_edgeids = expanded_edgeids[:len(aid_pairs)] tags_list = ut.dict_take(edgeid2_tags, input_edgeids) return tags_list
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list, name_score_list, nid2_nidx, nNameShortList, nAnnotPerName): r""" CommandLine: python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.scoring import * # NOQA >>> daid_list = np.array([11, 12, 13, 14, 15, 16, 17]) >>> dnid_list = np.array([21, 21, 21, 22, 22, 23, 24]) >>> annot_score_list = np.array([ 6, 2, 3, 5, 6, 3, 2]) >>> name_score_list = np.array([ 8, 9, 5, 4]) >>> nid2_nidx = {21:0, 22:1, 23:2, 24:3} >>> nNameShortList, nAnnotPerName = 3, 2 >>> args = (daid_list, dnid_list, annot_score_list, name_score_list, ... nid2_nidx, nNameShortList, nAnnotPerName) >>> top_daids = get_name_shortlist_aids(*args) >>> result = str(top_daids) >>> print(result) [15, 14, 11, 13, 16] """ unique_nids, groupxs = vt.group_indices(np.array(dnid_list)) grouped_annot_scores = vt.apply_grouping(annot_score_list, groupxs) grouped_daids = vt.apply_grouping(np.array(daid_list), groupxs) # Ensure name score list is aligned with the unique_nids aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids)) # Sort each group by the name score group_sortx = aligned_name_score_list.argsort()[::-1] _top_daid_groups = ut.take(grouped_daids, group_sortx) _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx) top_daid_groups = ut.listclip(_top_daid_groups, nNameShortList) top_annot_score_groups = ut.listclip(_top_annot_score_groups, nNameShortList) # Sort within each group by the annotation score top_daid_sortx_groups = [annot_score_group.argsort()[::-1] for annot_score_group in top_annot_score_groups] top_sorted_daid_groups = vt.ziptake(top_daid_groups, top_daid_sortx_groups) top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName) for sorted_daid_group in top_sorted_daid_groups] top_daids = ut.flatten(top_clipped_daids) return top_daids
def consolidate(self, inplace=False): """ removes duplicate entries Example: >>> # UNSTABLE_DOCTEST >>> from ibeis.algo.hots.pgm_ext import * # NOQA >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]] >>> weights = [.1, .2, .1] >>> variables = ['v1', 'v2', 'v3'] >>> self = ApproximateFactor(state_idxs, weights, variables) >>> inplace = False >>> phi = self.consolidate(inplace) >>> result = str(phi) >>> print(result) +------+------+------+-----------------------+ | v1 | v2 | v3 | \hat{phi}(v1,v2,v3) | |------+------+------+-----------------------| | v1_1 | v2_0 | v3_1 | 0.3000 | | v1_1 | v2_0 | v3_2 | 0.1000 | +------+------+------+-----------------------+ """ import vtool_ibeis as vt phi = self.copy() if inplace else self #data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) data_ids = self._compute_unique_state_ids() unique_ids, groupxs = vt.group_indices(data_ids) #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs))))) if len(data_ids) != len(unique_ids): # Sum the values in the cpd to marginalize the duplicate probs # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs]) self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0) self.weights = np.array( [g.sum() for g in vt.apply_grouping(self.weights, groupxs)]) #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),)) #else: # print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),)) if not inplace: return phi
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool_ibeis as vt #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if NAME_TTYPE not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = [NAME_TTYPE] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) other_colxs, = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) print('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids),)) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array([ g.sum() for g in vt.apply_grouping(reduced_values, groupxs) ]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def cluster_timespace_sec(posixtimes, latlons, thresh_sec=5, km_per_sec=KM_PER_SEC): """ Args: X_data (ndarray) : Nx3 array where columns are (seconds, lat, lon) thresh_sec (float) : threshold in seconds Doctest: >>> from ibeis.algo.preproc.occurrence_blackbox import * # NOQA >>> # Nx1 matrix denoting groundtruth locations (for testing) >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2]) >>> # Nx3 matrix where each columns are (time, lat, lon) >>> X_data = np.array([ >>> (0, 42.727985, -73.683994), # MRC >>> (0, 42.657414, -73.774448), # Park1 >>> (0, 42.658333, -73.770993), # Park2 >>> (0, 42.654384, -73.768919), # Park3 >>> (0, 42.655039, -73.769048), # Park4 >>> (0, 42.657872, -73.764148), # Park5 >>> (0, 42.876974, -73.819311), # CP1 >>> (0, 42.862946, -73.804977), # CP2 >>> (0, 42.849809, -73.758486), # CP3 >>> ]) >>> posixtimes = X_data.T[0] >>> latlons = X_data.T[1:3].T >>> thresh_sec = 250 # seconds >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec) >>> result = ('X_labels = %r' % (X_labels,)) >>> print(result) X_labels = array([6, 4, 4, 4, 4, 5, 1, 2, 3]) Doctest: >>> from ibeis.algo.preproc.occurrence_blackbox import * # NOQA >>> # Nx1 matrix denoting groundtruth locations (for testing) >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2]) >>> # Nx3 matrix where each columns are (time, lat, lon) >>> X_data = np.array([ >>> (np.nan, 42.657414, -73.774448), # Park1 >>> (0, 42.658333, -73.770993), # Park2 >>> (np.nan, np.nan, np.nan), # Park3 >>> (np.nan, np.nan, np.nan), # Park3.5 >>> (0, 42.655039, -73.769048), # Park4 >>> (0, 42.657872, -73.764148), # Park5 >>> ]) >>> posixtimes = X_data.T[0] >>> latlons = X_data.T[1:3].T >>> thresh_sec = 250 # seconds >>> km_per_sec = KM_PER_SEC >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec) >>> result = 'X_labels = {}'.format(ut.repr2(X_labels)) >>> print(result) X_labels = np.array([3, 4, 1, 2, 4, 5]) """ X_data, dist_func, columns = prepare_data(posixtimes, latlons, km_per_sec, 'seconds') if X_data is None: return None # Cluster nan distributions differently X_bools = ~np.isnan(X_data) group_id = (X_bools * np.power(2, [2, 1, 0])).sum(axis=1) import vtool_ibeis as vt unique_ids, groupxs = vt.group_indices(group_id) grouped_labels = [] for xs in groupxs: X_part = X_data.take(xs, axis=0) labels = _cluster_part(X_part, dist_func, columns, thresh_sec, km_per_sec) grouped_labels.append((labels, xs)) # Undo grouping and rectify overlaps X_labels = _recombine_labels(grouped_labels) # Do clustering return X_labels
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m ibeis.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) #infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool_ibeis as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. #map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) #joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred #evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction( query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx)] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array([ g.sum() for g in vt.apply_grouping(new_vals, groupxs) ]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) print(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) print(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) #probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) #joint.marginalize(irrelevant_vars) #joint.normalize() #new_rows = joint._row_labels() #new_vals = joint.values.ravel() #map_vals = new_rows[new_vals.argmax()] #map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints #marginalized_joints = {} #for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def flow(): """ http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin pip install PyMaxFlow pip install pystruct pip install hdbscan """ # Toy problem representing attempting to discover names via annotation # scores import pystruct # NOQA import pystruct.models # NOQA import networkx as netx # NOQA import vtool_ibeis as vt num_annots = 10 num_names = num_annots hidden_nids = np.random.randint(0, num_names, num_annots) unique_nids, groupxs = vt.group_indices(hidden_nids) toy_params = { True: {'mu': 1.0, 'sigma': 2.2}, False: {'mu': 7.0, 'sigma': .9} } if True: import vtool_ibeis as vt import plottool_ibeis as pt xdata = np.linspace(0, 100, 1000) tp_pdf = vt.gauss_func1d(xdata, **toy_params[True]) fp_pdf = vt.gauss_func1d(xdata, **toy_params[False]) pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata) def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params): if aidx1 == aidx2: return 0 rng = np.random.RandomState(int(aidx1 + aidx2)) same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)] mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma']) return np.clip(rng.normal(mu, sigma), 0, np.inf) pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots))) pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs]) pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs]) pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) if num_annots <= 10: print(ut.repr2(pairwise_scores_mat, precision=1)) #aids = list(range(num_annots)) #g = netx.DiGraph() #g.add_nodes_from(aids) #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]]) #netx.draw_graphviz(g) #pr = netx.pagerank(g) X = pairwise_scores Y = pairwise_labels encoder = vt.ScoreNormalizer() encoder.fit(X, Y) encoder.visualize() # meanshift clustering import sklearn bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None]) # , quantile=quantile, n_samples=500) assert bandwidth != 0, ('[] bandwidth is 0. Cannot cluster') # bandwidth is with respect to the RBF used in clustering #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True) ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False) ms.fit(X[:, None]) label_arr = ms.labels_ unique_labels = np.unique(label_arr) max_label = max(0, unique_labels.max()) num_orphans = (label_arr == -1).sum() label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans) X_data = np.arange(num_annots)[:, None].astype(np.int64) #graph = pystruct.models.GraphCRF( # n_states=None, # n_features=None, # inference_method='lp', # class_weight=None, # directed=False, #) import scipy import scipy.cluster import scipy.cluster.hierarchy thresh = 2.0 labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric) unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #X_data, seconds_thresh, criterion='distance') #help(hdbscan.HDBSCAN) import hdbscan alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2) labels = alg.fit_predict(X_data) labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1 unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #import ddbscan #help(ddbscan.DDBSCAN) #alg = ddbscan.DDBSCAN(2, 2) #D = np.zeros((len(aids), len(aids) + 1)) #D.T[-1] = np.arange(len(aids)) ## Can alpha-expansion be used when the pairwise potentials are not in a grid? #hidden_ut.group_items(aids, hidden_nids) if False: import maxflow #from maxflow import fastmin # Create a graph with integer capacities. g = maxflow.Graph[int](2, 2) # Add two (non-terminal) nodes. Get the index to the first one. nodes = g.add_nodes(2) # Create two edges (forwards and backwards) with the given capacities. # The indices of the nodes are always consecutive. g.add_edge(nodes[0], nodes[1], 1, 2) # Set the capacities of the terminal edges... # ...for the first node. g.add_tedge(nodes[0], 2, 5) # ...for the second node. g.add_tedge(nodes[1], 9, 4) g = maxflow.Graph[float](2, 2) g.maxflow() g.get_nx_graph() g.get_segment(nodes[0])
def invert_assigns(idx_to_wxs, idx_to_maws, verbose=False): r""" Inverts assignment of vectors->to->words into words->to->vectors. Invert mapping -- Group by word indexes This gives a HUGE speedup over the old invert_assigns Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.smk.smk_funcs import * # NOQA >>> idx_to_wxs = np.ma.array([ >>> (0, 4), >>> (2, -1), >>> (2, 0)], dtype=np.int32) >>> idx_to_wxs[1, 1] = np.ma.masked >>> idx_to_maws = np.ma.array( >>> [(.5, 1.), (1., np.nan), (.5, .5)], dtype=np.float32) >>> idx_to_maws[1, 1] = np.ma.masked >>> tup = invert_assigns(idx_to_wxs, idx_to_maws) >>> wx_to_idxs, wx_to_maws = tup >>> result = 'wx_to_idxs = %s' % (ut.repr4(wx_to_idxs, with_dtype=True),) >>> result += '\nwx_to_maws = %s' % (ut.repr4(wx_to_maws, with_dtype=True),) >>> print(result) wx_to_idxs = { 0: np.array([0, 2], dtype=np.int32), 2: np.array([1, 2], dtype=np.int32), 4: np.array([0], dtype=np.int32), } wx_to_maws = { 0: np.array([0.5, 0.5], dtype=np.float32), 2: np.array([1. , 0.5], dtype=np.float32), 4: np.array([1.], dtype=np.float32), } """ assert isinstance(idx_to_wxs, np.ma.masked_array) assert isinstance(idx_to_maws, np.ma.masked_array) nrows, ncols = idx_to_wxs.shape if len(idx_to_wxs.mask.shape) == 0: valid_mask = np.ones((nrows, ncols), dtype=np.bool) else: valid_mask = ~idx_to_maws.mask # idx_to_nAssign = (valid_mask).sum(axis=1) _valid_x2d = np.flatnonzero(valid_mask) flat_idxs = np.floor_divide(_valid_x2d, ncols, dtype=np.int32) flat_wxs = idx_to_wxs.compressed() flat_maws = idx_to_maws.compressed() sortx = flat_wxs.argsort() flat_wxs = flat_wxs.take(sortx) flat_idxs = flat_idxs.take(sortx) flat_maws = flat_maws.take(sortx) wx_keys, groupxs = vt.group_indices(flat_wxs) idxs_list = vt.apply_grouping(flat_idxs, groupxs) maws_list = vt.apply_grouping(flat_maws, groupxs) wx_to_idxs = dict(zip(wx_keys, idxs_list)) wx_to_maws = dict(zip(wx_keys, maws_list)) if verbose: print('[vocab] L___ End Assign vecs to words.') return (wx_to_idxs, wx_to_maws)
def compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets): """ More efficient version of agg on a stacked structure Args: words (ndarray): entire vocabulary of words flat_wxs_assign (ndarray): maps a stacked index to word index flat_vecs (ndarray): stacked SIFT descriptors flat_offsets (ndarray): offset positions per annotation Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.smk.smk_funcs import * # NOQA >>> data = testdata_rvecs(dim=2, nvecs=1000, nannots=10) >>> words = data['words'] >>> flat_offsets = data['offset_list'] >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs']) >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets) >>> all_agg_vecs, all_error_flags, agg_offset_list = tup >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> assert len(agg_flags_list) == len(flat_offsets) - 1 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.smk.smk_funcs import * # NOQA >>> data = testdata_rvecs(dim=2, nvecs=100, nannots=5) >>> words = data['words'] >>> flat_offsets = data['offset_list'] >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs']) >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets) >>> all_agg_vecs, all_error_flags, agg_offset_list = tup >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> assert len(agg_flags_list) == len(flat_offsets) - 1 """ grouped_wxs = [flat_wxs_assign[l:r] for l, r in ut.itertwo(flat_offsets)] # Assume single assignment, aggregate everything # across the entire database flat_offsets = np.array(flat_offsets) idx_to_dx = (np.searchsorted( flat_offsets, np.arange(len(flat_wxs_assign)), side='right') - 1).astype(np.int32) if isinstance(flat_wxs_assign, np.ma.masked_array): wx_list = flat_wxs_assign.T[0].compressed() else: wx_list = flat_wxs_assign.T[0].ravel() unique_wx, groupxs = vt.group_indices(wx_list) dim = flat_vecs.shape[1] if isinstance(flat_wxs_assign, np.ma.masked_array): dx_to_wxs = [np.unique(wxs.compressed()) for wxs in grouped_wxs] else: dx_to_wxs = [np.unique(wxs.ravel()) for wxs in grouped_wxs] dx_to_nagg = [len(wxs) for wxs in dx_to_wxs] num_agg_vecs = sum(dx_to_nagg) # all_agg_wxs = np.hstack(dx_to_wxs) agg_offset_list = np.array([0] + ut.cumsum(dx_to_nagg)) # Preallocate agg residuals for all dxs all_agg_vecs = np.empty((num_agg_vecs, dim), dtype=np.float32) all_agg_vecs[:, :] = np.nan # precompute agg residual stack i_to_dxs = vt.apply_grouping(idx_to_dx, groupxs) subgroup = [vt.group_indices(dxs) for dxs in ut.ProgIter(i_to_dxs)] i_to_unique_dxs = ut.take_column(subgroup, 0) i_to_dx_groupxs = ut.take_column(subgroup, 1) num_words = len(unique_wx) # Overall this takes 5 minutes and 21 seconds # I think the other method takes about 12 minutes for i in ut.ProgIter(range(num_words), 'agg'): wx = unique_wx[i] xs = groupxs[i] dxs = i_to_unique_dxs[i] dx_groupxs = i_to_dx_groupxs[i] word = words[wx:wx + 1] offsets1 = agg_offset_list.take(dxs) offsets2 = [np.where(dx_to_wxs[dx] == wx)[0][0] for dx in dxs] offsets = np.add(offsets1, offsets2, out=offsets1) # if __debug__: # assert np.bincount(dxs).max() < 2 # offset = agg_offset_list[dxs[0]] # assert np.all(dx_to_wxs[dxs[0]] == all_agg_wxs[offset:offset + # dx_to_nagg[dxs[0]]]) # Compute residuals rvecs = flat_vecs[xs] - word vt.normalize(rvecs, axis=1, out=rvecs) rvecs[np.all(np.isnan(rvecs), axis=1)] = 0 # Aggregate across same images grouped_rvecs = vt.apply_grouping(rvecs, dx_groupxs, axis=0) agg_rvecs_ = [rvec_group.sum(axis=0) for rvec_group in grouped_rvecs] # agg_rvecs = np.vstack(agg_rvecs_) all_agg_vecs[offsets, :] = agg_rvecs_ assert not np.any(np.isnan(all_agg_vecs)) print('Apply normalization') vt.normalize(all_agg_vecs, axis=1, out=all_agg_vecs) all_error_flags = np.all(np.isnan(all_agg_vecs), axis=1) all_agg_vecs[all_error_flags, :] = 0 # ndocs_per_word1 = np.array(ut.lmap(len, wx_to_unique_dxs)) # ndocs_total1 = len(flat_offsets) - 1 # idf1 = smk_funcs.inv_doc_freq(ndocs_total1, ndocs_per_word1) tup = all_agg_vecs, all_error_flags, agg_offset_list return tup
def get_review_edges(cm_list, ibs=None, review_cfg={}): r""" Needs to be moved to a better file. Maybe something to do with identification. Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in id_review_api and interact_qres2 Args: cm_list (list): list of chip match objects ranks_top (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.gui.id_review_api get_review_edges:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = qreq_.execute() >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg) >>> print(review_edges) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20') >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100') >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10') >>> ranks_top = 3 >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) """ import vtool_ibeis as vt from ibeis.algo.hots import chip_match automatch_kw = REVIEW_CFG_DEFAULTS.copy() automatch_kw = ut.update_existing(automatch_kw, review_cfg) print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw))) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) if len(cm_list) == 0: return ([], [], [], []) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch): daids = cm.get_top_aids(ntop=automatch_kw['ranks_top']) scores = cm.get_top_scores(ntop=automatch_kw['ranks_top']) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_top=automatch_kw['ranks_top'], name_scoring=automatch_kw['name_scoring'], ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] # IS_REVIEWED DOES NOT WORK if automatch_kw['filter_reviewed']: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not automatch_kw['directed']: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes( directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if automatch_kw['filter_duplicate_true_matches']: # filter_dup_namepairs qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not automatch_kw['directed']: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes( directed_name_edges, score_arr) else: namepair_id_list = np.array( vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices( namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip( namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) # Filter all true matches if automatch_kw['filter_true_matches']: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) valid_flags = qnid_arr != dnid_arr qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) if automatch_kw['filter_photobombs']: unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr])) #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids) invalid_nid_map = get_photobomber_map(ibs, qaid_arr) nid2_aids = ut.group_items(unique_aids, ibs.get_annot_nids(unique_aids)) expanded_aid_map = ut.ddict(set) for nid1, other_nids in invalid_nid_map.items(): for aid1 in nid2_aids[nid1]: for nid2 in other_nids: for aid2 in nid2_aids[nid2]: expanded_aid_map[aid1].add(aid2) expanded_aid_map[aid2].add(aid1) valid_flags = [ daid not in expanded_aid_map[qaid] for qaid, daid in zip(qaid_arr, daid_arr) ] qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) return review_edges
def compute_fmech_score(cm, qreq_=None, hack_single_ori=False): r""" nsum. This is the fmech scoring mechanism. Args: cm (ibeis.ChipMatch): Returns: tuple: (unique_nids, nsum_score_list) CommandLine: python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:0 python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> cm = testdata_chipmatch() >>> nsum_score_list = compute_fmech_score(cm) >>> assert np.all(nsum_score_list == [ 4., 7., 5.]) Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_dnids(qreq_) >>> cm._cast_scores() >>> #cm.qnid = 1 # Hack for testdb1 names >>> nsum_score_list = compute_fmech_score(cm, qreq_) >>> #assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment' >>> flags = (cm.unique_nids == cm.qnid) >>> max_true = nsum_score_list[flags].max() >>> max_false = nsum_score_list[~flags].max() >>> assert max_true > max_false, 'is this truely a hard case?' >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,) Example2: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) Example3: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) """ #assert qreq_ is not None if hack_single_ori is None: try: hack_single_ori = qreq_ is not None and ( qreq_.qparams.query_rotation_heuristic or qreq_.qparams.rotation_invariance ) except AttributeError: hack_single_ori = True # The core for each feature match # # The query feature index for each feature match fm_list = cm.fm_list fs_list = cm.get_fsv_prod_list() fx1_list = [fm.T[0] for fm in fm_list] if hack_single_ori: # Group keypoints with the same xy-coordinate. # Combine these feature so each only recieves one vote kpts1 = qreq_.ibs.get_annot_kpts( cm.qaid, config2_=qreq_.extern_query_config2) xys1_ = vt.get_xys(kpts1).T fx1_to_comboid = vt.compute_unique_arr_dataids(xys1_) fcombo_ids = [fx1_to_comboid.take(fx1) for fx1 in fx1_list] else: # use the feature index itself as a combo id # so each feature only recieves one vote fcombo_ids = fx1_list if False: import ubelt as ub for ids in fcombo_ids: ub.find_duplicates(ids) # Group annotation matches by name # nsum_nid_list, name_groupxs = vt.group_indices(cm.dnid_list) # nsum_nid_list = cm.unique_nids name_groupxs = cm.name_groupxs nsum_score_list = [] # For all indicies matched to a particular name for name_idxs in name_groupxs: # Get feat indicies and scores corresponding to the name's annots name_combo_ids = ut.take(fcombo_ids, name_idxs) name_fss = ut.take(fs_list, name_idxs) # Flatten over annots in the name fs = np.hstack(name_fss) if len(fs) == 0: nsum_score_list.append(0) continue combo_ids = np.hstack(name_combo_ids) # Features (with the same id) can't vote for this name twice group_idxs = vt.group_indices(combo_ids)[1] flagged_idxs = [idxs[fs.take(idxs).argmax()] for idxs in group_idxs] # Detail: sorting the idxs preseveres summation order # this fixes the numerical issue where nsum and csum were off flagged_idxs = np.sort(flagged_idxs) name_score = fs.take(flagged_idxs).sum() nsum_score_list.append(name_score) nsum_score_list = np.array(nsum_score_list) return nsum_score_list
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs): """ very hacky, but cute way to cache keypoint distinctivness Args: ibs (IBEISController): ibeis controller object aid_list (list): dstncvs_normer (None): Returns: list: dstncvs_list CommandLine: python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness Example: >>> # SLOW_DOCTEST >>> # xdoctest: +SKIP >>> from ibeis.control.manual_ibeiscontrol_funcs import * # NOQA >>> from ibeis.algo.hots import distinctiveness_normalizer >>> import ibeis >>> import numpy as np >>> config2_ = None >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN) >>> # execute function >>> aid_list1 = aid_list[::2] >>> aid_list2 = aid_list[1::3] >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1) >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2) >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list) >>> print(ut.depth_profile(dstncvs_list1)) >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list]) >>> print(ut.repr2(stats_dict)) >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds' >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds' """ from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer # per-species disinctivness wrapper around ibeis cached function # get feature rowids aid_list = np.array(aid_list) fid_list = np.array( ibs.get_annot_feat_rowids(aid_list, ensure=True, eager=True, nInput=None, config2_=config2_)) species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list)) # Compute distinctivness separately for each species unique_sids, groupxs = vt.group_indices(species_rowid_list) fids_groups = vt.apply_grouping(fid_list, groupxs) species_text_list = ibs.get_species_texts(unique_sids) # Map distinctivness computation normer_list = [ dcvs_normer.request_species_distinctiveness_normalizer(species) for species in species_text_list ] # Reduce to get results dstncvs_groups = [ get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer, species_rowid=sid, **kwargs) for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids) ] dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs) return dstncvs_list