def _compute_unique_state_ids(self): import vtool as vt # data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, self.state_idxs)))) return data_ids
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if 'name' not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = ['name'] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) other_colxs, = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) print('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids),)) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array([ g.sum() for g in vt.apply_grouping(reduced_values, groupxs) ]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt # assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if NAME_TTYPE not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = [NAME_TTYPE] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list( range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) (other_colxs, ) = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) logger.info('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids), )) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array( [g.sum() for g in vt.apply_grouping(reduced_values, groupxs)]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len( model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def _compute_unique_state_ids(self): import vtool as vt #data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) data_ids = np.array(vt.compute_unique_data_ids_(list(map(tuple, self.state_idxs)))) return data_ids
def get_review_edges(cm_list, ibs=None, review_cfg={}): r""" Needs to be moved to a better file. Maybe something to do with identification. Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in id_review_api and interact_qres2 Args: cm_list (list): list of chip match objects ranks_top (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.gui.id_review_api get_review_edges:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = qreq_.execute() >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg) >>> print(review_edges) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20') >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100') >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10') >>> ranks_top = 3 >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) """ import vtool as vt from ibeis.algo.hots import chip_match automatch_kw = REVIEW_CFG_DEFAULTS.copy() automatch_kw = ut.update_existing(automatch_kw, review_cfg) print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw))) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) if len(cm_list) == 0: return ([], [], [], []) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch): daids = cm.get_top_aids(ntop=automatch_kw['ranks_top']) scores = cm.get_top_scores(ntop=automatch_kw['ranks_top']) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_top=automatch_kw['ranks_top'], name_scoring=automatch_kw['name_scoring'], ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] # IS_REVIEWED DOES NOT WORK if automatch_kw['filter_reviewed']: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not automatch_kw['directed']: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if automatch_kw['filter_duplicate_true_matches']: # filter_dup_namepairs qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not automatch_kw['directed']: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes( directed_name_edges, score_arr) else: namepair_id_list = np.array(vt.compute_unique_data_ids_( list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip(namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) # Filter all true matches if automatch_kw['filter_true_matches']: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) valid_flags = qnid_arr != dnid_arr qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) if automatch_kw['filter_photobombs']: unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr])) #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids) invalid_nid_map = get_photobomber_map(ibs, qaid_arr) nid2_aids = ut.group_items(unique_aids, ibs.get_annot_nids(unique_aids)) expanded_aid_map = ut.ddict(set) for nid1, other_nids in invalid_nid_map.items(): for aid1 in nid2_aids[nid1]: for nid2 in other_nids: for aid2 in nid2_aids[nid2]: expanded_aid_map[aid1].add(aid2) expanded_aid_map[aid2].add(aid1) valid_flags = [daid not in expanded_aid_map[qaid] for qaid, daid in zip(qaid_arr, daid_arr)] qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) return review_edges
def get_automatch_candidates(cm_list, ranks_lt=5, directed=True, name_scoring=False, ibs=None, filter_reviewed=False, filter_duplicate_namepair_matches=False): """ THIS IS PROBABLY ONE OF THE ONLY THINGS IN THIS FILE THAT SHOULD NOT BE DEPRICATED Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in inspect_gui and interact_qres2 Args: qaid2_qres (dict): mapping from query annotaiton id to query result object ranks_lt (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.expt.results_organizer --test-get_automatch_candidates:2 python -m ibeis.expt.results_organizer --test-get_automatch_candidates:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = ibs.query_chips(qreq_=qreq_, return_cm=True) >>> ranks_lt = 5 >>> directed = True >>> name_scoring = False >>> candidate_matches = get_automatch_candidates(cm_list, ranks_lt, directed, ibs=ibs) >>> print(candidate_matches) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:5] >>> daid_list = ibs.get_valid_aids()[0:20] >>> cm_list = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 5 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:1] >>> daid_list = ibs.get_valid_aids()[10:100] >>> qaid2_cm = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 1 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:10] >>> daid_list = ibs.get_valid_aids()[0:10] >>> qres_list = ibs.query_chips(qaid_list, daid_list) >>> ranks_lt = 3 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... qaid2_cm, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) """ import vtool as vt from ibeis.model.hots import chip_match print(('[resorg] get_automatch_candidates(' 'filter_reviewed={filter_reviewed},' 'filter_duplicate_namepair_matches={filter_duplicate_namepair_matches},' 'directed={directed},' 'ranks_lt={ranks_lt},' ).format(**locals())) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch2): daids = cm.get_top_aids(ntop=ranks_lt) scores = cm.get_top_scores(ntop=ranks_lt) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_lt=ranks_lt, name_scoring=name_scoring, ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array # utool.embed() qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] if filter_reviewed: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not directed: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if filter_duplicate_namepair_matches: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not directed: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes(directed_name_edges, score_arr) else: namepair_id_list = np.array(vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip(namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) return candidate_matches