def group_aids_by_featweight_species(ibs, aid_list, config2_=None): """ helper Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = None >>> aid_list = ibs.get_valid_aids() >>> grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(ibs, aid_list, config2_) """ if config2_ is None: featweight_species = ibs.cfg.featweight_cfg.featweight_species else: featweight_species = config2_.get('featweight_species') assert featweight_species is not None if featweight_species == 'uselabel': # Use the labeled species for the detector species_list = ibs.get_annot_species_texts(aid_list) else: species_list = [featweight_species] aid_list = np.array(aid_list) species_list = np.array(species_list) species_rowid = np.array(ibs.get_species_rowids_from_text(species_list)) unique_species_rowids, groupxs = vtool.group_indices(species_rowid) grouped_aids = vtool.apply_grouping(aid_list, groupxs) grouped_species = vtool.apply_grouping(species_list, groupxs) unique_species = ut.get_list_column(grouped_species, 0) return grouped_aids, unique_species, groupxs
def get_patches(invassign, wx): ax_list = invassign.wx2_axs[wx] fx_list = invassign.wx2_fxs[wx] config = invassign.fstack.config ibs = invassign.fstack.ibs unique_axs, groupxs = vt.group_indices(ax_list) fxs_groups = vt.apply_grouping(fx_list, groupxs) unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs) all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config) sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0) chip_list = ibs.depc_annot.d.get_chips_img(unique_aids) # convert to approprate colorspace #if colorspace is not None: # chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 grouped_patches_list = [ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list), nTotal=len(unique_aids), lbl='warping patches') ] # Make it correspond with original fx_list and ax_list word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs) return word_patches
def get_annotmatch_rowids_from_aid2(ibs, aid2_list, eager=True, nInput=None, force_method=None): """ # This one is slow because aid2 is the second part of the index TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid2_list = ibs.get_valid_aids() >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid2_list=aid2_list, num_list=num_list): >>> return (aid2_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid2', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if force_method != 2 and (nInput < 128 or (force_method == 1)): colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,) # FIXME: col_rowid is not correct params_iter = zip(aid2_list) andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2] annotmatch_rowid_list = ibs.db.get_where2( ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, andwhere_colnames, eager=eager, nInput=nInput, unpack_scalars=False) elif force_method == 2: import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids2_ = [_.tolist() for _ in rowids2_] maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_)) annotmatch_rowid_list = ut.dict_take(maping2, aid2_list) annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list, name_score_list, nid2_nidx, nNameShortList, nAnnotPerName): r""" CommandLine: python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.scoring import * # NOQA >>> # build test data >>> daid_list = np.array([11, 12, 13, 14, 15, 16, 17]) >>> dnid_list = np.array([21, 21, 21, 22, 22, 23, 24]) >>> annot_score_list = np.array([ 6, 2, 3, 5, 6, 3, 2]) >>> name_score_list = np.array([ 8, 9, 5, 4]) >>> nid2_nidx = {21:0, 22:1, 23:2, 24:3} >>> nNameShortList, nAnnotPerName = 3, 2 >>> # execute function >>> args = (daid_list, dnid_list, annot_score_list, name_score_list, ... nid2_nidx, nNameShortList, nAnnotPerName) >>> top_daids = get_name_shortlist_aids(*args) >>> # verify results >>> result = str(top_daids) >>> print(result) [15, 14, 11, 13, 16] """ unique_nids, groupxs = vt.group_indices(np.array(dnid_list)) grouped_annot_scores = vt.apply_grouping(annot_score_list, groupxs) grouped_daids = vt.apply_grouping(np.array(daid_list), groupxs) # Ensure name score list is aligned with the unique_nids aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids)) # Sort each group by the name score group_sortx = aligned_name_score_list.argsort()[::-1] _top_daid_groups = ut.take(grouped_daids, group_sortx) _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx) top_daid_groups = ut.listclip(_top_daid_groups, nNameShortList) top_annot_score_groups = ut.listclip(_top_annot_score_groups, nNameShortList) # Sort within each group by the annotation score top_daid_sortx_groups = [annot_score_group.argsort()[::-1] for annot_score_group in top_annot_score_groups] top_sorted_daid_groups = vt.ziptake(top_daid_groups, top_daid_sortx_groups) top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName) for sorted_daid_group in top_sorted_daid_groups] top_daids = ut.flatten(top_clipped_daids) return top_daids
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None): r""" fm_list = [fm[:min(len(fm), 10)] for fm in fm_list] fs_list = [fs[:min(len(fs), 10)] for fs in fs_list] """ fx1_list = [fm.T[0] for fm in fm_list] # Group annotation matches by name name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name, keep track of original indicies via offets name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list)) name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0) name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) if kpts1 is not None: xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_data_ids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_xyid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat) xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat) name_group_fx1_groupxs_list = xyid_groupxs_list else: # Make nested group for every name by query feature index fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat] name_group_fx1_groupxs_list = fx1_groupxs_list name_grouped_fid_grouped_fs_list = [ vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list) ] # Flag which features are valid in this grouped space. Only one keypoint should be able to vote # for each group name_grouped_fid_grouped_isvalid_list = [ np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list]) for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list ] # Go back to being grouped only in name space #dtype = np.bool name_grouped_isvalid_flat_list = [ vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool) for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list) ] name_grouped_isvalid_unflat_list = [ ut.unflatten2(isvalid_flat, invertable_cumsum_list) for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list) ] # Reports which features were valid in name scoring for every annotation featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs) return featflag_list
def group_images_by_label(label_arr, gid_arr): """ Input: Length N list of labels and ids Output: Length M list of unique labels, and lenth M list of lists of ids """ # Reverse the image to cluster index mapping import vtool as vt labels_, groupxs_ = vt.group_indices(label_arr) sortx = np.array(list(map(len, groupxs_))).argsort()[::-1] labels = labels_.take(sortx, axis=0) groupxs = ut.take(groupxs_, sortx) label_gids = vt.apply_grouping(gid_arr, groupxs) return labels, label_gids
def score_chipmatch_true_nsum(qaid, chipmatch, qreq_, return_wrt_aids=False): """ Sums scores over all annots with those names. Dupvote weighting should be on to combat double counting """ # Nonhacky version of name scoring #(aid2_fm, aid2_fsv, aid2_fk, aid2_score, aid2_H) = chipmatch aid2_fsv = chipmatch.aid2_fsv NEW_WAY = True if NEW_WAY: # New version aid_list = list(six.iterkeys(aid2_fsv)) fsv_list = ut.dict_take(aid2_fsv, aid_list) #fs_list = [fsv.prod(axis=1) if fsv.shape[1] > 1 else fsv.T[0] for fsv in fsv_list] fs_list = [fsv.prod(axis=1) for fsv in fsv_list] annot_score_list = np.array([fs.sum() for fs in fs_list]) annot_nid_list = np.array(qreq_.ibs.get_annot_name_rowids(aid_list)) nid_list, groupxs = vtool.group_indicies(annot_nid_list) grouped_scores = vtool.apply_grouping(annot_score_list, groupxs) else: aid2_fs = {aid: fsv.prod(axis=1) for aid, fsv in six.iteritems(aid2_fsv)} aid_list = list(six.iterkeys(aid2_fs)) annot_score_list = np.array([fs.sum() for fs in six.itervalues(aid2_fs)]) annot_nid_list = np.array(qreq_.ibs.get_annot_name_rowids(aid_list)) nid_list, groupxs = vtool.group_indicies(annot_nid_list) grouped_scores = vtool.apply_grouping(annot_score_list, groupxs) if return_wrt_aids: def indicator_array(size, pos, value): """ creates zero array and places value at pos """ arr = np.zeros(size) arr[pos] = value return arr grouped_nscores = [indicator_array(scores.size, scores.argmax(), scores.sum()) for scores in grouped_scores] nscore_list = vtool.clustering2.invert_apply_grouping(grouped_nscores, groupxs) #nscore_list = ut.flatten(grouped_nscores) return aid_list, nscore_list else: score_list = [scores.sum() for scores in grouped_scores] return nid_list, score_list
def compute_agg_rvecs(invassign, wx): """ Sums and normalizes all rvecs that belong to the same word and the same annotation id """ rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx) ax_list = invassign.wx2_axs[wx] maw_list = invassign.wx2_maws[wx] # group members of each word by aid, we will collapse these groups unique_ax, groupxs = vt.group_indices(ax_list) # (weighted aggregation with multi-assign-weights) grouped_maws = vt.apply_grouping(maw_list, groupxs) grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs) grouped_flags = vt.apply_grouping(~error_flags, groupxs) grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0) grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags) is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_] aggvecs = [aggregate_rvecs(rvecs, maws)[0] for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_)] unique_ax2_ = unique_ax.compress(is_good) ax2_aggvec = dict(zip(unique_ax2_, aggvecs)) # Need to recompute flags for consistency # flag is true when aggvec is all zeros return ax2_aggvec
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True): r""" Args: ibs (IBEISController): ibeis controller object aid1_list (list): aid2_list (list): directed (bool): (default = True) Returns: list: tags_list CommandLine: python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting Example: >>> # DISABLE_DOCTEST >>> from ibeis.tag_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> has_any = ut.get_argval('--tags', type_=list, default=None) >>> min_num = ut.get_argval('--min_num', type_=int, default=1) >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1) >>> aid1_list = aid_pairs.T[0] >>> aid2_list = aid_pairs.T[1] >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False) >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) >>> print(ut.list_str(tagged_pairs)) >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) >>> print(ut.dict_str(tag_dict, nl=2)) >>> print(ut.dict_str(ut.map_dict_vals(len, tag_dict))) """ aid_pairs = np.vstack([aid1_list, aid2_list]).T if directed: annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) else: expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]]) expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey( expanded_aid_pairs.T[0], expanded_aid_pairs.T[1]) expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs) unique_edgeids, groupxs = vt.group_indices(expanded_edgeids) expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid) grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs) undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags] edgeid2_tags = dict(zip(unique_edgeids, undirected_tags)) input_edgeids = expanded_edgeids[:len(aid_pairs)] tags_list = ut.dict_take(edgeid2_tags, input_edgeids) return tags_list
def get_match_results(depc, qaid_list, daid_list, score_list, config): """ converts table results into format for ipython notebook """ #qaid_list, daid_list = request.get_parent_rowids() #score_list = request.score_list #config = request.config unique_qaids, groupxs = ut.group_indices(qaid_list) #grouped_qaids_list = ut.apply_grouping(qaid_list, groupxs) grouped_daids = ut.apply_grouping(daid_list, groupxs) grouped_scores = ut.apply_grouping(score_list, groupxs) ibs = depc.controller unique_qnids = ibs.get_annot_nids(unique_qaids) # FIXME: decision should not be part of the config for the one-vs-one # scores decision_func = getattr(np, config['decision']) _iter = zip(unique_qaids, unique_qnids, grouped_daids, grouped_scores) for qaid, qnid, daids, scores in _iter: dnids = ibs.get_annot_nids(daids) # Remove distance to self annot_scores = np.array(scores) daid_list_ = np.array(daids) dnid_list_ = np.array(dnids) is_valid = (daid_list_ != qaid) daid_list_ = daid_list_.compress(is_valid) dnid_list_ = dnid_list_.compress(is_valid) annot_scores = annot_scores.compress(is_valid) # Hacked in version of creating an annot match object match_result = ibeis.AnnotMatch() match_result.qaid = qaid match_result.qnid = qnid match_result.daid_list = daid_list_ match_result.dnid_list = dnid_list_ match_result._update_daid_index() match_result._update_unique_nid_index() grouped_annot_scores = vt.apply_grouping(annot_scores, match_result.name_groupxs) name_scores = np.array([decision_func(dists) for dists in grouped_annot_scores]) match_result.set_cannonical_name_score(annot_scores, name_scores) yield match_result
def consolidate(self, inplace=False): """ removes duplicate entries Example: >>> # UNSTABLE_DOCTEST >>> from ibeis.algo.hots.pgm_ext import * # NOQA >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]] >>> weights = [.1, .2, .1] >>> variables = ['v1', 'v2', 'v3'] >>> self = ApproximateFactor(state_idxs, weights, variables) >>> inplace = False >>> phi = self.consolidate(inplace) >>> result = str(phi) >>> print(result) +------+------+------+-----------------------+ | v1 | v2 | v3 | \hat{phi}(v1,v2,v3) | |------+------+------+-----------------------| | v1_1 | v2_0 | v3_1 | 0.3000 | | v1_1 | v2_0 | v3_2 | 0.1000 | +------+------+------+-----------------------+ """ import vtool as vt phi = self.copy() if inplace else self data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) unique_ids, groupxs = vt.group_indices(data_ids) #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs))))) if len(data_ids) != len(unique_ids): # Sum the values in the cpd to marginalize the duplicate probs # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs]) self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0) self.weights = np.array([ g.sum() for g in vt.apply_grouping(self.weights, groupxs) ]) #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),)) #else: # print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),)) if not inplace: return phi
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if 'name' not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = ['name'] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) other_colxs, = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) print('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids),)) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array([ g.sum() for g in vt.apply_grouping(reduced_values, groupxs) ]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs): """ very hacky, but cute way to cache keypoint distinctivness Args: ibs (IBEISController): ibeis controller object aid_list (list): dstncvs_normer (None): Returns: list: dstncvs_list CommandLine: python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness Example: >>> # SLOW_DOCTEST >>> from ibeis.control.manual_ibeiscontrol_funcs import * # NOQA >>> from ibeis.algo.hots import distinctiveness_normalizer >>> import ibeis >>> import numpy as np >>> config2_ = None >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN) >>> # execute function >>> aid_list1 = aid_list[::2] >>> aid_list2 = aid_list[1::3] >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1) >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2) >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list) >>> print(ut.depth_profile(dstncvs_list1)) >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list]) >>> print(ut.dict_str(stats_dict)) >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds' >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds' """ from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer # per-species disinctivness wrapper around ibeis cached function # get feature rowids aid_list = np.array(aid_list) fid_list = np.array(ibs.get_annot_feat_rowids(aid_list, ensure=True, eager=True, nInput=None, config2_=config2_)) species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list)) # Compute distinctivness separately for each species unique_sids, groupxs = vt.group_indices(species_rowid_list) fids_groups = vt.apply_grouping(fid_list, groupxs) species_text_list = ibs.get_species_texts(unique_sids) # Map distinctivness computation normer_list = [dcvs_normer.request_species_distinctiveness_normalizer(species) for species in species_text_list] # Reduce to get results dstncvs_groups = [ get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer, species_rowid=sid, **kwargs) for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids) ] dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs) return dstncvs_list
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None): r""" DEPRICATE fm_list = [fm[:min(len(fm), 10)] for fm in fm_list] fs_list = [fs[:min(len(fs), 10)] for fs in fs_list] """ fx1_list = [fm.T[0] for fm in fm_list] # Group annotation matches by name name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name, keep track of original indicies via offets name_invertable_flat_fx1_list = list( map(ut.invertible_flatten2_numpy, name_grouped_fx1_list)) name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0) name_grouped_invertable_cumsum_list = ut.get_list_column( name_invertable_flat_fx1_list, 1) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) if kpts1 is not None: xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_data_ids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_comboid_flat = list( kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat) xyid_groupxs_list = list( vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_comboid_flat) name_group_fx1_groupxs_list = xyid_groupxs_list else: # Make nested group for every name by query feature index fx1_groupxs_list = [ vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat ] name_group_fx1_groupxs_list = fx1_groupxs_list name_grouped_fid_grouped_fs_list = [ vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list) ] # Flag which features are valid in this grouped space. Only one keypoint should be able to vote # for each group name_grouped_fid_grouped_isvalid_list = [ np.array( [fs_group.max() == fs_group for fs_group in fid_grouped_fs_list]) for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list ] # Go back to being grouped only in name space # dtype = np.bool name_grouped_isvalid_flat_list = [ vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool) for fid_grouped_isvalid_list, fid_groupxs in zip( name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list) ] name_grouped_isvalid_unflat_list = [ ut.unflatten2(isvalid_flat, invertable_cumsum_list) for isvalid_flat, invertable_cumsum_list in zip( name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list) ] # Reports which features were valid in name scoring for every annotation featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs) return featflag_list
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m wbia.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) # infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered( query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. # map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) # joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred # evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction(query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [ joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx) ] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array( [g.sum() for g in vt.apply_grouping(new_vals, groupxs)]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column( sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict( zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) logger.info(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list( zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) # probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) # joint.marginalize(irrelevant_vars) # joint.normalize() # new_rows = joint._row_labels() # new_vals = joint.values.ravel() # map_vals = new_rows[new_vals.argmax()] # map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints # marginalized_joints = {} # for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if NAME_TTYPE not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = [NAME_TTYPE] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) other_colxs, = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) print('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids),)) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array([ g.sum() for g in vt.apply_grouping(reduced_values, groupxs) ]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list): r""" takes name scores and gives them to the best annotation Returns: score_list: list of scores aligned with cm.daid_list and cm.dnid_list Args: annot_score_list (list): score associated with each annot name_groupxs (list): groups annot_score lists into groups compatible with name_score_list name_score_list (list): score assocated with name nid2_nidx (dict): mapping from nids to index in name score list CommandLine: python -m wbia.algo.hots.name_scoring --test-align_name_scores_with_annots python -m wbia.algo.hots.name_scoring --test-align_name_scores_with_annots --show Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_csum_annot_score(qreq_) >>> cm.evaluate_nsum_name_score(qreq_) >>> # Annot aligned lists >>> annot_score_list = cm.algo_annot_scores['csum'] >>> annot_aid_list = cm.daid_list >>> daid2_idx = cm.daid2_idx >>> # Name aligned lists >>> name_score_list = cm.algo_name_scores['nsum'] >>> name_groupxs = cm.name_groupxs >>> # Execute Function >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list) >>> # Check that the correct name gets the highest score >>> target = name_score_list[cm.nid2_nidx[cm.qnid]] >>> test_index = np.where(score_list == target)[0][0] >>> cm.score_list = score_list >>> ut.assert_eq(ibs.get_annot_name_rowids(cm.daid_list[test_index]), cm.qnid) >>> assert ut.isunique(cm.dnid_list[score_list > 0]), 'bad name score' >>> top_idx = cm.algo_name_scores['nsum'].argmax() >>> assert cm.get_top_nids()[0] == cm.unique_nids[top_idx], 'bug in alignment' >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_) >>> ut.show_if_requested() Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.name_scoring import * # NOQA >>> annot_score_list = [] >>> annot_aid_list = [] >>> daid2_idx = {} >>> # Name aligned lists >>> name_score_list = np.array([], dtype=np.float32) >>> name_groupxs = [] >>> # Execute Function >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list) """ if len(name_groupxs) == 0: score_list = np.empty(0, dtype=name_score_list.dtype) return score_list else: # Group annot aligned indicies by nid annot_aid_list = np.array(annot_aid_list) # nid_list, groupxs = vt.group_indices(annot_nid_list) grouped_scores = vt.apply_grouping(annot_score_list, name_groupxs) grouped_annot_aids = vt.apply_grouping(annot_aid_list, name_groupxs) flat_grouped_aids = np.hstack(grouped_annot_aids) # flat_groupxs = np.hstack(name_groupxs) # if __debug__: # sum_scores = np.array([scores.sum() for scores in grouped_scores]) # max_scores = np.array([scores.max() for scores in grouped_scores]) # assert np.all(name_score_list <= sum_scores) # assert np.all(name_score_list > max_scores) # +------------ # Find the position of the highest name_scoring annotation for each name # IN THE FLATTENED GROUPED ANNOT_AID_LIST (this was the bug) offset_list = np.array( [annot_scores.argmax() for annot_scores in grouped_scores]) # Find the starting position of eatch group use chain to start offsets with 0 _padded_scores = itertools.chain([[]], grouped_scores[:-1]) sizeoffset_list = np.array( [len(annot_scores) for annot_scores in _padded_scores]) baseindex_list = sizeoffset_list.cumsum() # Augment starting position with offset index annot_idx_list = np.add(baseindex_list, offset_list) # L______________ best_aid_list = flat_grouped_aids[annot_idx_list] best_idx_list = ut.dict_take(daid2_idx, best_aid_list) # give the annotation domain a name score # score_list = np.zeros(len(annot_score_list), dtype=name_score_list.dtype) score_list = np.full(len(annot_score_list), fill_value=-np.inf, dtype=name_score_list.dtype) # score_list = np.full(len(annot_score_list), fill_value=np.nan, dtype=name_score_list.dtype) # score_list = np.nan(len(annot_score_list), dtype=name_score_list.dtype) # HACK: we need to set these to 'low' values and we also have to respect negatives # score_list[:] = -np.inf # make sure that the nid_list from group_indicies and the nids belonging to # name_score_list (cm.unique_nids) are in alignment # nidx_list = np.array(ut.dict_take(nid2_nidx, nid_list)) # THIS ASSUMES name_score_list IS IN ALIGNMENT WITH BOTH cm.unique_nids and # nid_list (which should be == cm.unique_nids) score_list[best_idx_list] = name_score_list return score_list
def align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list): r""" takes name scores and gives them to the best annotation Returns: score_list: list of scores aligned with cm.daid_list and cm.dnid_list Args: annot_score_list (list): score associated with each annot name_groupxs (list): groups annot_score lists into groups compatible with name_score_list name_score_list (list): score assocated with name nid2_nidx (dict): mapping from nids to index in name score list CommandLine: python -m ibeis.algo.hots.name_scoring --test-align_name_scores_with_annots python -m ibeis.algo.hots.name_scoring --test-align_name_scores_with_annots --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('PZ_MTEST', qaid_list=[18]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_csum_score(qreq_) >>> cm.evaluate_nsum_score(qreq_) >>> # Annot aligned lists >>> annot_score_list = cm.algo_annot_scores['csum'] >>> annot_aid_list = cm.daid_list >>> daid2_idx = cm.daid2_idx >>> # Name aligned lists >>> name_score_list = cm.algo_name_scores['nsum'] >>> name_groupxs = cm.name_groupxs >>> # Execute Function >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list) >>> # Check that the correct name gets the highest score >>> target = name_score_list[cm.nid2_nidx[cm.qnid]] >>> test_index = np.where(score_list == target)[0][0] >>> cm.score_list = score_list >>> ut.assert_eq(ibs.get_annot_name_rowids(cm.daid_list[test_index]), cm.qnid) >>> assert ut.isunique(cm.dnid_list[score_list > 0]), 'bad name score' >>> assert cm.get_top_nids()[0] == cm.unique_nids[cm.nsum_score_list.argmax()], 'bug in alignment' >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_) >>> ut.show_if_requested() Example: >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> annot_score_list = [] >>> annot_aid_list = [] >>> daid2_idx = {} >>> # Name aligned lists >>> name_score_list = np.array([], dtype=np.float32) >>> name_groupxs = [] >>> # Execute Function >>> score_list = align_name_scores_with_annots(annot_score_list, annot_aid_list, daid2_idx, name_groupxs, name_score_list) Ignore: dict(zip(cm.dnid_list, cm.score_list)) dict(zip(cm.unique_nids, cm.nsum_score_list)) np.all(nid_list == cm.unique_nids) """ if len(name_groupxs) == 0: score_list = np.empty(0, dtype=name_score_list.dtype) return score_list else: # Group annot aligned indicies by nid annot_aid_list = np.array(annot_aid_list) #nid_list, groupxs = vt.group_indices(annot_nid_list) grouped_scores = vt.apply_grouping(annot_score_list, name_groupxs) grouped_annot_aids = vt.apply_grouping(annot_aid_list, name_groupxs) flat_grouped_aids = np.hstack(grouped_annot_aids) #flat_groupxs = np.hstack(name_groupxs) #if __debug__: # sum_scores = np.array([scores.sum() for scores in grouped_scores]) # max_scores = np.array([scores.max() for scores in grouped_scores]) # assert np.all(name_score_list <= sum_scores) # assert np.all(name_score_list > max_scores) # +------------ # Find the position of the highest name_scoring annotation for each name # IN THE FLATTENED GROUPED ANNOT_AID_LIST (this was the bug) offset_list = np.array([annot_scores.argmax() for annot_scores in grouped_scores]) # Find the starting position of eatch group use chain to start offsets with 0 _padded_scores = itertools.chain([[]], grouped_scores[:-1]) sizeoffset_list = np.array([len(annot_scores) for annot_scores in _padded_scores]) baseindex_list = sizeoffset_list.cumsum() # Augment starting position with offset index annot_idx_list = np.add(baseindex_list, offset_list) # L______________ best_aid_list = flat_grouped_aids[annot_idx_list] best_idx_list = ut.dict_take(daid2_idx, best_aid_list) # give the annotation domain a name score #score_list = np.zeros(len(annot_score_list), dtype=name_score_list.dtype) score_list = np.full(len(annot_score_list), fill_value=-np.inf, dtype=name_score_list.dtype) #score_list = np.full(len(annot_score_list), fill_value=np.nan, dtype=name_score_list.dtype) #score_list = np.nan(len(annot_score_list), dtype=name_score_list.dtype) # HACK: we need to set these to 'low' values and we also have to respect negatives #score_list[:] = -np.inf # make sure that the nid_list from group_indicies and the nids belonging to # name_score_list (cm.unique_nids) are in alignment #nidx_list = np.array(ut.dict_take(nid2_nidx, nid_list)) # THIS ASSUMES name_score_list IS IN ALIGNMENT WITH BOTH cm.unique_nids and # nid_list (which should be == cm.unique_nids) score_list[best_idx_list] = name_score_list return score_list
def group_scores_by_name(ibs, aid_list, score_list): r""" Converts annotation scores to name scores. Over multiple annotations finds keypoints best match and uses that score. CommandLine: python -m ibeis.algo.hots.name_scoring --test-group_scores_by_name Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> import ibeis >>> cm, qreq_ = ibeis.testdata_cm('PZ_MTEST') >>> ibs = qreq_.ibs >>> #print(cm.get_inspect_str(qreq_)) >>> aid_list = cm.daid_list >>> score_list = cm.annot_score_list >>> nscoretup = group_scores_by_name(ibs, aid_list, score_list) >>> (sorted_nids, sorted_nscore, sorted_aids, sorted_scores) = nscoretup >>> ut.assert_eq(sorted_nids[0], cm.qnid) TODO: # TODO: this code needs a really good test case #>>> result = np.array_repr(sorted_nids[0:2]) #>>> print(result) #array([1, 5]) Ignore:: # hack in dict of Nones prob for testing import six qres.aid2_prob = {aid:None for aid in six.iterkeys(qres.aid2_score)} array([ 1, 5, 26]) [2 6 5] Timeit:: import ibeis ibs = ibeis.opendb('PZ_MTEST') aid_list = ibs.get_valid_aids() aid_arr = np.array(aid_list) %timeit ibs.get_annot_name_rowids(aid_list) %timeit ibs.get_annot_name_rowids(aid_arr) """ assert len(score_list) == len(aid_list), 'scores and aids must be associated' score_arr = np.array(score_list) nid_list = np.array(ibs.get_annot_name_rowids(aid_list)) aid_list = np.array(aid_list) # Group scores by name unique_nids, groupxs = vt.group_indices(nid_list) grouped_scores = np.array(vt.apply_grouping(score_arr, groupxs)) grouped_aids = np.array(vt.apply_grouping(aid_list, groupxs)) # Build representative score per group # (find each keypoints best match per annotation within the name) group_nscore = np.array([scores.max() for scores in grouped_scores]) group_sortx = group_nscore.argsort()[::-1] # Top nids sorted_nids = unique_nids.take(group_sortx, axis=0) sorted_nscore = group_nscore.take(group_sortx, axis=0) # Initial sort of aids _sorted_aids = grouped_aids.take(group_sortx, axis=0) _sorted_scores = grouped_scores.take(group_sortx, axis=0) # Secondary sort of aids sorted_sortx = [scores.argsort()[::-1] for scores in _sorted_scores] sorted_scores = [scores.take(sortx) for scores, sortx in zip(_sorted_scores, sorted_sortx)] sorted_aids = [aids.take(sortx) for aids, sortx in zip(_sorted_aids, sorted_sortx)] nscoretup = NameScoreTup(sorted_nids, sorted_nscore, sorted_aids, sorted_scores) return nscoretup
def get_name_aids(ibs, nid_list, enable_unknown_fix=True): r""" # TODO: Rename to get_anot_rowids_from_name_rowid Returns: list: aids_list a list of list of aids in each name RESTful: Method: GET URL: /api/name/aids/ Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.manual_name_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> # Map annotations to name ids >>> aid_list = ibs.get_valid_aids() >>> nid_list = ibs.get_annot_name_rowids(aid_list) >>> # Get annotation ids for each name >>> aids_list = ibs.get_name_aids(nid_list) >>> # Run Assertion Test >>> groupid2_items = ut.group_items(aids_list, nid_list) >>> grouped_items = list(six.itervalues(groupid2_items)) >>> passed_iter = map(ut.allsame, grouped_items) >>> passed_list = list(passed_iter) >>> assert all(passed_list), 'problem in get_name_aids' >>> # Print gropued items >>> print(ut.dict_str(groupid2_items, newlines=False)) Ignore; from ibeis.control.manual_name_funcs import * # NOQA import ibeis #ibs = ibeis.opendb('testdb1') #ibs = ibeis.opendb('PZ_MTEST') ibs = ibeis.opendb('PZ_Master0') #ibs = ibeis.opendb('GZ_ALL') nid_list = ibs.get_valid_nids() nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] with ut.Timer('sql'): #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False) aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) with ut.Timer('hackquery + group'): opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_] for aids1, aids5 in zip(aids_list1, aids_list5): if (aids1) != (aids5): print(aids1) print(aids5) print('-----') ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1))) with ut.Timer('numpy'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_] with ut.Timer('numpy2'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] with ut.Timer('numpy3'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] assert aids_list2 == aids_list3 assert aids_list3 == aids_list4 assert aids_list1 == aids_list2 valid_aids = ibs.get_valid_aids() %timeit ibs.db.get_all_col_rows('annotations', 'rowid') %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid') %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) %timeit ibs.get_valid_aids() %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False) valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid') assert valid_nids1 == valid_nids2 ibs.db.fname ibs.db.fpath import sqlite3 con = sqlite3.connect(ibs.db.fpath) opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (SELECT name_rowid FROM name) ORDER BY name_rowid ASC, annot_rowid ASC ''' annot_rowid_list = con.execute(opstr).fetchall() aid_list = ut.get_list_column(annot_rowid_list, 0) nid_list = ut.get_list_column(annot_rowid_list, 1) # HACKY HACKY HACK with ut.Timer('hackquery + group'): #nid_list = ibs.get_valid_nids()[10:15] nid_list = ibs.get_valid_nids() opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) grouped_aids = [arr.tolist() for arr in grouped_aids_] SELECT name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list FROM annotations WHERE name_rowid in (SELECT name_rowid FROM name) GROUP BY name_rowid ORDER BY name_rowid ASC import vtool as vt vt vt.aid_list[0] annot_rowid_list = con.execute(opstr).fetchall() opstr = ''' SELECT annot_rowid FROM annotations WHERE name_rowid=? ''' cur = ibs.db.connection.cursor() cur = con.execute('BEGIN IMMEDIATE TRANSACTION') cur = ibs.db.connection res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] cur.execute('COMMIT TRANSACTION') res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] """ # FIXME: THIS FUNCTION IS VERY SLOW # ADD A LOCAL CACHE TO FIX THIS SPEED # ALSO FIX GET_IMAGE_AIDS # really a getter for the annotation table not the name table #return [[] for nid in nid_list] # TODO: should a query of the UNKNOWN_NAME_ROWID return anything? # TODO: don't even run negative aids as queries nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] USE_GROUPING_HACK = False if USE_GROUPING_HACK: # This code doesn't work because it doesn't respect empty names input_list, inverse_unique = np.unique(nid_list_, return_inverse=True) input_str = ', '.join(list(map(str, input_list))) opstr = ''' SELECT annot_rowid, name_rowid FROM {ANNOTATION_TABLE} WHERE name_rowid IN ({input_str}) ORDER BY name_rowid ASC, annot_rowid ASC '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE) pair_list = ibs.db.connection.execute(opstr).fetchall() aidscol = np.array(ut.get_list_column(pair_list, 0)) nidscol = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nidscol) grouped_aids_ = vt.apply_grouping(aidscol, groupx) #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_] structured_aids_list = [arr.tolist() for arr in grouped_aids_] aids_list = np.array(structured_aids_list)[inverse_unique].tolist() else: USE_NUMPY_IMPL = True #USE_NUMPY_IMPL = False # Use qt if getting one at a time otherwise perform bulk operation USE_NUMPY_IMPL = len(nid_list_) > 1 #USE_NUMPY_IMPL = len(nid_list_) > 10 if USE_NUMPY_IMPL: # This seems to be 30x faster for bigger inputs valid_aids = np.array(ibs._get_all_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) # MEMORY HOG LIKE A SON OF A BITCH # aids_list = [ # valid_aids.take(np.flatnonzero( # np.equal(valid_nids, nid))).tolist() # for nid in nid_list_ # ] temp = np.zeros((len(valid_nids), ), dtype=np.bool) aids_dict = {} nid_list_unique = np.unique(nid_list_) for nid in nid_list_unique: bool_list = np.equal(valid_nids, nid, out=temp) flattened = np.flatnonzero(bool_list) aid_list = [] if nid < 0 else valid_aids.take(flattened) aid_list = aid_list.tolist() aids_dict[nid] = aid_list aids_list = ut.dict_take(aids_dict, nid_list_) else: # SQL IMPL aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) if enable_unknown_fix: #enable_unknown_fix == distinguish_unknowns # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids) # for nid, aids in zip(nid_list, aids_list)] # Not sure if this should fail or return empty list on None nid aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids) for nid, aids in zip(nid_list, aids_list)] #aids_list = [[-nid] if nid < 0 else aids # for nid, aids in zip(nid_list, aids_list)] return aids_list
def analyze(ibsmap, qreq_dict, species_dict, path_to_file_list, params): print('[analyze] Beginning Analyze') print('[analyze] Received %d file paths' % (len(path_to_file_list))) # decompose the filename to get the car/person to whom this image belongs info_tup_list = [preprocess_fpath(ibsmap, species_dict, path_to_file, params) for path_to_file in path_to_file_list] is_valid_list = [tup_ is not None for tup_ in info_tup_list] # get the ungrouped tuples that were not None valid_tup_list_ug = ut.filter_items(info_tup_list, is_valid_list) valid_path_list_ug = ut.filter_items(path_to_file_list, is_valid_list) # group by species valid_species_list_ug = ut.get_list_column(valid_tup_list_ug, 3) seen_species = {} def get_species_tmpid(txt): if txt in seen_species: return seen_species[txt] else: seen_species[txt] = len(seen_species) return get_species_tmpid(txt) species_tmpid_list = np.array([get_species_tmpid(txt) for txt in valid_species_list_ug]) #ibs.get_species_rowids_from_text(valid_species_list_ug) unique_species_rowids, groupxs = vt.group_indices(np.array(species_tmpid_list)) grouped_valid_tup_list = vt.apply_grouping(np.array(valid_tup_list_ug, dtype=object), groupxs) grouped_path_list = vt.apply_grouping(np.array(valid_path_list_ug, dtype=object), groupxs) print('[analyze] Created %d species groups' % (len(grouped_valid_tup_list))) print('[analyze] grouped_valid_tup_list = ' + ut.list_str(grouped_valid_tup_list)) print('[analyze] grouped_path_list = ' + ut.list_str(grouped_path_list)) assert len(grouped_valid_tup_list) == len(grouped_path_list), 'lengths must match for zip' for groupx, (tup, valid_path_list) in enumerate(zip(grouped_valid_tup_list, grouped_path_list)): car_list, person_list, animal_list, species_list, offset_list, contributor_row_id_list = zip(*tup) assert ut.list_allsame(species_list) animal = animal_list[0] species = species_list[0] ibs = ibsmap[animal] with ut.Indenter('[GROUP-%d-%s]' % (groupx, species)): assert ((animal == 'zebra' and species == species_dict['zebra']) or (animal == 'giraffe' and species == species_dict['giraffe'])), 'animal/species mismatch!' # Add image to database gid_list = ibs.add_images(valid_path_list, auto_localize=False) reported_time_list = list(map(vt.parse_exif_unixtime, valid_path_list)) actual_unixtime_list = [ reported_unixtime + offset for reported_unixtime, offset in zip(reported_time_list, offset_list) ] ibs.set_image_unixtime(gid_list, actual_unixtime_list, duplicate_behavior='filter') ibs.set_image_contributor_rowid(gid_list, contributor_row_id_list, duplicate_behavior='filter') print('[analyze] starting detection for %d images and species %s...' % (len(valid_path_list), species)) qaids_list = ibs.detect_random_forest(gid_list, species=species) qaid_list, reverse_list = ut.invertible_flatten2(qaids_list) print('\n[analyze] detected %d animals of species %s' % (len(qaid_list), species)) # if there were no detections, don't bother if not qaid_list: continue # because qreq_ is persistent we need only to update the qaid_list qreq_ = qreq_dict[animal] # there is a qreq_ for each species qaid_list_unique, unique_inverse = np.unique(qaid_list, return_inverse=True) qreq_.set_external_qaids(qaid_list_unique) qres_list_unique = ibs.query_chips(qreq_=qreq_, verbose=False) qres_list = ut.list_take(qres_list_unique, unique_inverse) # so that we can draw a new bounding box for each detection detection_bbox_list = ibs.get_annot_verts(qaid_list) detection_bboxes_list = ut.unflatten2(detection_bbox_list, reverse_list) qreses_list = ut.unflatten2(qres_list, reverse_list) with ut.Indenter('[POSTPROCESS]'): for _tup in zip(valid_path_list, detection_bboxes_list, qreses_list, car_list, person_list, animal_list, gid_list, qaids_list): postprocess_result(ibs, _tup, params) with ut.Indenter('[REVIEW_CHECK]'): for car, person in zip(car_list, person_list): check_if_need_review(person, car, params)
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None): """ Undirected version TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> # setup_pzmtest_subgraph() >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> aid_list = ibs.get_valid_aids()[0:4] >>> eager = True >>> nInput = None >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list, >>> eager, nInput) >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),)) >>> print(result) Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid_list = ibs.get_valid_aids() >>> from functools import partial >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid_list=aid_list, num_list=num_list): >>> return (aid_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['combo', 'sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if nInput is None: nInput = len(aid_list) if force_method != 2 and (nInput < 256 or (force_method == 1)): rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list) # This one is slow because aid2 is the second part of the index rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1, rowids2))) # NOQA else: # This is much much faster than the other methods for large queries import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids)) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid1, groupxs1 = vt.group_indices(aids1) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids1_ = [_.tolist() for _ in rowids1_] rowids2_ = [_.tolist() for _ in rowids2_] maping1 = dict(zip(unique_aid1, rowids1_)) maping2 = dict(zip(unique_aid2, rowids2_)) mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2)) annotmatch_rowid_list = ut.dict_take(mapping, aid_list) if False: # VERY SLOW colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,) # FIXME: col_rowid is not correct params_iter = list(zip(aid_list, aid_list)) where_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID1, _autogen_annotmatch_funcs.ANNOT_ROWID2] with ut.Timer('one'): annotmatch_rowid_list1 = ibs.db.get_where3( # NOQA ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, where_colnames, logicop='OR', eager=eager, nInput=nInput, unpack_scalars=False) # Ensure funciton output is consistent annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs): """ >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> qreq_ = ibeis.testdata_qreq_(defaultdb='seaturtles') >>> qreq_.load_indexer() >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0]) >>> num_neighbors = 2 >>> nnindexer = qreq_.indexer >>> ibs = qreq_.ibs >>> qaid = 1 >>> qencid = ibs.get_annot_encounter_text([qaid])[0] >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid)) >>> invalid_axs = np.where(ax2_encid == qencid)[0] """ #import ibeis import itertools def in1d_shape(arr1, arr2): return np.in1d(arr1, arr2).reshape(arr1.shape) get_neighbors = ut.partial(nnindexer.flann.nn_index, checks=nnindexer.checks, cores=nnindexer.cores) # Alloc space for final results K = num_neighbors shape = (len(qfx2_vec), K) qfx2_idx = np.full(shape, -1, dtype=np.int32) qfx2_rawdist = np.full(shape, np.nan, dtype=np.float64) qfx2_truek = np.full(shape, -1, dtype=np.int32) # Make a set of temporary indexes and loop variables limit = None limit = 4 K_ = K tx2_qfx = np.arange(len(qfx2_vec)) tx2_vec = qfx2_vec iter_count = 0 for iter_count in itertools.count(): if limit is not None and iter_count >= limit: break # Find a set of neighbors (tx2_idx, tx2_rawdist) = get_neighbors(tx2_vec, K_) tx2_idx = vt.atleast_nd(tx2_idx, 2) tx2_rawdist = vt.atleast_nd(tx2_rawdist, 2) tx2_ax = nnindexer.get_nn_axs(tx2_idx) # Check to see if they meet the criteria tx2_invalid = in1d_shape(tx2_ax, invalid_axs) tx2_valid = np.logical_not(tx2_invalid) tx2_num_valid = tx2_valid.sum(axis=1) tx2_notdone = tx2_num_valid < K tx2_done = np.logical_not(tx2_notdone) # Move completely valid queries into the results if np.any(tx2_done): done_qfx = tx2_qfx.compress(tx2_done, axis=0) # Need to parse which columns are the completed ones done_valid_ = tx2_valid.compress(tx2_done, axis=0) done_rawdist_ = tx2_rawdist.compress(tx2_done, axis=0) done_idx_ = tx2_idx.compress(tx2_done, axis=0) # Get the complete valid indicies rowxs, colxs = np.where(done_valid_) unique_rows, groupxs = vt.group_indices(rowxs) first_k_groupxs = [groupx[0:K] for groupx in groupxs] chosen_xs = np.hstack(first_k_groupxs) multi_index = (rowxs.take(chosen_xs), colxs.take(chosen_xs)) flat_xs = np.ravel_multi_index(multi_index, done_valid_.shape) done_rawdist = done_rawdist_.take(flat_xs).reshape((-1, K)) done_idx = done_idx_.take(flat_xs).reshape((-1, K)) # Write done results in output qfx2_idx[done_qfx, :] = done_idx qfx2_rawdist[done_qfx, :] = done_rawdist qfx2_truek[done_qfx, :] = vt.apply_grouping( colxs, first_k_groupxs) if np.all(tx2_done): break K_increase = (K - tx2_num_valid.min()) K_ += K_increase tx2_qfx = tx2_qfx.compress(tx2_notdone, axis=0) tx2_vec = tx2_vec.compress(tx2_notdone, axis=0) if nnindexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_rawdist, nnindexer.max_distance_sqrd) else: qfx2_dist = qfx2_rawdist return (qfx2_idx, qfx2_dist, iter_count)
def get_name_aids(ibs, nid_list, enable_unknown_fix=True): r""" # TODO: Rename to get_anot_rowids_from_name_rowid Returns: list: aids_list a list of list of aids in each name RESTful: Method: GET URL: /api/name/aids/ Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.manual_name_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> # Map annotations to name ids >>> aid_list = ibs.get_valid_aids() >>> nid_list = ibs.get_annot_name_rowids(aid_list) >>> # Get annotation ids for each name >>> aids_list = ibs.get_name_aids(nid_list) >>> # Run Assertion Test >>> groupid2_items = ut.group_items(aids_list, nid_list) >>> grouped_items = list(six.itervalues(groupid2_items)) >>> passed_iter = map(ut.list_allsame, grouped_items) >>> passed_list = list(passed_iter) >>> assert all(passed_list), 'problem in get_name_aids' >>> # Print gropued items >>> print(ut.dict_str(groupid2_items, newlines=False)) Ignore; from ibeis.control.manual_name_funcs import * # NOQA import ibeis #ibs = ibeis.opendb('testdb1') #ibs = ibeis.opendb('PZ_MTEST') ibs = ibeis.opendb('PZ_Master0') #ibs = ibeis.opendb('GZ_ALL') nid_list = ibs.get_valid_nids() nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] with ut.Timer('sql'): #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False) aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) with ut.Timer('hackquery + group'): opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_] for aids1, aids5 in zip(aids_list1, aids_list5): if (aids1) != (aids5): print(aids1) print(aids5) print('-----') ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1))) with ut.Timer('numpy'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_] with ut.Timer('numpy2'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] with ut.Timer('numpy3'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] assert aids_list2 == aids_list3 assert aids_list3 == aids_list4 assert aids_list1 == aids_list2 valid_aids = ibs.get_valid_aids() %timeit ibs.db.get_all_col_rows('annotations', 'rowid') %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid') %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) %timeit ibs.get_valid_aids() %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False) valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid') assert valid_nids1 == valid_nids2 ibs.db.fname ibs.db.fpath import sqlite3 con = sqlite3.connect(ibs.db.fpath) opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (SELECT name_rowid FROM name) ORDER BY name_rowid ASC, annot_rowid ASC ''' annot_rowid_list = con.execute(opstr).fetchall() aid_list = ut.get_list_column(annot_rowid_list, 0) nid_list = ut.get_list_column(annot_rowid_list, 1) # HACKY HACKY HACK with ut.Timer('hackquery + group'): #nid_list = ibs.get_valid_nids()[10:15] nid_list = ibs.get_valid_nids() opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) grouped_aids = [arr.tolist() for arr in grouped_aids_] SELECT name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list FROM annotations WHERE name_rowid in (SELECT name_rowid FROM name) GROUP BY name_rowid ORDER BY name_rowid ASC import vtool as vt vt vt.aid_list[0] annot_rowid_list = con.execute(opstr).fetchall() opstr = ''' SELECT annot_rowid FROM annotations WHERE name_rowid=? ''' cur = ibs.db.connection.cursor() cur = con.execute('BEGIN IMMEDIATE TRANSACTION') cur = ibs.db.connection res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] cur.execute('COMMIT TRANSACTION') res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] """ # FIXME: THIS FUNCTION IS VERY SLOW # ADD A LOCAL CACHE TO FIX THIS SPEED # ALSO FIX GET_IMAGE_AIDS # really a getter for the annotation table not the name table #return [[] for nid in nid_list] # TODO: should a query of the UNKNOWN_NAME_ROWID return anything? # TODO: don't even run negative aids as queries nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] USE_GROUPING_HACK = False if USE_GROUPING_HACK: # This code doesn't work because it doesn't respect empty names input_list, inverse_unique = np.unique(nid_list_, return_inverse=True) input_str = ', '.join(list(map(str, input_list))) opstr = ''' SELECT annot_rowid, name_rowid FROM {ANNOTATION_TABLE} WHERE name_rowid IN ({input_str}) ORDER BY name_rowid ASC, annot_rowid ASC '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE) pair_list = ibs.db.connection.execute(opstr).fetchall() aidscol = np.array(ut.get_list_column(pair_list, 0)) nidscol = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nidscol) grouped_aids_ = vt.apply_grouping(aidscol, groupx) #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_] structured_aids_list = [arr.tolist() for arr in grouped_aids_] aids_list = np.array(structured_aids_list)[inverse_unique].tolist() else: USE_NUMPY_IMPL = True #USE_NUMPY_IMPL = False # Use qt if getting one at a time otherwise perform bulk operation USE_NUMPY_IMPL = len(nid_list_) > 1 #USE_NUMPY_IMPL = len(nid_list_) > 10 if USE_NUMPY_IMPL: # This seems to be 30x faster for bigger inputs valid_aids = np.array(ibs._get_all_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list = [ valid_aids.take(np.flatnonzero( np.equal(valid_nids, nid))).tolist() for nid in nid_list_ ] else: # SQL IMPL aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) if enable_unknown_fix: #enable_unknown_fix == distinguish_unknowns # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids) # for nid, aids in zip(nid_list, aids_list)] # Not sure if this should fail or return empty list on None nid aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids) for nid, aids in zip(nid_list, aids_list)] #aids_list = [[-nid] if nid < 0 else aids # for nid, aids in zip(nid_list, aids_list)] return aids_list
def get_review_edges(cm_list, ibs=None, review_cfg={}): r""" Needs to be moved to a better file. Maybe something to do with identification. Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in id_review_api and interact_qres2 Args: cm_list (list): list of chip match objects ranks_top (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.gui.id_review_api get_review_edges:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = qreq_.execute() >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg) >>> print(review_edges) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20') >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100') >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.gui.id_review_api import * # NOQA >>> import ibeis >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10') >>> ranks_top = 3 >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False, >>> filter_reviewed=False, filter_true_matches=True) >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs) >>> print(review_edges) """ import vtool as vt from ibeis.algo.hots import chip_match automatch_kw = REVIEW_CFG_DEFAULTS.copy() automatch_kw = ut.update_existing(automatch_kw, review_cfg) print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw))) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) if len(cm_list) == 0: return ([], [], [], []) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch): daids = cm.get_top_aids(ntop=automatch_kw['ranks_top']) scores = cm.get_top_scores(ntop=automatch_kw['ranks_top']) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_top=automatch_kw['ranks_top'], name_scoring=automatch_kw['name_scoring'], ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] # IS_REVIEWED DOES NOT WORK if automatch_kw['filter_reviewed']: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not automatch_kw['directed']: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if automatch_kw['filter_duplicate_true_matches']: # filter_dup_namepairs qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not automatch_kw['directed']: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes( directed_name_edges, score_arr) else: namepair_id_list = np.array(vt.compute_unique_data_ids_( list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip(namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) # Filter all true matches if automatch_kw['filter_true_matches']: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) valid_flags = qnid_arr != dnid_arr qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) if automatch_kw['filter_photobombs']: unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr])) #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids) invalid_nid_map = get_photobomber_map(ibs, qaid_arr) nid2_aids = ut.group_items(unique_aids, ibs.get_annot_nids(unique_aids)) expanded_aid_map = ut.ddict(set) for nid1, other_nids in invalid_nid_map.items(): for aid1 in nid2_aids[nid1]: for nid2 in other_nids: for aid2 in nid2_aids[nid2]: expanded_aid_map[aid1].add(aid2) expanded_aid_map[aid2].add(aid1) valid_flags = [daid not in expanded_aid_map[qaid] for qaid, daid in zip(qaid_arr, daid_arr)] qaid_arr = qaid_arr.compress(valid_flags) daid_arr = daid_arr.compress(valid_flags) score_arr = score_arr.compress(valid_flags) rank_arr = rank_arr.compress(valid_flags) review_edges = (qaid_arr, daid_arr, score_arr, rank_arr) return review_edges
def make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin=4, grid_steps=1, resize=False, out=None, grid_sigma=1.6): r""" Args: kpts (ndarray[float32_t, ndim=2]): keypoint chipsize (tuple): width, height weights (ndarray[float32_t, ndim=1]): pxl_per_bin (float): grid_steps (int): Returns: ndarray: weightgrid CommandLine: python -m vtool.coverage_grid --test-make_grid_coverage_mask --show Example: >>> # DISABLE_DOCTEST >>> from vtool.coverage_grid import * # NOQA >>> import vtool as vt >>> # build test data >>> kpts, chipsize, weights = coverage_kpts.testdata_coverage('easy1.png') >>> pxl_per_bin = 4 >>> grid_steps = 2 >>> # execute function >>> weightgrid = make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin, grid_steps) >>> # verify result >>> result = str(weightgrid) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> pt.imshow(weightgrid) >>> ut.show_if_requested() """ import vtool as vt coverage_gridtup = sparse_grid_coverage( kpts, chipsize, weights, pxl_per_bin=pxl_per_bin, grid_steps=grid_steps, grid_sigma=grid_sigma ) gridshape = coverage_gridtup[0:2] neighbor_bin_weights, neighbor_bin_indices = coverage_gridtup[-2:] oldshape_indices = neighbor_bin_indices.shape newshape_indices = (np.prod(oldshape_indices[0:2]), oldshape_indices[2]) neighbor_bin_indices = neighbor_bin_indices.reshape(newshape_indices).T neighbor_bin_weights = neighbor_bin_weights.flatten() # Get flat indexing into gridbin neighbor_bin_flat_indices = np.ravel_multi_index(neighbor_bin_indices, gridshape) # Group by bins with weight unique_flatxs, grouped_flatxs = vt.group_indices(neighbor_bin_flat_indices) grouped_weights = vt.apply_grouping(neighbor_bin_weights, grouped_flatxs) # FIXME: boundary cases are not handled right because their vote is split # into the same bin and is fighting with itself durring the max max_weights = list(map(np.max, grouped_weights)) if out is None: weightgrid = np.zeros(gridshape) else: # outvar specified weightgrid = out weightgrid[:] = 0 unique_rows, unique_cols = np.unravel_index(unique_flatxs, gridshape) weightgrid[unique_rows, unique_cols] = max_weights #flat_weightgrid = np.zeros(np.prod(gridshape)) #flat_weightgrid[unique_flatxs] = max_weight #ut.embed() #weightgrid = np.reshape(flat_weightgrid, gridshape) if resize: weightgrid = cv2.resize(weightgrid, chipsize, interpolation=cv2.INTER_NEAREST) return weightgrid
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m ibeis.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) #infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. #map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) #joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred #evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction( query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx)] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array([ g.sum() for g in vt.apply_grouping(new_vals, groupxs) ]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_keep_order, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) print(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) print(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) #probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) #joint.marginalize(irrelevant_vars) #joint.normalize() #new_rows = joint._row_labels() #new_vals = joint.values.ravel() #map_vals = new_rows[new_vals.argmax()] #map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints #marginalized_joints = {} #for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def get_automatch_candidates(cm_list, ranks_lt=5, directed=True, name_scoring=False, ibs=None, filter_reviewed=False, filter_duplicate_namepair_matches=False): """ THIS IS PROBABLY ONE OF THE ONLY THINGS IN THIS FILE THAT SHOULD NOT BE DEPRICATED Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in inspect_gui and interact_qres2 Args: qaid2_qres (dict): mapping from query annotaiton id to query result object ranks_lt (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.expt.results_organizer --test-get_automatch_candidates:2 python -m ibeis.expt.results_organizer --test-get_automatch_candidates:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = ibs.query_chips(qreq_=qreq_, return_cm=True) >>> ranks_lt = 5 >>> directed = True >>> name_scoring = False >>> candidate_matches = get_automatch_candidates(cm_list, ranks_lt, directed, ibs=ibs) >>> print(candidate_matches) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:5] >>> daid_list = ibs.get_valid_aids()[0:20] >>> cm_list = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 5 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:1] >>> daid_list = ibs.get_valid_aids()[10:100] >>> qaid2_cm = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 1 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:10] >>> daid_list = ibs.get_valid_aids()[0:10] >>> qres_list = ibs.query_chips(qaid_list, daid_list) >>> ranks_lt = 3 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... qaid2_cm, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) """ import vtool as vt from ibeis.model.hots import chip_match print(('[resorg] get_automatch_candidates(' 'filter_reviewed={filter_reviewed},' 'filter_duplicate_namepair_matches={filter_duplicate_namepair_matches},' 'directed={directed},' 'ranks_lt={ranks_lt},' ).format(**locals())) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch2): daids = cm.get_top_aids(ntop=ranks_lt) scores = cm.get_top_scores(ntop=ranks_lt) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_lt=ranks_lt, name_scoring=name_scoring, ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array # utool.embed() qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] if filter_reviewed: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not directed: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if filter_duplicate_namepair_matches: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not directed: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes(directed_name_edges, score_arr) else: namepair_id_list = np.array(vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip(namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) return candidate_matches