def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None): r""" fm_list = [fm[:min(len(fm), 10)] for fm in fm_list] fs_list = [fs[:min(len(fs), 10)] for fs in fs_list] """ fx1_list = [fm.T[0] for fm in fm_list] # Group annotation matches by name name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name, keep track of original indicies via offets name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list)) name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0) name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) if kpts1 is not None: xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_data_ids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_xyid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat) xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat) name_group_fx1_groupxs_list = xyid_groupxs_list else: # Make nested group for every name by query feature index fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat] name_group_fx1_groupxs_list = fx1_groupxs_list name_grouped_fid_grouped_fs_list = [ vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list) ] # Flag which features are valid in this grouped space. Only one keypoint should be able to vote # for each group name_grouped_fid_grouped_isvalid_list = [ np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list]) for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list ] # Go back to being grouped only in name space #dtype = np.bool name_grouped_isvalid_flat_list = [ vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool) for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list) ] name_grouped_isvalid_unflat_list = [ ut.unflatten2(isvalid_flat, invertable_cumsum_list) for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list) ] # Reports which features were valid in name scoring for every annotation featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs) return featflag_list
def _make_anygroup_hashes(annots, nids): """helper function import wbia qreq_ = wbia.testdata_qreq_( defaultdb='PZ_MTEST', qaid_override=[1, 2, 3, 4, 5, 6, 10, 11], daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24], ) import wbia qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master1') %timeit qreq_._make_namegroup_data_hashes() %timeit qreq_._make_namegroup_data_uuids() """ # make sure items are sorted to ensure same assignment # gives same uuids # annots = qreq_.ibs.annots(sorted(qreq_.daids)) unique_nids, groupxs = vt.group_indices(nids) grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs) group_hashes = [ ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1()) for uuids in grouped_visual_uuids ] nid_to_grouphash = dict(zip(unique_nids, group_hashes)) return nid_to_grouphash
def get_patches(invassign, wx): ax_list = invassign.wx2_axs[wx] fx_list = invassign.wx2_fxs[wx] config = invassign.fstack.config ibs = invassign.fstack.ibs unique_axs, groupxs = vt.group_indices(ax_list) fxs_groups = vt.apply_grouping(fx_list, groupxs) unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs) all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config) sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0) chip_list = ibs.depc_annot.d.get_chips_img(unique_aids) # convert to approprate colorspace #if colorspace is not None: # chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 grouped_patches_list = [ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list), nTotal=len(unique_aids), lbl='warping patches') ] # Make it correspond with original fx_list and ax_list word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs) return word_patches
def compute_agg_rvecs(invassign, wx): """ Sums and normalizes all rvecs that belong to the same word and the same annotation id """ rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx) ax_list = invassign.wx2_axs[wx] maw_list = invassign.wx2_maws[wx] # group members of each word by aid, we will collapse these groups unique_ax, groupxs = vt.group_indices(ax_list) # (weighted aggregation with multi-assign-weights) grouped_maws = vt.apply_grouping(maw_list, groupxs) grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs) grouped_flags = vt.apply_grouping(~error_flags, groupxs) grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0) grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags) is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_] aggvecs = [ aggregate_rvecs(rvecs, maws)[0] for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_) ] unique_ax2_ = unique_ax.compress(is_good) ax2_aggvec = dict(zip(unique_ax2_, aggvecs)) # Need to recompute flags for consistency # flag is true when aggvec is all zeros return ax2_aggvec
def done_part(cand, num_neighbs): # Find the first `num_neighbs` complete columns in each row rowxs, colxs = np.where(cand.validflags) unique_rows, groupxs = vt.group_indices(rowxs, assume_sorted=True) first_k_groupxs = [groupx[0:num_neighbs] for groupx in groupxs] if DEBUG_REQUERY: assert all(ut.issorted(groupx) for groupx in groupxs) assert all( [len(group) == num_neighbs for group in first_k_groupxs]) chosen_xs = np.array(ut.flatten(first_k_groupxs), dtype=np.int) # chosen_xs = np.hstack(first_k_groupxs) # then convert these to multi-indices done_rows = rowxs.take(chosen_xs) done_cols = colxs.take(chosen_xs) multi_index = (done_rows, done_cols) # done_shape = (cand.validflags.shape[0], num_neighbs) # flat_xs = np.ravel_multi_index(multi_index, done_shape) flat_xs = np.ravel_multi_index(multi_index, cand.idxs.shape) _shape = (-1, num_neighbs) idxs = cand.idxs.take(flat_xs).reshape(_shape) dists = cand.dists.take(flat_xs).reshape(_shape) trueks = colxs.take(chosen_xs).reshape(_shape) if DEBUG_REQUERY: # dists2 = dists.copy() for count, (row, cols) in enumerate(zip(unique_rows, groupxs)): pass assert np.all(np.diff(dists, axis=1) >= 0) valid = cand.validflags.take(flat_xs).reshape(_shape) assert np.all(valid) return idxs, dists, trueks
def group_aids_by_featweight_species(ibs, aid_list, config2_=None): """ helper Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = None >>> aid_list = ibs.get_valid_aids() >>> grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(ibs, aid_list, config2_) """ if config2_ is None: featweight_species = ibs.cfg.featweight_cfg.featweight_species else: featweight_species = config2_.get('featweight_species') assert featweight_species is not None if featweight_species == 'uselabel': # Use the labeled species for the detector species_list = ibs.get_annot_species_texts(aid_list) else: species_list = [featweight_species] aid_list = np.array(aid_list) species_list = np.array(species_list) species_rowid = np.array(ibs.get_species_rowids_from_text(species_list)) unique_species_rowids, groupxs = vtool.group_indices(species_rowid) grouped_aids = vtool.apply_grouping(aid_list, groupxs) grouped_species = vtool.apply_grouping(species_list, groupxs) unique_species = ut.get_list_column(grouped_species, 0) return grouped_aids, unique_species, groupxs
def report_partitioning_statistics(new_reduced_joint): # compute partitioning statistics import vtool as vt vals, idxs = vt.group_indices(new_reduced_joint.values.ravel()) #groupsize = list(map(len, idxs)) #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs) all_states = new_reduced_joint._row_labels(asindex=True) clusterstats = [tuple(sorted(list(ut.dict_hist(a).values()))) for a in all_states] grouped_vals = ut.group_items(new_reduced_joint.values.ravel(), clusterstats) #probs_assigned_to_clustertype = [( # sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a) # for a, b in grouped_vals.items()] probs_assigned_to_clustertype = [( ut.dict_hist(np.array(b).round(decimals=5)), a) for a, b in grouped_vals.items()] sortx = ut.argsort([max(c[0].keys()) for c in probs_assigned_to_clustertype]) probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx) # This list of 2-tuples with the first item being the unique # probabilies that are assigned to a cluster type along with the number # of times they were assigned. A cluster type is the second item. Every # number represents how many annotations were assigned to a specific # label. The length of that list is the number of total labels. For # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]] # indicating that that the assignment of everyone to a different label happend once # where the probability was somenum and a 800 times where the probability was 0. #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items()) #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()]) print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
def get_annotmatch_rowids_from_aid2(ibs, aid2_list, eager=True, nInput=None, force_method=None): """ # This one is slow because aid2 is the second part of the index TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid2_list = ibs.get_valid_aids() >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid2_list=aid2_list, num_list=num_list): >>> return (aid2_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid2', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if force_method != 2 and (nInput < 128 or (force_method == 1)): colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,) # FIXME: col_rowid is not correct params_iter = zip(aid2_list) andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2] annotmatch_rowid_list = ibs.db.get_where2( ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, andwhere_colnames, eager=eager, nInput=nInput, unpack_scalars=False) elif force_method == 2: import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids2_ = [_.tolist() for _ in rowids2_] maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_)) annotmatch_rowid_list = ut.dict_take(maping2, aid2_list) annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True): r""" Args: ibs (IBEISController): wbia controller object aid1_list (list): aid2_list (list): directed (bool): (default = True) Returns: list: tags_list CommandLine: python -m wbia.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting Example: >>> # DISABLE_DOCTEST >>> from wbia.tag_funcs import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='testdb1') >>> has_any = ut.get_argval('--tags', type_=list, default=None) >>> min_num = ut.get_argval('--min_num', type_=int, default=1) >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1) >>> aid1_list = aid_pairs.T[0] >>> aid2_list = aid_pairs.T[1] >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False) >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) >>> print(ut.repr2(tagged_pairs)) >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) >>> print(ut.repr2(tag_dict, nl=2)) >>> print(ut.repr2(ut.map_dict_vals(len, tag_dict))) """ aid_pairs = np.vstack([aid1_list, aid2_list]).T if directed: annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey( aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) else: annotmatch_rowid = ibs.get_annotmatch_rowid_from_undirected_superkey( aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) if False: expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]]) expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey( expanded_aid_pairs.T[0], expanded_aid_pairs.T[1]) expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs) unique_edgeids, groupxs = vt.group_indices(expanded_edgeids) expanded_tags_list = ibs.get_annotmatch_case_tags( expanded_annotmatch_rowid) grouped_tags = vt.apply_grouping( np.array(expanded_tags_list, dtype=object), groupxs) undirected_tags = [ list(set(ut.flatten(tags))) for tags in grouped_tags ] edgeid2_tags = dict(zip(unique_edgeids, undirected_tags)) input_edgeids = expanded_edgeids[:len(aid_pairs)] tags_list = ut.dict_take(edgeid2_tags, input_edgeids) return tags_list
def get_name_shortlist_aids( daid_list, dnid_list, annot_score_list, name_score_list, nid2_nidx, nNameShortList, nAnnotPerName, ): r""" CommandLine: python -m wbia.algo.hots.scoring --test-get_name_shortlist_aids Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.scoring import * # NOQA >>> daid_list = np.array([11, 12, 13, 14, 15, 16, 17]) >>> dnid_list = np.array([21, 21, 21, 22, 22, 23, 24]) >>> annot_score_list = np.array([ 6, 2, 3, 5, 6, 3, 2]) >>> name_score_list = np.array([ 8, 9, 5, 4]) >>> nid2_nidx = {21:0, 22:1, 23:2, 24:3} >>> nNameShortList, nAnnotPerName = 3, 2 >>> args = (daid_list, dnid_list, annot_score_list, name_score_list, ... nid2_nidx, nNameShortList, nAnnotPerName) >>> top_daids = get_name_shortlist_aids(*args) >>> result = str(top_daids) >>> print(result) [15, 14, 11, 13, 16] """ unique_nids, groupxs = vt.group_indices(np.array(dnid_list)) grouped_annot_scores = vt.apply_grouping(annot_score_list, groupxs) grouped_daids = vt.apply_grouping(np.array(daid_list), groupxs) # Ensure name score list is aligned with the unique_nids aligned_name_score_list = name_score_list.take( ut.dict_take(nid2_nidx, unique_nids)) # Sort each group by the name score group_sortx = aligned_name_score_list.argsort()[::-1] _top_daid_groups = ut.take(grouped_daids, group_sortx) _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx) top_daid_groups = ut.listclip(_top_daid_groups, nNameShortList) top_annot_score_groups = ut.listclip(_top_annot_score_groups, nNameShortList) # Sort within each group by the annotation score top_daid_sortx_groups = [ annot_score_group.argsort()[::-1] for annot_score_group in top_annot_score_groups ] top_sorted_daid_groups = vt.ziptake(top_daid_groups, top_daid_sortx_groups) top_clipped_daids = [ ut.listclip(sorted_daid_group, nAnnotPerName) for sorted_daid_group in top_sorted_daid_groups ] top_daids = ut.flatten(top_clipped_daids) return top_daids
def general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg): """ DEPRICATE Yeilds: nid, weight_mask_m, weight_mask CommandLine: python -m wbia.algo.hots.scoring --test-general_name_coverage_mask_generator --show python -m wbia.algo.hots.scoring --test-general_name_coverage_mask_generator --show --qaid 18 Note: Evaluate output one at a time or it will get clobbered Example0: >>> # SLOW_DOCTEST >>> # (IMPORTANT) >>> from wbia.algo.hots.scoring import * # NOQA >>> qreq_, cm = plh.testdata_scoring('PZ_MTEST', qaid_list=[18]) >>> config = qreq_.qparams >>> make_mask_func, cov_cfg = get_mask_func(config) >>> masks_iter = general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg) >>> dnid_list, score_list, masks_list = evaluate_masks_iter(masks_iter) >>> ut.quit_if_noshow() >>> nidx = np.where(dnid_list == cm.qnid)[0][0] >>> daids = cm.get_groundtruth_daids() >>> dnid, weight_mask_m, weight_mask = masks_list[nidx] >>> show_single_coverage_mask(qreq_, cm, weight_mask_m, weight_mask, daids) >>> ut.show_if_requested() """ import vtool as vt if ut.VERYVERBOSE: logger.info('[ncov] make_mask_func = %r' % (make_mask_func, )) logger.info('[ncov] cov_cfg = %s' % (ut.repr2(cov_cfg), )) assert cm.dnid_list is not None, 'eval nids' unique_dnids, groupxs = vt.group_indices(cm.dnid_list) fm_groups = vt.apply_grouping_(cm.fm_list, groupxs) fs_groups = vt.apply_grouping_(cm.fs_list, groupxs) fs_name_list = [np.hstack(fs_group) for fs_group in fs_groups] fm_name_list = [np.vstack(fm_group) for fm_group in fm_groups] return general_coverage_mask_generator( make_mask_func, qreq_, cm.qaid, unique_dnids, fm_name_list, fs_name_list, config, cov_cfg, )
def group_images_by_label(label_arr, gid_arr): """ Input: Length N list of labels and ids Output: Length M list of unique labels, and lenth M list of lists of ids """ # Reverse the image to cluster index mapping import vtool as vt labels_, groupxs_ = vt.group_indices(label_arr) sortx = np.array(list(map(len, groupxs_))).argsort()[::-1] labels = labels_.take(sortx, axis=0) groupxs = ut.take(groupxs_, sortx) label_gids = vt.apply_grouping(gid_arr, groupxs) return labels, label_gids
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True): r""" Args: ibs (IBEISController): ibeis controller object aid1_list (list): aid2_list (list): directed (bool): (default = True) Returns: list: tags_list CommandLine: python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting Example: >>> # DISABLE_DOCTEST >>> from ibeis.tag_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> has_any = ut.get_argval('--tags', type_=list, default=None) >>> min_num = ut.get_argval('--min_num', type_=int, default=1) >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1) >>> aid1_list = aid_pairs.T[0] >>> aid2_list = aid_pairs.T[1] >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False) >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) >>> print(ut.list_str(tagged_pairs)) >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) >>> print(ut.dict_str(tag_dict, nl=2)) >>> print(ut.dict_str(ut.map_dict_vals(len, tag_dict))) """ aid_pairs = np.vstack([aid1_list, aid2_list]).T if directed: annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1]) tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid) else: expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]]) expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey( expanded_aid_pairs.T[0], expanded_aid_pairs.T[1]) expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs) unique_edgeids, groupxs = vt.group_indices(expanded_edgeids) expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid) grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs) undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags] edgeid2_tags = dict(zip(unique_edgeids, undirected_tags)) input_edgeids = expanded_edgeids[:len(aid_pairs)] tags_list = ut.dict_take(edgeid2_tags, input_edgeids) return tags_list
def group_correspondences(all_matches, all_scores, all_daids, daid2_sccw): daid_keys, groupxs = vt.group_indices(all_daids) fs_list = vt.apply_grouping(all_scores, groupxs) fm_list = vt.apply_grouping(all_matches, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = { daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list) } # FIXME: generalize to when nAssign > 1 daid2_fk = { daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list) } daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list, name_score_list, nid2_nidx, nNameShortList, nAnnotPerName): r""" CommandLine: python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.scoring import * # NOQA >>> # build test data >>> daid_list = np.array([11, 12, 13, 14, 15, 16, 17]) >>> dnid_list = np.array([21, 21, 21, 22, 22, 23, 24]) >>> annot_score_list = np.array([ 6, 2, 3, 5, 6, 3, 2]) >>> name_score_list = np.array([ 8, 9, 5, 4]) >>> nid2_nidx = {21:0, 22:1, 23:2, 24:3} >>> nNameShortList, nAnnotPerName = 3, 2 >>> # execute function >>> args = (daid_list, dnid_list, annot_score_list, name_score_list, ... nid2_nidx, nNameShortList, nAnnotPerName) >>> top_daids = get_name_shortlist_aids(*args) >>> # verify results >>> result = str(top_daids) >>> print(result) [15, 14, 11, 13, 16] """ unique_nids, groupxs = vt.group_indices(np.array(dnid_list)) grouped_annot_scores = vt.apply_grouping(annot_score_list, groupxs) grouped_daids = vt.apply_grouping(np.array(daid_list), groupxs) # Ensure name score list is aligned with the unique_nids aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids)) # Sort each group by the name score group_sortx = aligned_name_score_list.argsort()[::-1] _top_daid_groups = ut.take(grouped_daids, group_sortx) _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx) top_daid_groups = ut.listclip(_top_daid_groups, nNameShortList) top_annot_score_groups = ut.listclip(_top_annot_score_groups, nNameShortList) # Sort within each group by the annotation score top_daid_sortx_groups = [annot_score_group.argsort()[::-1] for annot_score_group in top_annot_score_groups] top_sorted_daid_groups = vt.ziptake(top_daid_groups, top_daid_sortx_groups) top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName) for sorted_daid_group in top_sorted_daid_groups] top_daids = ut.flatten(top_clipped_daids) return top_daids
def consolidate(self, inplace=False): """ removes duplicate entries Example: >>> # UNSTABLE_DOCTEST >>> from ibeis.algo.hots.pgm_ext import * # NOQA >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]] >>> weights = [.1, .2, .1] >>> variables = ['v1', 'v2', 'v3'] >>> self = ApproximateFactor(state_idxs, weights, variables) >>> inplace = False >>> phi = self.consolidate(inplace) >>> result = str(phi) >>> print(result) +------+------+------+-----------------------+ | v1 | v2 | v3 | \hat{phi}(v1,v2,v3) | |------+------+------+-----------------------| | v1_1 | v2_0 | v3_1 | 0.3000 | | v1_1 | v2_0 | v3_2 | 0.1000 | +------+------+------+-----------------------+ """ import vtool as vt phi = self.copy() if inplace else self data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) unique_ids, groupxs = vt.group_indices(data_ids) #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs))))) if len(data_ids) != len(unique_ids): # Sum the values in the cpd to marginalize the duplicate probs # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs]) self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0) self.weights = np.array([ g.sum() for g in vt.apply_grouping(self.weights, groupxs) ]) #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),)) #else: # print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),)) if not inplace: return phi
def consolidate(self, inplace=False): """ removes duplicate entries Example: >>> # UNSTABLE_DOCTEST >>> from ibeis.algo.hots.pgm_ext import * # NOQA >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]] >>> weights = [.1, .2, .1] >>> variables = ['v1', 'v2', 'v3'] >>> self = ApproximateFactor(state_idxs, weights, variables) >>> inplace = False >>> phi = self.consolidate(inplace) >>> result = str(phi) >>> print(result) +------+------+------+-----------------------+ | v1 | v2 | v3 | \hat{phi}(v1,v2,v3) | |------+------+------+-----------------------| | v1_1 | v2_0 | v3_1 | 0.3000 | | v1_1 | v2_0 | v3_2 | 0.1000 | +------+------+------+-----------------------+ """ import vtool as vt phi = self.copy() if inplace else self #data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs) data_ids = self._compute_unique_state_ids() unique_ids, groupxs = vt.group_indices(data_ids) #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs))))) if len(data_ids) != len(unique_ids): # Sum the values in the cpd to marginalize the duplicate probs # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs]) self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0) self.weights = np.array([ g.sum() for g in vt.apply_grouping(self.weights, groupxs) ]) #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),)) #else: # print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),)) if not inplace: return phi
def general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg): """ Yeilds: nid, weight_mask_m, weight_mask CommandLine: python -m ibeis.algo.hots.scoring --test-general_name_coverage_mask_generator --show python -m ibeis.algo.hots.scoring --test-general_name_coverage_mask_generator --show --qaid 18 Note: Evaluate output one at a time or it will get clobbered Example0: >>> # SLOW_DOCTEST >>> # (IMPORTANT) >>> from ibeis.algo.hots.scoring import * # NOQA >>> qreq_, cm = plh.testdata_scoring('PZ_MTEST', qaid_list=[18]) >>> config = qreq_.qparams >>> make_mask_func, cov_cfg = get_mask_func(config) >>> masks_iter = general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg) >>> dnid_list, score_list, masks_list = evaluate_masks_iter(masks_iter) >>> ut.quit_if_noshow() >>> nidx = np.where(dnid_list == cm.qnid)[0][0] >>> daids = cm.get_groundtruth_daids() >>> dnid, weight_mask_m, weight_mask = masks_list[nidx] >>> show_single_coverage_mask(qreq_, cm, weight_mask_m, weight_mask, daids) >>> ut.show_if_requested() """ if ut.VERYVERBOSE: print('[ncov] make_mask_func = %r' % (make_mask_func,)) print('[ncov] cov_cfg = %s' % (ut.dict_str(cov_cfg),)) assert cm.dnid_list is not None, 'eval nids' unique_dnids, groupxs = vt.group_indices(cm.dnid_list) fm_groups = vt.apply_grouping_(cm.fm_list, groupxs) fs_groups = vt.apply_grouping_(cm.fs_list, groupxs) fs_name_list = [np.hstack(fs_group) for fs_group in fs_groups] fm_name_list = [np.vstack(fm_group) for fm_group in fm_groups] return general_coverage_mask_generator(make_mask_func, qreq_, cm.qaid, unique_dnids, fm_name_list, fs_name_list, config, cov_cfg)
def compute_agg_rvecs(invassign, wx): """ Sums and normalizes all rvecs that belong to the same word and the same annotation id """ rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx) ax_list = invassign.wx2_axs[wx] maw_list = invassign.wx2_maws[wx] # group members of each word by aid, we will collapse these groups unique_ax, groupxs = vt.group_indices(ax_list) # (weighted aggregation with multi-assign-weights) grouped_maws = vt.apply_grouping(maw_list, groupxs) grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs) grouped_flags = vt.apply_grouping(~error_flags, groupxs) grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0) grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags) is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_] aggvecs = [aggregate_rvecs(rvecs, maws)[0] for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_)] unique_ax2_ = unique_ax.compress(is_good) ax2_aggvec = dict(zip(unique_ax2_, aggvecs)) # Need to recompute flags for consistency # flag is true when aggvec is all zeros return ax2_aggvec
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if 'name' not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = ['name'] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) other_colxs, = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) print('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids),)) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array([ g.sum() for g in vt.apply_grouping(reduced_values, groupxs) ]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def cluster_timespace_sec(posixtimes, latlons, thresh_sec=5, km_per_sec=KM_PER_SEC): """ Args: X_data (ndarray) : Nx3 array where columns are (seconds, lat, lon) thresh_sec (float) : threshold in seconds Doctest: >>> from wbia.algo.preproc.occurrence_blackbox import * # NOQA >>> # Nx1 matrix denoting groundtruth locations (for testing) >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2]) >>> # Nx3 matrix where each columns are (time, lat, lon) >>> X_data = np.array([ >>> (0, 42.727985, -73.683994), # MRC >>> (0, 42.657414, -73.774448), # Park1 >>> (0, 42.658333, -73.770993), # Park2 >>> (0, 42.654384, -73.768919), # Park3 >>> (0, 42.655039, -73.769048), # Park4 >>> (0, 42.657872, -73.764148), # Park5 >>> (0, 42.876974, -73.819311), # CP1 >>> (0, 42.862946, -73.804977), # CP2 >>> (0, 42.849809, -73.758486), # CP3 >>> ]) >>> posixtimes = X_data.T[0] >>> latlons = X_data.T[1:3].T >>> thresh_sec = 250 # seconds >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec) >>> result = ('X_labels = %r' % (X_labels,)) >>> print(result) X_labels = array([6, 4, 4, 4, 4, 5, 1, 2, 3]) Doctest: >>> from wbia.algo.preproc.occurrence_blackbox import * # NOQA >>> # Nx1 matrix denoting groundtruth locations (for testing) >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2]) >>> # Nx3 matrix where each columns are (time, lat, lon) >>> X_data = np.array([ >>> (np.nan, 42.657414, -73.774448), # Park1 >>> (0, 42.658333, -73.770993), # Park2 >>> (np.nan, np.nan, np.nan), # Park3 >>> (np.nan, np.nan, np.nan), # Park3.5 >>> (0, 42.655039, -73.769048), # Park4 >>> (0, 42.657872, -73.764148), # Park5 >>> ]) >>> posixtimes = X_data.T[0] >>> latlons = X_data.T[1:3].T >>> thresh_sec = 250 # seconds >>> km_per_sec = KM_PER_SEC >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec) >>> result = 'X_labels = {}'.format(ut.repr2(X_labels)) >>> print(result) X_labels = np.array([3, 4, 1, 2, 4, 5]) """ X_data, dist_func, columns = prepare_data(posixtimes, latlons, km_per_sec, 'seconds') if X_data is None: return None # Cluster nan distributions differently X_bools = ~np.isnan(X_data) group_id = (X_bools * np.power(2, [2, 1, 0])).sum(axis=1) import vtool as vt unique_ids, groupxs = vt.group_indices(group_id) grouped_labels = [] for xs in groupxs: X_part = X_data.take(xs, axis=0) labels = _cluster_part(X_part, dist_func, columns, thresh_sec, km_per_sec) grouped_labels.append((labels, xs)) # Undo grouping and rectify overlaps X_labels = _recombine_labels(grouped_labels) # Do clustering return X_labels
def convert_category_to_siam_data(category_data, category_labels): # CONVERT CATEGORY LABELS TO PAIR LABELS # Make genuine imposter pairs import vtool as vt unique_labels, groupxs_list = vt.group_indices(category_labels) num_categories = len(unique_labels) num_geninue = 10000 * num_categories num_imposter = 10000 * num_categories num_gen_per_category = int(num_geninue / len(unique_labels)) num_imp_per_category = int(num_imposter / len(unique_labels)) np.random.seed(0) groupxs = groupxs_list[0] def find_fix_flags(pairxs): is_dup = vt.nonunique_row_flags(pairxs) is_eye = pairxs.T[0] == pairxs.T[1] needs_fix = np.logical_or(is_dup, is_eye) #print(pairxs[needs_fix]) return needs_fix def swap_undirected(pairxs): """ ensure left indicies are lower """ needs_swap = pairxs.T[0] > pairxs.T[1] arr = pairxs[needs_swap] tmp = arr.T[0].copy() arr.T[0, :] = arr.T[1] arr.T[1, :] = tmp pairxs[needs_swap] = arr return pairxs def sample_pairs(left_list, right_list, size): # Sample initial random left and right indices _index1 = np.random.choice(left_list, size=size, replace=True) _index2 = np.random.choice(right_list, size=size, replace=True) # stack _pairxs = np.vstack((_index1, _index2)).T # make undiractional _pairxs = swap_undirected(_pairxs) # iterate until feasible needs_fix = find_fix_flags(_pairxs) while np.any(needs_fix): num_fix = needs_fix.sum() print('fixing: %d' % num_fix) _pairxs.T[1][needs_fix] = np.random.choice(right_list, size=num_fix, replace=True) _pairxs = swap_undirected(_pairxs) needs_fix = find_fix_flags(_pairxs) return _pairxs print('sampling genuine pairs') genuine_pairx_list = [] for groupxs in groupxs_list: left_list = groupxs right_list = groupxs size = num_gen_per_category _pairxs = sample_pairs(left_list, right_list, size) genuine_pairx_list.extend(_pairxs.tolist()) print('sampling imposter pairs') imposter_pairx_list = [] for index in range(len(groupxs_list)): # Pick random pairs of false matches groupxs = groupxs_list[index] bar_groupxs = np.hstack(groupxs_list[:index] + groupxs_list[index + 1:]) left_list = groupxs right_list = bar_groupxs size = num_imp_per_category _pairxs = sample_pairs(left_list, right_list, size) imposter_pairx_list.extend(_pairxs.tolist()) # We might have added duplicate imposters, just remove them for now imposter_pairx_list = ut.take( imposter_pairx_list, vt.unique_row_indexes(np.array(imposter_pairx_list))) # structure data for output flat_data_pairxs = np.array(genuine_pairx_list + imposter_pairx_list) assert np.all(flat_data_pairxs.T[0] < flat_data_pairxs.T[1]) assert find_fix_flags(flat_data_pairxs).sum() == 0 # TODO: batch should use indicies into data flat_index_list = np.array( ut.flatten(list(zip(flat_data_pairxs.T[0], flat_data_pairxs.T[1])))) data = np.array(category_data.take(flat_index_list, axis=0)) labels = np.array([True] * len(genuine_pairx_list) + [False] * len(imposter_pairx_list)) return data, labels
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw): """ Builds explicit chipmatches that the rest of the pipeline plays nice with Notation: An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches, feature_scores, and feature_ranks. Let N be the number of matches A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first column corresponds to query_feature_indexes (qfx) and the second column corresponds to database_feature_indexes (dfx). A feature score, fs{shape=(N,), dtype=float64} is an array of scores A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks Returns: daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk) Return Format:: daid2_fm (dict): {daid: fm, ...} daid2_fs (dict): {daid: fs, ...} daid2_fk (dict): {daid: fk, ...} Example: >>> from ibeis.algo.hots.smk.smk_core import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2() >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh >>> withinfo = True # takes an 11s vs 2s >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh) >>> retL1 = match_kernel_L1(*args) >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,) = retL1 >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0])) >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2])) >>> print(utool.is_dicteq(daid2_chipmatch_old[1], daid2_chipmatch_new[1])) %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) """ # FIXME: move groupby to vtool if utool.VERBOSE: print('[smk_core] build cmtup_old') wx2_dfxs = invindex.wx2_fxs daid2_sccw = invindex.daid2_sccw qfxs_list = [wx2_qfxs[wx] for wx in common_wxs] dfxs_list = [wx2_dfxs[wx] for wx in common_wxs] shapes_list = [scores.shape for scores in scores_list] # 51us shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list] # 230us ijs_list = [ mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges ] # 278us # Normalize scores for words, nMatches, and query sccw (still need daid sccw) nscores_iter = (scores * query_sccw for scores in scores_list) # FIXME: Preflatten all of these lists out_ijs = [list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list] out_qfxs = [[qfxs[ix] for (ix, jx) in ijs] for (qfxs, ijs) in zip(qfxs_list, out_ijs)] out_dfxs = [[dfxs[jx] for (ix, jx) in ijs] for (dfxs, ijs) in zip(dfxs_list, out_ijs)] out_daids = ([daids[jx] for (ix, jx) in ijs] for (daids, ijs) in zip(daids_list, out_ijs)) out_scores = ([nscores[ijx] for ijx in ijs] for (nscores, ijs) in zip(nscores_iter, out_ijs)) nested_fm_iter = [[ tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs) ] for qfxs, dfxs in zip(out_qfxs, out_dfxs)] all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE) nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter] nested_daid_iter = ([ [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids) ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids)) nested_score_iter = ([ [score / nMatch] * nMatch for nMatch, score in zip(nMatch_list, scores) ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores)) all_daids_ = np.array(list(utool.iflatten( utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE) all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE) # Filter out 0 scores keep_xs = np.where(all_fss > 0)[0] all_fss = all_fss.take(keep_xs) all_fms = all_fms.take(keep_xs, axis=0) all_daids_ = all_daids_.take(keep_xs) daid_keys, groupxs = vt.group_indices(all_daids_) fs_list = vt.apply_grouping(all_fss, groupxs) fm_list = vt.apply_grouping(all_fms, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = { daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list) } # FIXME: generalize to when nAssign > 1 daid2_fk = { daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list) } daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets): """ More efficient version of agg on a stacked structure Args: words (ndarray): entire vocabulary of words flat_wxs_assign (ndarray): maps a stacked index to word index flat_vecs (ndarray): stacked SIFT descriptors flat_offsets (ndarray): offset positions per annotation Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_funcs import * # NOQA >>> data = testdata_rvecs(dim=2, nvecs=1000, nannots=10) >>> words = data['words'] >>> flat_offsets = data['offset_list'] >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs']) >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets) >>> all_agg_vecs, all_error_flags, agg_offset_list = tup >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> assert len(agg_flags_list) == len(flat_offsets) - 1 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_funcs import * # NOQA >>> data = testdata_rvecs(dim=2, nvecs=100, nannots=5) >>> words = data['words'] >>> flat_offsets = data['offset_list'] >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs']) >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets) >>> all_agg_vecs, all_error_flags, agg_offset_list = tup >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)] >>> assert len(agg_flags_list) == len(flat_offsets) - 1 """ grouped_wxs = [ flat_wxs_assign[left:right] for left, right in ut.itertwo(flat_offsets) ] # Assume single assignment, aggregate everything # across the entire database flat_offsets = np.array(flat_offsets) idx_to_dx = (np.searchsorted( flat_offsets, np.arange(len(flat_wxs_assign)), side='right') - 1).astype(np.int32) if isinstance(flat_wxs_assign, np.ma.masked_array): wx_list = flat_wxs_assign.T[0].compressed() else: wx_list = flat_wxs_assign.T[0].ravel() unique_wx, groupxs = vt.group_indices(wx_list) dim = flat_vecs.shape[1] if isinstance(flat_wxs_assign, np.ma.masked_array): dx_to_wxs = [np.unique(wxs.compressed()) for wxs in grouped_wxs] else: dx_to_wxs = [np.unique(wxs.ravel()) for wxs in grouped_wxs] dx_to_nagg = [len(wxs) for wxs in dx_to_wxs] num_agg_vecs = sum(dx_to_nagg) # all_agg_wxs = np.hstack(dx_to_wxs) agg_offset_list = np.array([0] + ut.cumsum(dx_to_nagg)) # Preallocate agg residuals for all dxs all_agg_vecs = np.empty((num_agg_vecs, dim), dtype=np.float32) all_agg_vecs[:, :] = np.nan # precompute agg residual stack i_to_dxs = vt.apply_grouping(idx_to_dx, groupxs) subgroup = [vt.group_indices(dxs) for dxs in ut.ProgIter(i_to_dxs)] i_to_unique_dxs = ut.take_column(subgroup, 0) i_to_dx_groupxs = ut.take_column(subgroup, 1) num_words = len(unique_wx) # Overall this takes 5 minutes and 21 seconds # I think the other method takes about 12 minutes for i in ut.ProgIter(range(num_words), 'agg'): wx = unique_wx[i] xs = groupxs[i] dxs = i_to_unique_dxs[i] dx_groupxs = i_to_dx_groupxs[i] word = words[wx:wx + 1] offsets1 = agg_offset_list.take(dxs) offsets2 = [np.where(dx_to_wxs[dx] == wx)[0][0] for dx in dxs] offsets = np.add(offsets1, offsets2, out=offsets1) # if __debug__: # assert np.bincount(dxs).max() < 2 # offset = agg_offset_list[dxs[0]] # assert np.all(dx_to_wxs[dxs[0]] == all_agg_wxs[offset:offset + # dx_to_nagg[dxs[0]]]) # Compute residuals rvecs = flat_vecs[xs] - word vt.normalize(rvecs, axis=1, out=rvecs) rvecs[np.all(np.isnan(rvecs), axis=1)] = 0 # Aggregate across same images grouped_rvecs = vt.apply_grouping(rvecs, dx_groupxs, axis=0) agg_rvecs_ = [rvec_group.sum(axis=0) for rvec_group in grouped_rvecs] # agg_rvecs = np.vstack(agg_rvecs_) all_agg_vecs[offsets, :] = agg_rvecs_ assert not np.any(np.isnan(all_agg_vecs)) logger.info('Apply normalization') vt.normalize(all_agg_vecs, axis=1, out=all_agg_vecs) all_error_flags = np.all(np.isnan(all_agg_vecs), axis=1) all_agg_vecs[all_error_flags, :] = 0 # ndocs_per_word1 = np.array(ut.lmap(len, wx_to_unique_dxs)) # ndocs_total1 = len(flat_offsets) - 1 # idf1 = smk_funcs.inv_doc_freq(ndocs_total1, ndocs_per_word1) tup = all_agg_vecs, all_error_flags, agg_offset_list return tup
def get_query_result_info(qreq_): """ Helper function. Runs queries of a specific configuration returns the best rank of each query Args: qaids (list) : query annotation ids daids (list) : database annotation ids Returns: qx2_bestranks CommandLine: python -m ibeis.expt.harness --test-get_query_result_info python -m ibeis.expt.harness --test-get_query_result_info:0 python -m ibeis.expt.harness --test-get_query_result_info:1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5']) >>> #ibs = ibeis.opendb('PZ_MTEST') >>> #qaids = ibs.get_valid_aids()[0:3] >>> #daids = ibs.get_valid_aids()[0:5] >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> #cfgdict = dict(codename='vsone') >>> # ibs.cfg.query_cfg.codename = 'vsone' >>> qaids = ibs.get_valid_aids()[0:3] >>> daids = ibs.get_valid_aids()[0:5] >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Ignore: ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW --show --debug-depc ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc --clear-all-depcache """ try: ibs = qreq_.ibs except AttributeError: ibs = qreq_.depc.controller import vtool as vt cm_list = qreq_.execute() #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False) qx2_cm = cm_list qaids = qreq_.qaids #qaids2 = [cm.qaid for cm in cm_list] qnids = ibs.get_annot_name_rowids(qaids) import utool with utool.embed_on_exception_context: unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids)) unique_qnids, groupxs = vt.group_indices(qnids) cm_group_list = ut.apply_grouping(cm_list, groupxs) qnid2_aggnamescores = {} qnx2_nameres_info = [] #import utool #utool.embed() # Ranked list aggregation-ish nameres_info_list = [] for qnid, cm_group in zip(unique_qnids, cm_group_list): nid2_name_score_group = [ dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group ] aligned_name_scores = np.array([ ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf) for nid2_name_score in nid2_name_score_group ]).T name_score_list = np.nanmax(aligned_name_scores, axis=1) qnid2_aggnamescores[qnid] = name_score_list # sort sortx = name_score_list.argsort()[::-1] sorted_namescores = name_score_list[sortx] sorted_dnids = unique_dnids[sortx] ## infer agg name results is_positive = sorted_dnids == qnid is_negative = np.logical_and(~is_positive, sorted_dnids > 0) gt_name_rank = None if not np.any(is_positive) else np.where( is_positive)[0][0] gf_name_rank = None if not np.any(is_negative) else np.nonzero( is_negative)[0][0] gt_nid = sorted_dnids[gt_name_rank] gf_nid = sorted_dnids[gf_name_rank] gt_name_score = sorted_namescores[gt_name_rank] gf_name_score = sorted_namescores[gf_name_rank] qnx2_nameres_info = {} qnx2_nameres_info['qnid'] = qnid qnx2_nameres_info['gt_nid'] = gt_nid qnx2_nameres_info['gf_nid'] = gf_nid qnx2_nameres_info['gt_name_rank'] = gt_name_rank qnx2_nameres_info['gf_name_rank'] = gf_name_rank qnx2_nameres_info['gt_name_score'] = gt_name_score qnx2_nameres_info['gf_name_score'] = gf_name_score nameres_info_list.append(qnx2_nameres_info) nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_') qaids = qreq_.qaids daids = qreq_.daids qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids) # Get the groundtruth ranks and accuracy measures qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm] cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_') #for key in qx2_qresinfo[0].keys(): # 'qx2_' + key # ut.get_list_column(qx2_qresinfo, key) if False: qx2_avepercision = np.array([ cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids) ]) cfgres_info['qx2_avepercision'] = qx2_avepercision # Compute mAP score # TODO: use mAP score # (Actually map score doesn't make much sense if using name scoring #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean() # NOQA cfgres_info['qx2_bestranks'] = ut.replace_nones( cfgres_info['qx2_bestranks'], -1) cfgres_info.update(nameres_info) return cfgres_info
def compute_nsum_score(cm, qreq_=None): r""" nsum Args: cm (ibeis.ChipMatch): Returns: tuple: (unique_nids, nsum_score_list) CommandLine: python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:0 python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:2 utprof.py -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:2 utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> # build test data >>> cm = testdata_chipmatch() >>> # execute function >>> (unique_nids, nsum_score_list) = compute_nsum_score(cm) >>> result = ut.list_str((unique_nids, nsum_score_list), label_list=['unique_nids', 'nsum_score_list'], with_dtype=False) >>> print(result) unique_nids = np.array([1, 2, 3]) nsum_score_list = np.array([ 4., 7., 5.]) Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_dnids(qreq_.ibs) >>> cm._cast_scores() >>> #cm.qnid = 1 # Hack for testdb1 names >>> nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_) >>> assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment' >>> flags = (nsum_nid_list == cm.qnid) >>> max_true = nsum_score_list[flags].max() >>> max_false = nsum_score_list[~flags].max() >>> assert max_true > max_false, 'is this truely a hard case?' >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,) >>> nsum_nid_list2, nsum_score_list2, _ = compute_nsum_score2(cm, qreq_) >>> assert np.allclose(nsum_score_list2, nsum_score_list), 'something is very wrong' >>> #assert np.all(nsum_score_list2 == nsum_score_list), 'could be a percision issue' Example2: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(augment_queryside_hack=True)) >>> cm = cm_list[0] >>> cm.score_nsum(qreq_) >>> #cm.evaluate_dnids(qreq_.ibs) >>> #cm.qnid = 1 # Hack for testdb1 names >>> #nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_=qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) Example3: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(augment_queryside_hack=True)) >>> cm = cm_list[0] >>> cm.score_nsum(qreq_) >>> #cm.evaluate_dnids(qreq_.ibs) >>> #cm.qnid = 1 # Hack for testdb1 names >>> #nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_=qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) Example4: >>> # ENABLE_DOCTEST >>> # FIXME: breaks when fg_on=True >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> from ibeis.algo.hots import name_scoring >>> from ibeis.algo.hots import scoring >>> import ibeis >>> # Test to make sure name score and chips score are equal when per_name=1 >>> qreq_, args = plh.testdata_pre( >>> 'spatial_verification', defaultdb='PZ_MTEST', >>> a=['default:dpername=1,qsize=1,dsize=10'], >>> p=['default:K=1,fg_on=True,sqrd_dist_on=True']) >>> cm = args.cm_list_FILT[0] >>> ibs = qreq_.ibs >>> # Ensure there is only one aid per database name >>> assert isinstance(ibs, ibeis.control.IBEISControl.IBEISController) >>> #stats_dict = ibs.get_annot_stats_dict(qreq_.get_external_daids(), prefix='d') >>> #stats = stats_dict['dper_name'] >>> stats = ibs.get_annot_per_name_stats(qreq_.get_external_daids()) >>> print('per_name_stats = %s' % (ut.dict_str(stats, nl=False),)) >>> assert stats['mean'] == 1 and stats['std'] == 0, 'this test requires one annot per name in the database' >>> cm.evaluate_dnids(qreq_.ibs) >>> cm.assert_self(qreq_) >>> cm._cast_scores() >>> # cm.fs_list = cm.fs_list.astype(np.float) >>> nsum_nid_list, nsum_score_list = name_scoring.compute_nsum_score(cm, qreq_) >>> nsum_nid_list2, nsum_score_list2, _ = name_scoring.compute_nsum_score2(cm, qreq_) >>> csum_score_list = scoring.compute_csum_score(cm) >>> vt.asserteq(nsum_score_list, csum_score_list) >>> vt.asserteq(nsum_score_list, csum_score_list, thresh=0, iswarning=True) >>> vt.asserteq(nsum_score_list2, csum_score_list, thresh=0, iswarning=True) >>> #assert np.allclose(nsum_score_list, csum_score_list), 'should be the same when K=1 and per_name=1' >>> #assert all(nsum_score_list == csum_score_list), 'should be the same when K=1 and per_name=1' >>> #assert all(nsum_score_list2 == csum_score_list), 'should be the same when K=1 and per_name=1' >>> # Evaluate parts of the sourcecode Ignore: assert all(nsum_score_list3 == csum_score_list), 'should be the same when K=1 and per_name=1' fm_list = fm_list[0:1] fs_list = fs_list[0:1] featflag_list2 = featflag_list2[0:1] dnid_list = dnid_list[0:1] name_groupxs2 = name_groupxs2[0:1] nsum_nid_list2 = nsum_nid_list2[0:1] """ #assert qreq_ is not None try: HACK_SINGLE_ORI = qreq_ is not None and (qreq_.qparams.augment_queryside_hack or qreq_.qparams.rotation_invariance) except AttributeError: HACK_SINGLE_ORI = qreq_ is not None and (qreq_.config.augment_queryside_hack or qreq_.config.feat_cfg.rotation_invariance) pass # The core for each feature match # # The query feature index for each feature match fm_list = cm.fm_list fs_list = cm.get_fsv_prod_list() dnid_list = cm.dnid_list #-- fx1_list = [fm.T[0] for fm in fm_list] """ # Try a rebase? fx1_list = list(map(vt.compute_unique_data_ids_, fx1_list)) """ # Group annotation matches by name nsum_nid_list, name_groupxs = vt.group_indices(dnid_list) name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name name_grouped_fx1_flat = list(map(np.hstack, name_grouped_fx1_list)) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) """ assert np.all(name_grouped_fs_list[0][0] == fs_list[0]) assert np.all(name_grouped_fs_flat[0] == fs_list[0]) """ if HACK_SINGLE_ORI: # keypoints with the same xy can only have one of them vote kpts1 = qreq_.ibs.get_annot_kpts(cm.qaid, config2_=qreq_.get_external_query_config2()) xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_arr_dataids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_xyid_flat = [kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat] feat_groupxs_list = [vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat] else: # make unique indicies using feature indexes feat_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat] # Make nested group for every name by unique query feature index feat_grouped_fs_list = [[fs_flat.take(xs, axis=0) for xs in feat_groupxs] for fs_flat, feat_groupxs in zip(name_grouped_fs_flat, feat_groupxs_list)] """ np.array(feat_grouped_fs_list)[0].T[0] == fs_list """ if False: valid_fs_list = [ np.array([group.max() for group in grouped_fs]) #np.array([group[group.argmax()] for group in grouped_fs]) for grouped_fs in feat_grouped_fs_list ] nsum_score_list4 = np.array([valid_fs.sum() for valid_fs in valid_fs_list]) # NOQA # Prevent a feature from voting twice: # take only the max score that a query feature produced #name_grouped_valid_fs_list1 =[np.array([fs_group.max() for fs_group in feat_grouped_fs]) # for feat_grouped_fs in feat_grouped_fs_list] nsum_score_list = np.array([np.sum([fs_group.max() for fs_group in feat_grouped_fs]) for feat_grouped_fs in feat_grouped_fs_list]) return nsum_nid_list, nsum_score_list
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None): r""" DEPRICATE fm_list = [fm[:min(len(fm), 10)] for fm in fm_list] fs_list = [fs[:min(len(fs), 10)] for fs in fs_list] """ fx1_list = [fm.T[0] for fm in fm_list] # Group annotation matches by name name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs) name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs) # Stack up all matches to a particular name, keep track of original indicies via offets name_invertable_flat_fx1_list = list( map(ut.invertible_flatten2_numpy, name_grouped_fx1_list)) name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0) name_grouped_invertable_cumsum_list = ut.get_list_column( name_invertable_flat_fx1_list, 1) name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list)) if kpts1 is not None: xys1_ = vt.get_xys(kpts1).T kpts_xyid_list = vt.compute_unique_data_ids(xys1_) # Make nested group for every name by query feature index (accounting for duplicate orientation) name_grouped_comboid_flat = list( kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat) xyid_groupxs_list = list( vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_comboid_flat) name_group_fx1_groupxs_list = xyid_groupxs_list else: # Make nested group for every name by query feature index fx1_groupxs_list = [ vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat ] name_group_fx1_groupxs_list = fx1_groupxs_list name_grouped_fid_grouped_fs_list = [ vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list) ] # Flag which features are valid in this grouped space. Only one keypoint should be able to vote # for each group name_grouped_fid_grouped_isvalid_list = [ np.array( [fs_group.max() == fs_group for fs_group in fid_grouped_fs_list]) for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list ] # Go back to being grouped only in name space #dtype = np.bool name_grouped_isvalid_flat_list = [ vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool) for fid_grouped_isvalid_list, fid_groupxs in zip( name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list) ] name_grouped_isvalid_unflat_list = [ ut.unflatten2(isvalid_flat, invertable_cumsum_list) for isvalid_flat, invertable_cumsum_list in zip( name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list) ] # Reports which features were valid in name scoring for every annotation featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs) return featflag_list
def get_query_result_info(qreq_): """ Helper function. Runs queries of a specific configuration returns the best rank of each query Args: qaids (list) : query annotation ids daids (list) : database annotation ids Returns: qx2_bestranks CommandLine: python -m ibeis.expt.harness --test-get_query_result_info python -m ibeis.expt.harness --test-get_query_result_info:0 python -m ibeis.expt.harness --test-get_query_result_info:1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5']) >>> #ibs = ibeis.opendb('PZ_MTEST') >>> #qaids = ibs.get_valid_aids()[0:3] >>> #daids = ibs.get_valid_aids()[0:5] >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Example: >>> # ENABLE_DOCTEST >>> from ibeis.expt.harness import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> #cfgdict = dict(codename='vsone') >>> # ibs.cfg.query_cfg.codename = 'vsone' >>> qaids = ibs.get_valid_aids()[0:3] >>> daids = ibs.get_valid_aids()[0:5] >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={}) >>> cfgres_info = get_query_result_info(qreq_) >>> print(ut.dict_str(cfgres_info)) Ignore: ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW --show --debug-depc ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc --clear-all-depcache """ try: ibs = qreq_.ibs except AttributeError: ibs = qreq_.depc.controller import vtool as vt cm_list = qreq_.execute() #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False) qx2_cm = cm_list qaids = qreq_.qaids #qaids2 = [cm.qaid for cm in cm_list] qnids = ibs.get_annot_name_rowids(qaids) import utool with utool.embed_on_exception_context: unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids)) unique_qnids, groupxs = vt.group_indices(qnids) cm_group_list = ut.apply_grouping(cm_list, groupxs) qnid2_aggnamescores = {} qnx2_nameres_info = [] #import utool #utool.embed() # Ranked list aggregation-ish nameres_info_list = [] for qnid, cm_group in zip(unique_qnids, cm_group_list): nid2_name_score_group = [ dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group ] aligned_name_scores = np.array([ ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf) for nid2_name_score in nid2_name_score_group ]).T name_score_list = np.nanmax(aligned_name_scores, axis=1) qnid2_aggnamescores[qnid] = name_score_list # sort sortx = name_score_list.argsort()[::-1] sorted_namescores = name_score_list[sortx] sorted_dnids = unique_dnids[sortx] ## infer agg name results is_positive = sorted_dnids == qnid is_negative = np.logical_and(~is_positive, sorted_dnids > 0) gt_name_rank = None if not np.any(is_positive) else np.where(is_positive)[0][0] gf_name_rank = None if not np.any(is_negative) else np.nonzero(is_negative)[0][0] gt_nid = sorted_dnids[gt_name_rank] gf_nid = sorted_dnids[gf_name_rank] gt_name_score = sorted_namescores[gt_name_rank] gf_name_score = sorted_namescores[gf_name_rank] qnx2_nameres_info = {} qnx2_nameres_info['qnid'] = qnid qnx2_nameres_info['gt_nid'] = gt_nid qnx2_nameres_info['gf_nid'] = gf_nid qnx2_nameres_info['gt_name_rank'] = gt_name_rank qnx2_nameres_info['gf_name_rank'] = gf_name_rank qnx2_nameres_info['gt_name_score'] = gt_name_score qnx2_nameres_info['gf_name_score'] = gf_name_score nameres_info_list.append(qnx2_nameres_info) nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_') qaids = qreq_.qaids daids = qreq_.daids qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids) # Get the groundtruth ranks and accuracy measures qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm] cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_') #for key in qx2_qresinfo[0].keys(): # 'qx2_' + key # ut.get_list_column(qx2_qresinfo, key) if False: qx2_avepercision = np.array( [cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)]) cfgres_info['qx2_avepercision'] = qx2_avepercision # Compute mAP score # TODO: use mAP score # (Actually map score doesn't make much sense if using name scoring #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean() # NOQA cfgres_info['qx2_bestranks'] = ut.replace_nones(cfgres_info['qx2_bestranks'] , -1) cfgres_info.update(nameres_info) return cfgres_info
def group_scores_by_name(ibs, aid_list, score_list): r""" Converts annotation scores to name scores. Over multiple annotations finds keypoints best match and uses that score. CommandLine: python -m ibeis.algo.hots.name_scoring --test-group_scores_by_name Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> import ibeis >>> cm, qreq_ = ibeis.testdata_cm('PZ_MTEST') >>> ibs = qreq_.ibs >>> #print(cm.get_inspect_str(qreq_)) >>> aid_list = cm.daid_list >>> score_list = cm.annot_score_list >>> nscoretup = group_scores_by_name(ibs, aid_list, score_list) >>> (sorted_nids, sorted_nscore, sorted_aids, sorted_scores) = nscoretup >>> ut.assert_eq(sorted_nids[0], cm.qnid) TODO: # TODO: this code needs a really good test case #>>> result = np.array_repr(sorted_nids[0:2]) #>>> print(result) #array([1, 5]) Ignore:: # hack in dict of Nones prob for testing import six qres.aid2_prob = {aid:None for aid in six.iterkeys(qres.aid2_score)} array([ 1, 5, 26]) [2 6 5] Timeit:: import ibeis ibs = ibeis.opendb('PZ_MTEST') aid_list = ibs.get_valid_aids() aid_arr = np.array(aid_list) %timeit ibs.get_annot_name_rowids(aid_list) %timeit ibs.get_annot_name_rowids(aid_arr) """ assert len(score_list) == len(aid_list), 'scores and aids must be associated' score_arr = np.array(score_list) nid_list = np.array(ibs.get_annot_name_rowids(aid_list)) aid_list = np.array(aid_list) # Group scores by name unique_nids, groupxs = vt.group_indices(nid_list) grouped_scores = np.array(vt.apply_grouping(score_arr, groupxs)) grouped_aids = np.array(vt.apply_grouping(aid_list, groupxs)) # Build representative score per group # (find each keypoints best match per annotation within the name) group_nscore = np.array([scores.max() for scores in grouped_scores]) group_sortx = group_nscore.argsort()[::-1] # Top nids sorted_nids = unique_nids.take(group_sortx, axis=0) sorted_nscore = group_nscore.take(group_sortx, axis=0) # Initial sort of aids _sorted_aids = grouped_aids.take(group_sortx, axis=0) _sorted_scores = grouped_scores.take(group_sortx, axis=0) # Secondary sort of aids sorted_sortx = [scores.argsort()[::-1] for scores in _sorted_scores] sorted_scores = [scores.take(sortx) for scores, sortx in zip(_sorted_scores, sorted_sortx)] sorted_aids = [aids.take(sortx) for aids, sortx in zip(_sorted_aids, sorted_sortx)] nscoretup = NameScoreTup(sorted_nids, sorted_nscore, sorted_aids, sorted_scores) return nscoretup
def get_annotmatch_rowids_from_aid2(ibs, aid2_list, eager=True, nInput=None, force_method=None): """ # This one is slow because aid2 is the second part of the index TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid2_list = ibs.get_valid_aids() >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid2_list=aid2_list, num_list=num_list): >>> return (aid2_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid2', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if force_method != 2 and (nInput < 128 or (force_method == 1)): colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID, ) # FIXME: col_rowid is not correct params_iter = zip(aid2_list) andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2] annotmatch_rowid_list = ibs.db.get_where2(ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, andwhere_colnames, eager=eager, nInput=nInput, unpack_scalars=False) elif force_method == 2: import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids2_ = [_.tolist() for _ in rowids2_] maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_)) annotmatch_rowid_list = ut.dict_take(maping2, aid2_list) annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs): """ >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> qreq_ = ibeis.testdata_qreq_(defaultdb='seaturtles') >>> qreq_.load_indexer() >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0]) >>> num_neighbors = 2 >>> nnindexer = qreq_.indexer >>> ibs = qreq_.ibs >>> qaid = 1 >>> qencid = ibs.get_annot_encounter_text([qaid])[0] >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid)) >>> invalid_axs = np.where(ax2_encid == qencid)[0] """ #import ibeis import itertools def in1d_shape(arr1, arr2): return np.in1d(arr1, arr2).reshape(arr1.shape) get_neighbors = ut.partial(nnindexer.flann.nn_index, checks=nnindexer.checks, cores=nnindexer.cores) # Alloc space for final results K = num_neighbors shape = (len(qfx2_vec), K) qfx2_idx = np.full(shape, -1, dtype=np.int32) qfx2_rawdist = np.full(shape, np.nan, dtype=np.float64) qfx2_truek = np.full(shape, -1, dtype=np.int32) # Make a set of temporary indexes and loop variables limit = None limit = 4 K_ = K tx2_qfx = np.arange(len(qfx2_vec)) tx2_vec = qfx2_vec iter_count = 0 for iter_count in itertools.count(): if limit is not None and iter_count >= limit: break # Find a set of neighbors (tx2_idx, tx2_rawdist) = get_neighbors(tx2_vec, K_) tx2_idx = vt.atleast_nd(tx2_idx, 2) tx2_rawdist = vt.atleast_nd(tx2_rawdist, 2) tx2_ax = nnindexer.get_nn_axs(tx2_idx) # Check to see if they meet the criteria tx2_invalid = in1d_shape(tx2_ax, invalid_axs) tx2_valid = np.logical_not(tx2_invalid) tx2_num_valid = tx2_valid.sum(axis=1) tx2_notdone = tx2_num_valid < K tx2_done = np.logical_not(tx2_notdone) # Move completely valid queries into the results if np.any(tx2_done): done_qfx = tx2_qfx.compress(tx2_done, axis=0) # Need to parse which columns are the completed ones done_valid_ = tx2_valid.compress(tx2_done, axis=0) done_rawdist_ = tx2_rawdist.compress(tx2_done, axis=0) done_idx_ = tx2_idx.compress(tx2_done, axis=0) # Get the complete valid indicies rowxs, colxs = np.where(done_valid_) unique_rows, groupxs = vt.group_indices(rowxs) first_k_groupxs = [groupx[0:K] for groupx in groupxs] chosen_xs = np.hstack(first_k_groupxs) multi_index = (rowxs.take(chosen_xs), colxs.take(chosen_xs)) flat_xs = np.ravel_multi_index(multi_index, done_valid_.shape) done_rawdist = done_rawdist_.take(flat_xs).reshape((-1, K)) done_idx = done_idx_.take(flat_xs).reshape((-1, K)) # Write done results in output qfx2_idx[done_qfx, :] = done_idx qfx2_rawdist[done_qfx, :] = done_rawdist qfx2_truek[done_qfx, :] = vt.apply_grouping( colxs, first_k_groupxs) if np.all(tx2_done): break K_increase = (K - tx2_num_valid.min()) K_ += K_increase tx2_qfx = tx2_qfx.compress(tx2_notdone, axis=0) tx2_vec = tx2_vec.compress(tx2_notdone, axis=0) if nnindexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_rawdist, nnindexer.max_distance_sqrd) else: qfx2_dist = qfx2_rawdist return (qfx2_idx, qfx2_dist, iter_count)
def flow(): """ http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin pip install PyMaxFlow pip install pystruct pip install hdbscan """ # Toy problem representing attempting to discover names via annotation # scores import pystruct # NOQA import pystruct.models # NOQA import networkx as netx # NOQA import vtool as vt num_annots = 10 num_names = num_annots hidden_nids = np.random.randint(0, num_names, num_annots) unique_nids, groupxs = vt.group_indices(hidden_nids) toy_params = { True: { 'mu': 1.0, 'sigma': 2.2 }, False: { 'mu': 7.0, 'sigma': 0.9 } } if True: import vtool as vt import wbia.plottool as pt xdata = np.linspace(0, 100, 1000) tp_pdf = vt.gauss_func1d(xdata, **toy_params[True]) fp_pdf = vt.gauss_func1d(xdata, **toy_params[False]) pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata) def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params): if aidx1 == aidx2: return 0 rng = np.random.RandomState(int(aidx1 + aidx2)) same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)] mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma']) return np.clip(rng.normal(mu, sigma), 0, np.inf) pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots))) pairwise_labels = np.array( [hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs]) pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs]) pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) if num_annots <= 10: logger.info(ut.repr2(pairwise_scores_mat, precision=1)) # aids = list(range(num_annots)) # g = netx.DiGraph() # g.add_nodes_from(aids) # g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]]) # netx.draw_graphviz(g) # pr = netx.pagerank(g) X = pairwise_scores Y = pairwise_labels encoder = vt.ScoreNormalizer() encoder.fit(X, Y) encoder.visualize() # meanshift clustering import sklearn bandwidth = sklearn.cluster.estimate_bandwidth( X[:, None]) # , quantile=quantile, n_samples=500) assert bandwidth != 0, '[] bandwidth is 0. Cannot cluster' # bandwidth is with respect to the RBF used in clustering # ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True) ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False) ms.fit(X[:, None]) label_arr = ms.labels_ unique_labels = np.unique(label_arr) max_label = max(0, unique_labels.max()) num_orphans = (label_arr == -1).sum() label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans) X_data = np.arange(num_annots)[:, None].astype(np.int64) # graph = pystruct.models.GraphCRF( # n_states=None, # n_features=None, # inference_method='lp', # class_weight=None, # directed=False, # ) import scipy import scipy.cluster import scipy.cluster.hierarchy thresh = 2.0 labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric) unique_lbls, lblgroupxs = vt.group_indices(labels) logger.info(groupxs) logger.info(lblgroupxs) logger.info('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs), )) logger.info('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs), )) # X_data, seconds_thresh, criterion='distance') # help(hdbscan.HDBSCAN) import hdbscan alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2) labels = alg.fit_predict(X_data) labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1 unique_lbls, lblgroupxs = vt.group_indices(labels) logger.info(groupxs) logger.info(lblgroupxs) logger.info('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs), )) logger.info('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs), )) # import ddbscan # help(ddbscan.DDBSCAN) # alg = ddbscan.DDBSCAN(2, 2) # D = np.zeros((len(aids), len(aids) + 1)) # D.T[-1] = np.arange(len(aids)) ## Can alpha-expansion be used when the pairwise potentials are not in a grid? # hidden_ut.group_items(aids, hidden_nids) if False: import maxflow # from maxflow import fastmin # Create a graph with integer capacities. g = maxflow.Graph[int](2, 2) # Add two (non-terminal) nodes. Get the index to the first one. nodes = g.add_nodes(2) # Create two edges (forwards and backwards) with the given capacities. # The indices of the nodes are always consecutive. g.add_edge(nodes[0], nodes[1], 1, 2) # Set the capacities of the terminal edges... # ...for the first node. g.add_tedge(nodes[0], 2, 5) # ...for the second node. g.add_tedge(nodes[1], 9, 4) g = maxflow.Graph[float](2, 2) g.maxflow() g.get_nx_graph() g.get_segment(nodes[0])
def compute_fmech_score(cm, qreq_=None, hack_single_ori=False): r""" nsum. This is the fmech scoring mechanism. Args: cm (ibeis.ChipMatch): Returns: tuple: (unique_nids, nsum_score_list) CommandLine: python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:0 python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2 utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> cm = testdata_chipmatch() >>> nsum_score_list = compute_fmech_score(cm) >>> assert np.all(nsum_score_list == [ 4., 7., 5.]) Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18]) >>> cm = cm_list[0] >>> cm.evaluate_dnids(qreq_) >>> cm._cast_scores() >>> #cm.qnid = 1 # Hack for testdb1 names >>> nsum_score_list = compute_fmech_score(cm, qreq_) >>> #assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment' >>> flags = (cm.unique_nids == cm.qnid) >>> max_true = nsum_score_list[flags].max() >>> max_false = nsum_score_list[~flags].max() >>> assert max_true > max_false, 'is this truely a hard case?' >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,) Example2: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) Example3: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.name_scoring import * # NOQA >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1]) >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(query_rotation_heuristic=True)) >>> cm = cm_list[0] >>> cm.score_name_nsum(qreq_) >>> ut.quit_if_noshow() >>> cm.show_ranked_matches(qreq_, ori=True) """ #assert qreq_ is not None if hack_single_ori is None: try: hack_single_ori = qreq_ is not None and ( qreq_.qparams.query_rotation_heuristic or qreq_.qparams.rotation_invariance) except AttributeError: hack_single_ori = True # The core for each feature match # # The query feature index for each feature match fm_list = cm.fm_list fs_list = cm.get_fsv_prod_list() fx1_list = [fm.T[0] for fm in fm_list] if hack_single_ori: # Group keypoints with the same xy-coordinate. # Combine these feature so each only recieves one vote kpts1 = qreq_.ibs.get_annot_kpts(cm.qaid, config2_=qreq_.extern_query_config2) xys1_ = vt.get_xys(kpts1).T fx1_to_comboid = vt.compute_unique_arr_dataids(xys1_) fcombo_ids = [fx1_to_comboid.take(fx1) for fx1 in fx1_list] else: # use the feature index itself as a combo id # so each feature only recieves one vote fcombo_ids = fx1_list if False: import ubelt as ub for ids in fcombo_ids: ub.find_duplicates(ids) # Group annotation matches by name # nsum_nid_list, name_groupxs = vt.group_indices(cm.dnid_list) # nsum_nid_list = cm.unique_nids name_groupxs = cm.name_groupxs nsum_score_list = [] # For all indicies matched to a particular name for name_idxs in name_groupxs: # Get feat indicies and scores corresponding to the name's annots name_combo_ids = ut.take(fcombo_ids, name_idxs) name_fss = ut.take(fs_list, name_idxs) # Flatten over annots in the name fs = np.hstack(name_fss) if len(fs) == 0: nsum_score_list.append(0) continue combo_ids = np.hstack(name_combo_ids) # Features (with the same id) can't vote for this name twice group_idxs = vt.group_indices(combo_ids)[1] flagged_idxs = [idxs[fs.take(idxs).argmax()] for idxs in group_idxs] # Detail: sorting the idxs preseveres summation order # this fixes the numerical issue where nsum and csum were off flagged_idxs = np.sort(flagged_idxs) name_score = fs.take(flagged_idxs).sum() nsum_score_list.append(name_score) nsum_score_list = np.array(nsum_score_list) return nsum_score_list
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m ibeis.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) #infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. #map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) #joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred #evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction( query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx)] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array([ g.sum() for g in vt.apply_grouping(new_vals, groupxs) ]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_keep_order, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) print(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) print(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) #probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) #joint.marginalize(irrelevant_vars) #joint.normalize() #new_rows = joint._row_labels() #new_vals = joint.values.ravel() #map_vals = new_rows[new_vals.argmax()] #map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints #marginalized_joints = {} #for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs): """ very hacky, but cute way to cache keypoint distinctivness Args: ibs (IBEISController): ibeis controller object aid_list (list): dstncvs_normer (None): Returns: list: dstncvs_list CommandLine: python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness Example: >>> # SLOW_DOCTEST >>> from ibeis.control.manual_ibeiscontrol_funcs import * # NOQA >>> from ibeis.algo.hots import distinctiveness_normalizer >>> import ibeis >>> import numpy as np >>> config2_ = None >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN) >>> # execute function >>> aid_list1 = aid_list[::2] >>> aid_list2 = aid_list[1::3] >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1) >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2) >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list) >>> print(ut.depth_profile(dstncvs_list1)) >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list]) >>> print(ut.dict_str(stats_dict)) >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds' >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds' """ from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer # per-species disinctivness wrapper around ibeis cached function # get feature rowids aid_list = np.array(aid_list) fid_list = np.array(ibs.get_annot_feat_rowids(aid_list, ensure=True, eager=True, nInput=None, config2_=config2_)) species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list)) # Compute distinctivness separately for each species unique_sids, groupxs = vt.group_indices(species_rowid_list) fids_groups = vt.apply_grouping(fid_list, groupxs) species_text_list = ibs.get_species_texts(unique_sids) # Map distinctivness computation normer_list = [dcvs_normer.request_species_distinctiveness_normalizer(species) for species in species_text_list] # Reduce to get results dstncvs_groups = [ get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer, species_rowid=sid, **kwargs) for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids) ] dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs) return dstncvs_list
def get_name_aids(ibs, nid_list, enable_unknown_fix=True): r""" # TODO: Rename to get_anot_rowids_from_name_rowid Returns: list: aids_list a list of list of aids in each name RESTful: Method: GET URL: /api/name/aids/ Example: >>> # ENABLE_DOCTEST >>> from ibeis.control.manual_name_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> # Map annotations to name ids >>> aid_list = ibs.get_valid_aids() >>> nid_list = ibs.get_annot_name_rowids(aid_list) >>> # Get annotation ids for each name >>> aids_list = ibs.get_name_aids(nid_list) >>> # Run Assertion Test >>> groupid2_items = ut.group_items(aids_list, nid_list) >>> grouped_items = list(six.itervalues(groupid2_items)) >>> passed_iter = map(ut.list_allsame, grouped_items) >>> passed_list = list(passed_iter) >>> assert all(passed_list), 'problem in get_name_aids' >>> # Print gropued items >>> print(ut.dict_str(groupid2_items, newlines=False)) Ignore; from ibeis.control.manual_name_funcs import * # NOQA import ibeis #ibs = ibeis.opendb('testdb1') #ibs = ibeis.opendb('PZ_MTEST') ibs = ibeis.opendb('PZ_Master0') #ibs = ibeis.opendb('GZ_ALL') nid_list = ibs.get_valid_nids() nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] with ut.Timer('sql'): #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False) aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) with ut.Timer('hackquery + group'): opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_] for aids1, aids5 in zip(aids_list1, aids_list5): if (aids1) != (aids5): print(aids1) print(aids5) print('-----') ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1))) with ut.Timer('numpy'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_] with ut.Timer('numpy2'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] with ut.Timer('numpy3'): # alt method valid_aids = np.array(ibs.get_valid_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_] assert aids_list2 == aids_list3 assert aids_list3 == aids_list4 assert aids_list1 == aids_list2 valid_aids = ibs.get_valid_aids() %timeit ibs.db.get_all_col_rows('annotations', 'rowid') %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid') %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) %timeit ibs.get_valid_aids() %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False) valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False) valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid') assert valid_nids1 == valid_nids2 ibs.db.fname ibs.db.fpath import sqlite3 con = sqlite3.connect(ibs.db.fpath) opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (SELECT name_rowid FROM name) ORDER BY name_rowid ASC, annot_rowid ASC ''' annot_rowid_list = con.execute(opstr).fetchall() aid_list = ut.get_list_column(annot_rowid_list, 0) nid_list = ut.get_list_column(annot_rowid_list, 1) # HACKY HACKY HACK with ut.Timer('hackquery + group'): #nid_list = ibs.get_valid_nids()[10:15] nid_list = ibs.get_valid_nids() opstr = ''' SELECT annot_rowid, name_rowid FROM annotations WHERE name_rowid IN (%s) ORDER BY name_rowid ASC, annot_rowid ASC ''' % (', '.join(map(str, nid_list))) pair_list = ibs.db.connection.execute(opstr).fetchall() aids = np.array(ut.get_list_column(pair_list, 0)) nids = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nids) grouped_aids_ = vt.apply_grouping(aids, groupx) grouped_aids = [arr.tolist() for arr in grouped_aids_] SELECT name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list FROM annotations WHERE name_rowid in (SELECT name_rowid FROM name) GROUP BY name_rowid ORDER BY name_rowid ASC import vtool as vt vt vt.aid_list[0] annot_rowid_list = con.execute(opstr).fetchall() opstr = ''' SELECT annot_rowid FROM annotations WHERE name_rowid=? ''' cur = ibs.db.connection.cursor() cur = con.execute('BEGIN IMMEDIATE TRANSACTION') cur = ibs.db.connection res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] cur.execute('COMMIT TRANSACTION') res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_] """ # FIXME: THIS FUNCTION IS VERY SLOW # ADD A LOCAL CACHE TO FIX THIS SPEED # ALSO FIX GET_IMAGE_AIDS # really a getter for the annotation table not the name table #return [[] for nid in nid_list] # TODO: should a query of the UNKNOWN_NAME_ROWID return anything? # TODO: don't even run negative aids as queries nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list] USE_GROUPING_HACK = False if USE_GROUPING_HACK: # This code doesn't work because it doesn't respect empty names input_list, inverse_unique = np.unique(nid_list_, return_inverse=True) input_str = ', '.join(list(map(str, input_list))) opstr = ''' SELECT annot_rowid, name_rowid FROM {ANNOTATION_TABLE} WHERE name_rowid IN ({input_str}) ORDER BY name_rowid ASC, annot_rowid ASC '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE) pair_list = ibs.db.connection.execute(opstr).fetchall() aidscol = np.array(ut.get_list_column(pair_list, 0)) nidscol = np.array(ut.get_list_column(pair_list, 1)) unique_nids, groupx = vt.group_indices(nidscol) grouped_aids_ = vt.apply_grouping(aidscol, groupx) #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_] structured_aids_list = [arr.tolist() for arr in grouped_aids_] aids_list = np.array(structured_aids_list)[inverse_unique].tolist() else: USE_NUMPY_IMPL = True #USE_NUMPY_IMPL = False # Use qt if getting one at a time otherwise perform bulk operation USE_NUMPY_IMPL = len(nid_list_) > 1 #USE_NUMPY_IMPL = len(nid_list_) > 10 if USE_NUMPY_IMPL: # This seems to be 30x faster for bigger inputs valid_aids = np.array(ibs._get_all_aids()) valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID)) #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)) aids_list = [ valid_aids.take(np.flatnonzero( np.equal(valid_nids, nid))).tolist() for nid in nid_list_ ] else: # SQL IMPL aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False) if enable_unknown_fix: #enable_unknown_fix == distinguish_unknowns # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids) # for nid, aids in zip(nid_list, aids_list)] # Not sure if this should fail or return empty list on None nid aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids) for nid, aids in zip(nid_list, aids_list)] #aids_list = [[-nid] if nid < 0 else aids # for nid, aids in zip(nid_list, aids_list)] return aids_list
def make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin=4, grid_steps=1, resize=False, out=None, grid_sigma=1.6): r""" Args: kpts (ndarray[float32_t, ndim=2]): keypoint chipsize (tuple): width, height weights (ndarray[float32_t, ndim=1]): pxl_per_bin (float): grid_steps (int): Returns: ndarray: weightgrid CommandLine: python -m vtool.coverage_grid --test-make_grid_coverage_mask --show Example: >>> # DISABLE_DOCTEST >>> from vtool.coverage_grid import * # NOQA >>> import vtool as vt >>> # build test data >>> kpts, chipsize, weights = coverage_kpts.testdata_coverage('easy1.png') >>> pxl_per_bin = 4 >>> grid_steps = 2 >>> # execute function >>> weightgrid = make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin, grid_steps) >>> # verify result >>> result = str(weightgrid) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> pt.imshow(weightgrid) >>> ut.show_if_requested() """ import vtool as vt coverage_gridtup = sparse_grid_coverage( kpts, chipsize, weights, pxl_per_bin=pxl_per_bin, grid_steps=grid_steps, grid_sigma=grid_sigma ) gridshape = coverage_gridtup[0:2] neighbor_bin_weights, neighbor_bin_indices = coverage_gridtup[-2:] oldshape_indices = neighbor_bin_indices.shape newshape_indices = (np.prod(oldshape_indices[0:2]), oldshape_indices[2]) neighbor_bin_indices = neighbor_bin_indices.reshape(newshape_indices).T neighbor_bin_weights = neighbor_bin_weights.flatten() # Get flat indexing into gridbin neighbor_bin_flat_indices = np.ravel_multi_index(neighbor_bin_indices, gridshape) # Group by bins with weight unique_flatxs, grouped_flatxs = vt.group_indices(neighbor_bin_flat_indices) grouped_weights = vt.apply_grouping(neighbor_bin_weights, grouped_flatxs) # FIXME: boundary cases are not handled right because their vote is split # into the same bin and is fighting with itself durring the max max_weights = list(map(np.max, grouped_weights)) if out is None: weightgrid = np.zeros(gridshape) else: # outvar specified weightgrid = out weightgrid[:] = 0 unique_rows, unique_cols = np.unravel_index(unique_flatxs, gridshape) weightgrid[unique_rows, unique_cols] = max_weights #flat_weightgrid = np.zeros(np.prod(gridshape)) #flat_weightgrid[unique_flatxs] = max_weight #ut.embed() #weightgrid = np.reshape(flat_weightgrid, gridshape) if resize: weightgrid = cv2.resize(weightgrid, chipsize, interpolation=cv2.INTER_NEAREST) return weightgrid
def flow(): """ http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin pip install PyMaxFlow pip install pystruct pip install hdbscan """ # Toy problem representing attempting to discover names via annotation # scores import pystruct # NOQA import pystruct.models # NOQA import networkx as netx # NOQA import vtool as vt num_annots = 10 num_names = num_annots hidden_nids = np.random.randint(0, num_names, num_annots) unique_nids, groupxs = vt.group_indices(hidden_nids) toy_params = { True: {'mu': 1.0, 'sigma': 2.2}, False: {'mu': 7.0, 'sigma': .9} } if True: import vtool as vt import plottool as pt xdata = np.linspace(0, 100, 1000) tp_pdf = vt.gauss_func1d(xdata, **toy_params[True]) fp_pdf = vt.gauss_func1d(xdata, **toy_params[False]) pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata) def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params): if aidx1 == aidx2: return 0 rng = np.random.RandomState(int(aidx1 + aidx2)) same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)] mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma']) return np.clip(rng.normal(mu, sigma), 0, np.inf) pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots))) pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs]) pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs]) pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) if num_annots <= 10: print(ut.repr2(pairwise_scores_mat, precision=1)) #aids = list(range(num_annots)) #g = netx.DiGraph() #g.add_nodes_from(aids) #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]]) #netx.draw_graphviz(g) #pr = netx.pagerank(g) X = pairwise_scores Y = pairwise_labels encoder = vt.ScoreNormalizer() encoder.fit(X, Y) encoder.visualize() # meanshift clustering import sklearn bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None]) # , quantile=quantile, n_samples=500) assert bandwidth != 0, ('[enc] bandwidth is 0. Cannot cluster') # bandwidth is with respect to the RBF used in clustering #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True) ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False) ms.fit(X[:, None]) label_arr = ms.labels_ unique_labels = np.unique(label_arr) max_label = max(0, unique_labels.max()) num_orphans = (label_arr == -1).sum() label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans) X_data = np.arange(num_annots)[:, None].astype(np.int64) #graph = pystruct.models.GraphCRF( # n_states=None, # n_features=None, # inference_method='lp', # class_weight=None, # directed=False, #) import scipy import scipy.cluster import scipy.cluster.hierarchy thresh = 2.0 labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric) unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #X_data, seconds_thresh, criterion='distance') #help(hdbscan.HDBSCAN) import hdbscan alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2) labels = alg.fit_predict(X_data) labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1 unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #import ddbscan #help(ddbscan.DDBSCAN) #alg = ddbscan.DDBSCAN(2, 2) #D = np.zeros((len(aids), len(aids) + 1)) #D.T[-1] = np.arange(len(aids)) ## Can alpha-expansion be used when the pairwise potentials are not in a grid? #hidden_ut.group_items(aids, hidden_nids) if False: import maxflow #from maxflow import fastmin # Create a graph with integer capacities. g = maxflow.Graph[int](2, 2) # Add two (non-terminal) nodes. Get the index to the first one. nodes = g.add_nodes(2) # Create two edges (forwards and backwards) with the given capacities. # The indices of the nodes are always consecutive. g.add_edge(nodes[0], nodes[1], 1, 2) # Set the capacities of the terminal edges... # ...for the first node. g.add_tedge(nodes[0], 2, 5) # ...for the second node. g.add_tedge(nodes[1], 9, 4) g = maxflow.Graph[float](2, 2) g.maxflow() g.get_nx_graph() g.get_segment(nodes[0])
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs, reduced_values): import vtool as vt # assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten()) reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables] evidence_vars = list(evidence.keys()) evidence_state_idxs = ut.dict_take(evidence, evidence_vars) evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars] ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes))) ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes))) # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes) # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes) # Allow specific types of labels to change # everything is the same, only the names have changed. # TODO: allow for multiple different label_ttypes # for label_ttype in label_ttypes if NAME_TTYPE not in model.ttype2_template: return reduced_row_idxs, reduced_values label_ttypes = [NAME_TTYPE] for label_ttype in label_ttypes: ev_colxs = ttype2_ev_indices[label_ttype] re_colxs = ttype2_re_indices[label_ttype] ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs) ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int) num_ev_ = len(ev_colxs) aug_colxs = list( range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist() aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs]) # Relabel rows based on the knowledge that # everything is the same, only the names have changed. num_cols = len(aug_state_idxs.T) mask = vt.index_to_boolmask(aug_colxs, num_cols) (other_colxs, ) = np.where(~mask) relbl_states = aug_state_idxs.compress(mask, axis=1) other_states = aug_state_idxs.compress(~mask, axis=1) tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states))) max_tmp_state = -1 min_tmp_state = tmp_relbl_states.min() # rebuild original state structure with temp state idxs tmp_state_cols = [None] * num_cols for count, colx in enumerate(aug_colxs): tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1] for count, colx in enumerate(other_colxs): tmp_state_cols[colx] = other_states[:, count:count + 1] tmp_state_idxs = np.hstack(tmp_state_cols) data_ids = np.array( vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs)))) unique_ids, groupxs = vt.group_indices(data_ids) logger.info('Collapsed %r states into %r states' % ( len(data_ids), len(unique_ids), )) # Sum the values in the cpd to marginalize the duplicate probs new_values = np.array( [g.sum() for g in vt.apply_grouping(reduced_values, groupxs)]) # Take only the unique rows under this induced labeling unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0)) new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0) tmp_idx_set = set((-np.arange(-max_tmp_state, (-min_tmp_state) + 1)).tolist()) true_idx_set = set(range(len( model.ttype2_template[label_ttype].basis))) # Relabel the rows one more time to agree with initial constraints for colx, true_idx in enumerate(ev_state_idxs): tmp_idx = np.unique(new_aug_state_idxs.T[colx]) assert len(tmp_idx) == 1 tmp_idx_set -= {tmp_idx[0]} true_idx_set -= {true_idx} new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Relabel the remaining idxs remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1] remain_true_idxs = sorted(list(true_idx_set)) for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs): new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx # Remove evidence based augmented labels new_state_idxs = new_aug_state_idxs.T[num_ev_:].T return new_state_idxs, new_values
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs): """ very hacky, but cute way to cache keypoint distinctivness Args: ibs (IBEISController): ibeis controller object aid_list (list): dstncvs_normer (None): Returns: list: dstncvs_list CommandLine: python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness Example: >>> # SLOW_DOCTEST >>> from ibeis.control.manual_ibeiscontrol_funcs import * # NOQA >>> from ibeis.algo.hots import distinctiveness_normalizer >>> import ibeis >>> import numpy as np >>> config2_ = None >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN) >>> # execute function >>> aid_list1 = aid_list[::2] >>> aid_list2 = aid_list[1::3] >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1) >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2) >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list) >>> print(ut.depth_profile(dstncvs_list1)) >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list]) >>> print(ut.dict_str(stats_dict)) >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds' >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds' """ from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer # per-species disinctivness wrapper around ibeis cached function # get feature rowids aid_list = np.array(aid_list) fid_list = np.array( ibs.get_annot_feat_rowids(aid_list, ensure=True, eager=True, nInput=None, config2_=config2_)) species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list)) # Compute distinctivness separately for each species unique_sids, groupxs = vt.group_indices(species_rowid_list) fids_groups = vt.apply_grouping(fid_list, groupxs) species_text_list = ibs.get_species_texts(unique_sids) # Map distinctivness computation normer_list = [ dcvs_normer.request_species_distinctiveness_normalizer(species) for species in species_text_list ] # Reduce to get results dstncvs_groups = [ get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer, species_rowid=sid, **kwargs) for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids) ] dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs) return dstncvs_list
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None): """ Undirected version TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> # setup_pzmtest_subgraph() >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> aid_list = ibs.get_valid_aids()[0:4] >>> eager = True >>> nInput = None >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list, >>> eager, nInput) >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),)) >>> print(result) Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid_list = ibs.get_valid_aids() >>> from functools import partial >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid_list=aid_list, num_list=num_list): >>> return (aid_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['combo', 'sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if nInput is None: nInput = len(aid_list) if force_method != 2 and (nInput < 256 or (force_method == 1)): rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list) # This one is slow because aid2 is the second part of the index rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1, rowids2))) # NOQA else: # This is much much faster than the other methods for large queries import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids)) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid1, groupxs1 = vt.group_indices(aids1) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids1_ = [_.tolist() for _ in rowids1_] rowids2_ = [_.tolist() for _ in rowids2_] maping1 = dict(zip(unique_aid1, rowids1_)) maping2 = dict(zip(unique_aid2, rowids2_)) mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2)) annotmatch_rowid_list = ut.dict_take(mapping, aid_list) if False: # VERY SLOW colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,) # FIXME: col_rowid is not correct params_iter = list(zip(aid_list, aid_list)) where_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID1, _autogen_annotmatch_funcs.ANNOT_ROWID2] with ut.Timer('one'): annotmatch_rowid_list1 = ibs.db.get_where3( # NOQA ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, where_colnames, logicop='OR', eager=eager, nInput=nInput, unpack_scalars=False) # Ensure funciton output is consistent annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def invert_assigns(idx_to_wxs, idx_to_maws, verbose=False): r""" Inverts assignment of vectors->to->words into words->to->vectors. Invert mapping -- Group by word indexes This gives a HUGE speedup over the old invert_assigns Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.smk.smk_funcs import * # NOQA >>> idx_to_wxs = np.ma.array([ >>> (0, 4), >>> (2, -1), >>> (2, 0)], dtype=np.int32) >>> idx_to_wxs[1, 1] = np.ma.masked >>> idx_to_maws = np.ma.array( >>> [(.5, 1.), (1., np.nan), (.5, .5)], dtype=np.float32) >>> idx_to_maws[1, 1] = np.ma.masked >>> tup = invert_assigns(idx_to_wxs, idx_to_maws) >>> wx_to_idxs, wx_to_maws = tup >>> result = 'wx_to_idxs = %s' % (ut.repr4(wx_to_idxs, with_dtype=True),) >>> result += '\nwx_to_maws = %s' % (ut.repr4(wx_to_maws, with_dtype=True),) >>> print(result) wx_to_idxs = { 0: np.array([0, 2], dtype=np.int32), 2: np.array([1, 2], dtype=np.int32), 4: np.array([0], dtype=np.int32), } wx_to_maws = { 0: np.array([0.5, 0.5], dtype=np.float32), 2: np.array([1. , 0.5], dtype=np.float32), 4: np.array([1.], dtype=np.float32), } """ assert isinstance(idx_to_wxs, np.ma.masked_array) assert isinstance(idx_to_maws, np.ma.masked_array) nrows, ncols = idx_to_wxs.shape if len(idx_to_wxs.mask.shape) == 0: valid_mask = np.ones((nrows, ncols), dtype=np.bool) else: valid_mask = ~idx_to_maws.mask # idx_to_nAssign = (valid_mask).sum(axis=1) _valid_x2d = np.flatnonzero(valid_mask) flat_idxs = np.floor_divide(_valid_x2d, ncols, dtype=np.int32) flat_wxs = idx_to_wxs.compressed() flat_maws = idx_to_maws.compressed() sortx = flat_wxs.argsort() flat_wxs = flat_wxs.take(sortx) flat_idxs = flat_idxs.take(sortx) flat_maws = flat_maws.take(sortx) wx_keys, groupxs = vt.group_indices(flat_wxs) idxs_list = vt.apply_grouping(flat_idxs, groupxs) maws_list = vt.apply_grouping(flat_maws, groupxs) wx_to_idxs = dict(zip(wx_keys, idxs_list)) wx_to_maws = dict(zip(wx_keys, maws_list)) if verbose: logger.info('[vocab] L___ End Assign vecs to words.') return (wx_to_idxs, wx_to_maws)
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None): """ Undirected version TODO autogenerate Returns a list of the aids that were reviewed as candidate matches to the input aid aid_list = ibs.get_valid_aids() CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> # setup_pzmtest_subgraph() >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> aid_list = ibs.get_valid_aids()[0:4] >>> eager = True >>> nInput = None >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list, >>> eager, nInput) >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),)) >>> print(result) Example2: >>> # TIME TEST >>> # setup_pzmtest_subgraph() >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_Master1') >>> aid_list = ibs.get_valid_aids() >>> from functools import partial >>> func_list = [ >>> partial(ibs.get_annotmatch_rowids_from_aid), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=1), >>> partial(ibs.get_annotmatch_rowids_from_aid, force_method=2), >>> ] >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500] >>> def args_list(count, aid_list=aid_list, num_list=num_list): >>> return (aid_list[0:num_list[count]],) >>> searchkw = dict( >>> func_labels=['combo', 'sql', 'numpy'], >>> count_to_xtick=lambda count, args: len(args[0]), >>> title='Timings of get_annotmatch_rowids_from_aid', >>> ) >>> niters = len(num_list) >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw) >>> time_result['plot_timings']() >>> ut.show_if_requested() """ from ibeis.control import _autogen_annotmatch_funcs if nInput is None: nInput = len(aid_list) if force_method != 2 and (nInput < 256 or (force_method == 1)): rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list) # This one is slow because aid2 is the second part of the index rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1, rowids2))) # NOQA else: # This is much much faster than the other methods for large queries import vtool as vt all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids()) aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids)) aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids)) unique_aid1, groupxs1 = vt.group_indices(aids1) unique_aid2, groupxs2 = vt.group_indices(aids2) rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1) rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2) rowids1_ = [_.tolist() for _ in rowids1_] rowids2_ = [_.tolist() for _ in rowids2_] maping1 = dict(zip(unique_aid1, rowids1_)) maping2 = dict(zip(unique_aid2, rowids2_)) mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2)) annotmatch_rowid_list = ut.dict_take(mapping, aid_list) if False: # VERY SLOW colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID, ) # FIXME: col_rowid is not correct params_iter = list(zip(aid_list, aid_list)) where_colnames = [ _autogen_annotmatch_funcs.ANNOT_ROWID1, _autogen_annotmatch_funcs.ANNOT_ROWID2 ] with ut.Timer('one'): annotmatch_rowid_list1 = ibs.db.get_where3( # NOQA ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, where_colnames, logicop='OR', eager=eager, nInput=nInput, unpack_scalars=False) # Ensure funciton output is consistent annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list)) return annotmatch_rowid_list
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m wbia.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) # infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered( query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. # map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) # joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred # evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction(query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [ joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx) ] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array( [g.sum() for g in vt.apply_grouping(new_vals, groupxs)]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column( sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict( zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) logger.info(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list( zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) # probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) # joint.marginalize(irrelevant_vars) # joint.normalize() # new_rows = joint._row_labels() # new_vals = joint.values.ravel() # map_vals = new_rows[new_vals.argmax()] # map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints # marginalized_joints = {} # for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def get_automatch_candidates(cm_list, ranks_lt=5, directed=True, name_scoring=False, ibs=None, filter_reviewed=False, filter_duplicate_namepair_matches=False): """ THIS IS PROBABLY ONE OF THE ONLY THINGS IN THIS FILE THAT SHOULD NOT BE DEPRICATED Returns a list of matches that should be inspected This function is more lightweight than orgres or allres. Used in inspect_gui and interact_qres2 Args: qaid2_qres (dict): mapping from query annotaiton id to query result object ranks_lt (int): put all ranks less than this number into the graph directed (bool): Returns: tuple: candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) CommandLine: python -m ibeis.expt.results_organizer --test-get_automatch_candidates:2 python -m ibeis.expt.results_organizer --test-get_automatch_candidates:0 Example0: >>> # ENABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qreq_ = ibeis.main_helpers.testdata_qreq_() >>> cm_list = ibs.query_chips(qreq_=qreq_, return_cm=True) >>> ranks_lt = 5 >>> directed = True >>> name_scoring = False >>> candidate_matches = get_automatch_candidates(cm_list, ranks_lt, directed, ibs=ibs) >>> print(candidate_matches) Example1: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:5] >>> daid_list = ibs.get_valid_aids()[0:20] >>> cm_list = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 5 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example3: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:1] >>> daid_list = ibs.get_valid_aids()[10:100] >>> qaid2_cm = ibs.query_chips(qaid_list, daid_list, return_cm=True) >>> ranks_lt = 1 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... cm_list, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) Example4: >>> # UNSTABLE_DOCTEST >>> from ibeis.expt.results_organizer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> qaid_list = ibs.get_valid_aids()[0:10] >>> daid_list = ibs.get_valid_aids()[0:10] >>> qres_list = ibs.query_chips(qaid_list, daid_list) >>> ranks_lt = 3 >>> directed = False >>> name_scoring = False >>> filter_reviewed = False >>> filter_duplicate_namepair_matches = True >>> candidate_matches = get_automatch_candidates( ... qaid2_cm, ranks_lt, directed, name_scoring=name_scoring, ... filter_reviewed=filter_reviewed, ... filter_duplicate_namepair_matches=filter_duplicate_namepair_matches, ... ibs=ibs) >>> print(candidate_matches) """ import vtool as vt from ibeis.model.hots import chip_match print(('[resorg] get_automatch_candidates(' 'filter_reviewed={filter_reviewed},' 'filter_duplicate_namepair_matches={filter_duplicate_namepair_matches},' 'directed={directed},' 'ranks_lt={ranks_lt},' ).format(**locals())) print('[resorg] len(cm_list) = %d' % (len(cm_list))) qaids_stack = [] daids_stack = [] ranks_stack = [] scores_stack = [] # For each QueryResult, Extract inspectable candidate matches if isinstance(cm_list, dict): cm_list = list(cm_list.values()) for cm in cm_list: if isinstance(cm, chip_match.ChipMatch2): daids = cm.get_top_aids(ntop=ranks_lt) scores = cm.get_top_scores(ntop=ranks_lt) ranks = np.arange(len(daids)) qaids = np.full(daids.shape, cm.qaid, dtype=daids.dtype) else: (qaids, daids, scores, ranks) = cm.get_match_tbldata( ranks_lt=ranks_lt, name_scoring=name_scoring, ibs=ibs) qaids_stack.append(qaids) daids_stack.append(daids) scores_stack.append(scores) ranks_stack.append(ranks) # Stack them into a giant array # utool.embed() qaid_arr = np.hstack(qaids_stack) daid_arr = np.hstack(daids_stack) score_arr = np.hstack(scores_stack) rank_arr = np.hstack(ranks_stack) # Sort by scores sortx = score_arr.argsort()[::-1] qaid_arr = qaid_arr[sortx] daid_arr = daid_arr[sortx] score_arr = score_arr[sortx] rank_arr = rank_arr[sortx] if filter_reviewed: _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist()) is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool) qaid_arr = qaid_arr.compress(is_unreviewed) daid_arr = daid_arr.compress(is_unreviewed) score_arr = score_arr.compress(is_unreviewed) rank_arr = rank_arr.compress(is_unreviewed) # Remove directed edges if not directed: #nodes = np.unique(directed_edges.flatten()) directed_edges = np.vstack((qaid_arr, daid_arr)).T #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1]) unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr) qaid_arr = qaid_arr.take(unique_rowx) daid_arr = daid_arr.take(unique_rowx) score_arr = score_arr.take(unique_rowx) rank_arr = rank_arr.take(unique_rowx) # Filter Double Name Matches if filter_duplicate_namepair_matches: qnid_arr = ibs.get_annot_nids(qaid_arr) dnid_arr = ibs.get_annot_nids(daid_arr) if not directed: directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T unique_rowx2 = vt.find_best_undirected_edge_indexes(directed_name_edges, score_arr) else: namepair_id_list = np.array(vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr)))) unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list) score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs) unique_rowx2 = np.array(sorted([ groupx[score_group.argmax()] for groupx, score_group in zip(namepair_groupxs, score_namepair_groups) ]), dtype=np.int32) qaid_arr = qaid_arr.take(unique_rowx2) daid_arr = daid_arr.take(unique_rowx2) score_arr = score_arr.take(unique_rowx2) rank_arr = rank_arr.take(unique_rowx2) candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr) return candidate_matches