def get_training_desc_dist(cm, qreq_, fsv_col_lbls=[], namemode=True, top_percent=None, data_annots=None, query_annots=None, num=None): r""" computes custom distances on prematched descriptors SeeAlso: python -m ibeis --tf learn_featscore_normalizer --show --disttype=ratio python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db PZ_Master1 --save pzmaster_normdist.png python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db PZ_MTEST --save pzmtest_normdist.png python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db GZ_ALL python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -a timectrl -t default:K=1 --db PZ_MTEST python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -a timectrl -t default:K=1 --db PZ_Master1 python -m ibeis --tf compare_featscores --show --disttype=L2_sift,normdist -a timectrl -t default:K=1 --db GZ_ALL CommandLine: python -m ibeis.algo.hots.scorenorm --exec-get_training_desc_dist python -m ibeis.algo.hots.scorenorm --exec-get_training_desc_dist:1 Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.scorenorm import * # NOQA >>> import ibeis >>> cm, qreq_ = ibeis.testdata_cm(defaultdb='PZ_MTEST') >>> fsv_col_lbls = ['ratio', 'lnbnn', 'L2_sift'] >>> namemode = False >>> (tp_fsv, tn_fsv) = get_training_desc_dist(cm, qreq_, fsv_col_lbls, >>> namemode=namemode) >>> result = ut.repr2((tp_fsv.T, tn_fsv.T), nl=1) >>> print(result) Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.scorenorm import * # NOQA >>> import ibeis >>> cm, qreq_ = ibeis.testdata_cm(defaultdb='PZ_MTEST') >>> fsv_col_lbls = cm.fsv_col_lbls >>> num = None >>> namemode = False >>> top_percent = None >>> data_annots = None >>> (tp_fsv1, tn_fsv1) = get_training_fsv(cm, namemode=namemode, >>> top_percent=top_percent) >>> (tp_fsv, tn_fsv) = get_training_desc_dist(cm, qreq_, fsv_col_lbls, >>> namemode=namemode, >>> top_percent=top_percent) >>> vt.asserteq(tp_fsv1, tp_fsv) >>> vt.asserteq(tn_fsv1, tn_fsv) """ if namemode: tp_idxs, tn_idxs = get_topname_training_idxs(cm, num=num) else: tp_idxs, tn_idxs = get_topannot_training_idxs(cm, num=num) if top_percent is not None: cm_orig = cm cm_orig.assert_self(qreq_, verbose=False) # Keep only the top scoring half of the feature matches tophalf_indicies = [ ut.take_percentile(fs.argsort()[::-1], top_percent) for fs in cm.get_fsv_prod_list() ] cm = cm_orig.take_feature_matches(tophalf_indicies, keepscores=True) assert np.all(cm_orig.daid_list.take(tp_idxs) == cm.daid_list.take(tp_idxs)) assert np.all(cm_orig.daid_list.take(tn_idxs) == cm.daid_list.take(tn_idxs)) cm.assert_self(qreq_, verbose=False) ibs = qreq_.ibs query_config2_ = qreq_.extern_query_config2 data_config2_ = qreq_.extern_data_config2 special_xs, dist_xs = vt.index_partition(fsv_col_lbls, ['fg', 'ratio', 'lnbnn', 'normdist']) dist_lbls = ut.take(fsv_col_lbls, dist_xs) special_lbls = ut.take(fsv_col_lbls, special_xs) qaid = cm.qaid # cm.assert_self(qreq_=qreq_) fsv_list = [] for idxs in [tp_idxs, tn_idxs]: daid_list = cm.daid_list.take(idxs) # Matching indices in query / databas images qfxs_list = ut.take(cm.qfxs_list, idxs) dfxs_list = ut.take(cm.dfxs_list, idxs) need_norm = len(ut.setintersect_ordered(['ratio', 'lnbnn', 'normdist'], special_lbls)) > 0 #need_norm |= 'parzen' in special_lbls #need_norm |= 'norm_parzen' in special_lbls need_dists = len(dist_xs) > 0 if need_dists or need_norm: qaid_list = [qaid] * len(qfxs_list) qvecs_flat_m = np.vstack(ibs.get_annot_vecs_subset(qaid_list, qfxs_list, config2_=query_config2_)) dvecs_flat_m = np.vstack(ibs.get_annot_vecs_subset(daid_list, dfxs_list, config2_=data_config2_)) if need_norm: assert any(x is not None for x in cm.filtnorm_aids), 'no normalizer known' naids_list = ut.take(cm.naids_list, idxs) nfxs_list = ut.take(cm.nfxs_list, idxs) nvecs_flat = ibs.lookup_annot_vecs_subset(naids_list, nfxs_list, config2_=data_config2_, annots=data_annots) #import utool #with utool.embed_on_exception_context: #nvecs_flat_m = np.vstack(ut.compress(nvecs_flat, nvecs_flat)) _nvecs_flat_m = ut.compress(nvecs_flat, nvecs_flat) nvecs_flat_m = vt.safe_vstack(_nvecs_flat_m, qvecs_flat_m.shape, qvecs_flat_m.dtype) vdist = vt.L2_sift(qvecs_flat_m, dvecs_flat_m) ndist = vt.L2_sift(qvecs_flat_m, nvecs_flat_m) #assert np.all(vdist <= ndist) #import utool #utool.embed() #vdist = vt.L2_sift_sqrd(qvecs_flat_m, dvecs_flat_m) #ndist = vt.L2_sift_sqrd(qvecs_flat_m, nvecs_flat_m) #vdist = vt.L2_root_sift(qvecs_flat_m, dvecs_flat_m) #ndist = vt.L2_root_sift(qvecs_flat_m, nvecs_flat_m) #x = cm.fsv_list[0][0:5].T[0] #y = (ndist - vdist)[0:5] if len(special_xs) > 0: special_dist_list = [] # assert special_lbls[0] == 'fg' if 'fg' in special_lbls: # hack for fgweights (could get them directly from fsv) qfgweights_flat_m = np.hstack(ibs.get_annot_fgweights_subset([qaid] * len(qfxs_list), qfxs_list, config2_=query_config2_)) dfgweights_flat_m = np.hstack(ibs.get_annot_fgweights_subset(daid_list, dfxs_list, config2_=data_config2_)) fgweights = np.sqrt(qfgweights_flat_m * dfgweights_flat_m) special_dist_list.append(fgweights) if 'ratio' in special_lbls: # Integrating ratio test ratio_dist = (vdist / ndist) special_dist_list.append(ratio_dist) if 'lnbnn' in special_lbls: lnbnn_dist = ndist - vdist special_dist_list.append(lnbnn_dist) #if 'parzen' in special_lbls: # parzen = vt.gauss_parzen_est(vdist, sigma=.38) # special_dist_list.append(parzen) #if 'norm_parzen' in special_lbls: # parzen = vt.gauss_parzen_est(ndist, sigma=.38) # special_dist_list.append(parzen) if 'normdist' in special_lbls: special_dist_list.append(ndist) special_dists = np.vstack(special_dist_list).T else: special_dists = np.empty((0, 0)) if len(dist_xs) > 0: # Get descriptors # Compute descriptor distnaces _dists = vt.compute_distances(qvecs_flat_m, dvecs_flat_m, dist_lbls) dists = np.vstack(_dists.values()).T else: dists = np.empty((0, 0)) fsv = vt.rebuild_partition(special_dists.T, dists.T, special_xs, dist_xs) fsv = np.array(fsv).T fsv_list.append(fsv) tp_fsv, tn_fsv = fsv_list return tp_fsv, tn_fsv
def knn(indexer, qfx2_vec, K): r""" Returns the indices and squared distance to the nearest K neighbors. The distance is noramlized between zero and one using VEC_PSEUDO_MAX_DISTANCE = (np.sqrt(2) * VEC_PSEUDO_MAX) Args: qfx2_vec : (N x D) an array of N, D-dimensional query vectors K: number of approximate nearest neighbors to find Returns: tuple of (qfx2_idx, qfx2_dist) ndarray : qfx2_idx[n][k] (N x K) is the index of the kth approximate nearest data vector w.r.t qfx2_vec[n] ndarray : qfx2_dist[n][k] (N x K) is the distance to the kth approximate nearest data vector w.r.t. qfx2_vec[n] distance is normalized squared euclidean distance. CommandLine: python -m wbia --tf NeighborIndex.knn:0 --debug2 python -m wbia --tf NeighborIndex.knn:1 Example: >>> # FIXME failing-test (22-Jul-2020) This test is failing and it's not clear how to fix it >>> # xdoctest: +SKIP >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> indexer, qreq_, ibs = testdata_nnindexer() >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> K = 2 >>> indexer.debug_nnindexer() >>> assert vt.check_sift_validity(qfx2_vec), 'bad SIFT properties' >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print('qfx2_vec.dtype = %r' % (qfx2_vec.dtype,)) >>> print('indexer.max_distance_sqrd = %r' % (indexer.max_distance_sqrd,)) >>> assert np.all(qfx2_dist < 1.0), ( >>> 'distance should be less than 1. got %r' % (qfx2_dist,)) >>> # Ensure distance calculations are correct >>> qfx2_dvec = indexer.idx2_vec[qfx2_idx.T] >>> targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2 >>> rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T >>> assert np.all(qfx2_dist * indexer.max_distance_sqrd == rawdist), ( >>> 'inconsistant distance calculations') >>> assert np.allclose(targetdist, qfx2_dist), ( >>> 'inconsistant distance calculations') Example2: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> indexer, qreq_, ibs = testdata_nnindexer() >>> qfx2_vec = np.empty((0, 128), dtype=indexer.get_dtype()) >>> K = 2 >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print(result) (0, 2) (0, 2) """ if K == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif K > indexer.num_indexed: # If we want more points than there are in the database # FLANN will raise an exception. This corner case # will hopefully only be hit if using the multi-indexer # so try this workaround which should seemlessly integrate # when the multi-indexer stacks the subindxer results. # There is a very strong possibility that this will cause errors # If this corner case is used in non-multi-indexer code K = indexer.num_indexed (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif len(qfx2_vec) == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(0, K) else: try: # perform nearest neighbors (qfx2_idx, qfx2_raw_dist) = indexer.flann.nn_index(qfx2_vec, K, checks=indexer.checks, cores=indexer.cores) # TODO: catch case where K < dbsize except pyflann.FLANNException as ex: ut.printex( ex, 'probably misread the cached flann_fpath=%r' % (indexer.flann_fpath, ), ) # ut.embed() # Uncomment and use if the flan index needs to be deleted # ibs = ut.search_stack_for_localvar('ibs') # cachedir = ibs.get_flann_cachedir() # flann_fpath = indexer.get_fpath(cachedir) raise # Ensure that distance returned are between 0 and 1 if indexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_raw_dist, indexer.max_distance_sqrd) else: qfx2_dist = qfx2_raw_dist if ut.DEBUG2: # Ensure distance calculations are correct qfx2_dvec = indexer.idx2_vec[qfx2_idx.T] targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T**2 rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T assert np.all(qfx2_raw_dist == rawdist), 'inconsistant distance calculations' assert np.allclose( targetdist, qfx2_dist), 'inconsistant distance calculations' return (qfx2_idx, qfx2_dist)