Esempio n. 1
0
def get_training_desc_dist(cm, qreq_, fsv_col_lbls=[], namemode=True,
                           top_percent=None, data_annots=None,
                           query_annots=None, num=None):
    r"""
    computes custom distances on prematched descriptors

    SeeAlso:
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=ratio

        python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db PZ_Master1 --save pzmaster_normdist.png
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db PZ_MTEST --save pzmtest_normdist.png
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db GZ_ALL

        python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -a timectrl -t default:K=1 --db PZ_MTEST
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -a timectrl -t default:K=1 --db PZ_Master1

        python -m ibeis --tf compare_featscores --show --disttype=L2_sift,normdist -a timectrl -t default:K=1 --db GZ_ALL

    CommandLine:
        python -m ibeis.algo.hots.scorenorm --exec-get_training_desc_dist
        python -m ibeis.algo.hots.scorenorm --exec-get_training_desc_dist:1

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> import ibeis
        >>> cm, qreq_ = ibeis.testdata_cm(defaultdb='PZ_MTEST')
        >>> fsv_col_lbls = ['ratio', 'lnbnn', 'L2_sift']
        >>> namemode = False
        >>> (tp_fsv, tn_fsv) = get_training_desc_dist(cm, qreq_, fsv_col_lbls,
        >>>                                           namemode=namemode)
        >>> result = ut.repr2((tp_fsv.T, tn_fsv.T), nl=1)
        >>> print(result)

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> import ibeis
        >>> cm, qreq_ = ibeis.testdata_cm(defaultdb='PZ_MTEST')
        >>> fsv_col_lbls = cm.fsv_col_lbls
        >>> num = None
        >>> namemode = False
        >>> top_percent = None
        >>> data_annots = None
        >>> (tp_fsv1, tn_fsv1) = get_training_fsv(cm, namemode=namemode,
        >>>                                       top_percent=top_percent)
        >>> (tp_fsv, tn_fsv) = get_training_desc_dist(cm, qreq_, fsv_col_lbls,
        >>>                                           namemode=namemode,
        >>>                                           top_percent=top_percent)
        >>> vt.asserteq(tp_fsv1, tp_fsv)
        >>> vt.asserteq(tn_fsv1, tn_fsv)
    """
    if namemode:
        tp_idxs, tn_idxs = get_topname_training_idxs(cm, num=num)
    else:
        tp_idxs, tn_idxs = get_topannot_training_idxs(cm, num=num)

    if top_percent is not None:
        cm_orig = cm
        cm_orig.assert_self(qreq_, verbose=False)

        # Keep only the top scoring half of the feature matches
        tophalf_indicies = [
            ut.take_percentile(fs.argsort()[::-1], top_percent)
            for fs in cm.get_fsv_prod_list()
        ]
        cm = cm_orig.take_feature_matches(tophalf_indicies, keepscores=True)

        assert np.all(cm_orig.daid_list.take(tp_idxs) == cm.daid_list.take(tp_idxs))
        assert np.all(cm_orig.daid_list.take(tn_idxs) == cm.daid_list.take(tn_idxs))

        cm.assert_self(qreq_, verbose=False)

    ibs = qreq_.ibs
    query_config2_ = qreq_.extern_query_config2
    data_config2_ = qreq_.extern_data_config2
    special_xs, dist_xs = vt.index_partition(fsv_col_lbls, ['fg', 'ratio', 'lnbnn', 'normdist'])
    dist_lbls = ut.take(fsv_col_lbls, dist_xs)
    special_lbls = ut.take(fsv_col_lbls, special_xs)

    qaid = cm.qaid
    # cm.assert_self(qreq_=qreq_)

    fsv_list = []
    for idxs in [tp_idxs, tn_idxs]:
        daid_list = cm.daid_list.take(idxs)

        # Matching indices in query / databas images
        qfxs_list = ut.take(cm.qfxs_list, idxs)
        dfxs_list = ut.take(cm.dfxs_list, idxs)

        need_norm = len(ut.setintersect_ordered(['ratio', 'lnbnn', 'normdist'], special_lbls)) > 0
        #need_norm |= 'parzen' in special_lbls
        #need_norm |= 'norm_parzen' in special_lbls
        need_dists = len(dist_xs) > 0

        if need_dists or need_norm:
            qaid_list = [qaid] * len(qfxs_list)
            qvecs_flat_m = np.vstack(ibs.get_annot_vecs_subset(qaid_list, qfxs_list, config2_=query_config2_))
            dvecs_flat_m = np.vstack(ibs.get_annot_vecs_subset(daid_list, dfxs_list, config2_=data_config2_))

        if need_norm:
            assert any(x is not None for x in  cm.filtnorm_aids), 'no normalizer known'
            naids_list = ut.take(cm.naids_list, idxs)
            nfxs_list  = ut.take(cm.nfxs_list, idxs)
            nvecs_flat = ibs.lookup_annot_vecs_subset(naids_list, nfxs_list, config2_=data_config2_,
                                                      annots=data_annots)
            #import utool
            #with utool.embed_on_exception_context:
            #nvecs_flat_m = np.vstack(ut.compress(nvecs_flat, nvecs_flat))
            _nvecs_flat_m = ut.compress(nvecs_flat, nvecs_flat)
            nvecs_flat_m = vt.safe_vstack(_nvecs_flat_m, qvecs_flat_m.shape, qvecs_flat_m.dtype)

            vdist = vt.L2_sift(qvecs_flat_m, dvecs_flat_m)
            ndist = vt.L2_sift(qvecs_flat_m, nvecs_flat_m)

            #assert np.all(vdist <= ndist)
            #import utool
            #utool.embed()

            #vdist = vt.L2_sift_sqrd(qvecs_flat_m, dvecs_flat_m)
            #ndist = vt.L2_sift_sqrd(qvecs_flat_m, nvecs_flat_m)

            #vdist = vt.L2_root_sift(qvecs_flat_m, dvecs_flat_m)
            #ndist = vt.L2_root_sift(qvecs_flat_m, nvecs_flat_m)

            #x = cm.fsv_list[0][0:5].T[0]
            #y = (ndist - vdist)[0:5]

        if len(special_xs) > 0:
            special_dist_list = []
            # assert special_lbls[0] == 'fg'
            if 'fg' in special_lbls:
                # hack for fgweights (could get them directly from fsv)
                qfgweights_flat_m = np.hstack(ibs.get_annot_fgweights_subset([qaid] * len(qfxs_list), qfxs_list, config2_=query_config2_))
                dfgweights_flat_m = np.hstack(ibs.get_annot_fgweights_subset(daid_list, dfxs_list, config2_=data_config2_))
                fgweights = np.sqrt(qfgweights_flat_m * dfgweights_flat_m)
                special_dist_list.append(fgweights)

            if 'ratio' in special_lbls:
                # Integrating ratio test
                ratio_dist = (vdist / ndist)
                special_dist_list.append(ratio_dist)

            if 'lnbnn' in special_lbls:
                lnbnn_dist = ndist - vdist
                special_dist_list.append(lnbnn_dist)

            #if 'parzen' in special_lbls:
            #    parzen = vt.gauss_parzen_est(vdist, sigma=.38)
            #    special_dist_list.append(parzen)

            #if 'norm_parzen' in special_lbls:
            #    parzen = vt.gauss_parzen_est(ndist, sigma=.38)
            #    special_dist_list.append(parzen)

            if 'normdist' in special_lbls:
                special_dist_list.append(ndist)

            special_dists = np.vstack(special_dist_list).T
        else:
            special_dists = np.empty((0, 0))

        if len(dist_xs) > 0:
            # Get descriptors
            # Compute descriptor distnaces
            _dists = vt.compute_distances(qvecs_flat_m, dvecs_flat_m, dist_lbls)
            dists = np.vstack(_dists.values()).T
        else:
            dists = np.empty((0, 0))

        fsv = vt.rebuild_partition(special_dists.T, dists.T,
                                      special_xs, dist_xs)
        fsv = np.array(fsv).T
        fsv_list.append(fsv)
    tp_fsv, tn_fsv = fsv_list
    return tp_fsv, tn_fsv
Esempio n. 2
0
    def knn(indexer, qfx2_vec, K):
        r"""
        Returns the indices and squared distance to the nearest K neighbors.
        The distance is noramlized between zero and one using
        VEC_PSEUDO_MAX_DISTANCE = (np.sqrt(2) * VEC_PSEUDO_MAX)

        Args:
            qfx2_vec : (N x D) an array of N, D-dimensional query vectors

            K: number of approximate nearest neighbors to find

        Returns: tuple of (qfx2_idx, qfx2_dist)
            ndarray : qfx2_idx[n][k] (N x K) is the index of the kth
                        approximate nearest data vector w.r.t qfx2_vec[n]

            ndarray : qfx2_dist[n][k] (N x K) is the distance to the kth
                        approximate nearest data vector w.r.t. qfx2_vec[n]
                        distance is normalized squared euclidean distance.

        CommandLine:
            python -m wbia --tf NeighborIndex.knn:0 --debug2
            python -m wbia --tf NeighborIndex.knn:1

        Example:
            >>> # FIXME failing-test (22-Jul-2020) This test is failing and it's not clear how to fix it
            >>> # xdoctest: +SKIP
            >>> from wbia.algo.hots.neighbor_index import *  # NOQA
            >>> indexer, qreq_, ibs = testdata_nnindexer()
            >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2())
            >>> K = 2
            >>> indexer.debug_nnindexer()
            >>> assert vt.check_sift_validity(qfx2_vec), 'bad SIFT properties'
            >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K)
            >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape)
            >>> print('qfx2_vec.dtype = %r' % (qfx2_vec.dtype,))
            >>> print('indexer.max_distance_sqrd = %r' % (indexer.max_distance_sqrd,))
            >>> assert np.all(qfx2_dist < 1.0), (
            >>>    'distance should be less than 1. got %r' % (qfx2_dist,))
            >>> # Ensure distance calculations are correct
            >>> qfx2_dvec = indexer.idx2_vec[qfx2_idx.T]
            >>> targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2
            >>> rawdist    = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T
            >>> assert np.all(qfx2_dist * indexer.max_distance_sqrd == rawdist), (
            >>>    'inconsistant distance calculations')
            >>> assert np.allclose(targetdist, qfx2_dist), (
            >>>    'inconsistant distance calculations')

        Example2:
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.hots.neighbor_index import *  # NOQA
            >>> indexer, qreq_, ibs = testdata_nnindexer()
            >>> qfx2_vec = np.empty((0, 128), dtype=indexer.get_dtype())
            >>> K = 2
            >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K)
            >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape)
            >>> print(result)
            (0, 2) (0, 2)
        """
        if K == 0:
            (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0)
        elif K > indexer.num_indexed:
            # If we want more points than there are in the database
            # FLANN will raise an exception. This corner case
            # will hopefully only be hit if using the multi-indexer
            # so try this workaround which should seemlessly integrate
            # when the multi-indexer stacks the subindxer results.
            # There is a very strong possibility that this will cause errors
            # If this corner case is used in non-multi-indexer code
            K = indexer.num_indexed
            (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0)
        elif len(qfx2_vec) == 0:
            (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(0, K)
        else:
            try:
                # perform nearest neighbors
                (qfx2_idx,
                 qfx2_raw_dist) = indexer.flann.nn_index(qfx2_vec,
                                                         K,
                                                         checks=indexer.checks,
                                                         cores=indexer.cores)
                # TODO: catch case where K < dbsize
            except pyflann.FLANNException as ex:
                ut.printex(
                    ex,
                    'probably misread the cached flann_fpath=%r' %
                    (indexer.flann_fpath, ),
                )
                # ut.embed()
                # Uncomment and use if the flan index needs to be deleted
                # ibs = ut.search_stack_for_localvar('ibs')
                # cachedir = ibs.get_flann_cachedir()
                # flann_fpath = indexer.get_fpath(cachedir)
                raise
            # Ensure that distance returned are between 0 and 1
            if indexer.max_distance_sqrd is not None:
                qfx2_dist = np.divide(qfx2_raw_dist, indexer.max_distance_sqrd)
            else:
                qfx2_dist = qfx2_raw_dist
            if ut.DEBUG2:
                # Ensure distance calculations are correct
                qfx2_dvec = indexer.idx2_vec[qfx2_idx.T]
                targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T**2
                rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T
                assert np.all(qfx2_raw_dist ==
                              rawdist), 'inconsistant distance calculations'
                assert np.allclose(
                    targetdist,
                    qfx2_dist), 'inconsistant distance calculations'
        return (qfx2_idx, qfx2_dist)