Exemple #1
0
def assign_to_words_(wordflann, words, idx2_vec, nAssign, massign_alpha,
                     massign_sigma, massign_equal_weights):
    """
    Assigns descriptor-vectors to nearest word.

    Args:
        wordflann (FLANN): nearest neighbor index over words
        words (ndarray): vocabulary words
        idx2_vec (ndarray): descriptors to assign
        nAssign (int): number of words to assign each descriptor to
        massign_alpha (float): multiple-assignment ratio threshold
        massign_sigma (float): multiple-assignment gaussian variance
        massign_equal_weights (bool): assign equal weight to all multiassigned words

    Returns:
        tuple: inverted index, multi-assigned weights, and forward index
        formated as::

            * wx2_idxs - word index   -> vector indexes
            * wx2_maws - word index   -> multi-assignment weights
            * idf2_wxs - vector index -> assigned word indexes

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0()
        >>> words  = invindex.words
        >>> wordflann = invindex.wordflann
        >>> idx2_vec  = invindex.idx2_dvec
        >>> nAssign = qreq_.qparams.nAssign
        >>> massign_alpha = qreq_.qparams.massign_alpha
        >>> massign_sigma = qreq_.qparams.massign_sigma
        >>> massign_equal_weights = qreq_.qparams.massign_equal_weights
        >>> _dbargs = (wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights)
        >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs)
    """
    if ut.VERBOSE:
        print('[smk_index.assign] +--- Start Assign vecs to words.')
        print('[smk_index.assign] * nAssign=%r' % nAssign)
    if not ut.QUIET:
        print('[smk_index.assign] assign_to_words_. len(idx2_vec) = %r' % len(idx2_vec))
    # Assign each vector to the nearest visual words
    assert nAssign > 0, 'cannot assign to 0 neighbors'
    try:
        _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec, nAssign)
    except pyflann.FLANNException as ex:
        ut.printex(ex, 'probably misread the cached flann_fpath=%r' % (wordflann.flann_fpath,))
        raise
    _idx2_wx.shape    = (idx2_vec.shape[0], nAssign)
    _idx2_wdist.shape = (idx2_vec.shape[0], nAssign)
    if nAssign > 1:
        idx2_wxs, idx2_maws = compute_multiassign_weights_(
            _idx2_wx, _idx2_wdist, massign_alpha, massign_sigma, massign_equal_weights)
    else:
        idx2_wxs = _idx2_wx.tolist()
        idx2_maws = [[1.0]] * len(idx2_wxs)

    # Invert mapping -- Group by word indexes
    jagged_idxs = ([idx] * len(wxs)for idx, wxs in enumerate(idx2_wxs))
    wx_keys, groupxs = clustertool.jagged_group(idx2_wxs)
    idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs)
    maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs)
    wx2_idxs = dict(zip(wx_keys, idxs_list))
    wx2_maws = dict(zip(wx_keys, maws_list))
    if ut.VERBOSE:
        print('[smk_index.assign] L___ End Assign vecs to words.')

    return wx2_idxs, wx2_maws, idx2_wxs
Exemple #2
0
def assign_to_words_(wordflann, words, idx2_vec, idx_name='idx', dense=True,
                     nAssign=1, massign_alpha=1.2, massign_sigma=80):
    """
    Assigns descriptor-vectors to nearest word.
    Returns inverted index, multi-assigned weights, and forward index

    wx2_idxs - word index   -> vector indexes
    wx2_maws - word index   -> multi-assignment weights
    idf2_wxs - vector index -> assigned word indexes

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, invindex = smk_debug.testdata_raw_internals0()
    >>> words  = invindex.words
    >>> wordflann = invindex.wordflann
    >>> idx2_vec  = invindex.idx2_dvec
    >>> dense = True
    >>> nAssign = ibs.cfg.query_cfg.smk_cfg.nAssign
    >>> _dbargs = (wordflann, words, idx2_vec, idx_name, dense, nAssign)
    >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs)
    """
    idx2_vec_values = pdh.ensure_values(idx2_vec)
    # Assign each vector to the nearest visual words
    _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec_values, nAssign)
    if nAssign > 1:
        # MultiAssignment Filtering from Improving Bag of Features
        # http://lear.inrialpes.fr/pubs/2010/JDS10a/jegou_improvingbof_preprint.pdf
        thresh  = np.multiply(massign_alpha, _idx2_wdist.T[0:1].T)
        invalid = np.greater_equal(_idx2_wdist, thresh)
        # Weighting as in Lost in Quantization
        gauss_numer = -_idx2_wdist.astype(np.float64)
        gauss_denom = 2 * (massign_sigma ** 2)
        gauss_exp   = np.divide(gauss_numer, gauss_denom)
        unnorm_maw = np.exp(gauss_exp)
        # Mask invalid multiassignment weights
        masked_unorm_maw = np.ma.masked_array(unnorm_maw, mask=invalid)
        # Normalize multiassignment weights from 0 to 1
        masked_norm = masked_unorm_maw.sum(axis=1)[:, np.newaxis]
        masked_maw = np.divide(masked_unorm_maw, masked_norm)
        masked_wxs = np.ma.masked_array(_idx2_wx, mask=invalid)
        # Remove masked weights and word indexes
        idx2_wxs  = list(map(utool.filter_Nones, masked_wxs.tolist()))
        idx2_maws = list(map(utool.filter_Nones, masked_maw.tolist()))
    else:
        idx2_wxs = _idx2_wx.tolist()
        idx2_maws = [1.0] * len(idx2_wxs)

    # Invert mapping -- Group by word indexes
    jagged_idxs = ([idx] * len(wxs) for idx, wxs in enumerate(idx2_wxs))
    wx_keys, groupxs = clustertool.jagged_group(idx2_wxs)
    idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs)
    maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs)
    wx2_idxs = dict(zip(wx_keys, idxs_list))
    wx2_maws = dict(zip(wx_keys, maws_list))

    if WITH_PANDAS:
        idx_series = pdh.ensure_index(idx2_vec)
        wx_series  = pdh.ensure_index(words)
        wx2_idxs = pdh.pandasify_dict1d(
            wx2_idxs, wx_series, idx_name, ('wx2_' + idx_name + 's'), dense=dense)
        idx2_wxs = pdh.IntSeries(idx2_wxs, index=idx_series, name='wx')

    return wx2_idxs, wx2_maws, idx2_wxs
Exemple #3
0
def assign_to_words_(wordflann, words, idx2_vec, nAssign, massign_alpha,
                     massign_sigma, massign_equal_weights):
    """
    Assigns descriptor-vectors to nearest word.

    Args:
        wordflann (FLANN): nearest neighbor index over words
        words (ndarray): vocabulary words
        idx2_vec (ndarray): descriptors to assign
        nAssign (int): number of words to assign each descriptor to
        massign_alpha (float): multiple-assignment ratio threshold
        massign_sigma (float): multiple-assignment gaussian variance
        massign_equal_weights (bool): assign equal weight to all multiassigned words

    Returns:
        tuple: inverted index, multi-assigned weights, and forward index
        formated as::

            * wx2_idxs - word index   -> vector indexes
            * wx2_maws - word index   -> multi-assignment weights
            * idf2_wxs - vector index -> assigned word indexes

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0()
        >>> words  = invindex.words
        >>> wordflann = invindex.wordflann
        >>> idx2_vec  = invindex.idx2_dvec
        >>> nAssign = qreq_.qparams.nAssign
        >>> massign_alpha = qreq_.qparams.massign_alpha
        >>> massign_sigma = qreq_.qparams.massign_sigma
        >>> massign_equal_weights = qreq_.qparams.massign_equal_weights
        >>> _dbargs = (wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights)
        >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs)
    """
    if ut.VERBOSE:
        print('[smk_index.assign] +--- Start Assign vecs to words.')
        print('[smk_index.assign] * nAssign=%r' % nAssign)
    if not ut.QUIET:
        print('[smk_index.assign] assign_to_words_. len(idx2_vec) = %r' % len(idx2_vec))
    # Assign each vector to the nearest visual words
    assert nAssign > 0, 'cannot assign to 0 neighbors'
    try:
        _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec, nAssign)
    except pyflann.FLANNException as ex:
        ut.printex(ex, 'probably misread the cached flann_fpath=%r' % (wordflann.flann_fpath,))
        raise
    _idx2_wx.shape    = (idx2_vec.shape[0], nAssign)
    _idx2_wdist.shape = (idx2_vec.shape[0], nAssign)
    if nAssign > 1:
        idx2_wxs, idx2_maws = compute_multiassign_weights_(
            _idx2_wx, _idx2_wdist, massign_alpha, massign_sigma, massign_equal_weights)
    else:
        idx2_wxs = _idx2_wx.tolist()
        idx2_maws = [[1.0]] * len(idx2_wxs)

    # Invert mapping -- Group by word indexes
    jagged_idxs = ([idx] * len(wxs)for idx, wxs in enumerate(idx2_wxs))
    wx_keys, groupxs = clustertool.jagged_group(idx2_wxs)
    idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs)
    maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs)
    wx2_idxs = dict(zip(wx_keys, idxs_list))
    wx2_maws = dict(zip(wx_keys, maws_list))
    if ut.VERBOSE:
        print('[smk_index.assign] L___ End Assign vecs to words.')

    return wx2_idxs, wx2_maws, idx2_wxs
Exemple #4
0
def assign_to_words_(wordflann,
                     words,
                     idx2_vec,
                     idx_name='idx',
                     dense=True,
                     nAssign=1,
                     massign_alpha=1.2,
                     massign_sigma=80):
    """
    Assigns descriptor-vectors to nearest word.
    Returns inverted index, multi-assigned weights, and forward index

    wx2_idxs - word index   -> vector indexes
    wx2_maws - word index   -> multi-assignment weights
    idf2_wxs - vector index -> assigned word indexes

    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, daids, qaids, invindex = smk_debug.testdata_raw_internals0()
    >>> words  = invindex.words
    >>> wordflann = invindex.wordflann
    >>> idx2_vec  = invindex.idx2_dvec
    >>> dense = True
    >>> nAssign = ibs.cfg.query_cfg.smk_cfg.nAssign
    >>> _dbargs = (wordflann, words, idx2_vec, idx_name, dense, nAssign)
    >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs)
    """
    idx2_vec_values = pdh.ensure_values(idx2_vec)
    # Assign each vector to the nearest visual words
    _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec_values, nAssign)
    if nAssign > 1:
        # MultiAssignment Filtering from Improving Bag of Features
        # http://lear.inrialpes.fr/pubs/2010/JDS10a/jegou_improvingbof_preprint.pdf
        thresh = np.multiply(massign_alpha, _idx2_wdist.T[0:1].T)
        invalid = np.greater_equal(_idx2_wdist, thresh)
        # Weighting as in Lost in Quantization
        gauss_numer = -_idx2_wdist.astype(np.float64)
        gauss_denom = 2 * (massign_sigma**2)
        gauss_exp = np.divide(gauss_numer, gauss_denom)
        unnorm_maw = np.exp(gauss_exp)
        # Mask invalid multiassignment weights
        masked_unorm_maw = np.ma.masked_array(unnorm_maw, mask=invalid)
        # Normalize multiassignment weights from 0 to 1
        masked_norm = masked_unorm_maw.sum(axis=1)[:, np.newaxis]
        masked_maw = np.divide(masked_unorm_maw, masked_norm)
        masked_wxs = np.ma.masked_array(_idx2_wx, mask=invalid)
        # Remove masked weights and word indexes
        idx2_wxs = list(map(utool.filter_Nones, masked_wxs.tolist()))
        idx2_maws = list(map(utool.filter_Nones, masked_maw.tolist()))
    else:
        idx2_wxs = _idx2_wx.tolist()
        idx2_maws = [1.0] * len(idx2_wxs)

    # Invert mapping -- Group by word indexes
    jagged_idxs = ([idx] * len(wxs) for idx, wxs in enumerate(idx2_wxs))
    wx_keys, groupxs = clustertool.jagged_group(idx2_wxs)
    idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs)
    maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs)
    wx2_idxs = dict(zip(wx_keys, idxs_list))
    wx2_maws = dict(zip(wx_keys, maws_list))

    if WITH_PANDAS:
        idx_series = pdh.ensure_index(idx2_vec)
        wx_series = pdh.ensure_index(words)
        wx2_idxs = pdh.pandasify_dict1d(wx2_idxs,
                                        wx_series,
                                        idx_name, ('wx2_' + idx_name + 's'),
                                        dense=dense)
        idx2_wxs = pdh.IntSeries(idx2_wxs, index=idx_series, name='wx')

    return wx2_idxs, wx2_maws, idx2_wxs