def assign_to_words_(wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights): """ Assigns descriptor-vectors to nearest word. Args: wordflann (FLANN): nearest neighbor index over words words (ndarray): vocabulary words idx2_vec (ndarray): descriptors to assign nAssign (int): number of words to assign each descriptor to massign_alpha (float): multiple-assignment ratio threshold massign_sigma (float): multiple-assignment gaussian variance massign_equal_weights (bool): assign equal weight to all multiassigned words Returns: tuple: inverted index, multi-assigned weights, and forward index formated as:: * wx2_idxs - word index -> vector indexes * wx2_maws - word index -> multi-assignment weights * idf2_wxs - vector index -> assigned word indexes Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> words = invindex.words >>> wordflann = invindex.wordflann >>> idx2_vec = invindex.idx2_dvec >>> nAssign = qreq_.qparams.nAssign >>> massign_alpha = qreq_.qparams.massign_alpha >>> massign_sigma = qreq_.qparams.massign_sigma >>> massign_equal_weights = qreq_.qparams.massign_equal_weights >>> _dbargs = (wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs) """ if ut.VERBOSE: print('[smk_index.assign] +--- Start Assign vecs to words.') print('[smk_index.assign] * nAssign=%r' % nAssign) if not ut.QUIET: print('[smk_index.assign] assign_to_words_. len(idx2_vec) = %r' % len(idx2_vec)) # Assign each vector to the nearest visual words assert nAssign > 0, 'cannot assign to 0 neighbors' try: _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec, nAssign) except pyflann.FLANNException as ex: ut.printex(ex, 'probably misread the cached flann_fpath=%r' % (wordflann.flann_fpath,)) raise _idx2_wx.shape = (idx2_vec.shape[0], nAssign) _idx2_wdist.shape = (idx2_vec.shape[0], nAssign) if nAssign > 1: idx2_wxs, idx2_maws = compute_multiassign_weights_( _idx2_wx, _idx2_wdist, massign_alpha, massign_sigma, massign_equal_weights) else: idx2_wxs = _idx2_wx.tolist() idx2_maws = [[1.0]] * len(idx2_wxs) # Invert mapping -- Group by word indexes jagged_idxs = ([idx] * len(wxs)for idx, wxs in enumerate(idx2_wxs)) wx_keys, groupxs = clustertool.jagged_group(idx2_wxs) idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs) maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs) wx2_idxs = dict(zip(wx_keys, idxs_list)) wx2_maws = dict(zip(wx_keys, maws_list)) if ut.VERBOSE: print('[smk_index.assign] L___ End Assign vecs to words.') return wx2_idxs, wx2_maws, idx2_wxs
def assign_to_words_(wordflann, words, idx2_vec, idx_name='idx', dense=True, nAssign=1, massign_alpha=1.2, massign_sigma=80): """ Assigns descriptor-vectors to nearest word. Returns inverted index, multi-assigned weights, and forward index wx2_idxs - word index -> vector indexes wx2_maws - word index -> multi-assignment weights idf2_wxs - vector index -> assigned word indexes >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex = smk_debug.testdata_raw_internals0() >>> words = invindex.words >>> wordflann = invindex.wordflann >>> idx2_vec = invindex.idx2_dvec >>> dense = True >>> nAssign = ibs.cfg.query_cfg.smk_cfg.nAssign >>> _dbargs = (wordflann, words, idx2_vec, idx_name, dense, nAssign) >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs) """ idx2_vec_values = pdh.ensure_values(idx2_vec) # Assign each vector to the nearest visual words _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec_values, nAssign) if nAssign > 1: # MultiAssignment Filtering from Improving Bag of Features # http://lear.inrialpes.fr/pubs/2010/JDS10a/jegou_improvingbof_preprint.pdf thresh = np.multiply(massign_alpha, _idx2_wdist.T[0:1].T) invalid = np.greater_equal(_idx2_wdist, thresh) # Weighting as in Lost in Quantization gauss_numer = -_idx2_wdist.astype(np.float64) gauss_denom = 2 * (massign_sigma ** 2) gauss_exp = np.divide(gauss_numer, gauss_denom) unnorm_maw = np.exp(gauss_exp) # Mask invalid multiassignment weights masked_unorm_maw = np.ma.masked_array(unnorm_maw, mask=invalid) # Normalize multiassignment weights from 0 to 1 masked_norm = masked_unorm_maw.sum(axis=1)[:, np.newaxis] masked_maw = np.divide(masked_unorm_maw, masked_norm) masked_wxs = np.ma.masked_array(_idx2_wx, mask=invalid) # Remove masked weights and word indexes idx2_wxs = list(map(utool.filter_Nones, masked_wxs.tolist())) idx2_maws = list(map(utool.filter_Nones, masked_maw.tolist())) else: idx2_wxs = _idx2_wx.tolist() idx2_maws = [1.0] * len(idx2_wxs) # Invert mapping -- Group by word indexes jagged_idxs = ([idx] * len(wxs) for idx, wxs in enumerate(idx2_wxs)) wx_keys, groupxs = clustertool.jagged_group(idx2_wxs) idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs) maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs) wx2_idxs = dict(zip(wx_keys, idxs_list)) wx2_maws = dict(zip(wx_keys, maws_list)) if WITH_PANDAS: idx_series = pdh.ensure_index(idx2_vec) wx_series = pdh.ensure_index(words) wx2_idxs = pdh.pandasify_dict1d( wx2_idxs, wx_series, idx_name, ('wx2_' + idx_name + 's'), dense=dense) idx2_wxs = pdh.IntSeries(idx2_wxs, index=idx_series, name='wx') return wx2_idxs, wx2_maws, idx2_wxs
def assign_to_words_(wordflann, words, idx2_vec, idx_name='idx', dense=True, nAssign=1, massign_alpha=1.2, massign_sigma=80): """ Assigns descriptor-vectors to nearest word. Returns inverted index, multi-assigned weights, and forward index wx2_idxs - word index -> vector indexes wx2_maws - word index -> multi-assignment weights idf2_wxs - vector index -> assigned word indexes >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex = smk_debug.testdata_raw_internals0() >>> words = invindex.words >>> wordflann = invindex.wordflann >>> idx2_vec = invindex.idx2_dvec >>> dense = True >>> nAssign = ibs.cfg.query_cfg.smk_cfg.nAssign >>> _dbargs = (wordflann, words, idx2_vec, idx_name, dense, nAssign) >>> wx2_idxs, wx2_maws, idx2_wxs = assign_to_words_(*_dbargs) """ idx2_vec_values = pdh.ensure_values(idx2_vec) # Assign each vector to the nearest visual words _idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec_values, nAssign) if nAssign > 1: # MultiAssignment Filtering from Improving Bag of Features # http://lear.inrialpes.fr/pubs/2010/JDS10a/jegou_improvingbof_preprint.pdf thresh = np.multiply(massign_alpha, _idx2_wdist.T[0:1].T) invalid = np.greater_equal(_idx2_wdist, thresh) # Weighting as in Lost in Quantization gauss_numer = -_idx2_wdist.astype(np.float64) gauss_denom = 2 * (massign_sigma**2) gauss_exp = np.divide(gauss_numer, gauss_denom) unnorm_maw = np.exp(gauss_exp) # Mask invalid multiassignment weights masked_unorm_maw = np.ma.masked_array(unnorm_maw, mask=invalid) # Normalize multiassignment weights from 0 to 1 masked_norm = masked_unorm_maw.sum(axis=1)[:, np.newaxis] masked_maw = np.divide(masked_unorm_maw, masked_norm) masked_wxs = np.ma.masked_array(_idx2_wx, mask=invalid) # Remove masked weights and word indexes idx2_wxs = list(map(utool.filter_Nones, masked_wxs.tolist())) idx2_maws = list(map(utool.filter_Nones, masked_maw.tolist())) else: idx2_wxs = _idx2_wx.tolist() idx2_maws = [1.0] * len(idx2_wxs) # Invert mapping -- Group by word indexes jagged_idxs = ([idx] * len(wxs) for idx, wxs in enumerate(idx2_wxs)) wx_keys, groupxs = clustertool.jagged_group(idx2_wxs) idxs_list = clustertool.apply_jagged_grouping(jagged_idxs, groupxs) maws_list = clustertool.apply_jagged_grouping(idx2_maws, groupxs) wx2_idxs = dict(zip(wx_keys, idxs_list)) wx2_maws = dict(zip(wx_keys, maws_list)) if WITH_PANDAS: idx_series = pdh.ensure_index(idx2_vec) wx_series = pdh.ensure_index(words) wx2_idxs = pdh.pandasify_dict1d(wx2_idxs, wx_series, idx_name, ('wx2_' + idx_name + 's'), dense=dense) idx2_wxs = pdh.IntSeries(idx2_wxs, index=idx_series, name='wx') return wx2_idxs, wx2_maws, idx2_wxs