def residual_worker(argtup): wx_list, word_list, fxs_list, maws_list, fx_to_vecs, int_rvec = argtup if int_rvec: agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.int8) else: agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.float) agg_flags = np.empty((len(wx_list), 1), dtype=np.bool) #for idx, wx in enumerate(wx_list): for idx in range(len(wx_list)): # wx = wx_list[idx] word = word_list[idx] fxs = fxs_list[idx] maws = maws_list[idx] vecs = fx_to_vecs.take(fxs, axis=0) _rvecs, _flags = smk_funcs.compute_rvec(vecs, word) # rvecs = _rvecs # NOQA # error_flags = _flags # NOQA _agg_rvec, _agg_flag = smk_funcs.aggregate_rvecs(_rvecs, maws, _flags) # Cast to integers for storage if int_rvec: _agg_rvec = smk_funcs.cast_residual_integer(_agg_rvec) agg_rvecs[idx] = _agg_rvec agg_flags[idx] = _agg_flag tup = (wx_list, fxs_list, maws_list, agg_rvecs, agg_flags) return tup
def make_agg_vecs(X, words, fx_to_vecs): word_list = ut.take(words, X.wx_list) dtype = np.int8 if X.int_rvec else np.float32 dim = fx_to_vecs.shape[1] X.agg_rvecs = np.empty((len(X.wx_list), dim), dtype=dtype) X.agg_flags = np.empty((len(X.wx_list), 1), dtype=np.bool) for idx in range(len(X.wx_list)): word = word_list[idx] fxs = X.fxs_list[idx] maws = X.maws_list[idx] vecs = fx_to_vecs.take(fxs, axis=0) _rvecs, _flags = smk_funcs.compute_rvec(vecs, word) _agg_rvec, _agg_flag = smk_funcs.aggregate_rvecs(_rvecs, maws, _flags) if X.int_rvec: _agg_rvec = smk_funcs.cast_residual_integer(_agg_rvec) X.agg_rvecs[idx] = _agg_rvec X.agg_flags[idx] = _agg_flag return X
def make_temporary_annot(aid, vocab, wx_to_weight, ibs, config): nAssign = config.get('nAssign', 1) alpha = config.get('smk_alpha', 3.0) thresh = config.get('smk_thresh', 3.0) # Compute assignments fx_to_vecs = ibs.get_annot_vecs(aid, config2_=config) fx_to_wxs, fx_to_maws = smk_funcs.assign_to_words(vocab, fx_to_vecs, nAssign) wx_to_fxs, wx_to_maws = smk_funcs.invert_assigns(fx_to_wxs, fx_to_maws) # Build Aggregate Residual Vectors wx_list = sorted(wx_to_fxs.keys()) word_list = ut.take(vocab.wx_to_word, wx_list) fxs_list = ut.take(wx_to_fxs, wx_list) maws_list = ut.take(wx_to_maws, wx_list) agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.float) agg_flags = np.empty((len(wx_list), 1), dtype=np.bool) for idx in range(len(wx_list)): word = word_list[idx] fxs = fxs_list[idx] maws = maws_list[idx] vecs = fx_to_vecs.take(fxs, axis=0) _rvecs, _flags = smk_funcs.compute_rvec(vecs, word) _agg_rvec, _agg_flag = smk_funcs.aggregate_rvecs(_rvecs, maws, _flags) agg_rvecs[idx] = _agg_rvec agg_flags[idx] = _agg_flag X = inverted_index.SingleAnnot() X.aid = aid X.wx_list = wx_list X.fxs_list = fxs_list X.maws_list = maws_list X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags X.wx_to_idx = ut.make_index_lookup(X.wx_list) X.int_rvec = False X.wx_set = set(X.wx_list) weight_list = np.array(ut.take(wx_to_weight, wx_list)) X.gamma = smk_funcs.gamma_agg(X.agg_rvecs, X.agg_flags, weight_list, alpha, thresh) return X