def compute_nonagg_residuals_pandas(words, wx_sublist, wx2_idxs, idx2_vec): """ VERY SLOW. DEBUG USE ONLY Ignore: words = words.values wxlist = [wx] ### index test %timeit words[wx:wx + 1] # 0.334 us %timeit words[wx, np.newaxis] # 1.05 us %timeit words[np.newaxis, wx] # 1.05 us %timeit words.take(wxlist, axis=0) # 1.6 us ### pandas test %timeit words.values[wx:wx + 1] # 7.6 us %timeit words[wx:wx + 1].values # 84.9 us """ #with utool.Timer('compute_nonagg_residuals_pandas'): #mark, end_ = utool.log_progress('compute residual: ', len(wx_sublist), flushfreq=500, writefreq=50) num = wx_sublist.size rvecs_arr = np.empty(num, dtype=np.ndarray) # Compute Residuals for count, wx in enumerate(wx_sublist): #mark(count) idxs = wx2_idxs[wx].values vecs = idx2_vec.take(idxs).values word = words.values[wx:wx + 1] rvecs_n = smk_core.get_norm_rvecs(vecs, word) rvecs_arr[count] = rvecs_n return rvecs_arr
def compute_nonagg_rvec_listcomp(words, wx_sublist, idxs_list, idx2_vec): """ PREFERED METHOD - 110ms Example: >>> from ibeis.model.hots.smk import smk_debug >>> words, wx_sublist, aids_list, idxs_list, idx2_vec, maws_list = smk_debug.testdata_nonagg_rvec() Timeit: %timeit words_list = [words[np.newaxis, wx] for wx in wx_sublist] # 5 ms %timeit words_list = [words[wx:wx + 1] for wx in wx_sublist] # 1.6 ms """ #with utool.Timer('compute_nonagg_rvec_listcomp'): #vecs_list = [idx2_vec[idxs] for idxs in idxs_list] # 23 ms words_list = [words[wx:wx + 1] for wx in wx_sublist] # 1 ms vecs_list = [idx2_vec.take(idxs, axis=0) for idxs in idxs_list] # 5.3 ms rvecs_list = [smk_core.get_norm_rvecs(vecs, word) for vecs, word in zip(vecs_list, words_list)] # 103 ms # 90% return rvecs_list
def compute_nonagg_residuals_forloop(words, wx_sublist, idxs_list, idx2_vec): """ OK, but slower than listcomp method - 140ms Timeit: idxs = idxs.astype(np.int32) %timeit idx2_vec.take(idxs, axis=0) # 1.27 %timeit idx2_vec.take(idxs.astype(np.int32), axis=0) # 1.94 %timeit idx2_vec[idxs] # 7.8 """ #with utool.Timer('compute_nonagg_residuals_forloop'): num = wx_sublist.size rvecs_list = np.empty(num, dtype=np.ndarray) for count, wx in enumerate(wx_sublist): idxs = idxs_list[count] vecs = idx2_vec[idxs] word = words[wx:wx + 1] rvecs_n = smk_core.get_norm_rvecs(vecs, word) rvecs_list[count] = rvecs_n return rvecs_list
def compute_nonagg_rvec_listcomp(words, wx_sublist, idxs_list, idx2_vec): """ PREFERED METHOD - 110ms Example: >>> from ibeis.model.hots.smk import smk_debug >>> words, wx_sublist, aids_list, idxs_list, idx2_vec, maws_list = smk_debug.testdata_nonagg_rvec() Timeit: %timeit words_list = [words[np.newaxis, wx] for wx in wx_sublist] # 5 ms %timeit words_list = [words[wx:wx + 1] for wx in wx_sublist] # 1.6 ms """ #with utool.Timer('compute_nonagg_rvec_listcomp'): #vecs_list = [idx2_vec[idxs] for idxs in idxs_list] # 23 ms words_list = [words[wx:wx + 1] for wx in wx_sublist] # 1 ms vecs_list = [idx2_vec.take(idxs, axis=0) for idxs in idxs_list] # 5.3 ms rvecs_list = [ smk_core.get_norm_rvecs(vecs, word) for vecs, word in zip(vecs_list, words_list) ] # 103 ms # 90% return rvecs_list
def compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate, with_pandas=WITH_PANDAS): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Output: wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word: * list of aggvecs For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids = compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate) """ words_values = pdh.ensure_values(words) idx2_aid_values = pdh.ensure_values(idx2_aid) idx2_vec_values = pdh.ensure_values(idx2_vec) idx2_fx_values = pdh.ensure_values(idx2_fx) wx_sublist = pdh.ensure_index(wx2_idxs) # Build lists w.r.t. words idxs_list = pdh.ensure_values_subset(wx2_idxs, wx_sublist) aids_list = [idx2_aid_values.take(idxs) for idxs in idxs_list] #wx2_idxs_values = pdh.ensure_values_subset(wx2_idxs, wx_sublist) #idxs_list = [pdh.ensure_values(idxsdf).astype(INDEX_TYPE) for idxsdf in wx2_idxs_values] # 13 ms if utool.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec_values.shape[0] == idx2_fx_values.shape[0] assert idx2_vec_values.shape[0] == idx2_aid_values.shape[0] # Prealloc output if utool.VERBOSE: print('[smk_index] Residual Vectors for %d words. aggregate=%r' % (len(wx2_idxs), aggregate,)) # Nonaggregated residuals #_args1 = (words_values, wx_sublist, idxs_list, idx2_vec_values) #rvecs_list = smk_speed.compute_nonagg_rvec_listcomp(*_args1) # 125 ms 11% words_list = [words_values[wx:wx + 1] for wx in wx_sublist] # 1 ms vecs_list = [idx2_vec_values.take(idxs, axis=0) for idxs in idxs_list] # 5.3 ms rvecs_list = [smk_core.get_norm_rvecs(vecs, word) for vecs, word in zip(vecs_list, words_list)] # 103 ms # 90% if aggregate: # Aggregate over words of the same aid tup = smk_speed.compute_agg_rvecs(rvecs_list, idxs_list, aids_list) # 38% (aggvecs_list, aggaids_list, aggidxs_list) = tup aggfxs_list = [[idx2_fx_values.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] if with_pandas: _args2 = (wx_sublist, aggvecs_list, aggaids_list, aggfxs_list) # Make aggregate dataframes wx2_aggvecs, wx2_aggaids, wx2_aggfxs = pdh.pandasify_agg_list(*_args2) # 617 ms 47% else: wx2_aggvecs = {wx: aggvecs for wx, aggvecs in zip(wx_sublist, aggvecs_list)} wx2_aggaids = {wx: aggaids for wx, aggaids in zip(wx_sublist, aggaids_list)} wx2_aggfxs = {wx: aggfxs for wx, aggfxs in zip(wx_sublist, aggfxs_list)} if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_aggvecs, wx2_aggaids, wx2_aggfxs) return wx2_aggvecs, wx2_aggaids, wx2_aggfxs else: # Make residuals dataframes # compatibility hack fxs_list = [[idx2_fx_values[idx:idx + 1] for idx in idxs] for idxs in idxs_list] if with_pandas: _args3 = (wx_sublist, idxs_list, rvecs_list, aids_list, fxs_list) wx2_rvecs, wx2_aids, wx2_fxs = pdh.pandasify_rvecs_list(*_args3) # 405 ms else: wx2_rvecs = {wx: rvecs for wx, rvecs in zip(wx_sublist, rvecs_list)} wx2_aids = {wx: aids for wx, aids in zip(wx_sublist, aids_list)} wx2_fxs = {wx: fxs for wx, fxs in zip(wx_sublist, fxs_list)} if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) return wx2_rvecs, wx2_aids, wx2_fxs
def compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate, with_pandas=WITH_PANDAS): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Output: wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word: * list of aggvecs For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids = compute_residuals_(words, wx2_idxs, idx2_vec, idx2_aid, idx2_fx, aggregate) """ words_values = pdh.ensure_values(words) idx2_aid_values = pdh.ensure_values(idx2_aid) idx2_vec_values = pdh.ensure_values(idx2_vec) idx2_fx_values = pdh.ensure_values(idx2_fx) wx_sublist = pdh.ensure_index(wx2_idxs) # Build lists w.r.t. words idxs_list = pdh.ensure_values_subset(wx2_idxs, wx_sublist) aids_list = [idx2_aid_values.take(idxs) for idxs in idxs_list] #wx2_idxs_values = pdh.ensure_values_subset(wx2_idxs, wx_sublist) #idxs_list = [pdh.ensure_values(idxsdf).astype(INDEX_TYPE) for idxsdf in wx2_idxs_values] # 13 ms if utool.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec_values.shape[0] == idx2_fx_values.shape[0] assert idx2_vec_values.shape[0] == idx2_aid_values.shape[0] # Prealloc output if utool.VERBOSE: print('[smk_index] Residual Vectors for %d words. aggregate=%r' % ( len(wx2_idxs), aggregate, )) # Nonaggregated residuals #_args1 = (words_values, wx_sublist, idxs_list, idx2_vec_values) #rvecs_list = smk_speed.compute_nonagg_rvec_listcomp(*_args1) # 125 ms 11% words_list = [words_values[wx:wx + 1] for wx in wx_sublist] # 1 ms vecs_list = [idx2_vec_values.take(idxs, axis=0) for idxs in idxs_list] # 5.3 ms rvecs_list = [ smk_core.get_norm_rvecs(vecs, word) for vecs, word in zip(vecs_list, words_list) ] # 103 ms # 90% if aggregate: # Aggregate over words of the same aid tup = smk_speed.compute_agg_rvecs(rvecs_list, idxs_list, aids_list) # 38% (aggvecs_list, aggaids_list, aggidxs_list) = tup aggfxs_list = [[idx2_fx_values.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] if with_pandas: _args2 = (wx_sublist, aggvecs_list, aggaids_list, aggfxs_list) # Make aggregate dataframes wx2_aggvecs, wx2_aggaids, wx2_aggfxs = pdh.pandasify_agg_list( *_args2) # 617 ms 47% else: wx2_aggvecs = { wx: aggvecs for wx, aggvecs in zip(wx_sublist, aggvecs_list) } wx2_aggaids = { wx: aggaids for wx, aggaids in zip(wx_sublist, aggaids_list) } wx2_aggfxs = { wx: aggfxs for wx, aggfxs in zip(wx_sublist, aggfxs_list) } if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_aggvecs, wx2_aggaids, wx2_aggfxs) return wx2_aggvecs, wx2_aggaids, wx2_aggfxs else: # Make residuals dataframes # compatibility hack fxs_list = [[idx2_fx_values[idx:idx + 1] for idx in idxs] for idxs in idxs_list] if with_pandas: _args3 = (wx_sublist, idxs_list, rvecs_list, aids_list, fxs_list) wx2_rvecs, wx2_aids, wx2_fxs = pdh.pandasify_rvecs_list( *_args3) # 405 ms else: wx2_rvecs = { wx: rvecs for wx, rvecs in zip(wx_sublist, rvecs_list) } wx2_aids = {wx: aids for wx, aids in zip(wx_sublist, aids_list)} wx2_fxs = {wx: fxs for wx, fxs in zip(wx_sublist, fxs_list)} if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) return wx2_rvecs, wx2_aids, wx2_fxs