def __init__(nn_index, hs, cx_list): import algos cx2_desc = hs.feats.cx2_desc # Make unique id for indexed descriptors feat_uid = hs.prefs.feat_cfg.get_uid() sample_uid = helpers.hashstr_arr(cx_list, "dcxs") uid = "_" + sample_uid + feat_uid # Number of features per sample chip sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)] # Inverted index from indexed descriptor to chipx and featx _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)] _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)] ax2_cx = np.array(list(chain.from_iterable(_ax2_cx))) ax2_fx = np.array(list(chain.from_iterable(_ax2_fx))) # Aggregate indexed descriptors into continuous structure ax2_desc = np.vstack([cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0]) # Build/Load the flann index flann_params = {"algorithm": "kdtree", "trees": 4} precomp_kwargs = { "cache_dir": hs.dirs.cache_dir, "uid": uid, "flann_params": flann_params, "force_recompute": hs.args.nocache_flann, } flann = algos.precompute_flann(ax2_desc, **precomp_kwargs) # ---- # Agg Data nn_index.ax2_cx = ax2_cx nn_index.ax2_fx = ax2_fx nn_index.ax2_data = ax2_desc nn_index.flann = flann
def precompute_flann(data, cache_dir=None, uid='', flann_params=None, force_recompute=False): ''' Tries to load a cached flann index before doing anything''' print('[algos] precompute_flann(%r): ' % uid) cache_dir = '.' if cache_dir is None else cache_dir # Generate a unique filename for data and flann parameters fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]') data_uid = helpers.hashstr_arr(data, 'dID') # flann is dependent on the data flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann' # Append any user labels flann_fname = 'flann_index_' + uid + flann_suffix flann_fpath = os.path.normpath(join(cache_dir, flann_fname)) # Load the index if it exists flann = pyflann.FLANN() load_success = False if helpers.checkpath(flann_fpath) and not force_recompute: try: #print('[algos] precompute_flann(): #trying to load: %r ' % flann_fname) flann.load_index(flann_fpath, data) print('[algos]...flann cache hit') load_success = True except Exception as ex: print('[algos] precompute_flann(): ...cannot load index') print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex,)) if not load_success: # Rebuild the index otherwise with helpers.Timer(msg='compute FLANN', newline=False): flann.build_index(data, **flann_params) print('[algos] precompute_flann(): save_index(%r)' % flann_fname) flann.save_index(flann_fpath) return flann
def get_uid_list(qdat, *args, **kwargs): uid_list = qdat.cfg.get_uid_list(*args, **kwargs) if not "noDCXS" in args: # In case you don't search the entire dataset dcxs_uid = helpers.hashstr_arr(qdat._dcxs, "dcxs") uid_list += ["_", dcxs_uid] return uid_list
def __init__(nn_index, hs, cx_list): import algos cx2_desc = hs.feats.cx2_desc # Make unique id for indexed descriptors feat_uid = hs.prefs.feat_cfg.get_uid() sample_uid = helpers.hashstr_arr(cx_list, 'dcxs') uid = '_' + sample_uid + feat_uid # Number of features per sample chip sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)] # Inverted index from indexed descriptor to chipx and featx _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)] _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)] ax2_cx = np.array(list(chain.from_iterable(_ax2_cx))) ax2_fx = np.array(list(chain.from_iterable(_ax2_fx))) # Aggregate indexed descriptors into continuous structure ax2_desc = np.vstack( [cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0]) # Build/Load the flann index flann_params = {'algorithm': 'kdtree', 'trees': 4} precomp_kwargs = { 'cache_dir': hs.dirs.cache_dir, 'uid': uid, 'flann_params': flann_params, 'force_recompute': hs.args.nocache_flann } flann = algos.precompute_flann(ax2_desc, **precomp_kwargs) #---- # Agg Data nn_index.ax2_cx = ax2_cx nn_index.ax2_fx = ax2_fx nn_index.ax2_data = ax2_desc nn_index.flann = flann
def get_uid_list(qdat, *args, **kwargs): uid_list = qdat.cfg.get_uid_list(*args, **kwargs) if not 'noDCXS' in args: # In case you don't search the entire dataset dcxs_uid = helpers.hashstr_arr(qdat._dcxs, 'dcxs') uid_list += ['_', dcxs_uid] return uid_list
def get_cache_uid(hs, cx_list=None, lbl='cxs'): query_cfg = hs.prefs.query_cfg # Build query big cache uid hs_uid = 'HSDB(%s)' % hs.get_db_name() uid_list = [hs_uid] + query_cfg.get_uid_list() if cx_list is not None: cxs_uid = helpers.hashstr_arr(cx_list, 'cxs') uid_list.append('_' + cxs_uid) cache_uid = ''.join(uid_list) return cache_uid
def _load_features_bigcache(hs, cx_list): # args for smart load/save feat_cfg = hs.prefs.feat_cfg feat_uid = feat_cfg.get_uid() cache_dir = hs.dirs.cache_dir sample_uid = helpers.hashstr_arr(cx_list, 'cids') bigcache_uid = '_'.join((feat_uid, sample_uid)) ext = '.npy' loaded = bigcache_feat_load(cache_dir, bigcache_uid, ext) if loaded is not None: # Cache Hit kpts_list, desc_list = loaded else: # Cache Miss kpts_list, desc_list = _load_features_individualy(hs, cx_list) # Cache all the features bigcache_feat_save(cache_dir, bigcache_uid, ext, kpts_list, desc_list) return kpts_list, desc_list
def ensure_nn_index(hs, qdat, dcxs): # NNIndexes depend on the data cxs AND feature / chip configs feat_uid = qdat.cfg._feat_cfg.get_uid() dcxs_uid = helpers.hashstr_arr(dcxs, 'dcxs') + feat_uid if not dcxs_uid in qdat._dcxs2_index: # Make sure the features are all computed first print('[mc3] qdat._data_index[dcxs_uid]... cache miss') print('[mc3] dcxs_ is not in qdat cache') print('[mc3] hashstr(dcxs_) = %r' % dcxs_uid) print('[mc3] REFRESHING FEATURES') hs.refresh_features(dcxs) # Compute the FLANN Index data_index = ds.NNIndex(hs, dcxs) qdat._dcxs2_index[dcxs_uid] = data_index else: print('[mc3] qdat._data_index[dcxs_uid]... cache hit') qdat._data_index = qdat._dcxs2_index[dcxs_uid]
def precompute_flann(data, cache_dir=None, uid='', flann_params=None, force_recompute=False): ''' Tries to load a cached flann index before doing anything''' print('[algos] precompute_flann(%r): ' % uid) cache_dir = '.' if cache_dir is None else cache_dir # Generate a unique filename for data and flann parameters fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]') data_uid = helpers.hashstr_arr(data, 'dID') # flann is dependent on the data flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann' # Append any user labels flann_fname = 'flann_index_' + uid + flann_suffix flann_fpath = os.path.normpath(join(cache_dir, flann_fname)) # Load the index if it exists flann = pyflann.FLANN() load_success = False if helpers.checkpath(flann_fpath) and not force_recompute: try: #print('[algos] precompute_flann(): #trying to load: %r ' % flann_fname) flann.load_index(flann_fpath, data) print('[algos]...flann cache hit') load_success = True except Exception as ex: print('[algos] precompute_flann(): ...cannot load index') print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex, )) if not load_success: # Rebuild the index otherwise with helpers.Timer(msg='compute FLANN', newline=False): flann.build_index(data, **flann_params) print('[algos] precompute_flann(): save_index(%r)' % flann_fname) flann.save_index(flann_fpath) return flann
def precompute_akmeans(data, num_clusters, max_iters=100, flann_params=None, cache_dir=None, force_recomp=False, same_data=True, uid=''): 'precompute aproximate kmeans' if flann_params is None: flann_params = {} print('[algos] pre_akmeans()') if same_data: data_uid = helpers.hashstr_arr(data, 'dID') uid += data_uid clusters_fname = 'akmeans_clusters' datax2cl_fname = 'akmeans_datax2cl' try: if not force_recomp: clusters = io.smart_load(cache_dir, clusters_fname, uid, '.npy', can_fail=False) datax2_clusterx = io.smart_load(cache_dir, datax2cl_fname, uid, '.npy', can_fail=False) else: raise Exception('forcing') # Hack to refine akmeans with a few more iterations if '--refine' in sys.argv or '--refine-exit' in sys.argv: max_iters_override = helpers.get_arg('--refine', type_=int) print('Overriding max_iters=%r' % max_iters_override) if not max_iters_override is None: max_iters = max_iters_override datax2_clusterx_old = datax2_clusterx print('[algos] refining:') print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy') print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy') (datax2_clusterx, clusters) = __akmeans_iterate(data, clusters, datax2_clusterx_old, max_iters, flann_params, 0, 10) io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy') io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy') if '--refine-exit' in sys.argv: print('exiting after refine') sys.exit(1) print('[algos] pre_akmeans(): ... loaded akmeans.') except Exception as ex: print('[algos] pre_akmeans(): ... could not load akmeans.') errstr = helpers.indent(repr(ex), '[algos] ') print('[algos] pre_akmeans(): ... caught ex:\n %s ' % errstr) print('[algos] pre_akmeans(): printing debug_smart_load') print('---- <DEBUG SMART LOAD>---') io.debug_smart_load(cache_dir, clusters_fname) io.debug_smart_load(cache_dir, datax2cl_fname) print('----</DEBUG SMART LOAD>---') #print('[algos] Press Ctrl+C to stop k-means early (and save)') #signal.signal(signal.SIGINT, force_quit_akmeans) # set ctrl+c behavior print('[algos] computing:') print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy') print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy') print('[algos] pre_akmeans(): calling akmeans') (datax2_clusterx, clusters) = akmeans(data, num_clusters, max_iters, flann_params) print('[algos] pre_akmeans(): finished running akmeans') io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy') io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy') #print('[algos] Removing Ctrl+C signal handler') #signal.signal(signal.SIGINT, signal.SIG_DFL) # reset ctrl+c behavior print('[algos] pre_akmeans(): return') return (datax2_clusterx, clusters)
def get_test_results(hs, qcx_list, qdat, cfgx=0, nCfg=1, force_load=False): dcxs = hs.get_indexed_sample() query_uid = qdat.get_uid() print('[harn] get_test_results(): %r' % query_uid) hs_uid = hs.get_db_name() qcxs_uid = helpers.hashstr_arr(qcx_list) test_uid = hs_uid + query_uid + qcxs_uid cache_dir = join(hs.dirs.cache_dir, 'experiment_harness_results') io_kwargs = dict(dpath=cache_dir, fname='test_results', uid=test_uid, ext='.cPkl') # High level caching qx2_bestranks = [] #nChips = hs.get_num_chip() #nNames = len(hs.tables.nx2_name) - 2 nQuery = len(qcx_list) #NMultiNames = nPrevQ = nQuery * cfgx qx2_reslist = [] if not hs.args.nocache_query and (not force_load): test_results = io.smart_load(**io_kwargs) if test_results is None: pass elif len(test_results) != 1: print('recaching test_results') elif not test_results is None: return test_results, [[{0: None}]] * nQuery for qx, qcx in enumerate(qcx_list): print(textwrap.dedent(''' [harn]---------------- [harn] TEST %d/%d [harn]----------------''' % (qx + nPrevQ + 1, nQuery * nCfg))) gt_cxs = hs.get_other_indexed_cxs(qcx) #title = 'q' + hs.cidstr(qcx) + ' - ' + notes #print('[harn] title=%r' % (title,)) #print('[harn] gt_' + hs.cidstr(gt_cxs)) res_list = mc3.execute_query_safe(hs, qdat, [qcx], dcxs) bestranks = [] algos = [] qx2_reslist += [res_list] assert len(res_list) == 1 for qcx2_res in res_list: assert len(qcx2_res) == 1 res = qcx2_res[qcx] algos += [res.title] gt_ranks = res.get_gt_ranks(gt_cxs) #print('[harn] cx_ranks(/%4r) = %r' % (nChips, gt_ranks)) #print('[harn] cx_ranks(/%4r) = %r' % (NMultiNames, gt_ranks)) #print('ns_ranks(/%4r) = %r' % (nNames, gt_ranks)) if len(gt_ranks) == 0: _bestrank = -1 else: _bestrank = min(gt_ranks) bestranks += [_bestrank] # record metadata qx2_bestranks += [bestranks] mat_vals = np.array(qx2_bestranks) test_results = (mat_vals,) # High level caching helpers.ensuredir(cache_dir) io.smart_save(test_results, **io_kwargs) return test_results, qx2_reslist
def precompute_akmeans(data, num_clusters, max_iters=100, flann_params=None, cache_dir=None, force_recomp=False, same_data=True, uid=''): 'precompute aproximate kmeans' if flann_params is None: flann_params = {} print('[algos] pre_akmeans()') if same_data: data_uid = helpers.hashstr_arr(data, 'dID') uid += data_uid clusters_fname = 'akmeans_clusters' datax2cl_fname = 'akmeans_datax2cl' try: if not force_recomp: clusters = io.smart_load(cache_dir, clusters_fname, uid, '.npy', can_fail=False) datax2_clusterx = io.smart_load(cache_dir, datax2cl_fname, uid, '.npy', can_fail=False) else: raise Exception('forcing') # Hack to refine akmeans with a few more iterations if '--refine' in sys.argv or '--refine-exit' in sys.argv: max_iters_override = helpers.get_arg('--refine', type_=int) print('Overriding max_iters=%r' % max_iters_override) if not max_iters_override is None: max_iters = max_iters_override datax2_clusterx_old = datax2_clusterx print('[algos] refining:') print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy') print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy') (datax2_clusterx, clusters) = __akmeans_iterate( data, clusters, datax2_clusterx_old, max_iters, flann_params, 0, 10) io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy') io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy') if '--refine-exit' in sys.argv: print('exiting after refine') sys.exit(1) print('[algos] pre_akmeans(): ... loaded akmeans.') except Exception as ex: print('[algos] pre_akmeans(): ... could not load akmeans.') errstr = helpers.indent(repr(ex), '[algos] ') print('[algos] pre_akmeans(): ... caught ex:\n %s ' % errstr) print('[algos] pre_akmeans(): printing debug_smart_load') print('---- <DEBUG SMART LOAD>---') io.debug_smart_load(cache_dir, clusters_fname) io.debug_smart_load(cache_dir, datax2cl_fname) print('----</DEBUG SMART LOAD>---') #print('[algos] Press Ctrl+C to stop k-means early (and save)') #signal.signal(signal.SIGINT, force_quit_akmeans) # set ctrl+c behavior print('[algos] computing:') print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy') print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy') print('[algos] pre_akmeans(): calling akmeans') (datax2_clusterx, clusters) = akmeans(data, num_clusters, max_iters, flann_params) print('[algos] pre_akmeans(): finished running akmeans') io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy') io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy') #print('[algos] Removing Ctrl+C signal handler') #signal.signal(signal.SIGINT, signal.SIG_DFL) # reset ctrl+c behavior print('[algos] pre_akmeans(): return') return (datax2_clusterx, clusters)