Beispiel #1
0
    def __init__(nn_index, hs, cx_list):
        import algos

        cx2_desc = hs.feats.cx2_desc
        # Make unique id for indexed descriptors
        feat_uid = hs.prefs.feat_cfg.get_uid()
        sample_uid = helpers.hashstr_arr(cx_list, "dcxs")
        uid = "_" + sample_uid + feat_uid
        # Number of features per sample chip
        sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)]
        # Inverted index from indexed descriptor to chipx and featx
        _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)]
        _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)]
        ax2_cx = np.array(list(chain.from_iterable(_ax2_cx)))
        ax2_fx = np.array(list(chain.from_iterable(_ax2_fx)))
        # Aggregate indexed descriptors into continuous structure
        ax2_desc = np.vstack([cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0])
        # Build/Load the flann index
        flann_params = {"algorithm": "kdtree", "trees": 4}
        precomp_kwargs = {
            "cache_dir": hs.dirs.cache_dir,
            "uid": uid,
            "flann_params": flann_params,
            "force_recompute": hs.args.nocache_flann,
        }
        flann = algos.precompute_flann(ax2_desc, **precomp_kwargs)
        # ----
        # Agg Data
        nn_index.ax2_cx = ax2_cx
        nn_index.ax2_fx = ax2_fx
        nn_index.ax2_data = ax2_desc
        nn_index.flann = flann
Beispiel #2
0
def precompute_flann(data, cache_dir=None, uid='', flann_params=None,
                     force_recompute=False):
    ''' Tries to load a cached flann index before doing anything'''
    print('[algos] precompute_flann(%r): ' % uid)
    cache_dir = '.' if cache_dir is None else cache_dir
    # Generate a unique filename for data and flann parameters
    fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]')
    data_uid = helpers.hashstr_arr(data, 'dID')  # flann is dependent on the data
    flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann'
    # Append any user labels
    flann_fname = 'flann_index_' + uid + flann_suffix
    flann_fpath = os.path.normpath(join(cache_dir, flann_fname))
    # Load the index if it exists
    flann = pyflann.FLANN()
    load_success = False
    if helpers.checkpath(flann_fpath) and not force_recompute:
        try:
            #print('[algos] precompute_flann():
                #trying to load: %r ' % flann_fname)
            flann.load_index(flann_fpath, data)
            print('[algos]...flann cache hit')
            load_success = True
        except Exception as ex:
            print('[algos] precompute_flann(): ...cannot load index')
            print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex,))
    if not load_success:
        # Rebuild the index otherwise
        with helpers.Timer(msg='compute FLANN', newline=False):
            flann.build_index(data, **flann_params)
        print('[algos] precompute_flann(): save_index(%r)' % flann_fname)
        flann.save_index(flann_fpath)
    return flann
Beispiel #3
0
 def get_uid_list(qdat, *args, **kwargs):
     uid_list = qdat.cfg.get_uid_list(*args, **kwargs)
     if not "noDCXS" in args:
         # In case you don't search the entire dataset
         dcxs_uid = helpers.hashstr_arr(qdat._dcxs, "dcxs")
         uid_list += ["_", dcxs_uid]
     return uid_list
Beispiel #4
0
 def __init__(nn_index, hs, cx_list):
     import algos
     cx2_desc = hs.feats.cx2_desc
     # Make unique id for indexed descriptors
     feat_uid = hs.prefs.feat_cfg.get_uid()
     sample_uid = helpers.hashstr_arr(cx_list, 'dcxs')
     uid = '_' + sample_uid + feat_uid
     # Number of features per sample chip
     sx2_nFeat = [len(cx2_desc[cx]) for cx in iter(cx_list)]
     # Inverted index from indexed descriptor to chipx and featx
     _ax2_cx = [[cx] * nFeat for (cx, nFeat) in izip(cx_list, sx2_nFeat)]
     _ax2_fx = [range(nFeat) for nFeat in iter(sx2_nFeat)]
     ax2_cx = np.array(list(chain.from_iterable(_ax2_cx)))
     ax2_fx = np.array(list(chain.from_iterable(_ax2_fx)))
     # Aggregate indexed descriptors into continuous structure
     ax2_desc = np.vstack(
         [cx2_desc[cx] for cx in cx_list if len(cx2_desc[cx]) > 0])
     # Build/Load the flann index
     flann_params = {'algorithm': 'kdtree', 'trees': 4}
     precomp_kwargs = {
         'cache_dir': hs.dirs.cache_dir,
         'uid': uid,
         'flann_params': flann_params,
         'force_recompute': hs.args.nocache_flann
     }
     flann = algos.precompute_flann(ax2_desc, **precomp_kwargs)
     #----
     # Agg Data
     nn_index.ax2_cx = ax2_cx
     nn_index.ax2_fx = ax2_fx
     nn_index.ax2_data = ax2_desc
     nn_index.flann = flann
Beispiel #5
0
 def get_uid_list(qdat, *args, **kwargs):
     uid_list = qdat.cfg.get_uid_list(*args, **kwargs)
     if not 'noDCXS' in args:
         # In case you don't search the entire dataset
         dcxs_uid = helpers.hashstr_arr(qdat._dcxs, 'dcxs')
         uid_list += ['_', dcxs_uid]
     return uid_list
Beispiel #6
0
 def get_cache_uid(hs, cx_list=None, lbl='cxs'):
     query_cfg = hs.prefs.query_cfg
     # Build query big cache uid
     hs_uid    = 'HSDB(%s)' % hs.get_db_name()
     uid_list = [hs_uid] + query_cfg.get_uid_list()
     if cx_list is not None:
         cxs_uid = helpers.hashstr_arr(cx_list, 'cxs')
         uid_list.append('_' + cxs_uid)
     cache_uid = ''.join(uid_list)
     return cache_uid
Beispiel #7
0
 def get_cache_uid(hs, cx_list=None, lbl='cxs'):
     query_cfg = hs.prefs.query_cfg
     # Build query big cache uid
     hs_uid = 'HSDB(%s)' % hs.get_db_name()
     uid_list = [hs_uid] + query_cfg.get_uid_list()
     if cx_list is not None:
         cxs_uid = helpers.hashstr_arr(cx_list, 'cxs')
         uid_list.append('_' + cxs_uid)
     cache_uid = ''.join(uid_list)
     return cache_uid
Beispiel #8
0
def _load_features_bigcache(hs, cx_list):
    # args for smart load/save
    feat_cfg = hs.prefs.feat_cfg
    feat_uid = feat_cfg.get_uid()
    cache_dir  = hs.dirs.cache_dir
    sample_uid = helpers.hashstr_arr(cx_list, 'cids')
    bigcache_uid = '_'.join((feat_uid, sample_uid))
    ext = '.npy'
    loaded = bigcache_feat_load(cache_dir, bigcache_uid, ext)
    if loaded is not None:  # Cache Hit
        kpts_list, desc_list = loaded
    else:  # Cache Miss
        kpts_list, desc_list = _load_features_individualy(hs, cx_list)
        # Cache all the features
        bigcache_feat_save(cache_dir, bigcache_uid, ext, kpts_list, desc_list)
    return kpts_list, desc_list
Beispiel #9
0
def ensure_nn_index(hs, qdat, dcxs):
    # NNIndexes depend on the data cxs AND feature / chip configs
    feat_uid = qdat.cfg._feat_cfg.get_uid()
    dcxs_uid = helpers.hashstr_arr(dcxs, 'dcxs') + feat_uid
    if not dcxs_uid in qdat._dcxs2_index:
        # Make sure the features are all computed first
        print('[mc3] qdat._data_index[dcxs_uid]... cache miss')
        print('[mc3] dcxs_ is not in qdat cache')
        print('[mc3] hashstr(dcxs_) = %r' % dcxs_uid)
        print('[mc3] REFRESHING FEATURES')
        hs.refresh_features(dcxs)
        # Compute the FLANN Index
        data_index = ds.NNIndex(hs, dcxs)
        qdat._dcxs2_index[dcxs_uid] = data_index
    else:
        print('[mc3] qdat._data_index[dcxs_uid]... cache hit')
    qdat._data_index = qdat._dcxs2_index[dcxs_uid]
Beispiel #10
0
def ensure_nn_index(hs, qdat, dcxs):
    # NNIndexes depend on the data cxs AND feature / chip configs
    feat_uid = qdat.cfg._feat_cfg.get_uid()
    dcxs_uid = helpers.hashstr_arr(dcxs, 'dcxs') + feat_uid
    if not dcxs_uid in qdat._dcxs2_index:
        # Make sure the features are all computed first
        print('[mc3] qdat._data_index[dcxs_uid]... cache miss')
        print('[mc3] dcxs_ is not in qdat cache')
        print('[mc3] hashstr(dcxs_) = %r' % dcxs_uid)
        print('[mc3] REFRESHING FEATURES')
        hs.refresh_features(dcxs)
        # Compute the FLANN Index
        data_index = ds.NNIndex(hs, dcxs)
        qdat._dcxs2_index[dcxs_uid] = data_index
    else:
        print('[mc3] qdat._data_index[dcxs_uid]... cache hit')
    qdat._data_index = qdat._dcxs2_index[dcxs_uid]
Beispiel #11
0
def precompute_flann(data,
                     cache_dir=None,
                     uid='',
                     flann_params=None,
                     force_recompute=False):
    ''' Tries to load a cached flann index before doing anything'''
    print('[algos] precompute_flann(%r): ' % uid)
    cache_dir = '.' if cache_dir is None else cache_dir
    # Generate a unique filename for data and flann parameters
    fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]')
    data_uid = helpers.hashstr_arr(data,
                                   'dID')  # flann is dependent on the data
    flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann'
    # Append any user labels
    flann_fname = 'flann_index_' + uid + flann_suffix
    flann_fpath = os.path.normpath(join(cache_dir, flann_fname))
    # Load the index if it exists
    flann = pyflann.FLANN()
    load_success = False
    if helpers.checkpath(flann_fpath) and not force_recompute:
        try:
            #print('[algos] precompute_flann():
            #trying to load: %r ' % flann_fname)
            flann.load_index(flann_fpath, data)
            print('[algos]...flann cache hit')
            load_success = True
        except Exception as ex:
            print('[algos] precompute_flann(): ...cannot load index')
            print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex, ))
    if not load_success:
        # Rebuild the index otherwise
        with helpers.Timer(msg='compute FLANN', newline=False):
            flann.build_index(data, **flann_params)
        print('[algos] precompute_flann(): save_index(%r)' % flann_fname)
        flann.save_index(flann_fpath)
    return flann
Beispiel #12
0
def precompute_akmeans(data,
                       num_clusters,
                       max_iters=100,
                       flann_params=None,
                       cache_dir=None,
                       force_recomp=False,
                       same_data=True,
                       uid=''):
    'precompute aproximate kmeans'
    if flann_params is None:
        flann_params = {}
    print('[algos] pre_akmeans()')
    if same_data:
        data_uid = helpers.hashstr_arr(data, 'dID')
        uid += data_uid
    clusters_fname = 'akmeans_clusters'
    datax2cl_fname = 'akmeans_datax2cl'
    try:
        if not force_recomp:
            clusters = io.smart_load(cache_dir,
                                     clusters_fname,
                                     uid,
                                     '.npy',
                                     can_fail=False)
            datax2_clusterx = io.smart_load(cache_dir,
                                            datax2cl_fname,
                                            uid,
                                            '.npy',
                                            can_fail=False)
        else:
            raise Exception('forcing')
        # Hack to refine akmeans with a few more iterations
        if '--refine' in sys.argv or '--refine-exit' in sys.argv:
            max_iters_override = helpers.get_arg('--refine', type_=int)
            print('Overriding max_iters=%r' % max_iters_override)
            if not max_iters_override is None:
                max_iters = max_iters_override
            datax2_clusterx_old = datax2_clusterx
            print('[algos] refining:')
            print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy')
            print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy')
            (datax2_clusterx,
             clusters) = __akmeans_iterate(data, clusters, datax2_clusterx_old,
                                           max_iters, flann_params, 0, 10)
            io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy')
            io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid,
                          '.npy')
            if '--refine-exit' in sys.argv:
                print('exiting after refine')
                sys.exit(1)
        print('[algos] pre_akmeans(): ... loaded akmeans.')
    except Exception as ex:
        print('[algos] pre_akmeans(): ... could not load akmeans.')
        errstr = helpers.indent(repr(ex), '[algos]    ')
        print('[algos] pre_akmeans(): ... caught ex:\n %s ' % errstr)
        print('[algos] pre_akmeans(): printing debug_smart_load')
        print('---- <DEBUG SMART LOAD>---')
        io.debug_smart_load(cache_dir, clusters_fname)
        io.debug_smart_load(cache_dir, datax2cl_fname)
        print('----</DEBUG SMART LOAD>---')
        #print('[algos] Press Ctrl+C to stop k-means early (and save)')
        #signal.signal(signal.SIGINT, force_quit_akmeans) # set ctrl+c behavior
        print('[algos] computing:')
        print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy')
        print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy')
        print('[algos] pre_akmeans(): calling akmeans')
        (datax2_clusterx, clusters) = akmeans(data, num_clusters, max_iters,
                                              flann_params)
        print('[algos] pre_akmeans(): finished running akmeans')
        io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy')
        io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy')
        #print('[algos] Removing Ctrl+C signal handler')
        #signal.signal(signal.SIGINT, signal.SIG_DFL) # reset ctrl+c behavior
    print('[algos] pre_akmeans(): return')
    return (datax2_clusterx, clusters)
def get_test_results(hs, qcx_list, qdat, cfgx=0, nCfg=1,
                     force_load=False):
    dcxs = hs.get_indexed_sample()
    query_uid = qdat.get_uid()
    print('[harn] get_test_results(): %r' % query_uid)
    hs_uid    = hs.get_db_name()
    qcxs_uid  = helpers.hashstr_arr(qcx_list)
    test_uid  = hs_uid + query_uid + qcxs_uid
    cache_dir = join(hs.dirs.cache_dir, 'experiment_harness_results')
    io_kwargs = dict(dpath=cache_dir, fname='test_results', uid=test_uid, ext='.cPkl')
    # High level caching
    qx2_bestranks = []
    #nChips = hs.get_num_chip()
    #nNames = len(hs.tables.nx2_name) - 2
    nQuery = len(qcx_list)
    #NMultiNames =
    nPrevQ = nQuery * cfgx
    qx2_reslist = []
    if not hs.args.nocache_query and (not force_load):
        test_results = io.smart_load(**io_kwargs)
        if test_results is None:
            pass
        elif len(test_results) != 1:
            print('recaching test_results')
        elif not test_results is None:
            return test_results, [[{0: None}]] * nQuery
    for qx, qcx in enumerate(qcx_list):
        print(textwrap.dedent('''
        [harn]----------------
        [harn] TEST %d/%d
        [harn]----------------''' % (qx + nPrevQ + 1, nQuery * nCfg)))
        gt_cxs = hs.get_other_indexed_cxs(qcx)
        #title = 'q' + hs.cidstr(qcx) + ' - ' + notes
        #print('[harn] title=%r' % (title,))
        #print('[harn] gt_' + hs.cidstr(gt_cxs))
        res_list = mc3.execute_query_safe(hs, qdat, [qcx], dcxs)
        bestranks = []
        algos = []
        qx2_reslist += [res_list]
        assert len(res_list) == 1
        for qcx2_res in res_list:
            assert len(qcx2_res) == 1
            res = qcx2_res[qcx]
            algos += [res.title]
            gt_ranks = res.get_gt_ranks(gt_cxs)
            #print('[harn] cx_ranks(/%4r) = %r' % (nChips, gt_ranks))
            #print('[harn] cx_ranks(/%4r) = %r' % (NMultiNames, gt_ranks))
            #print('ns_ranks(/%4r) = %r' % (nNames, gt_ranks))
            if len(gt_ranks) == 0:
                _bestrank = -1
            else:
                _bestrank = min(gt_ranks)
            bestranks += [_bestrank]
        # record metadata
        qx2_bestranks += [bestranks]
    mat_vals = np.array(qx2_bestranks)
    test_results = (mat_vals,)
    # High level caching
    helpers.ensuredir(cache_dir)
    io.smart_save(test_results, **io_kwargs)
    return test_results, qx2_reslist
Beispiel #14
0
def precompute_akmeans(data, num_clusters, max_iters=100,
                       flann_params=None,  cache_dir=None,
                       force_recomp=False, same_data=True, uid=''):
    'precompute aproximate kmeans'
    if flann_params is None:
        flann_params = {}
    print('[algos] pre_akmeans()')
    if same_data:
        data_uid = helpers.hashstr_arr(data, 'dID')
        uid += data_uid
    clusters_fname = 'akmeans_clusters'
    datax2cl_fname = 'akmeans_datax2cl'
    try:
        if not force_recomp:
            clusters = io.smart_load(cache_dir, clusters_fname, uid, '.npy',
                                     can_fail=False)
            datax2_clusterx = io.smart_load(cache_dir, datax2cl_fname, uid,
                                            '.npy', can_fail=False)
        else:
            raise Exception('forcing')
        # Hack to refine akmeans with a few more iterations
        if '--refine' in sys.argv or '--refine-exit' in sys.argv:
            max_iters_override = helpers.get_arg('--refine', type_=int)
            print('Overriding max_iters=%r' % max_iters_override)
            if not max_iters_override is None:
                max_iters = max_iters_override
            datax2_clusterx_old = datax2_clusterx
            print('[algos] refining:')
            print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy')
            print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy')
            (datax2_clusterx, clusters) = __akmeans_iterate(
                data, clusters, datax2_clusterx_old, max_iters, flann_params,
                0, 10)
            io.smart_save(clusters, cache_dir, clusters_fname, uid, '.npy')
            io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid,
                          '.npy')
            if '--refine-exit' in sys.argv:
                print('exiting after refine')
                sys.exit(1)
        print('[algos] pre_akmeans(): ... loaded akmeans.')
    except Exception as ex:
        print('[algos] pre_akmeans(): ... could not load akmeans.')
        errstr = helpers.indent(repr(ex), '[algos]    ')
        print('[algos] pre_akmeans(): ... caught ex:\n %s ' % errstr)
        print('[algos] pre_akmeans(): printing debug_smart_load')
        print('---- <DEBUG SMART LOAD>---')
        io.debug_smart_load(cache_dir, clusters_fname)
        io.debug_smart_load(cache_dir, datax2cl_fname)
        print('----</DEBUG SMART LOAD>---')
        #print('[algos] Press Ctrl+C to stop k-means early (and save)')
        #signal.signal(signal.SIGINT, force_quit_akmeans) # set ctrl+c behavior
        print('[algos] computing:')
        print('[algos] ' + '_'.join([clusters_fname, uid]) + '.npy')
        print('[algos] ' + '_'.join([datax2cl_fname, uid]) + '.npy')
        print('[algos] pre_akmeans(): calling akmeans')
        (datax2_clusterx, clusters) = akmeans(data, num_clusters, max_iters,
                                              flann_params)
        print('[algos] pre_akmeans(): finished running akmeans')
        io.smart_save(clusters,        cache_dir, clusters_fname, uid, '.npy')
        io.smart_save(datax2_clusterx, cache_dir, datax2cl_fname, uid, '.npy')
        #print('[algos] Removing Ctrl+C signal handler')
        #signal.signal(signal.SIGINT, signal.SIG_DFL) # reset ctrl+c behavior
    print('[algos] pre_akmeans(): return')
    return (datax2_clusterx, clusters)