def execute_and_save(qreq_miss): # Iterate over vsone queries in chunks. total_chunks = ut.get_num_chunks(len(qreq_miss.qaids), qreq_miss.chunksize) qaid_chunk_iter = ut.ichunks(qreq_miss.qaids, qreq_miss.chunksize) _prog = ut.ProgPartial( length=total_chunks, freq=1, label='[mc5] query chunk: ', prog_hook=qreq_miss.prog_hook, bs=False, ) qaid_chunk_iter = iter(_prog(qaid_chunk_iter)) qaid_to_cm = {} for qaids in qaid_chunk_iter: sub_qreq = qreq_miss.shallowcopy(qaids=qaids) cm_batch = sub_qreq.execute_pipeline() assert len(cm_batch) == len(qaids), 'bad alignment' assert all([qaid == cm.qaid for qaid, cm in zip(qaids, cm_batch)]) # TODO: we already computed the fpaths # should be able to pass them in fpath_list = sub_qreq.get_chipmatch_fpaths(qaids) _prog = ut.ProgPartial( length=len(cm_batch), adjust=True, freq=1, label='saving chip matches', bs=True, ) for cm, fpath in _prog(zip(cm_batch, fpath_list)): cm.save_to_fpath(fpath, verbose=False) qaid_to_cm.update({cm.qaid: cm for cm in cm_batch}) return qaid_to_cm
def batch_knn(indexer, vecs, K, chunksize=4096, label='batch knn'): """ Works like `indexer.knn` but the input is split into batches and progress is reported to give an esimated time remaining. """ # Preallocate output idxs = np.empty((vecs.shape[0], K), dtype=np.int32) dists = np.empty((vecs.shape[0], K), dtype=np.float32) # Generate chunk slices num_chunks = ut.get_num_chunks(vecs.shape[0], chunksize) iter_ = ut.ichunk_slices(vecs.shape[0], chunksize) prog = ut.ProgIter(iter_, length=num_chunks, label=label) for sl_ in prog: idxs[sl_], dists[sl_] = indexer.knn(vecs[sl_], K=K) return idxs, dists
def compute_vocab(depc, fid_list, config): r""" Depcache method for computing a new visual vocab CommandLine: python -m wbia.core_annots --exec-compute_neighbor_index --show python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0 python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1 # FIXME make util_tests register python -m wbia.algo.smk.vocab_indexer compute_vocab:0 Ignore: >>> # Lev Oxford Debug Example >>> import wbia >>> ibs = wbia.opendb('Oxford') >>> depc = ibs.depc >>> table = depc['vocab'] >>> # Check what currently exists in vocab table >>> table.print_configs() >>> table.print_table() >>> table.print_internal_info() >>> # Grab aids used to compute vocab >>> from wbia.expt.experiment_helpers import get_annotcfg_list >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1] >>> qaids, daids = expanded_aids_list[0] >>> vocab_aids = daids >>> config = {'num_words': 64000} >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config) >>> print('exists = %r' % (exists,)) >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0] >>> print('vocab_rowid = %r' % (vocab_rowid,)) >>> vocab = table.get_row_data([vocab_rowid], 'words')[0] >>> print('vocab = %r' % (vocab,)) Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> # Test depcache access >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> input_tuple = [aid_list] >>> rowid_kw = {} >>> tablename = 'vocab' >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw) >>> vocab = depc.get(tablename, input_tuple, 'words')[0] >>> assert vocab.wordflann is not None >>> assert vocab.wordflann._FLANN__curindex_data is not None >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.vocab_indexer import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids('testdb1') >>> depc = ibs.depc_annot >>> fid_list = depc.get_rowids('feat', aid_list) >>> config = VocabConfig() >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs']) >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0] >>> self = vocab >>> ut.quit_if_noshow() >>> data = train_vecs >>> centroids = vocab.wx_to_word >>> import wbia.plottool as pt >>> vt.plot_centroids(data, centroids, num_pca_dims=2) >>> ut.show_if_requested() >>> #config = ibs.depc_annot['vocab'].configclass() """ logger.info('[IBEIS] COMPUTE_VOCAB:') vecs_list = depc.get_native('feat', fid_list, 'vecs') train_vecs = np.vstack(vecs_list).astype(np.float32) num_words = config['num_words'] logger.info( '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (num_words, len(fid_list), len(train_vecs))) if config['algorithm'] == 'kdtree': flann_params = vt.get_flann_params(random_seed=42) kwds = dict(max_iters=20, flann_params=flann_params) words = vt.akmeans(train_vecs, num_words, **kwds) elif config['algorithm'] == 'minibatch': logger.info('Using minibatch kmeans') import sklearn.cluster rng = np.random.RandomState(config['random_seed']) n_init = config['n_init'] with warnings.catch_warnings(): warnings.simplefilter('ignore') init_size = int(num_words * 4) batch_size = 1000 n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size) minibatch_params = dict( n_clusters=num_words, init='k-means++', init_size=init_size, n_init=n_init, max_iter=30000 // n_batches, batch_size=batch_size, tol=0.0, max_no_improvement=10, reassignment_ratio=0.01, ) logger.info('minibatch_params = %s' % (ut.repr4(minibatch_params), )) clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False, random_state=rng, verbose=2, **minibatch_params) try: clusterer.fit(train_vecs) except (Exception, KeyboardInterrupt) as ex: ut.printex(ex, tb=True) if ut.is_developer(): ut.embed() else: raise words = clusterer.cluster_centers_ logger.info('Finished clustering') # if False: # flann_params['checks'] = 64 # flann_params['trees'] = 4 # num_words = 128 # centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++') # words, hist = vt.akmeans_iterations( # train_vecs, centroids, max_iters=1000, monitor=True, # flann_params=flann_params) logger.info('Constructing vocab') vocab = VisualVocab(words) logger.info('Building vocab index') vocab.build() logger.info('Returning vocab') return (vocab, )
def execute_query2(qreq_, verbose, save_qcache, batch_size=None, use_supercache=False): """ Breaks up query request into several subrequests to process "more efficiently" and safer as well. """ if qreq_.prog_hook is not None: preload_hook, query_hook = qreq_.prog_hook.subdivide( spacing=[0, 0.15, 0.8]) preload_hook(0, lbl='preloading') qreq_.prog_hook = query_hook else: preload_hook = None # Load features / weights for all annotations qreq_.lazy_preload(prog_hook=preload_hook, verbose=verbose and ut.NOT_QUIET) all_qaids = qreq_.qaids logger.info('len(missed_qaids) = %r' % (len(all_qaids), )) qaid2_cm = {} # vsone must have a chunksize of 1 if batch_size is None: if HOTS_BATCH_SIZE is None: hots_batch_size = qreq_.ibs.cfg.other_cfg.hots_batch_size # hots_batch_size = 256 else: hots_batch_size = HOTS_BATCH_SIZE else: hots_batch_size = batch_size chunksize = 1 if qreq_.qparams.vsone else hots_batch_size # Iterate over vsone queries in chunks. n_total_chunks = ut.get_num_chunks(len(all_qaids), chunksize) qaid_chunk_iter = ut.ichunks(all_qaids, chunksize) _qreq_iter = (qreq_.shallowcopy(qaids=qaids) for qaids in qaid_chunk_iter) sub_qreq_iter = ut.ProgIter( _qreq_iter, length=n_total_chunks, freq=1, label='[mc4] query chunk: ', prog_hook=qreq_.prog_hook, ) for sub_qreq_ in sub_qreq_iter: if ut.VERBOSE: logger.info('Generating vsmany chunk') sub_cm_list = pipeline.request_wbia_query_L0(qreq_.ibs, sub_qreq_, verbose=verbose) assert len(sub_qreq_.qaids) == len(sub_cm_list), 'not aligned' assert all([ qaid == cm.qaid for qaid, cm in zip(sub_qreq_.qaids, sub_cm_list) ]), 'not corresonding' if save_qcache: fpath_list = list( qreq_.get_chipmatch_fpaths(sub_qreq_.qaids, super_qres_cache=use_supercache)) _iter = zip(sub_cm_list, fpath_list) _iter = ut.ProgIter( _iter, length=len(sub_cm_list), label='saving chip matches', adjust=True, freq=1, ) for cm, fpath in _iter: cm.save_to_fpath(fpath, verbose=False) else: if ut.VERBOSE: logger.info('[mc4] not saving vsmany chunk') qaid2_cm.update({cm.qaid: cm for cm in sub_cm_list}) return qaid2_cm