def execute_and_save(qreq_miss):
    # Iterate over vsone queries in chunks.
    total_chunks = ut.get_num_chunks(len(qreq_miss.qaids), qreq_miss.chunksize)
    qaid_chunk_iter = ut.ichunks(qreq_miss.qaids, qreq_miss.chunksize)
    _prog = ut.ProgPartial(
        length=total_chunks,
        freq=1,
        label='[mc5] query chunk: ',
        prog_hook=qreq_miss.prog_hook,
        bs=False,
    )
    qaid_chunk_iter = iter(_prog(qaid_chunk_iter))

    qaid_to_cm = {}
    for qaids in qaid_chunk_iter:
        sub_qreq = qreq_miss.shallowcopy(qaids=qaids)
        cm_batch = sub_qreq.execute_pipeline()
        assert len(cm_batch) == len(qaids), 'bad alignment'
        assert all([qaid == cm.qaid for qaid, cm in zip(qaids, cm_batch)])

        # TODO: we already computed the fpaths
        # should be able to pass them in
        fpath_list = sub_qreq.get_chipmatch_fpaths(qaids)
        _prog = ut.ProgPartial(
            length=len(cm_batch),
            adjust=True,
            freq=1,
            label='saving chip matches',
            bs=True,
        )
        for cm, fpath in _prog(zip(cm_batch, fpath_list)):
            cm.save_to_fpath(fpath, verbose=False)
        qaid_to_cm.update({cm.qaid: cm for cm in cm_batch})

    return qaid_to_cm
Example #2
0
 def batch_knn(indexer, vecs, K, chunksize=4096, label='batch knn'):
     """
     Works like `indexer.knn` but the input is split into batches and
     progress is reported to give an esimated time remaining.
     """
     # Preallocate output
     idxs = np.empty((vecs.shape[0], K), dtype=np.int32)
     dists = np.empty((vecs.shape[0], K), dtype=np.float32)
     # Generate chunk slices
     num_chunks = ut.get_num_chunks(vecs.shape[0], chunksize)
     iter_ = ut.ichunk_slices(vecs.shape[0], chunksize)
     prog = ut.ProgIter(iter_, length=num_chunks, label=label)
     for sl_ in prog:
         idxs[sl_], dists[sl_] = indexer.knn(vecs[sl_], K=K)
     return idxs, dists
Example #3
0
def compute_vocab(depc, fid_list, config):
    r"""
    Depcache method for computing a new visual vocab

    CommandLine:
        python -m wbia.core_annots --exec-compute_neighbor_index --show
        python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index

        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0
        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1

        # FIXME make util_tests register
        python -m wbia.algo.smk.vocab_indexer compute_vocab:0

    Ignore:
        >>> # Lev Oxford Debug Example
        >>> import wbia
        >>> ibs = wbia.opendb('Oxford')
        >>> depc = ibs.depc
        >>> table = depc['vocab']
        >>> # Check what currently exists in vocab table
        >>> table.print_configs()
        >>> table.print_table()
        >>> table.print_internal_info()
        >>> # Grab aids used to compute vocab
        >>> from wbia.expt.experiment_helpers import get_annotcfg_list
        >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1]
        >>> qaids, daids = expanded_aids_list[0]
        >>> vocab_aids = daids
        >>> config = {'num_words': 64000}
        >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config)
        >>> print('exists = %r' % (exists,))
        >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0]
        >>> print('vocab_rowid = %r' % (vocab_rowid,))
        >>> vocab = table.get_row_data([vocab_rowid], 'words')[0]
        >>> print('vocab = %r' % (vocab,))

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> # Test depcache access
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> input_tuple = [aid_list]
        >>> rowid_kw = {}
        >>> tablename = 'vocab'
        >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw)
        >>> vocab = depc.get(tablename, input_tuple, 'words')[0]
        >>> assert vocab.wordflann is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> fid_list = depc.get_rowids('feat', aid_list)
        >>> config = VocabConfig()
        >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs'])
        >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0]
        >>> self = vocab
        >>> ut.quit_if_noshow()
        >>> data = train_vecs
        >>> centroids = vocab.wx_to_word
        >>> import wbia.plottool as pt
        >>> vt.plot_centroids(data, centroids, num_pca_dims=2)
        >>> ut.show_if_requested()
        >>> #config = ibs.depc_annot['vocab'].configclass()

    """
    logger.info('[IBEIS] COMPUTE_VOCAB:')
    vecs_list = depc.get_native('feat', fid_list, 'vecs')
    train_vecs = np.vstack(vecs_list).astype(np.float32)
    num_words = config['num_words']
    logger.info(
        '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors'
        % (num_words, len(fid_list), len(train_vecs)))
    if config['algorithm'] == 'kdtree':
        flann_params = vt.get_flann_params(random_seed=42)
        kwds = dict(max_iters=20, flann_params=flann_params)
        words = vt.akmeans(train_vecs, num_words, **kwds)
    elif config['algorithm'] == 'minibatch':
        logger.info('Using minibatch kmeans')
        import sklearn.cluster

        rng = np.random.RandomState(config['random_seed'])
        n_init = config['n_init']
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            init_size = int(num_words * 4)
            batch_size = 1000
            n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size)
            minibatch_params = dict(
                n_clusters=num_words,
                init='k-means++',
                init_size=init_size,
                n_init=n_init,
                max_iter=30000 // n_batches,
                batch_size=batch_size,
                tol=0.0,
                max_no_improvement=10,
                reassignment_ratio=0.01,
            )
            logger.info('minibatch_params = %s' %
                        (ut.repr4(minibatch_params), ))
            clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False,
                                                        random_state=rng,
                                                        verbose=2,
                                                        **minibatch_params)
            try:
                clusterer.fit(train_vecs)
            except (Exception, KeyboardInterrupt) as ex:
                ut.printex(ex, tb=True)
                if ut.is_developer():
                    ut.embed()
                else:
                    raise
        words = clusterer.cluster_centers_
        logger.info('Finished clustering')
    # if False:
    #     flann_params['checks'] = 64
    #     flann_params['trees'] = 4
    #     num_words = 128
    #     centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++')
    #     words, hist = vt.akmeans_iterations(
    #         train_vecs, centroids, max_iters=1000, monitor=True,
    #         flann_params=flann_params)

    logger.info('Constructing vocab')
    vocab = VisualVocab(words)
    logger.info('Building vocab index')
    vocab.build()
    logger.info('Returning vocab')
    return (vocab, )
def execute_query2(qreq_,
                   verbose,
                   save_qcache,
                   batch_size=None,
                   use_supercache=False):
    """
    Breaks up query request into several subrequests
    to process "more efficiently" and safer as well.
    """
    if qreq_.prog_hook is not None:
        preload_hook, query_hook = qreq_.prog_hook.subdivide(
            spacing=[0, 0.15, 0.8])
        preload_hook(0, lbl='preloading')
        qreq_.prog_hook = query_hook
    else:
        preload_hook = None
    # Load features / weights for all annotations
    qreq_.lazy_preload(prog_hook=preload_hook,
                       verbose=verbose and ut.NOT_QUIET)

    all_qaids = qreq_.qaids
    logger.info('len(missed_qaids) = %r' % (len(all_qaids), ))
    qaid2_cm = {}
    # vsone must have a chunksize of 1
    if batch_size is None:
        if HOTS_BATCH_SIZE is None:
            hots_batch_size = qreq_.ibs.cfg.other_cfg.hots_batch_size
            # hots_batch_size = 256
        else:
            hots_batch_size = HOTS_BATCH_SIZE
    else:
        hots_batch_size = batch_size
    chunksize = 1 if qreq_.qparams.vsone else hots_batch_size

    # Iterate over vsone queries in chunks.
    n_total_chunks = ut.get_num_chunks(len(all_qaids), chunksize)
    qaid_chunk_iter = ut.ichunks(all_qaids, chunksize)
    _qreq_iter = (qreq_.shallowcopy(qaids=qaids) for qaids in qaid_chunk_iter)
    sub_qreq_iter = ut.ProgIter(
        _qreq_iter,
        length=n_total_chunks,
        freq=1,
        label='[mc4] query chunk: ',
        prog_hook=qreq_.prog_hook,
    )
    for sub_qreq_ in sub_qreq_iter:
        if ut.VERBOSE:
            logger.info('Generating vsmany chunk')
        sub_cm_list = pipeline.request_wbia_query_L0(qreq_.ibs,
                                                     sub_qreq_,
                                                     verbose=verbose)
        assert len(sub_qreq_.qaids) == len(sub_cm_list), 'not aligned'
        assert all([
            qaid == cm.qaid for qaid, cm in zip(sub_qreq_.qaids, sub_cm_list)
        ]), 'not corresonding'
        if save_qcache:
            fpath_list = list(
                qreq_.get_chipmatch_fpaths(sub_qreq_.qaids,
                                           super_qres_cache=use_supercache))
            _iter = zip(sub_cm_list, fpath_list)
            _iter = ut.ProgIter(
                _iter,
                length=len(sub_cm_list),
                label='saving chip matches',
                adjust=True,
                freq=1,
            )
            for cm, fpath in _iter:
                cm.save_to_fpath(fpath, verbose=False)
        else:
            if ut.VERBOSE:
                logger.info('[mc4] not saving vsmany chunk')
        qaid2_cm.update({cm.qaid: cm for cm in sub_cm_list})
    return qaid2_cm