def learn_visual_words(annots_df, taids, nWords, use_cache=USE_CACHE_WORDS, with_pandas=WITH_PANDAS): """ Computes visual words >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, taids, daids, qaids, nWords = smk_debug.testdata_dataframe() >>> use_cache = USE_CACHE_WORDS >>> words = learn_visual_words(annots_df, taids, nWords) >>> print(words.shape) (8000, 128) """ max_iters = 200 flann_params = {} train_vecs_list = [pdh.ensure_values(vecs) for vecs in annots_df['vecs'][taids].values] train_vecs = np.vstack(train_vecs_list) print('Training %d word vocabulary with %d annots and %d descriptors' % (nWords, len(taids), len(train_vecs))) kwds = dict(max_iters=max_iters, use_cache=use_cache, appname='smk', flann_params=flann_params) _words = clustertool.cached_akmeans(train_vecs, nWords, **kwds) if with_pandas: # Pandasify wx_series = pdh.RangeIndex(len(_words), name='wx') #words = pd.DataFrame(_words, index=wx_series, columns=VEC_COLUMNS) words = pd.DataFrame(_words, index=wx_series) else: words = _words return words
def learn_visual_words(annots_df, train_aids, nCentroids): vecs_list = annots_df['vecs'][train_aids].as_matrix() train_vecs = np.vstack(vecs_list) print('Training %d word vocabulary with %d annots and %d descriptors' % (nCentroids, len(train_aids), len(train_vecs))) words = clustertool.cached_akmeans(train_vecs, nCentroids, max_iters=100) return words
def train_paris_vocab(ibs): """ CommandLine: python dev.py --db Paris --cmd """ # UNFINISHED aid_list = [] # use only one annotion per image for aids in ibs.get_image_aids(ibs.get_valid_gids()): if len(aids) == 1: aid_list.append(aids[0]) else: # use annote with largest area aid_list.append(aids[np.argmax(ibs.get_annot_bbox_area(aids))]) vecs_list = ibs.get_annot_vecs(aid_list) vecs = np.vstack(vecs_list) nWords = 8000 from vtool import clustering2 as clustertool print('vecs are: %r' % utool.get_object_size_str(vecs)) _words = clustertool.cached_akmeans(vecs, nWords, max_iters=500, use_cache=True, appname='smk') # NOQA vec_mean = vecs.mean(axis=0).astype(np.float32) vec_mean.shape = (1, vec_mean.shape[0]) vecs_centered = vecs - vec_mean norm_ = npl.norm(arr1, axis=1) norm_.shape = (norm_.size, 1) vecs_norm = np.divide(arr1, norm_) # , out=out) print('vecs_centered are: %r' % utool.get_object_size_str(vecs_centered)) vecs_post = np.round(128 * np.sqrt(np.abs(vecs_norm)) * np.sign(vecs_norm)).astype(np.int8) # NOQA
def learn_visual_words(annots_df, taids, nWords, use_cache=USE_CACHE_WORDS): """ Computes visual words >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, taids, daids, qaids, nWords = smk_debug.testdata_dataframe() >>> use_cache = USE_CACHE_WORDS >>> words = learn_visual_words(annots_df, taids, nWords) >>> print(words.shape) (8000, 128) """ max_iters = 200 flann_params = {} train_vecs_list = [ pdh.ensure_values(vecs) for vecs in annots_df['vecs'][taids].values ] train_vecs = np.vstack(train_vecs_list) print('Training %d word vocabulary with %d annots and %d descriptors' % (nWords, len(taids), len(train_vecs))) kwds = dict(max_iters=max_iters, use_cache=use_cache, appname='smk', flann_params=flann_params) _words = clustertool.cached_akmeans(train_vecs, nWords, **kwds) if WITH_PANDAS: # Pandasify wx_series = pdh.RangeIndex(len(_words), name='wx') #words = pd.DataFrame(_words, index=wx_series, columns=VEC_COLUMNS) words = pd.DataFrame(_words, index=wx_series) else: words = _words return words
def learn_visual_words(annots_df, taids, nWords, use_cache=True): """ Computes visual words >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) """ vecs_list = annots_df['vecs'][taids].as_matrix() train_vecs = np.vstack(vecs_list) print('Training %d word vocabulary with %d annots and %d descriptors' % (nWords, len(taids), len(train_vecs))) cache_dir = utool.get_app_resource_dir('smk') words = clustertool.cached_akmeans(train_vecs, nWords, max_iters=100, use_cache=use_cache, cache_dir=cache_dir) return words
def learn_visual_words(ibs, config2_=None, use_cache=USE_CACHE_WORDS, memtrack=None): """ Computes and caches visual words Args: ibs (?): qreq_ (QueryRequest): query request object with hyper-parameters use_cache (bool): turns on disk based caching(default = True) memtrack (None): (default = None) Returns: ndarray[uint8_t, ndim=2]: words - aggregate descriptor cluster centers Returns: words Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> use_cache = True >>> words = learn_visual_words(ibs, qreq_) >>> print(words.shape) (8000, 128) Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_Master1') >>> config2_ = ibs.new_query_params(cfgdict=dict(nWords=128000)) >>> use_cache = True >>> words = learn_visual_words(ibs, config2_) >>> print(words.shape) (8000, 128) Auto: from ibeis.algo.hots.smk import smk_index import utool as ut argdoc = ut.make_default_docstr(smk_index.learn_visual_words) print(argdoc) """ #if memtrack is None: # memtrack = ut.MemoryTracker('[learn_visual_words]') #config2_ = qreq_.get_external_data_config2() nWords = config2_.nWords # TODO: Incorporated taids (vocab training ids) into qreq if config2_.vocab_taids == 'all': taids = ibs.get_valid_aids(species=ibs.get_primary_database_species()) # exemplar else: taids = config2_.vocab_taids initmethod = config2_.vocab_init_method max_iters = config2_.vocab_nIters flann_params = config2_.vocab_flann_params train_vecs_list = ibs.get_annot_vecs(taids, eager=True, config2_=config2_) #memtrack.track_obj(train_vecs_list[0], 'train_vecs_list[0]') #memtrack.report('loaded trainvecs') train_vecs = np.vstack(train_vecs_list) #memtrack.track_obj(train_vecs, 'train_vecs') #memtrack.report('stacked trainvecs') del train_vecs_list print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (nWords, len(taids), len(train_vecs))) kwds = dict(max_iters=max_iters, use_cache=use_cache, initmethod=initmethod, appname='smk', flann_params=flann_params) words = clustertool.cached_akmeans(train_vecs, nWords, **kwds) del train_vecs del kwds #memtrack.report('returning words') #del train_vecs_list return words
def learn_visual_words(ibs, config2_=None, use_cache=USE_CACHE_WORDS, memtrack=None): """ Computes and caches visual words Args: ibs (?): qreq_ (QueryRequest): query request object with hyper-parameters use_cache (bool): turns on disk based caching(default = True) memtrack (None): (default = None) Returns: ndarray[uint8_t, ndim=2]: words - aggregate descriptor cluster centers Returns: words Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> use_cache = True >>> words = learn_visual_words(ibs, qreq_) >>> print(words.shape) (8000, 128) Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_Master1') >>> config2_ = ibs.new_query_params(cfgdict=dict(nWords=128000)) >>> use_cache = True >>> words = learn_visual_words(ibs, config2_) >>> print(words.shape) (8000, 128) Auto: from ibeis.algo.hots.smk import smk_index import utool as ut argdoc = ut.make_default_docstr(smk_index.learn_visual_words) print(argdoc) """ #if memtrack is None: # memtrack = ut.MemoryTracker('[learn_visual_words]') #config2_ = qreq_.extern_data_config2 nWords = config2_.nWords # TODO: Incorporated taids (vocab training ids) into qreq if config2_.vocab_taids == 'all': taids = ibs.get_valid_aids(species=ibs.get_primary_database_species()) # exemplar else: taids = config2_.vocab_taids initmethod = config2_.vocab_init_method max_iters = config2_.vocab_nIters flann_params = config2_.vocab_flann_params train_vecs_list = ibs.get_annot_vecs(taids, eager=True, config2_=config2_) #memtrack.track_obj(train_vecs_list[0], 'train_vecs_list[0]') #memtrack.report('loaded trainvecs') train_vecs = np.vstack(train_vecs_list) #memtrack.track_obj(train_vecs, 'train_vecs') #memtrack.report('stacked trainvecs') del train_vecs_list print('[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors' % (nWords, len(taids), len(train_vecs))) kwds = dict(max_iters=max_iters, use_cache=use_cache, initmethod=initmethod, appname='smk', flann_params=flann_params) words = clustertool.cached_akmeans(train_vecs, nWords, **kwds) del train_vecs del kwds #memtrack.report('returning words') #del train_vecs_list return words