def print_pretrained_weights(pretrained_weights, lbl=''): r""" Args: pretrained_weights (list of ndarrays): represents layer weights lbl (str): label """ print('Initialization network: %r' % (lbl)) print('Total memory: %s' % (ut.get_object_size_str(pretrained_weights))) for index, layer_ in enumerate(pretrained_weights): print(' layer {:2}: shape={:<18}, memory={}'.format(index, layer_.shape, ut.get_object_size_str(layer_)))
def get_cachestats_str(ibs): """ Returns info about the underlying SQL cache memory """ total_size_str = ut.get_object_size_str(ibs.table_cache, lbl='size(table_cache): ') total_size_str = '\nlen(table_cache) = %r' % (len(ibs.table_cache)) table_size_str_list = [ ut.get_object_size_str(val, lbl='size(table_cache[%s]): ' % (key,)) for key, val in six.iteritems(ibs.table_cache)] cachestats_str = ( total_size_str + ut.indentjoin(table_size_str_list, '\n * ')) return cachestats_str
def request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache=True, verbose=ut.NOT_QUIET, veryverbose=False, force_rebuild=False, allow_memfallback=True, memtrack=None): r""" FOR INTERNAL USE ONLY takes custom daid list. might not be the same as what is in qreq_ CommandLine: python -m ibeis.algo.hots.neighbor_index_cache --test-request_memcached_ibeis_nnindexer Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index_cache import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> qreq_.qparams.min_reindex_thresh = 3 >>> ZEB_PLAIN = ibeis.const.TEST_SPECIES.ZEB_PLAIN >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3] >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> verbose = True >>> use_memcache = True >>> # execute function >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache) >>> # verify results >>> result = str(nnindexer) >>> print(result) """ global NEIGHBOR_CACHE #try: if veryverbose: print('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %r' % (len(NEIGHBOR_CACHE),)) # the lru cache wont be recognized by get_object_size_str, cast to pure python objects print('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % (ut.get_object_size_str(NEIGHBOR_CACHE.items()),)) #if memtrack is not None: # memtrack.report('IN REQUEST MEMCACHE') nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) # neighbor memory cache if not force_rebuild and use_memcache and NEIGHBOR_CACHE.has_key(nnindex_cfgstr): # NOQA (has_key is for a lru cache) if veryverbose or ut.VERYVERBOSE or ut.VERBOSE: print('... nnindex memcache hit: cfgstr=%s' % (nnindex_cfgstr,)) nnindexer = NEIGHBOR_CACHE[nnindex_cfgstr] else: if veryverbose or ut.VERYVERBOSE or ut.VERBOSE: print('... nnindex memcache miss: cfgstr=%s' % (nnindex_cfgstr,)) # Write to inverse uuid nnindexer = request_diskcached_ibeis_nnindexer( qreq_, daid_list, nnindex_cfgstr, verbose, force_rebuild=force_rebuild, memtrack=memtrack) NEIGHBOR_CACHE_WRITE = True if NEIGHBOR_CACHE_WRITE: # Write to memcache if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Write to memcache=%r' % (nnindex_cfgstr,)) NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer else: if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Did not write to memcache=%r' % (nnindex_cfgstr,)) return nnindexer
def request_species_distinctiveness_normalizer(species, cachedir=None, verbose=False): """ helper function to get distinctivness model independent of IBEIS. """ if species in DISTINCTIVENESS_NORMALIZER_CACHE: dstcnvs_normer = DISTINCTIVENESS_NORMALIZER_CACHE[species] else: if cachedir is None: cachedir = sysres.get_global_distinctiveness_modeldir(ensure=True) dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir) if not dstcnvs_normer.exists(cachedir): # download normalizer if it doesn't exist download_baseline_distinctiveness_normalizer(cachedir, species) dstcnvs_normer.load(cachedir) logger.info(ut.get_object_size_str(dstcnvs_normer, 'dstcnvs_normer = ')) logger.info('Loaded distinctivness normalizer') # dstcnvs_normer.ensure_flann(cachedir) assert dstcnvs_normer.exists( cachedir, need_flann=True ), 'normalizer should have been downloaded, but it doesnt exist' DISTINCTIVENESS_NORMALIZER_CACHE[species] = dstcnvs_normer return dstcnvs_normer
def get_buildtime_data(**kwargs): flann_params = vt.get_flann_params(**kwargs) print('flann_params = %r' % (ut.dict_str(flann_params), )) data_list = [] num = 1000 print('-----') for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False): num = int(num * 1.2) print('num = %r' % (num, )) #if num > 1E6: # break data = pool.get_testdata(num) print('object size ' + ut.get_object_size_str(data, 'data')) flann = pyflann.FLANN(**flann_params) with ut.Timer(verbose=False) as t: flann.build_index(data) print('t.ellapsed = %r' % (t.ellapsed, )) if t.ellapsed > 5 or count > 1000: break data_list.append((count, num, t.ellapsed)) print('-----') return data_list, flann_params
def print_dataset_info(data, labels, key): labelhist = {key: len(val) for key, val in ut.group_items(labels, labels).items()} stats_dict = ut.get_stats(data.ravel()) ut.delete_keys(stats_dict, ['shape', 'nMax', 'nMin']) print('[dataset] Dataset Info: ') print('[dataset] * Data:') print('[dataset] %s_data(shape=%r, dtype=%r)' % (key, data.shape, data.dtype)) print('[dataset] %s_memory(data) = %r' % (key, ut.get_object_size_str(data),)) print('[dataset] %s_stats(data) = %s' % (key, ut.repr2(stats_dict, precision=2),)) print('[dataset] * Labels:') print('[dataset] %s_labels(shape=%r, dtype=%r)' % (key, labels.shape, labels.dtype)) print('[dataset] %s_label histogram = %s' % (key, ut.repr2(labelhist)))
def report_memory(obj, objname='obj'): """ obj = invindex objname = 'invindex' """ print('Object Memory Usage for %s' % objname) maxlen = max(map(len, six.iterkeys(obj.__dict__))) for key, val in six.iteritems(obj.__dict__): fmtstr = 'memusage({0}.{1}){2} = ' lbl = fmtstr.format(objname, key, ' ' * (maxlen - len(key))) sizestr = ut.get_object_size_str(val, lbl=lbl, unit='MB') print(sizestr)
def request_species_distinctiveness_normalizer(species, cachedir=None, verbose=False): """ helper function to get distinctivness model independent of IBEIS. """ if species in DISTINCTIVENESS_NORMALIZER_CACHE: dstcnvs_normer = DISTINCTIVENESS_NORMALIZER_CACHE[species] else: if cachedir is None: cachedir = sysres.get_global_distinctiveness_modeldir(ensure=True) dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir) if not dstcnvs_normer.exists(cachedir): # download normalizer if it doesn't exist download_baseline_distinctiveness_normalizer(cachedir, species) dstcnvs_normer.load(cachedir) print(ut.get_object_size_str(dstcnvs_normer, 'dstcnvs_normer = ')) print('Loaded distinctivness normalizer') #dstcnvs_normer.ensure_flann(cachedir) assert dstcnvs_normer.exists(cachedir, need_flann=True), ( 'normalizer should have been downloaded, but it doesnt exist') DISTINCTIVENESS_NORMALIZER_CACHE[species] = dstcnvs_normer return dstcnvs_normer
def get_buildtime_data(**kwargs): flann_params = vt.get_flann_params(**kwargs) print('flann_params = %r' % (ut.dict_str(flann_params),)) data_list = [] num = 1000 print('-----') for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False): num = int(num * 1.2) print('num = %r' % (num,)) #if num > 1E6: # break data = pool.get_testdata(num) print('object size ' + ut.get_object_size_str(data, 'data')) flann = pyflann.FLANN(**flann_params) with ut.Timer(verbose=False) as t: flann.build_index(data) print('t.ellapsed = %r' % (t.ellapsed,)) if t.ellapsed > 5 or count > 1000: break data_list.append((count, num, t.ellapsed)) print('-----') return data_list, flann_params
def request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache=True, verbose=ut.NOT_QUIET, veryverbose=False, force_rebuild=False, memtrack=None, prog_hook=None): r""" FOR INTERNAL USE ONLY takes custom daid list. might not be the same as what is in qreq_ CommandLine: python -m ibeis.algo.hots.neighbor_index_cache --test-request_memcached_ibeis_nnindexer Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index_cache import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> qreq_.qparams.min_reindex_thresh = 3 >>> ZEB_PLAIN = ibeis.const.TEST_SPECIES.ZEB_PLAIN >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3] >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> verbose = True >>> use_memcache = True >>> # execute function >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache) >>> # verify results >>> result = str(nnindexer) >>> print(result) """ global NEIGHBOR_CACHE #try: if veryverbose: print('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %r' % (len(NEIGHBOR_CACHE), )) # the lru cache wont be recognized by get_object_size_str, cast to pure python objects print('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % (ut.get_object_size_str(NEIGHBOR_CACHE.items()), )) #if memtrack is not None: # memtrack.report('IN REQUEST MEMCACHE') nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) # neighbor memory cache if not force_rebuild and use_memcache and NEIGHBOR_CACHE.has_key( nnindex_cfgstr): # NOQA (has_key is for a lru cache) if veryverbose or ut.VERYVERBOSE or ut.VERBOSE: print('... nnindex memcache hit: cfgstr=%s' % (nnindex_cfgstr, )) nnindexer = NEIGHBOR_CACHE[nnindex_cfgstr] else: if veryverbose or ut.VERYVERBOSE or ut.VERBOSE: print('... nnindex memcache miss: cfgstr=%s' % (nnindex_cfgstr, )) # Write to inverse uuid nnindexer = request_diskcached_ibeis_nnindexer( qreq_, daid_list, nnindex_cfgstr, verbose, force_rebuild=force_rebuild, memtrack=memtrack, prog_hook=prog_hook) NEIGHBOR_CACHE_WRITE = True if NEIGHBOR_CACHE_WRITE: # Write to memcache if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Write to memcache=%r' % (nnindex_cfgstr, )) NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer else: if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Did not write to memcache=%r' % (nnindex_cfgstr, )) return nnindexer
def dev_train_distinctiveness(species=None): r""" Args: ibs (IBEISController): wbia controller object species (None): CommandLine: python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness alias dev_train_distinctiveness='python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness' # Publishing (uses cached normalizers if available) dev_train_distinctiveness --species GZ --publish dev_train_distinctiveness --species PZ --publish dev_train_distinctiveness --species PZ --retrain Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.distinctiveness_normalizer import * # NOQA >>> import wbia >>> species = ut.get_argval('--species', str, 'zebra_grevys') >>> dev_train_distinctiveness(species) """ import wbia # if 'species' not in vars() or species is None: # species = 'zebra_grevys' if species == 'zebra_grevys': dbname = 'GZ_ALL' elif species == 'zebra_plains': dbname = 'PZ_Master0' ibs = wbia.opendb(dbname) global_distinctdir = ibs.get_global_distinctiveness_modeldir() cachedir = global_distinctdir dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir) try: if ut.get_argflag('--retrain'): raise IOError('force cache miss') with ut.Timer('loading distinctiveness'): dstcnvs_normer.load(cachedir) # Cache hit logger.info('distinctivness model cache hit') except IOError: logger.info('distinctivness model cache miss') with ut.Timer('training distinctiveness'): # Need to train # Add one example from each name # TODO: add one exemplar per viewpoint for each name # max_vecs = 1E6 # max_annots = 975 max_annots = 975 # ibs.fix_and_clean_database() nid_list = ibs.get_valid_nids() aids_list = ibs.get_name_aids(nid_list) # remove junk aids_list = ibs.unflat_map(ibs.filter_junk_annotations, aids_list) # remove empty aids_list = [aids for aids in aids_list if len(aids) > 0] num_annots_list = list(map(len, aids_list)) aids_list = ut.sortedby(aids_list, num_annots_list, reverse=True) # take only one annot per name aid_list = ut.get_list_column(aids_list, 0) # Keep only a certain number of annots for distinctiveness mapping aid_list_ = ut.listclip(aid_list, max_annots) logger.info('total num named annots = %r' % (sum(num_annots_list))) logger.info( 'training distinctiveness using %d/%d singleton annots' % (len(aid_list_), len(aid_list))) # vec # FIXME: qreq_ params for config rowid vecs_list = ibs.get_annot_vecs(aid_list_) num_vecs = sum(list(map(len, vecs_list))) logger.info('num_vecs = %r' % (num_vecs, )) vecs = np.vstack(vecs_list) logger.info('vecs size = %r' % (ut.get_object_size_str(vecs), )) dstcnvs_normer.init_support(vecs) dstcnvs_normer.save(global_distinctdir) if ut.get_argflag('--publish'): dstcnvs_normer.publish()
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate, verbose=False): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Args: words (ndarray): wx2_idxs (dict): wx2_maws (dict): idx2_vec (dict): idx2_aid (dict): idx2_fx (dict): aggregate (bool): verbose (bool): Returns: tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as:: * wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] * wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] * wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word:: * list of aggvecs * For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate) """ if not ut.QUIET: print('[smk_index.rvec] +--- Start Compute Residuals') wx_sublist = np.array(wx2_idxs.keys()) # Build lists w.r.t. words idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist] aids_list = [idx2_aid.take(idxs) for idxs in idxs_list] if ut.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec.shape[0] == idx2_fx.shape[0] assert idx2_vec.shape[0] == idx2_aid.shape[0] # Prealloc output if ut.VERBOSE or verbose: lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs' print(lbl) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2_idxs(wx2_idxs, len(words)) # Compute Residuals rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list) if ut.VERBOSE: print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list)) print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list)) if aggregate: maws_list = [wx2_maws[wx] for wx in wx_sublist] # Aggregate Residuals tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list) (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup # Pack into common query structure aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] wx2_aggvecs = dict(zip(wx_sublist, aggvecs_list)) wx2_aggaids = dict(zip(wx_sublist, aggaids_list)) wx2_aggfxs = dict(zip(wx_sublist, aggfxs_list)) wx2_aggmaws = dict(zip(wx_sublist, aggmaws_list)) wx2_aggflags = dict(zip(wx_sublist, aggflags_list)) (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = ( wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags) else: # Hack non-aggregate residuals to have the same structure as aggregate # residuals for compatability: i.e. each rvec gets a list of fxs that # contributed to it, and for SMK this is a list of size 1 fxs_list = [[idx2_fx[idx:idx + 1] for idx in idxs] for idxs in idxs_list] wx2_rvecs = dict(zip(wx_sublist, rvecs_list)) wx2_aids = dict(zip(wx_sublist, aids_list)) wx2_fxs = dict(zip(wx_sublist, fxs_list)) wx2_flags = dict(zip(wx_sublist, flags_list)) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) if ut.VERBOSE or verbose: print('[smk_index.rvec] L___ End Compute Residuals') return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags
def alloc_pool(self, num): print('[alloc] num = %r' % (num,)) self.num = num self.data_pool = vt.tests.dummy.testdata_dummy_sift(num) print('[alloc] object size ' + ut.get_object_size_str(self.data_pool, 'data_pool'))
def execute_smk_L5(qreq_): """ ibeis query interface Example: >>> from ibeis.algo.hots.smk.smk_match import * # NOQA >>> from ibeis.algo.hots.smk import smk_match >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_internals_full() >>> qaid2_scores, qaid2_chipmatch = smk_match.execute_smk_L5(qreq_) Dev:: from ibeis.algo.hots import pipeline filt2_meta = {} # Get both spatial verified and not qaid2_chipmatch_FILT_ = qaid2_chipmatch qaid2_chipmatch_SVER_ = pipeline.spatial_verification(qaid2_chipmatch_FILT_, qreq_) qaid2_qres_FILT_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch_FILT_, filt2_meta, qreq_) qaid2_qres_SVER_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch_SVER_, filt2_meta, qreq_) qres_FILT = qaid2_qres_FILT_[qaids[0]] qres_SVER = qaid2_qres_SVER_[qaids[0]] fig1 = qres_FILT.show_top(ibs, fnum=1, figtitle='filt') fig2 = qres_SVER.show_top(ibs, fnum=2, figtitle='sver') fig1.show() fig2.show() CommandLine:: python -m memory_profiler dev.py --db PZ_Mothers -t smk2 --allgt --index 0 python dev.py -t smk2 --allgt --db GZ_ALL python dev.py -t smk2 --allgt --db GZ_ALL python dev.py -t smk2 --allgt --db GZ_ALL --index 2:10 --vf --va python dev.py -t smk2 --allgt --db GZ_ALL --index 2:10 --vf --va --print-cfgstr python dev.py -t smk2 --allgt --db GZ_ALL --index 2:20 --vf --va python dev.py -t smk2 --allgt --db GZ_ALL --noqcache --index 2:20 --va --vf python dev.py -t smk2 --allgt --db PZ_Master0 && python dev.py -t smk3 --allgt --db PZ_Master0 python dev.py -t smk2 --allgt --db PZ_Master0 --index 2:10 --va python dev.py -t smk2 --allgt --db PZ_Mothers --index 20:30 python dev.py -t smk2 --allgt --db PZ_Mothers --noqcache --index 18:20 --super-strict --va python dev.py -t smk2 --db PZ_Master0 --qaid 7199 --va --quality --vf --noqcache python dev.py -t smk3 --allgt --db GZ_ALL --index 2:10 --vf --va python dev.py -t smk5 --allgt --db PZ_Master0 --noqcache ; python dev.py -t smk5 --allgt --db GZ_ALL --noqcache python dev.py -t smkd --allgt --db PZ_Mothers --index 1:3 --va --quality --vf --noqcache python dev.py -t smk_8k --allgt --db PZ_Mothers --index 20:30 --va --vf python dev.py -t smk_8k --allgt --db PZ_Mothers --index 20:30 --echo-hardcase python dev.py -t smk_8k --allgt --db PZ_Mothers --index 20:30 --vh python dev.py -t smk_8k_compare --allgt --db PZ_Mothers --index 20:30 --view-hard """ memtrack = ut.MemoryTracker('[SMK ENTRY]') qaids = qreq_.get_external_qaids() ibs = qreq_.ibs # Params qparams = qreq_.qparams memtrack.report('[SMK PREINIT]') # Build ~~Pandas~~ dataframe (or maybe not) annots_df = smk_repr.make_annot_df(ibs) words, invindex = prepare_qreq(qreq_, annots_df, memtrack) withinfo = True # Execute smk for each query memtrack.report('[SMK QREQ INITIALIZED]') print('[SMK_MEM] invindex is using ' + ut.get_object_size_str(invindex)) print('[SMK_MEM] qreq_ is using ' + ut.get_object_size_str(qreq_)) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.invindex_dbgstr(invindex) qaid2_scores, qaid2_chipmatch = execute_smk_L4(annots_df, qaids, invindex, qparams, withinfo) memtrack.report('[SMK QREQ FINISHED]') return qaid2_scores, qaid2_chipmatch
def dev_train_distinctiveness(species=None): r""" Args: ibs (IBEISController): ibeis controller object species (None): CommandLine: python -m ibeis.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness alias dev_train_distinctiveness='python -m ibeis.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness' # Publishing (uses cached normalizers if available) dev_train_distinctiveness --species GZ --publish dev_train_distinctiveness --species PZ --publish dev_train_distinctiveness --species PZ --retrain Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.distinctiveness_normalizer import * # NOQA >>> import ibeis >>> species = ut.get_argval('--species', str, 'zebra_grevys') >>> dev_train_distinctiveness(species) """ import ibeis #if 'species' not in vars() or species is None: # species = 'zebra_grevys' if species == 'zebra_grevys': dbname = 'GZ_ALL' elif species == 'zebra_plains': dbname = 'PZ_Master0' ibs = ibeis.opendb(dbname) global_distinctdir = ibs.get_global_distinctiveness_modeldir() cachedir = global_distinctdir dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir) try: if ut.get_argflag('--retrain'): raise IOError('force cache miss') with ut.Timer('loading distinctiveness'): dstcnvs_normer.load(cachedir) # Cache hit print('distinctivness model cache hit') except IOError: print('distinctivness model cache miss') with ut.Timer('training distinctiveness'): # Need to train # Add one example from each name # TODO: add one exemplar per viewpoint for each name #max_vecs = 1E6 #max_annots = 975 max_annots = 975 #ibs.fix_and_clean_database() nid_list = ibs.get_valid_nids() aids_list = ibs.get_name_aids(nid_list) # remove junk aids_list = ibs.unflat_map(ibs.filter_junk_annotations, aids_list) # remove empty aids_list = [aids for aids in aids_list if len(aids) > 0] num_annots_list = list(map(len, aids_list)) aids_list = ut.sortedby(aids_list, num_annots_list, reverse=True) # take only one annot per name aid_list = ut.get_list_column(aids_list, 0) # Keep only a certain number of annots for distinctiveness mapping aid_list_ = ut.listclip(aid_list, max_annots) print('total num named annots = %r' % (sum(num_annots_list))) print('training distinctiveness using %d/%d singleton annots' % (len(aid_list_), len(aid_list))) # vec # FIXME: qreq_ params for config rowid vecs_list = ibs.get_annot_vecs(aid_list_) num_vecs = sum(list(map(len, vecs_list))) print('num_vecs = %r' % (num_vecs,)) vecs = np.vstack(vecs_list) print('vecs size = %r' % (ut.get_object_size_str(vecs),)) dstcnvs_normer.init_support(vecs) dstcnvs_normer.save(global_distinctdir) if ut.get_argflag('--publish'): dstcnvs_normer.publish()
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate, verbose=False): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Args: words (ndarray): wx2_idxs (dict): wx2_maws (dict): idx2_vec (dict): idx2_aid (dict): idx2_fx (dict): aggregate (bool): verbose (bool): Returns: tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as:: * wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] * wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] * wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word:: * list of aggvecs * For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate) """ if not ut.QUIET: print('[smk_index.rvec] +--- Start Compute Residuals') wx_sublist = np.array(wx2_idxs.keys()) # Build lists w.r.t. words idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist] aids_list = [idx2_aid.take(idxs) for idxs in idxs_list] if ut.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec.shape[0] == idx2_fx.shape[0] assert idx2_vec.shape[0] == idx2_aid.shape[0] # Prealloc output if ut.VERBOSE or verbose: #print('[smk_index.rvec] Residual Vectors for %d words. aggregate=%r' % # (len(wx2_idxs), aggregate,)) lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs' mark, end_ = ut.log_progress(lbl, len(wx2_idxs), freq=50, with_time=True) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2_idxs(wx2_idxs, len(words)) # Compute Residuals rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list) if ut.VERBOSE: print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list)) print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list)) if aggregate: maws_list = [wx2_maws[wx] for wx in wx_sublist] # Aggregate Residuals tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list) (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup # Pack into common query structure aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] wx2_aggvecs = dict(zip(wx_sublist, aggvecs_list)) wx2_aggaids = dict(zip(wx_sublist, aggaids_list)) wx2_aggfxs = dict(zip(wx_sublist, aggfxs_list)) wx2_aggmaws = dict(zip(wx_sublist, aggmaws_list)) wx2_aggflags = dict(zip(wx_sublist, aggflags_list)) (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = ( wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags) else: # Hack non-aggregate residuals to have the same structure as aggregate # residuals for compatability: i.e. each rvec gets a list of fxs that # contributed to it, and for SMK this is a list of size 1 fxs_list = [[idx2_fx[idx:idx + 1] for idx in idxs] for idxs in idxs_list] wx2_rvecs = dict(zip(wx_sublist, rvecs_list)) wx2_aids = dict(zip(wx_sublist, aids_list)) wx2_fxs = dict(zip(wx_sublist, fxs_list)) wx2_flags = dict(zip(wx_sublist, flags_list)) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) if ut.VERBOSE or verbose: end_() print('[smk_index.rvec] L___ End Compute Residuals') return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags
def augment_nnindexer_experiment(): """ References: http://answers.opencv.org/question/44592/flann-index-training-fails-with-segfault/ CommandLine: utprof.py -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_MTEST --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --show python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --show python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --no-api-cache --nocache-uuids python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_MTEST --show python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show # RUNS THE SEGFAULTING CASE python -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show # Debug it gdb python run -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show gdb python run -m wbia.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots._neighbor_experiment import * # NOQA >>> # execute function >>> augment_nnindexer_experiment() >>> # verify results >>> ut.show_if_requested() """ import wbia # build test data # ibs = wbia.opendb('PZ_MTEST') ibs = wbia.opendb(defaultdb='PZ_Master0') if ibs.get_dbname() == 'PZ_MTEST': initial = 1 addition_stride = 4 max_ceiling = 100 elif ibs.get_dbname() == 'PZ_Master0': initial = 128 # addition_stride = 64 # addition_stride = 128 addition_stride = 256 max_ceiling = 10000 # max_ceiling = 4000 # max_ceiling = 2000 # max_ceiling = 600 else: assert False all_daids = ibs.get_valid_aids(species='zebra_plains') qreq_ = ibs.new_query_request(all_daids, all_daids) max_num = min(max_ceiling, len(all_daids)) # Clear Caches ibs.delete_flann_cachedir() neighbor_index_cache.clear_memcache() neighbor_index_cache.clear_uuid_cache(qreq_) # Setup all_randomize_daids_ = ut.deterministic_shuffle(all_daids[:]) # ensure all features are computed nnindexer_list = [] addition_lbl = 'Addition' _addition_iter = list(range(initial + 1, max_num, addition_stride)) addition_iter = iter( ut.ProgressIter(_addition_iter, lbl=addition_lbl, freq=1, autoadjust=False)) time_list_addition = [] # time_list_reindex = [] addition_count_list = [] tmp_cfgstr_list = [] # for _ in range(80): # next(addition_iter) try: memtrack = ut.MemoryTracker(disable=False) for count in addition_iter: aid_list_ = all_randomize_daids_[0:count] # Request an indexer which could be an augmented version of an existing indexer. with ut.Timer(verbose=False) as t: memtrack.report('BEFORE AUGMENT') nnindexer_ = neighbor_index_cache.request_augmented_wbia_nnindexer( qreq_, aid_list_) memtrack.report('AFTER AUGMENT') nnindexer_list.append(nnindexer_) addition_count_list.append(count) time_list_addition.append(t.ellapsed) tmp_cfgstr_list.append(nnindexer_.cfgstr) logger.info('===============\n\n') logger.info(ut.repr2(time_list_addition)) logger.info(ut.repr2(list(map(id, nnindexer_list)))) logger.info(ut.repr2(tmp_cfgstr_list)) logger.info( ut.repr2(list([nnindxer.cfgstr for nnindxer in nnindexer_list]))) IS_SMALL = False if IS_SMALL: nnindexer_list = [] reindex_label = 'Reindex' # go backwards for reindex _reindex_iter = list(range(initial + 1, max_num, addition_stride))[::-1] reindex_iter = ut.ProgressIter(_reindex_iter, lbl=reindex_label) time_list_reindex = [] # time_list_reindex = [] reindex_count_list = [] for count in reindex_iter: logger.info('\n+===PREDONE====================\n') # check only a single size for memory leaks # count = max_num // 16 + ((x % 6) * 1) # x += 1 aid_list_ = all_randomize_daids_[0:count] # Call the same code, but force rebuilds memtrack.report('BEFORE REINDEX') with ut.Timer(verbose=False) as t: nnindexer_ = neighbor_index_cache.request_augmented_wbia_nnindexer( qreq_, aid_list_, force_rebuild=True, memtrack=memtrack) memtrack.report('AFTER REINDEX') ibs.print_cachestats_str() logger.info('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % (ut.get_object_size_str( neighbor_index_cache.NEIGHBOR_CACHE.items()), )) logger.info('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %s' % (len(neighbor_index_cache.NEIGHBOR_CACHE.items()), )) logger.info('[nnindex.MEMCACHE] size(UUID_MAP_CACHE) = %s' % (ut.get_object_size_str( neighbor_index_cache.UUID_MAP_CACHE), )) logger.info('totalsize(nnindexer) = ' + ut.get_object_size_str(nnindexer_)) memtrack.report_type(neighbor_index_cache.NeighborIndex) ut.print_object_size_tree(nnindexer_, lbl='nnindexer_') if IS_SMALL: nnindexer_list.append(nnindexer_) reindex_count_list.append(count) time_list_reindex.append(t.ellapsed) # import cv2 # import matplotlib as mpl # logger.info(mem_top.mem_top(limit=30, width=120, # #exclude_refs=[cv2.__dict__, mpl.__dict__] # )) logger.info('L___________________\n\n\n') logger.info(ut.repr2(time_list_reindex)) if IS_SMALL: logger.info(ut.repr2(list(map(id, nnindexer_list)))) logger.info( ut.repr2(list([nnindxer.cfgstr for nnindxer in nnindexer_list]))) except KeyboardInterrupt: logger.info('\n[train] Caught CRTL+C') resolution = '' from six.moves import input while not (resolution.isdigit()): logger.info('\n[train] What do you want to do?') logger.info('[train] 0 - Continue') logger.info('[train] 1 - Embed') logger.info('[train] ELSE - Stop network training') resolution = input('[train] Resolution: ') resolution = int(resolution) # We have a resolution if resolution == 0: logger.info('resuming training...') elif resolution == 1: ut.embed() import wbia.plottool as pt next_fnum = iter(range(0, 1)).next # python3 PY3 pt.figure(fnum=next_fnum()) if len(addition_count_list) > 0: pt.plot2( addition_count_list, time_list_addition, marker='-o', equal_aspect=False, x_label='num_annotations', label=addition_lbl + ' Time', ) if len(reindex_count_list) > 0: pt.plot2( reindex_count_list, time_list_reindex, marker='-o', equal_aspect=False, x_label='num_annotations', label=reindex_label + ' Time', ) pt.set_figtitle('Augmented indexer experiment') pt.legend()
def alloc_pool(self, num): print('[alloc] num = %r' % (num, )) self.num = num self.data_pool = vt.tests.dummy.testdata_dummy_sift(num) print('[alloc] object size ' + ut.get_object_size_str(self.data_pool, 'data_pool'))
def augment_nnindexer_experiment(): """ References: http://answers.opencv.org/question/44592/flann-index-training-fails-with-segfault/ CommandLine: utprof.py -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_MTEST --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --show python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --show python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 --nosave-flann --no-api-cache --nocache-uuids python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_MTEST --show python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show # RUNS THE SEGFAULTING CASE python -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show # Debug it gdb python run -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --show gdb python run -m ibeis.algo.hots._neighbor_experiment --test-augment_nnindexer_experiment --db PZ_Master0 --diskshow --adjust=.1 --save "augment_experiment_{db}.png" --dpath='.' --dpi=180 --figsize=9,6 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots._neighbor_experiment import * # NOQA >>> # execute function >>> augment_nnindexer_experiment() >>> # verify results >>> ut.show_if_requested() """ import ibeis # build test data #ibs = ibeis.opendb('PZ_MTEST') ibs = ibeis.opendb(defaultdb='PZ_Master0') if ibs.get_dbname() == 'PZ_MTEST': initial = 1 addition_stride = 4 max_ceiling = 100 elif ibs.get_dbname() == 'PZ_Master0': initial = 128 #addition_stride = 64 #addition_stride = 128 addition_stride = 256 max_ceiling = 10000 #max_ceiling = 4000 #max_ceiling = 2000 #max_ceiling = 600 else: assert False all_daids = ibs.get_valid_aids(species='zebra_plains') qreq_ = ibs.new_query_request(all_daids, all_daids) max_num = min(max_ceiling, len(all_daids)) # Clear Caches ibs.delete_flann_cachedir() neighbor_index_cache.clear_memcache() neighbor_index_cache.clear_uuid_cache(qreq_) # Setup all_randomize_daids_ = ut.deterministic_shuffle(all_daids[:]) # ensure all features are computed #ibs.get_annot_vecs(all_randomize_daids_, ensure=True) #ibs.get_annot_fgweights(all_randomize_daids_, ensure=True) nnindexer_list = [] addition_lbl = 'Addition' _addition_iter = list(range(initial + 1, max_num, addition_stride)) addition_iter = iter(ut.ProgressIter(_addition_iter, lbl=addition_lbl, freq=1, autoadjust=False)) time_list_addition = [] #time_list_reindex = [] addition_count_list = [] tmp_cfgstr_list = [] #for _ in range(80): # next(addition_iter) try: memtrack = ut.MemoryTracker(disable=False) for count in addition_iter: aid_list_ = all_randomize_daids_[0:count] # Request an indexer which could be an augmented version of an existing indexer. with ut.Timer(verbose=False) as t: memtrack.report('BEFORE AUGMENT') nnindexer_ = neighbor_index_cache.request_augmented_ibeis_nnindexer(qreq_, aid_list_) memtrack.report('AFTER AUGMENT') nnindexer_list.append(nnindexer_) addition_count_list.append(count) time_list_addition.append(t.ellapsed) tmp_cfgstr_list.append(nnindexer_.cfgstr) print('===============\n\n') print(ut.list_str(time_list_addition)) print(ut.list_str(list(map(id, nnindexer_list)))) print(ut.list_str(tmp_cfgstr_list)) print(ut.list_str(list([nnindxer.cfgstr for nnindxer in nnindexer_list]))) IS_SMALL = False if IS_SMALL: nnindexer_list = [] reindex_label = 'Reindex' # go backwards for reindex _reindex_iter = list(range(initial + 1, max_num, addition_stride))[::-1] reindex_iter = ut.ProgressIter(_reindex_iter, lbl=reindex_label) time_list_reindex = [] #time_list_reindex = [] reindex_count_list = [] for count in reindex_iter: print('\n+===PREDONE====================\n') # check only a single size for memory leaks #count = max_num // 16 + ((x % 6) * 1) #x += 1 aid_list_ = all_randomize_daids_[0:count] # Call the same code, but force rebuilds memtrack.report('BEFORE REINDEX') with ut.Timer(verbose=False) as t: nnindexer_ = neighbor_index_cache.request_augmented_ibeis_nnindexer( qreq_, aid_list_, force_rebuild=True, memtrack=memtrack) memtrack.report('AFTER REINDEX') ibs.print_cachestats_str() print('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % ( ut.get_object_size_str(neighbor_index_cache.NEIGHBOR_CACHE.items()),)) print('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %s' % ( len(neighbor_index_cache.NEIGHBOR_CACHE.items()),)) print('[nnindex.MEMCACHE] size(UUID_MAP_CACHE) = %s' % ( ut.get_object_size_str(neighbor_index_cache.UUID_MAP_CACHE),)) print('totalsize(nnindexer) = ' + ut.get_object_size_str(nnindexer_)) memtrack.report_type(neighbor_index_cache.NeighborIndex) ut.print_object_size_tree(nnindexer_, lbl='nnindexer_') if IS_SMALL: nnindexer_list.append(nnindexer_) reindex_count_list.append(count) time_list_reindex.append(t.ellapsed) #import cv2 #import matplotlib as mpl #print(mem_top.mem_top(limit=30, width=120, # #exclude_refs=[cv2.__dict__, mpl.__dict__] # )) print('L___________________\n\n\n') print(ut.list_str(time_list_reindex)) if IS_SMALL: print(ut.list_str(list(map(id, nnindexer_list)))) print(ut.list_str(list([nnindxer.cfgstr for nnindxer in nnindexer_list]))) except KeyboardInterrupt: print('\n[train] Caught CRTL+C') resolution = '' from six.moves import input while not (resolution.isdigit()): print('\n[train] What do you want to do?') print('[train] 0 - Continue') print('[train] 1 - Embed') print('[train] ELSE - Stop network training') resolution = input('[train] Resolution: ') resolution = int(resolution) # We have a resolution if resolution == 0: print('resuming training...') elif resolution == 1: ut.embed() import plottool as pt next_fnum = iter(range(0, 1)).next # python3 PY3 pt.figure(fnum=next_fnum()) if len(addition_count_list) > 0: pt.plot2(addition_count_list, time_list_addition, marker='-o', equal_aspect=False, x_label='num_annotations', label=addition_lbl + ' Time') if len(reindex_count_list) > 0: pt.plot2(reindex_count_list, time_list_reindex, marker='-o', equal_aspect=False, x_label='num_annotations', label=reindex_label + ' Time') pt.set_figtitle('Augmented indexer experiment') pt.legend()