def grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8): ndata = utool.get_arg('--ndata', type_=int, default=2) print('[TEST] build ndata=%d numpy arrays with shape=%r' % (ndata, shape)) print(' * expected_memory(table_list) = %s' % utool.byte_str2(ndata * np.product(shape))) table_list = [np.empty(shape, dtype=dtype) for i in xrange(ndata)] print(' * memory+overhead(table_list) = %s' % utool.byte_str2(utool.get_object_size(table_list))) return table_list
def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip( key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp
def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp
def print_tier_info(drive): tier_windows = drive.get_tier_windows() tier_flags = drive.get_tier_flags() for tier, flags in enumerate(tier_flags): high, low = tier_windows[tier] print('tier %r window = %s - %s' % (tier, ut.byte_str2(high), ut.byte_str2(low))) print(' len(fpaths) = %r' % (np.sum(flags)))
def grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8): ndata = utool.get_argval('--ndata', type_=int, default=2) print('[TEST] build ndata=%d numpy arrays with shape=%r' % (ndata, shape)) print(' * expected_memory(table_list) = %s' % utool.byte_str2(ndata * np.product(shape))) table_list = [np.empty(shape, dtype=dtype) for i in range(ndata)] print(' * memory+overhead(table_list) = %s' % utool.byte_str2(utool.get_object_size(table_list))) return table_list
def check_consistency(drive): print('Checking %r consistency' % (drive,)) total = ut.get_total_diskbytes(drive.root_dpath) free = ut.get_free_diskbytes(drive.root_dpath) used = total - free print('total = %r' % (total,)) print('used = %r' % (used,)) print('drive.total_bytes = %r' % (drive.total_bytes,)) print('total = %r' % (ut.byte_str2(total),)) print('used = %r' % (ut.byte_str2(used),)) print('drive.total_bytes = %r' % (ut.byte_str2(drive.total_bytes),))
def check_consistency(drive): print('Checking %r consistency' % (drive, )) total = ut.get_total_diskbytes(drive.root_dpath) free = ut.get_free_diskbytes(drive.root_dpath) used = total - free print('total = %r' % (total, )) print('used = %r' % (used, )) print('drive.total_bytes = %r' % (drive.total_bytes, )) print('total = %r' % (ut.byte_str2(total), )) print('used = %r' % (ut.byte_str2(used), )) print('drive.total_bytes = %r' % (ut.byte_str2(drive.total_bytes), ))
def print_size_info(inva): sizes = inva.get_size_info() sizes = ut.sort_dict(sizes, 'vals', ut.identity) total_nbytes = sum(sizes.values()) logger.info( ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':') ) logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
def get_tier_flags(drive): try: tier_flags = drive.cache.load('tier_flags') except ut.CacheMissException: tier_windows = drive.get_tier_windows() print('Tier Windows') for tier, (high, low) in enumerate(tier_windows): print('tier %r window = %s - %s' % (tier, ut.byte_str2(high), ut.byte_str2(low))) fpath_bytes_arr = np.array(drive.fpath_bytes_list) tier_flags = [ np.logical_and.reduce([fpath_bytes_arr <= high, fpath_bytes_arr > low]) for high, low in tier_windows ] drive.cache.save('tier_flags', tier_flags) return tier_flags
def TEST_SQL_NUMPY(): sqldb_fname = 'temp_test_sql_numpy.sqlite3' sqldb_dpath = utool.util_cplat.get_app_resource_dir('ibeis', 'testfiles') utool.ensuredir(sqldb_dpath) utool.util_path.remove_file(join(sqldb_dpath, sqldb_fname), dryrun=False) db = sqldbc.SQLDatabaseController(sqldb_dpath=sqldb_dpath, sqldb_fname=sqldb_fname) db.add_table('temp', [ ('temp_id', 'INTEGER PRIMARY KEY'), ('temp_hash', 'NUMPY'), ]) tt = utool.tic() feats_list = grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8) print(' * numpy.new time=%r sec' % utool.toc(tt)) print('[TEST] insert numpy arrays') tt = utool.tic() feats_iter = ((feats, ) for feats in feats_list) db.executemany(operation=''' INSERT INTO temp ( temp_hash ) VALUES (?) ''', params_iter=feats_iter) print(' * execute insert time=%r sec' % utool.toc(tt)) print('[TEST] save sql database') tt = utool.tic() #db.cur.commit() db.connection.commit() print(' * commit time=%r sec' % utool.toc(tt)) print('[TEST] read from sql database') tt = utool.tic() db.cur.execute('SELECT temp_hash FROM temp', []) print(' * execute select time=%r sec' % utool.toc(tt)) tt = utool.tic() result_list = _results_gen(db.cur) print(' * iter results time=%r sec' % utool.toc(tt)) print(' * memory(result_list) = %s' % utool.byte_str2(utool.get_object_size(result_list))) del result_list #print('[TEST] result_list=%r' % result_list) print('[TEST] dump sql database') tt = utool.tic() db.dump('temp.dump.txt') print(' * dump time=%r sec' % utool.toc(tt)) #with open('temp.dump.txt') as file_: # print(file_.read()) return locals()
def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path,)) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1))
def TEST_SQL_NUMPY(): sqldb_fname = 'temp_test_sql_numpy.sqlite3' sqldb_dpath = utool.util_cplat.get_app_resource_dir('ibeis', 'testfiles') utool.ensuredir(sqldb_dpath) utool.util_path.remove_file(join(sqldb_dpath, sqldb_fname), dryrun=False) db = sqldbc.SQLDatabaseController(sqldb_dpath=sqldb_dpath, sqldb_fname=sqldb_fname) db.schema('temp', [ ('temp_id', 'INTEGER PRIMARY KEY'), ('temp_hash', 'NUMPY'), ]) tt = utool.tic() feats_list = grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8) print(' * numpy.new time=%r sec' % utool.toc(tt)) print('[TEST] insert numpy arrays') tt = utool.tic() feats_iter = ((feats, ) for feats in feats_list) db.executemany(operation=''' INSERT INTO temp ( temp_hash ) VALUES (?) ''', params_iter=feats_iter) print(' * execute insert time=%r sec' % utool.toc(tt)) print('[TEST] save sql database') tt = utool.tic() #db.cur.commit() db.connection.commit() print(' * commit time=%r sec' % utool.toc(tt)) print('[TEST] read from sql database') tt = utool.tic() db.cur.execute('SELECT temp_hash FROM temp', []) print(' * execute select time=%r sec' % utool.toc(tt)) tt = utool.tic() result_list = _results_gen(db.cur) print(' * iter results time=%r sec' % utool.toc(tt)) print(' * memory(result_list) = %s' % utool.byte_str2(utool.get_object_size(result_list))) del result_list #print('[TEST] result_list=%r' % result_list) print('[TEST] dump sql database') tt = utool.tic() db.dump('temp.dump.txt') print(' * dump time=%r sec' % utool.toc(tt)) #with open('temp.dump.txt') as file_: # print(file_.read()) return locals()
def get_tier_flags(drive): try: tier_flags = drive.cache.load('tier_flags') except ut.CacheMissException: tier_windows = drive.get_tier_windows() print('Tier Windows') for tier, (high, low) in enumerate(tier_windows): print('tier %r window = %s - %s' % (tier, ut.byte_str2(high), ut.byte_str2(low))) fpath_bytes_arr = np.array(drive.fpath_bytes_list) tier_flags = [ np.logical_and.reduce( [fpath_bytes_arr <= high, fpath_bytes_arr > low]) for high, low in tier_windows ] drive.cache.save('tier_flags', tier_flags) return tier_flags
def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path, )) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1))
def get_infostr(drive, extra=False): drive.num_fpaths = len(drive.fpath_list) infostr_list = [str(drive)] drive.get_filesize_errors() nan_fpaths = drive.get_filesize_errors() infostr_list += ['#nan fsize fpaths = %r' % (len(nan_fpaths),)] if extra: infostr_list += ['#nan_fpaths = %r' % (nan_fpaths[0:10],)] total_drive_bytes = ut.get_total_diskbytes(drive.root_dpath) infostr_list += [('total drive size = %r' % (ut.byte_str2(total_drive_bytes),))] infostr_list += [('drive.num_fpaths = %r' % (drive.num_fpaths,))] infostr = '\n'.join(infostr_list) return infostr
def get_infostr(drive, extra=False): drive.num_fpaths = len(drive.fpath_list) infostr_list = [str(drive)] drive.get_filesize_errors() nan_fpaths = drive.get_filesize_errors() infostr_list += ['#nan fsize fpaths = %r' % (len(nan_fpaths), )] if extra: infostr_list += ['#nan_fpaths = %r' % (nan_fpaths[0:10], )] total_drive_bytes = ut.get_total_diskbytes(drive.root_dpath) infostr_list += [ ('total drive size = %r' % (ut.byte_str2(total_drive_bytes), )) ] infostr_list += [('drive.num_fpaths = %r' % (drive.num_fpaths, ))] infostr = '\n'.join(infostr_list) return infostr
def invert_index(vecs_list, ax_list, verbose=ut.NOT_QUIET): r""" Aggregates descriptors of input annotations and returns inverted information Args: vecs_list (list): ax_list (list): verbose (bool): verbosity flag(default = True) Returns: tuple: (idx2_vec, idx2_ax, idx2_fx) CommandLine: python -m ibeis.algo.hots.neighbor_index --test-invert_index Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import vtool as vt >>> num = 100 >>> rng = np.random.RandomState(0) >>> ax_list = np.arange(num) >>> vecs_list = [vt.tests.dummy.get_dummy_dpts(rng.randint(100)) for ax in ax_list] >>> verbose = True >>> (idx2_vec, idx2_ax, idx2_fx) = invert_index(vecs_list, ax_list, verbose) """ if ut.VERYVERBOSE: print('[nnindex] stacking descriptors from %d annotations' % len(ax_list)) try: idx2_vec, idx2_ax, idx2_fx = vt.invertible_stack(vecs_list, ax_list) assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: ut.printex(ex, 'cannot build inverted index', '[!memerror]') raise if ut.VERYVERBOSE or verbose: print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list))) print('[nnindex] idx2_vecs.dtype = {}'.format(idx2_vec.dtype)) print('[nnindex] memory(idx2_vecs) = {}'.format( ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize))) return idx2_vec, idx2_ax, idx2_fx
def get_layer_info(layer): r""" Args: layer (?): Returns: ?: layer_info CommandLine: python -m ibeis_cnn.net_strs get_layer_info --show Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.net_strs import * # NOQA >>> from ibeis_cnn import models >>> model = models.mnist.MNISTModel(batch_size=8, data_shape=(24, 24, 1), output_dims=10) >>> model.init_arch() >>> nn_layers = model.get_all_layers() >>> for layer in nn_layers: >>> layer_info = get_layer_info(layer) >>> print(ut.repr3(layer_info, nl=1)) """ import operator import ibeis_cnn.__LASAGNE__ as lasagne # Information that contributes to RAM usage import numpy as np # Get basic layer infos output_shape = lasagne.layers.get_output_shape(layer) input_shape = getattr(layer, 'input_shape', []) # Get number of outputs ignoring the batch size num_outputs = functools.reduce(operator.mul, output_shape[1:]) if len(input_shape): num_inputs = functools.reduce(operator.mul, input_shape[1:]) else: num_inputs = 0 # TODO: if we can ever support non float32 calculations this must change #layer_type = 'float32' layer_dtype = np.dtype('float32') # Get individual param infos param_infos = [] for param, tags in layer.params.items(): value = param.get_value() pbasename = param_basename(layer, param) param_info = ut.odict([ ('name', param.name), ('basename', pbasename), ('tags', tags), ('shape', value.shape), ('size', value.size), ('itemsize', value.dtype.itemsize), ('dtype', str(value.dtype)), ('bytes', value.size * value.dtype.itemsize), ]) def initializer_info(initclass): initclassname = initclass.__class__.__name__ if initclassname == 'Constant': spec = initclass.val else: spec = ut.odict() spec['type'] = initclassname for key, val in initclass.__dict__.items(): if isinstance(val, lasagne.init.Initializer): spec[key] = initializer_info(val) elif isinstance(val, type) and issubclass(val, lasagne.init.Initializer): spec[key] = val.__name__ #initializer_info(val()) else: spec[key] = val return spec if hasattr(layer, '_initializers'): #print('layer = %r' % (layer,)) initclass = layer._initializers[param] spec = initializer_info(initclass) param_info['init'] = spec param_infos.append(param_info) # Combine param infos param_str = surround(', '.join( [paramstr(layer, p, tags) for p, tags in layer.params.items()]), '[]') param_type_str = surround(', '.join( [repr(p.type) for p, tags in layer.params.items()]), '[]') num_params = sum([info['size'] for info in param_infos]) classalias_map = { 'ElemwiseSumLayer': 'ElemwiseSum', 'Conv2DCCLayer' : 'Conv2D', 'Conv2DDNNLayer' : 'Conv2D', 'Conv2DLayer' : 'Conv2D', 'MaxPool2DLayer': 'MaxPool2D', 'MaxPool2DCCLayer' : 'MaxPool2D', 'MaxPool2DDNNLayer' : 'MaxPool2D', 'LeakyRectify' : 'LReLU', 'InputLayer' : 'Input', 'GaussianNoiseLayer': 'Noise', 'DropoutLayer' : 'Dropout', 'DenseLayer' : 'Dense', 'NonlinearityLayer' : 'Nonlinearity', 'FlattenLayer' : 'Flatten', 'L2NormalizeLayer' : 'L2Norm', 'BatchNormLayer' : 'BatchNorm', 'BatchNormLayer2' : 'BatchNorm', } layer_attrs_ignore_dict = { 'MaxPool2D' : ['mode', 'ignore_border'], 'Dropout' : ['rescale'], 'Conv2D' : ['convolution'], 'BatchNorm': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'], 'BatchNorm2': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'], #'ElemwiseSum': ['merge_function', 'cropping'], #'ElemwiseSum': [], 'FeaturePoolLayer': ['axis'], } layer_attrs_dict = { #'ElemwiseSum': ['coeffs'], #'ElemwiseSum': ['coeffs', 'merge_function', 'cropping'], 'Noise' : ['sigma'], 'Input' : ['shape'], 'Dropout' : ['p', 'shared_axes'], 'Conv2D' : ['num_filters', 'filter_size', 'stride', 'output_shape', 'num_groups'], 'MaxPool2D' : ['stride', 'pool_size', 'output_shape'], # 'mode'], 'Dense' : ['num_units', 'num_leading_axes'], 'SoftMax' : ['num_units', 'num_leading_axes'], 'L2Norm' : ['axis'], 'BatchNorm' : ['alpha'], 'BatchNorm2' : ['alpha'], 'FeaturePoolLayer': ['pool_size', 'pool_function'] } #layer_attrs_dict = {} all_ignore_attrs = ['nonlinearity', 'b', 'W', 'get_output_kwargs', 'name', 'input_shapes', 'input_layers', 'input_shape', 'input_layer', 'input_var', 'untie_biases', '_initializers', 'flip_filters', 'pad', 'params', 'n', '_is_main_layer'] classname = layer.__class__.__name__ classalias = classalias_map.get(classname, classname) #if classalias == 'FeaturePoolLayer' and ut.get_funcname(layer.pool_function) == 'max': # classalias = 'MaxOut' if classalias == 'Dense' and ut.get_funcname(layer.nonlinearity) == 'softmax': classalias = 'SoftMax' layer_attrs = ut.odict([ (key, getattr(layer, key)) for key in layer_attrs_dict.get(classalias, []) ]) ignore_attrs = (all_ignore_attrs + layer_attrs_ignore_dict.get(classalias, [])) if classalias not in layer_attrs_dict or (classalias == classname and len(layer_attrs) == 0): layer_attrs = layer.__dict__.copy() ut.delete_dict_keys(layer_attrs, ignore_attrs) for key in list(layer_attrs.keys()): val = layer_attrs[key] if ut.is_funclike(val): layer_attrs[key] = ut.get_funcname(val) attr_key_list = list(layer_attrs.keys()) missing_keys = (set(layer.__dict__.keys()) - set(ignore_attrs) - set(attr_key_list)) missing_keys = [k for k in missing_keys if not k.startswith('_')] #if layer_type == 'Conv2DCCLayer': # ut.embed() DEBUG = True if DEBUG and len(missing_keys) > 0: print('---') print(' * ' + classname) print(' * missing keys: %r' % (missing_keys,)) print(' * has keys: %r' % (attr_key_list,)) if True: #import utool #with utool.embed_on_exception_context: #raise AssertionError('MISSING KEYS') pass # handle None batch sizes if output_shape[0] is None: size = np.prod(output_shape[1:]) else: size = np.prod(output_shape) layer_info = ut.odict([ ('name', layer.name), ('classname', classname), ('classalias', classalias), ('output_shape', output_shape), ('input_shape', input_shape), ('num_outputs', num_outputs), ('num_inputs', num_inputs), ('size', size), ('itemsize', layer_dtype.itemsize), ('dtype', str(layer_dtype)), ('num_params', num_params), ('param_infos', param_infos), ('param_str', param_str), ('param_type_str', param_type_str), ('layer_attrs', layer_attrs), ('nonlinearity', None), ]) if hasattr(layer, 'nonlinearity'): try: nonlinearity = layer.nonlinearity.__name__ except AttributeError: nonlinearity = layer.nonlinearity.__class__.__name__ layer_info['nonlinearity'] = ut.odict([]) layer_info['nonlinearity']['type'] = nonlinearity layer_info['nonlinearity'].update(layer.nonlinearity.__dict__) #attr_str_list.append('nonlinearity={0}'.format(nonlinearity)) param_bytes = sum([info['bytes'] for info in param_infos]) layer_bytes = layer_info['size'] * layer_info['itemsize'] #if classname in ['BatchNormLayer', 'NonlinearityLayer']: # layer_bytes = 0 layer_info['bytes'] = layer_bytes layer_info['param_bytes'] = param_bytes layer_info['total_bytes'] = layer_bytes + param_bytes layer_info['total_memory'] = ut.byte_str2(layer_info['total_bytes']) return layer_info
def get_dbinfo(ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: CommandLine: python -m ibeis.other.dbinfo --exec-get_dbinfo:0 python -m ibeis.other.dbinfo --test-get_dbinfo:1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> defaultdb = 'testdb1' >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = ibeis.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from ibeis.expt import cfghelpers >>> #from ibeis.expt import annotation_configs >>> #from ibeis.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> verbose = True >>> short = True >>> #ibs = ibeis.opendb(db='GZ_ALL') >>> #ibs = ibeis.opendb(db='PZ_Master0') >>> ibs = ibeis.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = ibeis.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] print('Specified custom aids via acfgname %s' % (acfg_name_list,)) from ibeis.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) #aid_list = if verbose: print('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) #associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation FILTER_HACK = True if FILTER_HACK: # HUGE HACK - get only images and names with filtered aids valid_aids_ = ibs.filter_aids_custom(valid_aids) valid_nids_ = ibs.filter_nids_custom(valid_nids) valid_gids_ = ibs.filter_gids_custom(valid_gids) if verbose: print('Filtered %d names' % (len(valid_nids) - len(valid_nids_))) print('Filtered %d images' % (len(valid_gids) - len(valid_gids_))) print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_))) valid_gids = valid_gids_ valid_nids = valid_nids_ valid_aids = valid_aids_ #associated_nids = ut.compress(associated_nids, map(any, #ibs.unflat_map(ibs.get_annot_custom_filterflags, # ibs.get_name_aids(associated_nids)))) # Image info if verbose: print('Checking Image Info') gx2_aids = ibs.get_image_aids(valid_gids) if FILTER_HACK: gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: print('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if FILTER_HACK: nx2_aids = [ibs.filter_aids_custom(aids) for aids in nx2_aids] # HACK FOR FILTER if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from ibeis.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()} return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1} nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1} singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True) resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True) try: aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0) undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1])) pair_tag_info['num_reviewed'] = num_reviewed_pairs except Exception: pair_tag_info = {} #print(ut.dict_str(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: print('Checking Annot Species') unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids)) species_list = ibs.get_annot_species_texts(valid_aids) species2_aids = ut.group_items(valid_aids, species_list) species2_nAids = {key: len(val) for key, val in species2_aids.items()} if verbose: print('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: print('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: print('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = OrderedDict( [( 'max', wh_list.max(0)), ( 'min', wh_list.min(0)), ('mean', wh_list.mean(0)), ( 'std', wh_list.std(0))]) def arr2str(var): return ('[' + ( ', '.join(list(map(lambda x: '%.1f' % x, var))) ) + ']') ret = (',\n '.join([ '%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items() ])) return '{\n ' + ret + '\n}' print('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: print('Building Stats String') multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True) # Time stats unixtime_list = ibs.get_image_unixtime(valid_gids) unixtime_list = ut.list_replace(unixtime_list, -1, float('nan')) #valid_unixtime_list = [time for time in unixtime_list if time != -1] #unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda : 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (36 <= max_age or max_age is None): age_dict['Adult'] += 1 else: print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, )) age_dict['UNKNOWN'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys)) sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys]) # Filter 0's sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0} return sextext2_nAnnots if verbose: print('Checking Other Annot Stats') qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids) yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: print('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags) contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags) contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)} contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)} contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)} if verbose: print('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_aids) num_annots = len(valid_aids) if with_bytes: if verbose: print('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: print('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_aids) _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' #if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex(ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ]) raise # Get contributor statistics contrib_rowids = ibs.get_valid_contrib_rowids() num_contributors = len(contrib_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.dict_str(dict_) return align2(str_) header_block_lines = ( [('+============================'), ] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] occurrence_block_lines = [ ('--' * num_tabs), ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] contrib_block_lines = [ '# Images per contributor = ' + align_dict2(contrib_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contrib_tag_to_nAnnots), '# Quality per contributor = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True), ] if with_contrib else [] img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), #('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contrib_block_lines + imgsize_stat_lines + [('L============================'), ] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: print(info_str2) locals_ = locals() return locals_
#utool.printvar2('psutil.disk_io_counters()') #print('') #print('PSUTIL NETWORK') #print('') #utool.printvar2('psutil.net_io_counters(pernic=True)') #print('') #print('PSUTIL MISC') #print('') #utool.printvar2('psutil.get_users()') #utool.printvar2('psutil.get_boot_time()') #utool.printvar2('psutil.get_pid_list()') #psutil.test() pass except ImportError: print('psutil not installed') try: import resource utool.rrr() used_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print('[parallel] Max memory usage: %s' % utool.byte_str2(used_memory)) except ImportError: print('no module resources (doesnt exist on win32)') try: import cv2 # NOQA utool.printvar2('cv2.__version__') except ImportError: print('cv2 is not installed')
def turtles(): source_dpaths = sorted( ut.glob('/raid/raw/RotanTurtles/', '*', recusrive=False, with_dirs=True, with_files=False)) sources = [SourceDir(dpath) for dpath in source_dpaths] for self in ut.ProgIter(sources, label='populate'): self.populate() import fnmatch del_ext = set(['.npy', '.flann', '.npz']) for self in ut.ProgIter(sources, label='populate'): flags = [ext in del_ext for ext in self.attrs['ext']] to_delete = ut.compress(list(self.fpaths()), flags) ut.remove_file_list(to_delete) flags = [ fnmatch.fnmatch(fpath, '*/_hsdb/computed/chips/*.png') for fpath in self.rel_fpath_list ] to_delete = ut.compress(list(self.fpaths()), flags) ut.remove_file_list(to_delete) self.populate() for self in ut.ProgIter(sources, label='del empty'): self.populate() self.delete_empty_directories() print(ut.byte_str2(sum([self.nbytes() for self in sources]))) # [ut.byte_str2(self.nbytes()) for self in sources] # import numpy as np # num_isect = np.zeros((len(sources), len(sources))) # num_union = np.zeros((len(sources), len(sources))) for i, j in ut.combinations(range(len(sources)), 2): s1 = sources[i] s2 = sources[j] isect = set(s1.rel_fpath_list).intersection(s2.rel_fpath_list) # union = set(s1.rel_fpath_list).union(s2.rel_fpath_list) if isect: s1.isect_info(s2) print((i, j)) print(s1.dpath) print(s2.dpath) self = s1 other = s2 assert False # print(isect) # break # num_isect[i, j] = len(isect) # num_union[i, j] = len(union) # for self in ut.ProgIter(sources, label='index'): # self.index() for self in ut.ProgIter(sources, label='populate'): self.populate() dest = sources[0] others = sources[1:] # Merge others into dest bash_script = '\n'.join([o.make_merge_bash_script(dest) for o in others]) print(bash_script) other = self for other in others: other.merge_into(dest)
def turtles(): source_dpaths = sorted(ut.glob('/raid/raw/RotanTurtles/', '*', recusrive=False, with_dirs=True, with_files=False)) sources = [SourceDir(dpath) for dpath in source_dpaths] for self in ut.ProgIter(sources, label='populate'): self.populate() import fnmatch del_ext = set(['.npy', '.flann', '.npz']) for self in ut.ProgIter(sources, label='populate'): flags = [ext in del_ext for ext in self.attrs['ext']] to_delete = ut.compress(list(self.fpaths()), flags) ut.remove_file_list(to_delete) flags = [fnmatch.fnmatch(fpath, '*/_hsdb/computed/chips/*.png') for fpath in self.rel_fpath_list] to_delete = ut.compress(list(self.fpaths()), flags) ut.remove_file_list(to_delete) self.populate() for self in ut.ProgIter(sources, label='del empty'): self.populate() self.delete_empty_directories() print(ut.byte_str2(sum([self.nbytes() for self in sources]))) # [ut.byte_str2(self.nbytes()) for self in sources] # import numpy as np # num_isect = np.zeros((len(sources), len(sources))) # num_union = np.zeros((len(sources), len(sources))) for i, j in ut.combinations(range(len(sources)), 2): s1 = sources[i] s2 = sources[j] isect = set(s1.rel_fpath_list).intersection(s2.rel_fpath_list) # union = set(s1.rel_fpath_list).union(s2.rel_fpath_list) if isect: s1.isect_info(s2) print((i, j)) print(s1.dpath) print(s2.dpath) self = s1 other = s2 assert False # print(isect) # break # num_isect[i, j] = len(isect) # num_union[i, j] = len(union) # for self in ut.ProgIter(sources, label='index'): # self.index() for self in ut.ProgIter(sources, label='populate'): self.populate() dest = sources[0] others = sources[1:] # Merge others into dest bash_script = '\n'.join([o.make_merge_bash_script(dest) for o in others]) print(bash_script) other = self for other in others: other.merge_into(dest)
def invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=ut.NOT_QUIET): r""" Aggregates descriptors of input annotations and returns inverted information Args: vecs_list (list): fgws_list (list): ax_list (list): fxs_list (list): verbose (bool): verbosity flag(default = True) Returns: tuple: (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) CommandLine: python -m ibeis.algo.hots.neighbor_index invert_index Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> rng = np.random.RandomState(42) >>> DIM_SIZE = 16 >>> nFeat_list = [3, 0, 4, 1] >>> vecs_list = [rng.randn(nFeat, DIM_SIZE) for nFeat in nFeat_list] >>> fgws_list = [rng.randn(nFeat) for nFeat in nFeat_list] >>> fxs_list = [np.arange(nFeat) for nFeat in nFeat_list] >>> ax_list = np.arange(len(vecs_list)) >>> fgws_list = None >>> verbose = True >>> tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(8, 16), 1, 8, 8] Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import ibeis >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999') >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, qreq_.daids) >>> ax_list = np.arange(len(vecs_list)) >>> input_ = vecs_list, fgws_list, ax_list, fxs_list >>> print('input depth_profile = %s' % (ut.depth_profile(input_),)) >>> tup = invert_index(*input_) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(1912, 128), 1912, 1912, 1912] """ if ut.VERYVERBOSE: print('[nnindex] stacking descriptors from %d annotations' % len(ax_list)) try: nFeat_list = np.array(list(map(len, vecs_list))) # Remove input without any features is_valid = nFeat_list > 0 nFeat_list = nFeat_list.compress(is_valid) vecs_list = ut.compress(vecs_list, is_valid) if fgws_list is not None: fgws_list = ut.compress(fgws_list, is_valid) ax_list = ut.compress(ax_list, is_valid) fxs_list = ut.compress(fxs_list, is_valid) # Flatten into inverted index axs_list = [[ax] * nFeat for (ax, nFeat) in zip(ax_list, nFeat_list)] nFeats = sum(nFeat_list) idx2_ax = np.fromiter(ut.iflatten(axs_list), np.int32, nFeats) idx2_fx = np.fromiter(ut.iflatten(fxs_list), np.int32, nFeats) idx2_vec = np.vstack(vecs_list) if fgws_list is None: idx2_fgw = None else: idx2_fgw = np.hstack(fgws_list) try: assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond' except Exception as ex: ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')]) raise assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: ut.printex(ex, 'cannot build inverted index', '[!memerror]') raise if ut.VERYVERBOSE or verbose: print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list))) print('[nnindex] idx2_vecs dtype={}, memory={}'.format( idx2_vec.dtype, ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize))) return idx2_vec, idx2_fgw, idx2_ax, idx2_fx
def get_dbinfo( ibs, verbose=True, with_imgsize=False, with_bytes=False, with_contrib=False, with_agesex=False, with_header=True, short=False, tag='dbinfo', aid_list=None, aids=None, ): """ Returns dictionary of digestable database information Infostr is a string summary of all the stats. Prints infostr in addition to returning locals Args: ibs (IBEISController): verbose (bool): with_imgsize (bool): with_bytes (bool): Returns: dict: SeeAlso: python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all CommandLine: python -m wbia.other.dbinfo --exec-get_dbinfo:0 python -m wbia.other.dbinfo --test-get_dbinfo:1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1 python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1 python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0 python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0 Example1: >>> # SCRIPT >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> defaultdb = 'testdb1' >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1') >>> kwargs = ut.get_kwdefaults(get_dbinfo) >>> kwargs['verbose'] = False >>> kwargs['aid_list'] = aid_list >>> kwargs = ut.parse_dict_from_argv(kwargs) >>> output = get_dbinfo(ibs, **kwargs) >>> result = (output['info_str']) >>> print(result) >>> #ibs = wbia.opendb(defaultdb='testdb1') >>> # <HACK FOR FILTERING> >>> #from wbia.expt import cfghelpers >>> #from wbia.expt import annotation_configs >>> #from wbia.init import filter_annots >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__, >>> # annotation_configs.TEST_NAMES) >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES, >>> # ut.get_list_column(named_defaults_dict, 'qcfg'))) >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0] >>> #aid_list = ibs.get_valid_aids() >>> # </HACK FOR FILTERING> Example1: >>> # ENABLE_DOCTEST >>> from wbia.other.dbinfo import * # NOQA >>> import wbia >>> verbose = True >>> short = True >>> #ibs = wbia.opendb(db='GZ_ALL') >>> #ibs = wbia.opendb(db='PZ_Master0') >>> ibs = wbia.opendb('testdb1') >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS' >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids()) >>> ibs.delete_empty_nids() >>> #ibs = wbia.opendb(db='PZ_MTEST') >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True) >>> result = (output['info_str']) >>> print(result) +============================ DB Info: testdb1 DB Notes: None DB NumContrib: 0 ---------- # Names = 7 # Names (unassociated) = 0 # Names (singleton) = 5 # Names (multiton) = 2 ---------- # Annots = 13 # Annots (unknown) = 4 # Annots (singleton) = 5 # Annots (multiton) = 4 ---------- # Img = 13 L============================ """ # TODO Database size in bytes # TODO: occurrence, contributors, etc... if aids is not None: aid_list = aids # Basic variables request_annot_subset = False _input_aid_list = aid_list # NOQA if aid_list is None: valid_aids = ibs.get_valid_aids() valid_nids = ibs.get_valid_nids() valid_gids = ibs.get_valid_gids() else: if isinstance(aid_list, str): # Hack to get experiment stats on aids acfg_name_list = [aid_list] logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,)) from wbia.expt import experiment_helpers acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list( ibs, acfg_name_list ) aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list))))) # aid_list = if verbose: logger.info('Specified %d custom aids' % (len(aid_list,))) request_annot_subset = True valid_aids = aid_list valid_nids = list( set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) - {const.UNKNOWN_NAME_ROWID} ) valid_gids = list(set(ibs.get_annot_gids(aid_list))) # associated_nids = ibs.get_valid_nids(filter_empty=True) # nids with at least one annotation valid_images = ibs.images(valid_gids) valid_annots = ibs.annots(valid_aids) # Image info if verbose: logger.info('Checking Image Info') gx2_aids = valid_images.aids if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids] gx2_nAnnots = np.array(list(map(len, gx2_aids))) image_without_annots = len(np.where(gx2_nAnnots == 0)[0]) gx2_nAnnots_stats = ut.repr4( ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True ) image_reviewed_list = ibs.get_image_reviewed(valid_gids) # Name stats if verbose: logger.info('Checking Name Info') nx2_aids = ibs.get_name_aids(valid_nids) if request_annot_subset: # remove annots not in this subset valid_aids_set = set(valid_aids) nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids] associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids))) ibs.check_name_mapping_consistency(nx2_aids) if False: # Occurrence Info def compute_annot_occurrence_ids(ibs, aid_list): from wbia.algo.preproc import preproc_occurrence gid_list = ibs.get_annot_gids(aid_list) gid2_aids = ut.group_items(aid_list, gid_list) config = {'seconds_thresh': 4 * 60 * 60} flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences( ibs, gid_list, config=config, verbose=False ) occurid2_gids = ut.group_items(flat_gids, flat_imgsetids) occurid2_aids = { oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items() } return occurid2_aids import utool with utool.embed_on_exception_context: occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids) occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values()) occur_unique_nids = [ut.unique(nids) for nids in occur_nids] nid2_occurxs = ut.ddict(list) for occurx, nids in enumerate(occur_unique_nids): for nid in nids: nid2_occurxs[nid].append(occurx) nid2_occurx_single = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1 } nid2_occurx_resight = { nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1 } singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys()) singlesight_annot_stats = ut.get_stats( list(map(len, singlesight_encounters)), use_median=True, use_sum=True ) resight_name_stats = ut.get_stats( list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True ) # Encounter Info def break_annots_into_encounters(aids): from wbia.algo.preproc import occurrence_blackbox import datetime thresh_sec = datetime.timedelta(minutes=30).seconds posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids)) # latlons = ibs.get_annot_image_gps(aids) labels = occurrence_blackbox.cluster_timespace2( posixtimes, None, thresh_sec=thresh_sec ) return labels # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()] # ut.square_pdist(ave_enc_time) try: am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[ 0 ] aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids) undirected_tags = ibs.get_aidpair_tags( aid_pairs.T[0], aid_pairs.T[1], directed=False ) tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags)) tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags) pair_tag_info = ut.map_dict_vals(len, tag_dict) except Exception: pair_tag_info = {} # logger.info(ut.repr2(pair_tag_info)) # Annot Stats # TODO: number of images where chips cover entire image # TODO: total image coverage of annotation # TODO: total annotation overlap """ ax2_unknown = ibs.is_aid_unknown(valid_aids) ax2_nid = ibs.get_annot_name_rowids(valid_aids) assert all([nid < 0 if unknown else nid > 0 for nid, unknown in zip(ax2_nid, ax2_unknown)]), 'bad annot nid' """ # if verbose: logger.info('Checking Annot Species') unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots)) species_list = valid_annots.species_texts species2_annots = valid_annots.group_items(valid_annots.species_texts) species2_nAids = {key: len(val) for key, val in species2_annots.items()} if verbose: logger.info('Checking Multiton/Singleton Species') nx2_nAnnots = np.array(list(map(len, nx2_aids))) # Seperate singleton / multitons multiton_nxs = np.where(nx2_nAnnots > 1)[0] singleton_nxs = np.where(nx2_nAnnots == 1)[0] unassociated_nxs = np.where(nx2_nAnnots == 0)[0] assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names' valid_nxs = np.hstack([multiton_nxs, singleton_nxs]) num_names_with_gt = len(multiton_nxs) # Annot Info if verbose: logger.info('Checking Annot Info') multiton_aids_list = ut.take(nx2_aids, multiton_nxs) assert len(set(multiton_nxs)) == len(multiton_nxs) if len(multiton_aids_list) == 0: multiton_aids = np.array([], dtype=np.int) else: multiton_aids = np.hstack(multiton_aids_list) assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot' singleton_aids = ut.take(nx2_aids, singleton_nxs) multiton_nid2_nannots = list(map(len, multiton_aids_list)) # Image size stats if with_imgsize: if verbose: logger.info('Checking ImageSize Info') gpath_list = ibs.get_image_paths(valid_gids) def wh_print_stats(wh_list): if len(wh_list) == 0: return '{empty}' wh_list = np.asarray(wh_list) stat_dict = collections.OrderedDict( [ ('max', wh_list.max(0)), ('min', wh_list.min(0)), ('mean', wh_list.mean(0)), ('std', wh_list.std(0)), ] ) def arr2str(var): return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']' ret = ',\n '.join( ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()] ) return '{\n ' + ret + '\n}' logger.info('reading image sizes') # Image size stats img_size_list = ibs.get_image_sizes(valid_gids) img_size_stats = wh_print_stats(img_size_list) # Chip size stats annotation_bbox_list = ibs.get_annot_bboxes(valid_aids) annotation_bbox_arr = np.array(annotation_bbox_list) if len(annotation_bbox_arr) == 0: annotation_size_list = [] else: annotation_size_list = annotation_bbox_arr[:, 2:4] chip_size_stats = wh_print_stats(annotation_size_list) imgsize_stat_lines = [ (' # Img in dir = %d' % len(gpath_list)), (' Image Size Stats = %s' % (img_size_stats,)), (' * Chip Size Stats = %s' % (chip_size_stats,)), ] else: imgsize_stat_lines = [] if verbose: logger.info('Building Stats String') multiton_stats = ut.repr3( ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True ) # Time stats unixtime_list = valid_images.unixtime2 # valid_unixtime_list = [time for time in unixtime_list if time != -1] # unixtime_statstr = ibs.get_image_time_statstr(valid_gids) if ut.get_argflag('--hackshow-unixtime'): show_time_distributions(ibs, unixtime_list) ut.show_if_requested() unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True) # GPS stats gps_list_ = ibs.get_image_gps(valid_gids) gpsvalid_list = [gps != (-1, -1) for gps in gps_list_] gps_list = ut.compress(gps_list_, gpsvalid_list) def get_annot_age_stats(aid_list): annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list) annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list) age_dict = ut.ddict((lambda: 0)) for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max): if max_age is None: max_age = min_age if min_age is None: min_age = max_age if max_age is None and min_age is None: logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,)) age_dict['UNKNOWN'] += 1 elif (min_age is None or min_age < 12) and max_age < 12: age_dict['Infant'] += 1 elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36: age_dict['Juvenile'] += 1 elif 36 <= min_age and (max_age is None or 36 <= max_age): age_dict['Adult'] += 1 return age_dict def get_annot_sex_stats(aid_list): annot_sextext_list = ibs.get_annot_sex_texts(aid_list) sextext2_aids = ut.group_items(aid_list, annot_sextext_list) sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys()) assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str( set(annot_sextext_list) - set(sex_keys) ) sextext2_nAnnots = ut.odict( [(key, len(sextext2_aids.get(key, []))) for key in sex_keys] ) # Filter 0's sextext2_nAnnots = { key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0 } return sextext2_nAnnots def get_annot_qual_stats(ibs, aid_list): annots = ibs.annots(aid_list) qualtext2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.quality_texts)), list(ibs.const.QUALITY_TEXT_TO_INT.keys()), ) return qualtext2_nAnnots def get_annot_viewpoint_stats(ibs, aid_list): annots = ibs.annots(aid_list) viewcode2_nAnnots = ut.order_dict_by( ut.map_vals(len, annots.group_items(annots.viewpoint_code)), list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None], ) return viewcode2_nAnnots if verbose: logger.info('Checking Other Annot Stats') qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids) viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids) agetext2_nAnnots = get_annot_age_stats(valid_aids) sextext2_nAnnots = get_annot_sex_stats(valid_aids) if verbose: logger.info('Checking Contrib Stats') # Contributor Statistics # hack remove colon for image alignment def fix_tag_list(tag_list): return [None if tag is None else tag.replace(':', ';') for tag in tag_list] image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids)) annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids)) contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags) contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags) contributor_tag_to_qualstats = { key: get_annot_qual_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_viewstats = { key: get_annot_viewpoint_stats(ibs, aids) for key, aids in six.iteritems(contributor_tag_to_aids) } contributor_tag_to_nImages = { key: len(val) for key, val in six.iteritems(contributor_tag_to_gids) } contributor_tag_to_nAnnots = { key: len(val) for key, val in six.iteritems(contributor_tag_to_aids) } if verbose: logger.info('Summarizing') # Summarize stats num_names = len(valid_nids) num_names_unassociated = len(valid_nids) - len(associated_nids) num_names_singleton = len(singleton_nxs) num_names_multiton = len(multiton_nxs) num_singleton_annots = len(singleton_aids) num_multiton_annots = len(multiton_aids) num_unknown_annots = len(unknown_annots) num_annots = len(valid_aids) if with_bytes: if verbose: logger.info('Checking Disk Space') ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir())) dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir())) imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir())) cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir())) if True: if verbose: logger.info('Check asserts') try: bad_aids = np.intersect1d(multiton_aids, unknown_annots) _num_names_total_check = ( num_names_singleton + num_names_unassociated + num_names_multiton ) _num_annots_total_check = ( num_unknown_annots + num_singleton_annots + num_multiton_annots ) assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids' assert _num_names_total_check == num_names, 'inconsistent num names' # if not request_annot_subset: # dont check this if you have an annot subset assert _num_annots_total_check == num_annots, 'inconsistent num annots' except Exception as ex: ut.printex( ex, keys=[ '_num_names_total_check', 'num_names', '_num_annots_total_check', 'num_annots', 'num_names_singleton', 'num_names_multiton', 'num_unknown_annots', 'num_multiton_annots', 'num_singleton_annots', ], ) raise # Get contributor statistics contributor_rowids = ibs.get_valid_contributor_rowids() num_contributors = len(contributor_rowids) # print num_tabs = 5 def align2(str_): return ut.align(str_, ':', ' :') def align_dict2(dict_): str_ = ut.repr2(dict_, si=True) return align2(str_) header_block_lines = [('+============================')] + ( [ ('+ singleton := single sighting'), ('+ multiton := multiple sightings'), ('--' * num_tabs), ] if not short and with_header else [] ) source_block_lines = [ ('DB Info: ' + ibs.get_dbname()), ('DB Notes: ' + ibs.get_dbnotes()), ('DB NumContrib: %d' % num_contributors), ] bytes_block_lines = ( [ ('--' * num_tabs), ('DB Bytes: '), (' +- dbdir nBytes: ' + dbdir_space), (' | +- _ibsdb nBytes: ' + ibsdir_space), (' | | +-imgdir nBytes: ' + imgdir_space), (' | | +-cachedir nBytes: ' + cachedir_space), ] if with_bytes else [] ) name_block_lines = [ ('--' * num_tabs), ('# Names = %d' % num_names), ('# Names (unassociated) = %d' % num_names_unassociated), ('# Names (singleton) = %d' % num_names_singleton), ('# Names (multiton) = %d' % num_names_multiton), ] subset_str = ' ' if not request_annot_subset else '(SUBSET)' annot_block_lines = [ ('--' * num_tabs), ('# Annots %s = %d' % (subset_str, num_annots,)), ('# Annots (unknown) = %d' % num_unknown_annots), ('# Annots (singleton) = %d' % num_singleton_annots), ('# Annots (multiton) = %d' % num_multiton_annots), ] annot_per_basic_block_lines = ( [ ('--' * num_tabs), ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)), ('# Annots per Image = %s' % (align2(gx2_nAnnots_stats),)), ('# Annots per Species = %s' % (align_dict2(species2_nAids),)), ] if not short else [] ) occurrence_block_lines = ( [ ('--' * num_tabs), # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)), # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)), ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)), ] if not short else [] ) annot_per_qualview_block_lines = [ None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots), None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots), ] annot_per_agesex_block_lines = ( [ '# Annots per Age = %s' % align_dict2(agetext2_nAnnots), '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots), ] if not short and with_agesex else [] ) contributor_block_lines = ( [ '# Images per contributor = ' + align_dict2(contributor_tag_to_nImages), '# Annots per contributor = ' + align_dict2(contributor_tag_to_nAnnots), '# Quality per contributor = ' + ut.repr2(contributor_tag_to_qualstats, sorted_=True), '# Viewpoint per contributor = ' + ut.repr2(contributor_tag_to_viewstats, sorted_=True), ] if with_contrib else [] ) img_block_lines = [ ('--' * num_tabs), ('# Img = %d' % len(valid_gids)), None if short else ('# Img reviewed = %d' % sum(image_reviewed_list)), None if short else ('# Img with gps = %d' % len(gps_list)), # ('# Img with timestamp = %d' % len(valid_unixtime_list)), None if short else ('Img Time Stats = %s' % (align2(unixtime_statstr),)), ] info_str_lines = ( header_block_lines + bytes_block_lines + source_block_lines + name_block_lines + annot_block_lines + annot_per_basic_block_lines + occurrence_block_lines + annot_per_qualview_block_lines + annot_per_agesex_block_lines + img_block_lines + contributor_block_lines + imgsize_stat_lines + [('L============================')] ) info_str = '\n'.join(ut.filter_Nones(info_str_lines)) info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag)) if verbose: logger.info(info_str2) locals_ = locals() return locals_
def get_layer_info_str(output_layer, batch_size=128): r""" Args: output_layer (lasange.layers.Layer): CommandLine: python -m ibeis_cnn.net_strs --test-get_layer_info_str:0 python -m ibeis_cnn.net_strs --test-get_layer_info_str:1 Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.net_strs import * # NOQA >>> from ibeis_cnn import models >>> model = models.DummyModel(data_shape=(24, 24, 3), autoinit=True) >>> output_layer = model.output_layer >>> result = get_layer_info_str(output_layer) >>> result = '\n'.join([x.rstrip() for x in result.split('\n')]) >>> print(result) Network Structure: index Layer Outputs Bytes OutShape Params 0 Input 1,728 55,296 (8, 3, 24, 24) [] 1 Conv2D 7,744 249,600 (8, 16, 22, 22) [W(16,3,3,3, {t,r}), b(16, {t})] 2 Conv2D 7,056 229,952 (8, 16, 21, 21) [W(16,16,2,2, {t,r}), b(16, {t})] 3 Dense 8 226,080 (8, 8) [W(7056,8, {t,r}), b(8, {t})] 4 Dense 5 340 (8, 5) [W(8,5, {t,r}), b(5, {t})] ...this model has 57,989 learnable parameters ...this model will use 761,268 bytes = 743.43 KB Example: >>> # DISABLE_DOCTEST >>> from ibeis_cnn.net_strs import * # NOQA >>> from ibeis_cnn import models >>> model = models.mnist.MNISTModel(batch_size=128, output_dims=10, >>> data_shape=(24, 24, 3)) >>> model.init_arch() >>> output_layer = model.output_layer >>> result = get_layer_info_str(output_layer) >>> result = '\n'.join([x.rstrip() for x in result.split('\n')]) >>> print(result) """ import ibeis_cnn.__LASAGNE__ as lasagne info_lines = [] _print = info_lines.append with warnings.catch_warnings(): warnings.filterwarnings('ignore', '.*topo.*') nn_layers = lasagne.layers.get_all_layers(output_layer) _print('Network Structure:') columns_ = ut.ddict(list) for index, layer in enumerate(nn_layers): layer_info = get_layer_info(layer) columns_['index'].append(index) columns_['name'].append(layer_info['name']) #columns_['type'].append(getattr(layer, 'type', None)) #columns_['layer'].append(layer_info['classname']) columns_['layer'].append(layer_info['classalias']) columns_['num_outputs'].append('{:,}'.format(int(layer_info['num_outputs']))) columns_['output_shape'].append(str(layer_info['output_shape'] )) columns_['params'].append(layer_info['param_str']) columns_['param_type'].append(layer_info['param_type_str']) columns_['mem'].append(layer_info['total_memory']) columns_['bytes'].append('{:,}'.format(int(layer_info['total_bytes']))) #ut.embed() header_nice = { 'index' : 'index', 'name' : 'Name', 'layer' : 'Layer', 'type' : 'Type', 'num_outputs' : 'Outputs', 'output_shape' : 'OutShape', 'params' : 'Params', 'param_type' : 'ParamType', 'mem' : 'Mem', 'bytes' : 'Bytes', } header_align = { 'index' : '<', 'params' : '<', 'bytes' : '>', 'num_outputs' : '>', } def get_col_maxval(key): header_len = len(header_nice[key]) val_len = max(list(map(len, map(str, columns_[key])))) return max(val_len, header_len) header_order = ['index'] if len(ut.filter_Nones(columns_['name'])) > 0: header_order += ['name'] header_order += ['layer', 'num_outputs'] #header_order += ['mem'] header_order += ['bytes'] header_order += ['output_shape', 'params' ] #'param_type'] max_len = {key: str(get_col_maxval(key) + 1) for key, col in six.iteritems(columns_)} fmtstr = ' ' + ' '.join( [ '{:' + align + len_ + '}' for align, len_ in zip(ut.dict_take(header_align, header_order, '<'), ut.dict_take(max_len, header_order)) ] ) _print(fmtstr.format(*ut.dict_take(header_nice, header_order))) row_list = zip(*ut.dict_take(columns_, header_order)) for row in row_list: try: row = ['' if _ is None else _ for _ in row] str_ = fmtstr.format(*row) _print(str_) except TypeError: print('Error printing %r with args %r' % (fmtstr, row, )) total_bytes = count_bytes(output_layer) num_params = lasagne.layers.count_params(output_layer) _print('...this model has {:,} learnable parameters'.format(num_params)) _print('...this model will use ~{:,} bytes = {} per input'.format( total_bytes, ut.byte_str2(total_bytes))) _print('...this model will use ~{:,} bytes = {} per batch with a batch size of {}'.format( total_bytes * batch_size, ut.byte_str2(total_bytes * batch_size), batch_size)) info_str = '\n'.join(info_lines) return info_str
def analyize_multiple_drives(drives): """ CommandLine: export PYTHONPATH=$PYTHONPATH:~/local/scripts python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/ python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/ /media/joncrall/backup cd ~/local/scripts Example: >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> drive = Broadcaster(drives) >>> drive.compute_info() >>> #drive.build_fpath_hashes() >>> drive.check_consistency() >>> E = drive = drives[0] >>> analyize_multiple_drives(drives) >>> #D, E, F = drives >>> #drive = D """ # ----- ## Find the files shared on all disks #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives]) #print('#allhave = %r' % (len(allhave),)) #allhave.keys()[0:3] #allhave.values()[0:3] #ut.embed() #for drive in drives: #drive.rrr() #print(drive.root_dpath) #print(len(drive.hash_to_unique_fpaths)) #print(len(drive.hash_to_fpaths)) #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths)) # Build dict to map from dpath to file pointers of unique descendants #unique_fidxs_list = drive.hash_to_fidxs.values() #fidxs = ut.flatten(unique_fidxs_list) esc = re.escape # Find which files exist on all drives hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives] allhave_hashes = reduce(set.intersection, hashes_list) print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes))) lbls = [drive_.root_dpath for drive_ in drives] isect_lens = np.zeros((len(drives), len(drives))) for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)): for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)): if drive1 is not drive2: common = set.intersection(hashes1, hashes2) isect_lens[idx1, idx2] = len(common) else: isect_lens[idx1, idx2] = len(hashes2) import pandas as pd print(pd.DataFrame(isect_lens, index=lbls, columns=lbls)) # for drive in drives drive = drives[0] print('Finding unique files in drive=%r' % (drive, )) # Get subset of fidxs on this drive unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes) valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs)) # Filter fpaths by patterns ignore_patterns = [esc('Thumbs.db')] ignore_paths = ['Spotify'] patterns = ignore_paths + ignore_patterns valid_fpaths = ut.take(drive.fpath_list, valid_fidxs) valid_flags = [ not any([re.search(p, fpath) for p in patterns]) for fpath in valid_fpaths ] valid_flags = np.array(valid_flags) valid_fidxs = ut.compress(valid_fidxs, valid_flags) print(ut.filtered_infostr(valid_flags, 'invalid fpaths')) fidxs = valid_fidxs valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs)) dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs, drive.root_dpath) def make_tree_structure(valid_fpaths): root = {} def dict_getitem_default(dict_, key, type_): try: val = dict_[key] except KeyError: val = type_() dict_[key] = val return val for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000): path_components = ut.dirsplit(fpath) current = root for comp in path_components[:-1]: current = dict_getitem_default(current, comp, dict) contents = dict_getitem_default(current, '.', list) contents.append(path_components[-1]) return root root = make_tree_structure(valid_fpaths) def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path, )) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1)) def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip( key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp def print_tree_struct(*args, **kwargs): tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1))) print(tree_str) #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])) #print('path = %r, %s' % (path, bytes_str)) #print(ut.repr3(key_list)) return tree_str dpath_to_unique_fidx dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx) complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx) complete_root = make_tree_structure(complete_unique_dpaths.keys()) globals()['ut'] = ut globals()['os'] = os globals()['join'] = join print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\']))) get_tree_info(root, path='E:\\', depth=0) get_tree_info(complete_root, path='E:\\', depth=0) get_tree_info(root, path='E:\\', depth=1) print(print_tree_struct(root, path='E:\\Clutter', depth=0)) print_tree(root, path=r'E:\TV') print_tree(root, path=r'E:\Movies') print_tree(root, path=r'E:\Boot') print_tree(root, path='E:\\') print_tree(root, path=r'E:\Downloaded') print_tree(root, path=r'E:\Recordings') print_tree(root, path=r'E:\Clutter') print_tree(root, path=r'E:\Audio Books') # TODO: # * Ignore list # * Find and rectify internal duplicates # * Update registry with new files and deleted ones # * Ensure that all unique files are backed up # Index the C: Drive as well. # * Lazy properties of drive # * Multiple types of identifiers (hash, fname, ext, fsize) # Drive subsets # Export/Import Drive for analysis on other machines ut.embed()
def __str__(drive): if drive.total_bytes is None: bytes_str = '?' else: bytes_str = ut.byte_str2(drive.total_bytes) return drive.root_dpath + ' - ' + bytes_str
def analyize_multiple_drives(drives): """ CommandLine: export PYTHONPATH=$PYTHONPATH:~/local/scripts python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/ python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/ /media/joncrall/backup cd ~/local/scripts Example: >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> drive = Broadcaster(drives) >>> drive.compute_info() >>> #drive.build_fpath_hashes() >>> drive.check_consistency() >>> E = drive = drives[0] >>> analyize_multiple_drives(drives) >>> #D, E, F = drives >>> #drive = D """ # ----- ## Find the files shared on all disks #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives]) #print('#allhave = %r' % (len(allhave),)) #allhave.keys()[0:3] #allhave.values()[0:3] #ut.embed() #for drive in drives: #drive.rrr() #print(drive.root_dpath) #print(len(drive.hash_to_unique_fpaths)) #print(len(drive.hash_to_fpaths)) #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths)) # Build dict to map from dpath to file pointers of unique descendants #unique_fidxs_list = drive.hash_to_fidxs.values() #fidxs = ut.flatten(unique_fidxs_list) esc = re.escape # Find which files exist on all drives hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives] allhave_hashes = reduce(set.intersection, hashes_list) print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes))) lbls = [drive_.root_dpath for drive_ in drives] isect_lens = np.zeros((len(drives), len(drives))) for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)): for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)): if drive1 is not drive2: common = set.intersection(hashes1, hashes2) isect_lens[idx1, idx2] = len(common) else: isect_lens[idx1, idx2] = len(hashes2) import pandas as pd print(pd.DataFrame(isect_lens, index=lbls, columns=lbls)) # for drive in drives drive = drives[0] print('Finding unique files in drive=%r' % (drive,)) # Get subset of fidxs on this drive unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes) valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs)) # Filter fpaths by patterns ignore_patterns = [ esc('Thumbs.db') ] ignore_paths = [ 'Spotify' ] patterns = ignore_paths + ignore_patterns valid_fpaths = ut.take(drive.fpath_list, valid_fidxs) valid_flags = [not any([re.search(p, fpath) for p in patterns]) for fpath in valid_fpaths] valid_flags = np.array(valid_flags) valid_fidxs = ut.compress(valid_fidxs, valid_flags) print(ut.filtered_infostr(valid_flags, 'invalid fpaths')) fidxs = valid_fidxs valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs)) dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs, drive.root_dpath) def make_tree_structure(valid_fpaths): root = {} def dict_getitem_default(dict_, key, type_): try: val = dict_[key] except KeyError: val = type_() dict_[key] = val return val for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000): path_components = ut.dirsplit(fpath) current = root for comp in path_components[:-1]: current = dict_getitem_default(current, comp, dict) contents = dict_getitem_default(current, '.', list) contents.append(path_components[-1]) return root root = make_tree_structure(valid_fpaths) def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None): print('path = %r' % (path,)) print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))) path_components = ut.dirsplit(path) # Navigate to correct spot in tree current = root for c in path_components: current = current[c] print(ut.repr3(current, truncate=1)) def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0): path_components = ut.dirsplit(path) current = root for c in path_components: current = current[c] if isinstance(current, list): tree_tmp = [] else: key_list = list(current.keys()) child_list = [join(path, key) for key in key_list] dpath_nbytes_list = [ drive.get_total_nbytes(dpath_to_unique_fidx.get(child, [])) for child in child_list ] nfiles_list = [ len(dpath_to_unique_fidx.get(child, [])) for child in child_list ] tree_tmp = sorted([ (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else (key, ut.byte_str2(nbytes), nfiles, get_tree_info(root, path=child, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=depth - 1)) for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list) ]) return tree_tmp def print_tree_struct(*args, **kwargs): tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1))) print(tree_str) #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])) #print('path = %r, %s' % (path, bytes_str)) #print(ut.repr3(key_list)) return tree_str dpath_to_unique_fidx dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx) complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx) complete_root = make_tree_structure(complete_unique_dpaths.keys()) globals()['ut'] = ut globals()['os'] = os globals()['join'] = join print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\']))) get_tree_info(root, path='E:\\', depth=0) get_tree_info(complete_root, path='E:\\', depth=0) get_tree_info(root, path='E:\\', depth=1) print(print_tree_struct(root, path='E:\\Clutter', depth=0)) print_tree(root, path=r'E:\TV') print_tree(root, path=r'E:\Movies') print_tree(root, path=r'E:\Boot') print_tree(root, path='E:\\') print_tree(root, path=r'E:\Downloaded') print_tree(root, path=r'E:\Recordings') print_tree(root, path=r'E:\Clutter') print_tree(root, path=r'E:\Audio Books') # TODO: # * Ignore list # * Find and rectify internal duplicates # * Update registry with new files and deleted ones # * Ensure that all unique files are backed up # Index the C: Drive as well. # * Lazy properties of drive # * Multiple types of identifiers (hash, fname, ext, fsize) # Drive subsets # Export/Import Drive for analysis on other machines ut.embed()