Example #1
0
def grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8):
    ndata = utool.get_arg('--ndata', type_=int, default=2)
    print('[TEST] build ndata=%d numpy arrays with shape=%r' % (ndata, shape))
    print(' * expected_memory(table_list) = %s' % utool.byte_str2(ndata * np.product(shape)))
    table_list = [np.empty(shape, dtype=dtype) for i in xrange(ndata)]
    print(' * memory+overhead(table_list) = %s' % utool.byte_str2(utool.get_object_size(table_list)))
    return table_list
Example #2
0
 def get_tree_info(root,
                   path,
                   dpath_to_unique_fidx=dpath_to_unique_fidx,
                   drive=drive,
                   depth=0):
     path_components = ut.dirsplit(path)
     current = root
     for c in path_components:
         current = current[c]
     if isinstance(current, list):
         tree_tmp = []
     else:
         key_list = list(current.keys())
         child_list = [join(path, key) for key in key_list]
         dpath_nbytes_list = [
             drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
             for child in child_list
         ]
         nfiles_list = [
             len(dpath_to_unique_fidx.get(child, []))
             for child in child_list
         ]
         tree_tmp = sorted([
             (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else
             (key, ut.byte_str2(nbytes), nfiles,
              get_tree_info(root,
                            path=child,
                            dpath_to_unique_fidx=dpath_to_unique_fidx,
                            drive=drive,
                            depth=depth - 1))
             for key, child, nbytes, nfiles in zip(
                 key_list, child_list, dpath_nbytes_list, nfiles_list)
         ])
     return tree_tmp
Example #3
0
 def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0):
     path_components = ut.dirsplit(path)
     current = root
     for c in path_components:
         current = current[c]
     if isinstance(current, list):
         tree_tmp = []
     else:
         key_list = list(current.keys())
         child_list = [join(path, key) for key in key_list]
         dpath_nbytes_list = [
             drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
             for child in child_list
         ]
         nfiles_list = [
             len(dpath_to_unique_fidx.get(child, []))
             for child in child_list
         ]
         tree_tmp = sorted([
             (key, ut.byte_str2(nbytes), nfiles)
             if depth == 0 else
             (key, ut.byte_str2(nbytes), nfiles,
                 get_tree_info(root, path=child,
                               dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive,
                               depth=depth - 1))
             for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list)
         ])
     return tree_tmp
Example #4
0
    def print_tier_info(drive):
        tier_windows = drive.get_tier_windows()
        tier_flags = drive.get_tier_flags()

        for tier, flags in enumerate(tier_flags):
            high, low = tier_windows[tier]
            print('tier %r window = %s - %s' % (tier, ut.byte_str2(high), ut.byte_str2(low)))
            print('    len(fpaths) = %r' % (np.sum(flags)))
Example #5
0
def grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8):
    ndata = utool.get_argval('--ndata', type_=int, default=2)
    print('[TEST] build ndata=%d numpy arrays with shape=%r' % (ndata, shape))
    print(' * expected_memory(table_list) = %s' %
          utool.byte_str2(ndata * np.product(shape)))
    table_list = [np.empty(shape, dtype=dtype) for i in range(ndata)]
    print(' * memory+overhead(table_list) = %s' %
          utool.byte_str2(utool.get_object_size(table_list)))
    return table_list
Example #6
0
    def print_tier_info(drive):
        tier_windows = drive.get_tier_windows()
        tier_flags = drive.get_tier_flags()

        for tier, flags in enumerate(tier_flags):
            high, low = tier_windows[tier]
            print('tier %r window = %s - %s' %
                  (tier, ut.byte_str2(high), ut.byte_str2(low)))
            print('    len(fpaths) = %r' % (np.sum(flags)))
Example #7
0
    def check_consistency(drive):
        print('Checking %r consistency' % (drive,))
        total = ut.get_total_diskbytes(drive.root_dpath)
        free = ut.get_free_diskbytes(drive.root_dpath)
        used = total - free
        print('total             = %r' % (total,))
        print('used              = %r' % (used,))
        print('drive.total_bytes = %r' % (drive.total_bytes,))

        print('total             = %r' % (ut.byte_str2(total),))
        print('used              = %r' % (ut.byte_str2(used),))
        print('drive.total_bytes = %r' % (ut.byte_str2(drive.total_bytes),))
Example #8
0
    def check_consistency(drive):
        print('Checking %r consistency' % (drive, ))
        total = ut.get_total_diskbytes(drive.root_dpath)
        free = ut.get_free_diskbytes(drive.root_dpath)
        used = total - free
        print('total             = %r' % (total, ))
        print('used              = %r' % (used, ))
        print('drive.total_bytes = %r' % (drive.total_bytes, ))

        print('total             = %r' % (ut.byte_str2(total), ))
        print('used              = %r' % (ut.byte_str2(used), ))
        print('drive.total_bytes = %r' % (ut.byte_str2(drive.total_bytes), ))
Example #9
0
 def print_size_info(inva):
     sizes = inva.get_size_info()
     sizes = ut.sort_dict(sizes, 'vals', ut.identity)
     total_nbytes = sum(sizes.values())
     logger.info(
         ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':')
     )
     logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
Example #10
0
    def get_tier_flags(drive):
        try:
            tier_flags = drive.cache.load('tier_flags')
        except ut.CacheMissException:
            tier_windows = drive.get_tier_windows()

            print('Tier Windows')
            for tier, (high, low) in enumerate(tier_windows):
                print('tier %r window = %s - %s' % (tier, ut.byte_str2(high), ut.byte_str2(low)))

            fpath_bytes_arr = np.array(drive.fpath_bytes_list)
            tier_flags = [
                np.logical_and.reduce([fpath_bytes_arr <= high, fpath_bytes_arr > low])
                for high, low in tier_windows
            ]
            drive.cache.save('tier_flags', tier_flags)
        return tier_flags
Example #11
0
def TEST_SQL_NUMPY():
    sqldb_fname = 'temp_test_sql_numpy.sqlite3'
    sqldb_dpath = utool.util_cplat.get_app_resource_dir('ibeis', 'testfiles')
    utool.ensuredir(sqldb_dpath)
    utool.util_path.remove_file(join(sqldb_dpath, sqldb_fname), dryrun=False)
    db = sqldbc.SQLDatabaseController(sqldb_dpath=sqldb_dpath,
                                      sqldb_fname=sqldb_fname)

    db.add_table('temp', [
        ('temp_id', 'INTEGER PRIMARY KEY'),
        ('temp_hash', 'NUMPY'),
    ])

    tt = utool.tic()
    feats_list = grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8)
    print(' * numpy.new time=%r sec' % utool.toc(tt))

    print('[TEST] insert numpy arrays')
    tt = utool.tic()
    feats_iter = ((feats, ) for feats in feats_list)
    db.executemany(operation='''
        INSERT
        INTO temp
        (
            temp_hash
        )
        VALUES (?)
        ''',
                   params_iter=feats_iter)
    print(' * execute insert time=%r sec' % utool.toc(tt))

    print('[TEST] save sql database')
    tt = utool.tic()
    #db.cur.commit()
    db.connection.commit()
    print(' * commit time=%r sec' % utool.toc(tt))

    print('[TEST] read from sql database')

    tt = utool.tic()
    db.cur.execute('SELECT temp_hash FROM temp', [])
    print(' * execute select time=%r sec' % utool.toc(tt))

    tt = utool.tic()
    result_list = _results_gen(db.cur)
    print(' * iter results time=%r sec' % utool.toc(tt))
    print(' * memory(result_list) = %s' %
          utool.byte_str2(utool.get_object_size(result_list)))
    del result_list
    #print('[TEST] result_list=%r' % result_list)

    print('[TEST] dump sql database')
    tt = utool.tic()
    db.dump('temp.dump.txt')
    print(' * dump time=%r sec' % utool.toc(tt))
    #with open('temp.dump.txt') as file_:
    #    print(file_.read())
    return locals()
Example #12
0
 def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None):
     print('path = %r' % (path,))
     print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
     path_components = ut.dirsplit(path)
     # Navigate to correct spot in tree
     current = root
     for c in path_components:
         current = current[c]
     print(ut.repr3(current, truncate=1))
Example #13
0
def TEST_SQL_NUMPY():
    sqldb_fname = 'temp_test_sql_numpy.sqlite3'
    sqldb_dpath = utool.util_cplat.get_app_resource_dir('ibeis', 'testfiles')
    utool.ensuredir(sqldb_dpath)
    utool.util_path.remove_file(join(sqldb_dpath, sqldb_fname), dryrun=False)
    db = sqldbc.SQLDatabaseController(sqldb_dpath=sqldb_dpath,
                                      sqldb_fname=sqldb_fname)

    db.schema('temp',    [
        ('temp_id',      'INTEGER PRIMARY KEY'),
        ('temp_hash',    'NUMPY'),
    ])

    tt = utool.tic()
    feats_list = grab_numpy_testdata(shape=(3e3, 128), dtype=np.uint8)
    print(' * numpy.new time=%r sec' % utool.toc(tt))

    print('[TEST] insert numpy arrays')
    tt = utool.tic()
    feats_iter = ((feats, ) for feats in feats_list)
    db.executemany(operation='''
        INSERT
        INTO temp
        (
            temp_hash
        )
        VALUES (?)
        ''', params_iter=feats_iter)
    print(' * execute insert time=%r sec' % utool.toc(tt))

    print('[TEST] save sql database')
    tt = utool.tic()
    #db.cur.commit()
    db.connection.commit()
    print(' * commit time=%r sec' % utool.toc(tt))

    print('[TEST] read from sql database')

    tt = utool.tic()
    db.cur.execute('SELECT temp_hash FROM temp', [])
    print(' * execute select time=%r sec' % utool.toc(tt))

    tt = utool.tic()
    result_list = _results_gen(db.cur)
    print(' * iter results time=%r sec' % utool.toc(tt))
    print(' * memory(result_list) = %s' % utool.byte_str2(utool.get_object_size(result_list)))
    del result_list
    #print('[TEST] result_list=%r' % result_list)

    print('[TEST] dump sql database')
    tt = utool.tic()
    db.dump('temp.dump.txt')
    print(' * dump time=%r sec' % utool.toc(tt))
    #with open('temp.dump.txt') as file_:
    #    print(file_.read())
    return locals()
Example #14
0
    def get_tier_flags(drive):
        try:
            tier_flags = drive.cache.load('tier_flags')
        except ut.CacheMissException:
            tier_windows = drive.get_tier_windows()

            print('Tier Windows')
            for tier, (high, low) in enumerate(tier_windows):
                print('tier %r window = %s - %s' %
                      (tier, ut.byte_str2(high), ut.byte_str2(low)))

            fpath_bytes_arr = np.array(drive.fpath_bytes_list)
            tier_flags = [
                np.logical_and.reduce(
                    [fpath_bytes_arr <= high, fpath_bytes_arr > low])
                for high, low in tier_windows
            ]
            drive.cache.save('tier_flags', tier_flags)
        return tier_flags
Example #15
0
 def print_tree(root,
                path,
                dpath_to_unique_fidx=dpath_to_unique_fidx,
                drive=drive,
                depth=None):
     print('path = %r' % (path, ))
     print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
     path_components = ut.dirsplit(path)
     # Navigate to correct spot in tree
     current = root
     for c in path_components:
         current = current[c]
     print(ut.repr3(current, truncate=1))
Example #16
0
 def get_infostr(drive, extra=False):
     drive.num_fpaths = len(drive.fpath_list)
     infostr_list = [str(drive)]
     drive.get_filesize_errors()
     nan_fpaths = drive.get_filesize_errors()
     infostr_list += ['#nan fsize fpaths = %r' % (len(nan_fpaths),)]
     if extra:
         infostr_list += ['#nan_fpaths = %r' % (nan_fpaths[0:10],)]
     total_drive_bytes = ut.get_total_diskbytes(drive.root_dpath)
     infostr_list += [('total drive size = %r' % (ut.byte_str2(total_drive_bytes),))]
     infostr_list += [('drive.num_fpaths = %r' % (drive.num_fpaths,))]
     infostr = '\n'.join(infostr_list)
     return infostr
Example #17
0
 def get_infostr(drive, extra=False):
     drive.num_fpaths = len(drive.fpath_list)
     infostr_list = [str(drive)]
     drive.get_filesize_errors()
     nan_fpaths = drive.get_filesize_errors()
     infostr_list += ['#nan fsize fpaths = %r' % (len(nan_fpaths), )]
     if extra:
         infostr_list += ['#nan_fpaths = %r' % (nan_fpaths[0:10], )]
     total_drive_bytes = ut.get_total_diskbytes(drive.root_dpath)
     infostr_list += [
         ('total drive size = %r' % (ut.byte_str2(total_drive_bytes), ))
     ]
     infostr_list += [('drive.num_fpaths = %r' % (drive.num_fpaths, ))]
     infostr = '\n'.join(infostr_list)
     return infostr
Example #18
0
def invert_index(vecs_list, ax_list, verbose=ut.NOT_QUIET):
    r"""
    Aggregates descriptors of input annotations and returns inverted information

    Args:
        vecs_list (list):
        ax_list (list):
        verbose (bool):  verbosity flag(default = True)

    Returns:
        tuple: (idx2_vec, idx2_ax, idx2_fx)

    CommandLine:
        python -m ibeis.algo.hots.neighbor_index --test-invert_index

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> import vtool as vt
        >>> num = 100
        >>> rng = np.random.RandomState(0)
        >>> ax_list = np.arange(num)
        >>> vecs_list = [vt.tests.dummy.get_dummy_dpts(rng.randint(100)) for ax in ax_list]
        >>> verbose = True
        >>> (idx2_vec, idx2_ax, idx2_fx) = invert_index(vecs_list, ax_list, verbose)
    """
    if ut.VERYVERBOSE:
        print('[nnindex] stacking descriptors from %d annotations' %
              len(ax_list))
    try:
        idx2_vec, idx2_ax, idx2_fx = vt.invertible_stack(vecs_list, ax_list)
        assert idx2_vec.shape[0] == idx2_ax.shape[0]
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
    except MemoryError as ex:
        ut.printex(ex, 'cannot build inverted index', '[!memerror]')
        raise
    if ut.VERYVERBOSE or verbose:
        print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format(
            nVecs=len(idx2_vec), nAnnots=len(ax_list)))
        print('[nnindex] idx2_vecs.dtype = {}'.format(idx2_vec.dtype))
        print('[nnindex] memory(idx2_vecs) = {}'.format(
            ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize)))
    return idx2_vec, idx2_ax, idx2_fx
Example #19
0
def invert_index(vecs_list, ax_list, verbose=ut.NOT_QUIET):
    r"""
    Aggregates descriptors of input annotations and returns inverted information

    Args:
        vecs_list (list):
        ax_list (list):
        verbose (bool):  verbosity flag(default = True)

    Returns:
        tuple: (idx2_vec, idx2_ax, idx2_fx)

    CommandLine:
        python -m ibeis.algo.hots.neighbor_index --test-invert_index

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> import vtool as vt
        >>> num = 100
        >>> rng = np.random.RandomState(0)
        >>> ax_list = np.arange(num)
        >>> vecs_list = [vt.tests.dummy.get_dummy_dpts(rng.randint(100)) for ax in ax_list]
        >>> verbose = True
        >>> (idx2_vec, idx2_ax, idx2_fx) = invert_index(vecs_list, ax_list, verbose)
    """
    if ut.VERYVERBOSE:
        print('[nnindex] stacking descriptors from %d annotations' % len(ax_list))
    try:
        idx2_vec, idx2_ax, idx2_fx = vt.invertible_stack(vecs_list, ax_list)
        assert idx2_vec.shape[0] == idx2_ax.shape[0]
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
    except MemoryError as ex:
        ut.printex(ex, 'cannot build inverted index', '[!memerror]')
        raise
    if ut.VERYVERBOSE or verbose:
        print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format(
            nVecs=len(idx2_vec), nAnnots=len(ax_list)))
        print('[nnindex] idx2_vecs.dtype = {}'.format(idx2_vec.dtype))
        print('[nnindex] memory(idx2_vecs) = {}'.format(
            ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize)))
    return idx2_vec, idx2_ax, idx2_fx
Example #20
0
def get_layer_info(layer):
    r"""
    Args:
        layer (?):

    Returns:
        ?: layer_info

    CommandLine:
        python -m ibeis_cnn.net_strs get_layer_info --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.net_strs import *  # NOQA
        >>> from ibeis_cnn import models
        >>> model = models.mnist.MNISTModel(batch_size=8, data_shape=(24, 24, 1), output_dims=10)
        >>> model.init_arch()
        >>> nn_layers = model.get_all_layers()
        >>> for layer in nn_layers:
        >>>     layer_info = get_layer_info(layer)
        >>>     print(ut.repr3(layer_info, nl=1))
    """
    import operator
    import ibeis_cnn.__LASAGNE__ as lasagne
    # Information that contributes to RAM usage
    import numpy as np
    # Get basic layer infos
    output_shape = lasagne.layers.get_output_shape(layer)
    input_shape = getattr(layer, 'input_shape', [])
    # Get number of outputs ignoring the batch size
    num_outputs = functools.reduce(operator.mul, output_shape[1:])
    if len(input_shape):
        num_inputs = functools.reduce(operator.mul, input_shape[1:])
    else:
        num_inputs = 0
    # TODO: if we can ever support non float32 calculations this must change
    #layer_type = 'float32'
    layer_dtype = np.dtype('float32')

    # Get individual param infos
    param_infos = []
    for param, tags in layer.params.items():
        value = param.get_value()
        pbasename = param_basename(layer, param)
        param_info = ut.odict([
            ('name', param.name),
            ('basename', pbasename),
            ('tags', tags),
            ('shape', value.shape),
            ('size', value.size),
            ('itemsize', value.dtype.itemsize),
            ('dtype', str(value.dtype)),
            ('bytes', value.size * value.dtype.itemsize),
        ])

        def initializer_info(initclass):
            initclassname = initclass.__class__.__name__
            if initclassname == 'Constant':
                spec = initclass.val
            else:
                spec = ut.odict()
                spec['type'] = initclassname
                for key, val in initclass.__dict__.items():
                    if isinstance(val, lasagne.init.Initializer):
                        spec[key] = initializer_info(val)
                    elif isinstance(val, type) and issubclass(val, lasagne.init.Initializer):
                        spec[key] = val.__name__
                        #initializer_info(val())
                    else:
                        spec[key] = val
            return spec

        if hasattr(layer, '_initializers'):
            #print('layer = %r' % (layer,))
            initclass = layer._initializers[param]
            spec = initializer_info(initclass)
            param_info['init'] = spec

        param_infos.append(param_info)
    # Combine param infos
    param_str = surround(', '.join(
        [paramstr(layer, p, tags) for p, tags in layer.params.items()]), '[]')
    param_type_str = surround(', '.join(
        [repr(p.type) for p, tags in layer.params.items()]), '[]')
    num_params = sum([info['size'] for info in param_infos])

    classalias_map = {
        'ElemwiseSumLayer': 'ElemwiseSum',
        'Conv2DCCLayer'    : 'Conv2D',
        'Conv2DDNNLayer'   : 'Conv2D',
        'Conv2DLayer'   : 'Conv2D',
        'MaxPool2DLayer': 'MaxPool2D',
        'MaxPool2DCCLayer' : 'MaxPool2D',
        'MaxPool2DDNNLayer' : 'MaxPool2D',
        'LeakyRectify'     : 'LReLU',
        'InputLayer'       : 'Input',
        'GaussianNoiseLayer': 'Noise',
        'DropoutLayer'     : 'Dropout',
        'DenseLayer'       : 'Dense',
        'NonlinearityLayer' : 'Nonlinearity',
        'FlattenLayer'     : 'Flatten',
        'L2NormalizeLayer' : 'L2Norm',
        'BatchNormLayer'   : 'BatchNorm',
        'BatchNormLayer2'   : 'BatchNorm',
    }
    layer_attrs_ignore_dict = {
        'MaxPool2D'  : ['mode', 'ignore_border'],
        'Dropout'  : ['rescale'],
        'Conv2D'   : ['convolution'],
        'BatchNorm': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'],
        'BatchNorm2': ['epsilon', 'mean', 'inv_std', 'axes', 'beta', 'gamma'],
        #'ElemwiseSum': ['merge_function', 'cropping'],
        #'ElemwiseSum': [],
        'FeaturePoolLayer': ['axis'],
    }
    layer_attrs_dict = {
        #'ElemwiseSum': ['coeffs'],
        #'ElemwiseSum': ['coeffs', 'merge_function', 'cropping'],
        'Noise'     : ['sigma'],
        'Input'     : ['shape'],
        'Dropout'   : ['p', 'shared_axes'],
        'Conv2D'    : ['num_filters', 'filter_size', 'stride', 'output_shape', 'num_groups'],
        'MaxPool2D' : ['stride', 'pool_size', 'output_shape'],  # 'mode'],
        'Dense'     : ['num_units', 'num_leading_axes'],
        'SoftMax'   : ['num_units', 'num_leading_axes'],
        'L2Norm'    : ['axis'],
        'BatchNorm' : ['alpha'],
        'BatchNorm2' : ['alpha'],
        'FeaturePoolLayer': ['pool_size', 'pool_function']
    }
    #layer_attrs_dict = {}
    all_ignore_attrs = ['nonlinearity', 'b', 'W', 'get_output_kwargs', 'name',
                        'input_shapes', 'input_layers', 'input_shape',
                        'input_layer', 'input_var', 'untie_biases',
                        '_initializers',
                        'flip_filters', 'pad', 'params', 'n', '_is_main_layer']

    classname = layer.__class__.__name__
    classalias = classalias_map.get(classname, classname)
    #if classalias == 'FeaturePoolLayer' and ut.get_funcname(layer.pool_function) == 'max':
    #    classalias = 'MaxOut'
    if classalias == 'Dense' and ut.get_funcname(layer.nonlinearity) == 'softmax':
        classalias = 'SoftMax'

    layer_attrs = ut.odict([
        (key, getattr(layer, key))
        for key in layer_attrs_dict.get(classalias, [])
    ])
    ignore_attrs = (all_ignore_attrs +
                    layer_attrs_ignore_dict.get(classalias, []))

    if classalias not in layer_attrs_dict or (classalias == classname and len(layer_attrs) == 0):
        layer_attrs = layer.__dict__.copy()
        ut.delete_dict_keys(layer_attrs, ignore_attrs)

    for key in list(layer_attrs.keys()):
        val = layer_attrs[key]
        if ut.is_funclike(val):
            layer_attrs[key] = ut.get_funcname(val)

    attr_key_list = list(layer_attrs.keys())
    missing_keys = (set(layer.__dict__.keys()) - set(ignore_attrs) - set(attr_key_list))
    missing_keys = [k for k in missing_keys if not k.startswith('_')]

    #if layer_type == 'Conv2DCCLayer':
    #    ut.embed()
    DEBUG = True
    if DEBUG and len(missing_keys) > 0:
        print('---')
        print(' * ' + classname)
        print(' * missing keys: %r' % (missing_keys,))
        print(' * has keys: %r' % (attr_key_list,))
        if True:
            #import utool
            #with utool.embed_on_exception_context:
            #raise AssertionError('MISSING KEYS')
            pass

    # handle None batch sizes
    if output_shape[0] is None:
        size = np.prod(output_shape[1:])
    else:
        size = np.prod(output_shape)

    layer_info = ut.odict([
        ('name', layer.name),
        ('classname', classname),
        ('classalias', classalias),
        ('output_shape', output_shape),
        ('input_shape', input_shape),
        ('num_outputs', num_outputs),
        ('num_inputs', num_inputs),
        ('size', size),
        ('itemsize', layer_dtype.itemsize),
        ('dtype', str(layer_dtype)),
        ('num_params', num_params),
        ('param_infos', param_infos),
        ('param_str', param_str),
        ('param_type_str', param_type_str),
        ('layer_attrs', layer_attrs),
        ('nonlinearity', None),
    ])

    if hasattr(layer, 'nonlinearity'):
        try:
            nonlinearity = layer.nonlinearity.__name__
        except AttributeError:
            nonlinearity = layer.nonlinearity.__class__.__name__
        layer_info['nonlinearity'] = ut.odict([])
        layer_info['nonlinearity']['type'] = nonlinearity
        layer_info['nonlinearity'].update(layer.nonlinearity.__dict__)
        #attr_str_list.append('nonlinearity={0}'.format(nonlinearity))

    param_bytes = sum([info['bytes'] for info in param_infos])
    layer_bytes = layer_info['size'] * layer_info['itemsize']
    #if classname in ['BatchNormLayer', 'NonlinearityLayer']:
    #    layer_bytes = 0
    layer_info['bytes'] = layer_bytes
    layer_info['param_bytes'] = param_bytes
    layer_info['total_bytes'] = layer_bytes + param_bytes
    layer_info['total_memory'] = ut.byte_str2(layer_info['total_bytes'])
    return layer_info
Example #21
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Example #22
0
        #utool.printvar2('psutil.disk_io_counters()')
        #print('')
        #print('PSUTIL NETWORK')
        #print('')
        #utool.printvar2('psutil.net_io_counters(pernic=True)')
        #print('')
        #print('PSUTIL MISC')
        #print('')
        #utool.printvar2('psutil.get_users()')
        #utool.printvar2('psutil.get_boot_time()')
        #utool.printvar2('psutil.get_pid_list()')

        #psutil.test()
        pass
    except ImportError:
        print('psutil not installed')

    try:
        import resource
        utool.rrr()
        used_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
        print('[parallel] Max memory usage: %s' % utool.byte_str2(used_memory))
    except ImportError:
        print('no module resources (doesnt exist on win32)')

    try:
        import cv2  # NOQA
        utool.printvar2('cv2.__version__')
    except ImportError:
        print('cv2 is not installed')
Example #23
0
def turtles():
    source_dpaths = sorted(
        ut.glob('/raid/raw/RotanTurtles/',
                '*',
                recusrive=False,
                with_dirs=True,
                with_files=False))
    sources = [SourceDir(dpath) for dpath in source_dpaths]

    for self in ut.ProgIter(sources, label='populate'):
        self.populate()

    import fnmatch
    del_ext = set(['.npy', '.flann', '.npz'])
    for self in ut.ProgIter(sources, label='populate'):
        flags = [ext in del_ext for ext in self.attrs['ext']]
        to_delete = ut.compress(list(self.fpaths()), flags)
        ut.remove_file_list(to_delete)
        flags = [
            fnmatch.fnmatch(fpath, '*/_hsdb/computed/chips/*.png')
            for fpath in self.rel_fpath_list
        ]
        to_delete = ut.compress(list(self.fpaths()), flags)
        ut.remove_file_list(to_delete)
        self.populate()

    for self in ut.ProgIter(sources, label='del empty'):
        self.populate()
        self.delete_empty_directories()

    print(ut.byte_str2(sum([self.nbytes() for self in sources])))
    # [ut.byte_str2(self.nbytes()) for self in sources]

    # import numpy as np
    # num_isect = np.zeros((len(sources), len(sources)))
    # num_union = np.zeros((len(sources), len(sources)))

    for i, j in ut.combinations(range(len(sources)), 2):
        s1 = sources[i]
        s2 = sources[j]
        isect = set(s1.rel_fpath_list).intersection(s2.rel_fpath_list)
        # union = set(s1.rel_fpath_list).union(s2.rel_fpath_list)
        if isect:
            s1.isect_info(s2)
            print((i, j))
            print(s1.dpath)
            print(s2.dpath)
            self = s1
            other = s2
            assert False
            # print(isect)
            # break
        # num_isect[i, j] = len(isect)
        # num_union[i, j] = len(union)

    # for self in ut.ProgIter(sources, label='index'):
    #     self.index()

    for self in ut.ProgIter(sources, label='populate'):
        self.populate()

    dest = sources[0]
    others = sources[1:]
    # Merge others into dest
    bash_script = '\n'.join([o.make_merge_bash_script(dest) for o in others])
    print(bash_script)

    other = self
    for other in others:
        other.merge_into(dest)
Example #24
0
def turtles():
    source_dpaths = sorted(ut.glob('/raid/raw/RotanTurtles/', '*',
                                   recusrive=False, with_dirs=True,
                                   with_files=False))
    sources = [SourceDir(dpath) for dpath in source_dpaths]

    for self in ut.ProgIter(sources, label='populate'):
        self.populate()

    import fnmatch
    del_ext = set(['.npy', '.flann', '.npz'])
    for self in ut.ProgIter(sources, label='populate'):
        flags = [ext in del_ext for ext in self.attrs['ext']]
        to_delete = ut.compress(list(self.fpaths()), flags)
        ut.remove_file_list(to_delete)
        flags = [fnmatch.fnmatch(fpath, '*/_hsdb/computed/chips/*.png') for fpath in self.rel_fpath_list]
        to_delete = ut.compress(list(self.fpaths()), flags)
        ut.remove_file_list(to_delete)
        self.populate()

    for self in ut.ProgIter(sources, label='del empty'):
        self.populate()
        self.delete_empty_directories()

    print(ut.byte_str2(sum([self.nbytes() for self in sources])))
    # [ut.byte_str2(self.nbytes()) for self in sources]

    # import numpy as np
    # num_isect = np.zeros((len(sources), len(sources)))
    # num_union = np.zeros((len(sources), len(sources)))

    for i, j in ut.combinations(range(len(sources)), 2):
        s1 = sources[i]
        s2 = sources[j]
        isect = set(s1.rel_fpath_list).intersection(s2.rel_fpath_list)
        # union = set(s1.rel_fpath_list).union(s2.rel_fpath_list)
        if isect:
            s1.isect_info(s2)
            print((i, j))
            print(s1.dpath)
            print(s2.dpath)
            self = s1
            other = s2
            assert False
            # print(isect)
            # break
        # num_isect[i, j] = len(isect)
        # num_union[i, j] = len(union)

    # for self in ut.ProgIter(sources, label='index'):
    #     self.index()

    for self in ut.ProgIter(sources, label='populate'):
        self.populate()

    dest = sources[0]
    others = sources[1:]
    # Merge others into dest
    bash_script = '\n'.join([o.make_merge_bash_script(dest) for o in others])
    print(bash_script)

    other = self
    for other in others:
        other.merge_into(dest)
Example #25
0
def invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=ut.NOT_QUIET):
    r"""
    Aggregates descriptors of input annotations and returns inverted information

    Args:
        vecs_list (list):
        fgws_list (list):
        ax_list (list):
        fxs_list (list):
        verbose (bool):  verbosity flag(default = True)

    Returns:
        tuple: (idx2_vec, idx2_fgw, idx2_ax, idx2_fx)

    CommandLine:
        python -m ibeis.algo.hots.neighbor_index invert_index

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> rng = np.random.RandomState(42)
        >>> DIM_SIZE = 16
        >>> nFeat_list = [3, 0, 4, 1]
        >>> vecs_list = [rng.randn(nFeat, DIM_SIZE) for nFeat in nFeat_list]
        >>> fgws_list = [rng.randn(nFeat) for nFeat in nFeat_list]
        >>> fxs_list = [np.arange(nFeat) for nFeat in nFeat_list]
        >>> ax_list = np.arange(len(vecs_list))
        >>> fgws_list = None
        >>> verbose = True
        >>> tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list)
        >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup
        >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),)
        >>> print(result)
        output depth_profile = [(8, 16), 1, 8, 8]

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999')
        >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, qreq_.daids)
        >>> ax_list = np.arange(len(vecs_list))
        >>> input_ = vecs_list, fgws_list, ax_list, fxs_list
        >>> print('input depth_profile = %s' % (ut.depth_profile(input_),))
        >>> tup = invert_index(*input_)
        >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup
        >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),)
        >>> print(result)

        output depth_profile = [(1912, 128), 1912, 1912, 1912]
    """
    if ut.VERYVERBOSE:
        print('[nnindex] stacking descriptors from %d annotations' % len(ax_list))
    try:
        nFeat_list = np.array(list(map(len, vecs_list)))
        # Remove input without any features
        is_valid = nFeat_list > 0
        nFeat_list = nFeat_list.compress(is_valid)
        vecs_list = ut.compress(vecs_list, is_valid)
        if fgws_list is not None:
            fgws_list = ut.compress(fgws_list, is_valid)
        ax_list = ut.compress(ax_list, is_valid)
        fxs_list = ut.compress(fxs_list, is_valid)

        # Flatten into inverted index
        axs_list = [[ax] * nFeat for (ax, nFeat) in zip(ax_list, nFeat_list)]
        nFeats = sum(nFeat_list)
        idx2_ax = np.fromiter(ut.iflatten(axs_list), np.int32, nFeats)
        idx2_fx = np.fromiter(ut.iflatten(fxs_list), np.int32, nFeats)
        idx2_vec = np.vstack(vecs_list)
        if fgws_list is None:
            idx2_fgw = None
        else:
            idx2_fgw = np.hstack(fgws_list)
            try:
                assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond'
            except Exception as ex:
                ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')])
                raise
        assert idx2_vec.shape[0] == idx2_ax.shape[0]
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
    except MemoryError as ex:
        ut.printex(ex, 'cannot build inverted index', '[!memerror]')
        raise
    if ut.VERYVERBOSE or verbose:
        print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format(
            nVecs=len(idx2_vec), nAnnots=len(ax_list)))
        print('[nnindex] idx2_vecs dtype={}, memory={}'.format(
            idx2_vec.dtype,
            ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize)))
    return idx2_vec, idx2_fgw, idx2_ax, idx2_fx
Example #26
0
def get_dbinfo(
    ibs,
    verbose=True,
    with_imgsize=False,
    with_bytes=False,
    with_contrib=False,
    with_agesex=False,
    with_header=True,
    short=False,
    tag='dbinfo',
    aid_list=None,
    aids=None,
):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    SeeAlso:
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all

    CommandLine:
        python -m wbia.other.dbinfo --exec-get_dbinfo:0
        python -m wbia.other.dbinfo --test-get_dbinfo:1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = wbia.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from wbia.expt import cfghelpers
        >>> #from wbia.expt import annotation_configs
        >>> #from wbia.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> verbose = True
        >>> short = True
        >>> #ibs = wbia.opendb(db='GZ_ALL')
        >>> #ibs = wbia.opendb(db='PZ_Master0')
        >>> ibs = wbia.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = wbia.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...
    if aids is not None:
        aid_list = aids

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from wbia.expt import experiment_helpers

            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list
            )
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            # aid_list =
        if verbose:
            logger.info('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False))
            - {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    # associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    valid_images = ibs.images(valid_gids)
    valid_annots = ibs.annots(valid_aids)

    # Image info
    if verbose:
        logger.info('Checking Image Info')
    gx2_aids = valid_images.aids
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats = ut.repr4(
        ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True
    )
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        logger.info('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    if False:
        # Occurrence Info
        def compute_annot_occurrence_ids(ibs, aid_list):
            from wbia.algo.preproc import preproc_occurrence

            gid_list = ibs.get_annot_gids(aid_list)
            gid2_aids = ut.group_items(aid_list, gid_list)
            config = {'seconds_thresh': 4 * 60 * 60}
            flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences(
                ibs, gid_list, config=config, verbose=False
            )
            occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
            occurid2_aids = {
                oid: ut.flatten(ut.take(gid2_aids, gids))
                for oid, gids in occurid2_gids.items()
            }
            return occurid2_aids

        import utool

        with utool.embed_on_exception_context:
            occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
            occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
            occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
            nid2_occurxs = ut.ddict(list)
            for occurx, nids in enumerate(occur_unique_nids):
                for nid in nids:
                    nid2_occurxs[nid].append(occurx)

        nid2_occurx_single = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1
        }
        nid2_occurx_resight = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1
        }
        singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

        singlesight_annot_stats = ut.get_stats(
            list(map(len, singlesight_encounters)), use_median=True, use_sum=True
        )
        resight_name_stats = ut.get_stats(
            list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True
        )

    # Encounter Info
    def break_annots_into_encounters(aids):
        from wbia.algo.preproc import occurrence_blackbox
        import datetime

        thresh_sec = datetime.timedelta(minutes=30).seconds
        posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids))
        # latlons = ibs.get_annot_image_gps(aids)
        labels = occurrence_blackbox.cluster_timespace2(
            posixtimes, None, thresh_sec=thresh_sec
        )
        return labels
        # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()]
        # ut.square_pdist(ave_enc_time)

    try:
        am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[
            0
        ]
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids)
        undirected_tags = ibs.get_aidpair_tags(
            aid_pairs.T[0], aid_pairs.T[1], directed=False
        )
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)
    except Exception:
        pair_tag_info = {}

    # logger.info(ut.repr2(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        logger.info('Checking Annot Species')
    unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots))
    species_list = valid_annots.species_texts
    species2_annots = valid_annots.group_items(valid_annots.species_texts)
    species2_nAids = {key: len(val) for key, val in species2_annots.items()}

    if verbose:
        logger.info('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        logger.info('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            logger.info('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)

        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = collections.OrderedDict(
                [
                    ('max', wh_list.max(0)),
                    ('min', wh_list.min(0)),
                    ('mean', wh_list.mean(0)),
                    ('std', wh_list.std(0)),
                ]
            )

            def arr2str(var):
                return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']'

            ret = ',\n    '.join(
                ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()]
            )
            return '{\n    ' + ret + '\n}'

        logger.info('reading image sizes')
        # Image size stats
        img_size_list = ibs.get_image_sizes(valid_gids)
        img_size_stats = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        logger.info('Building Stats String')

    multiton_stats = ut.repr3(
        ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True
    )

    # Time stats
    unixtime_list = valid_images.unixtime2
    # valid_unixtime_list = [time for time in unixtime_list if time != -1]
    # unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda: 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if max_age is None:
                max_age = min_age
            if min_age is None:
                min_age = max_age
            if max_age is None and min_age is None:
                logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,))
                age_dict['UNKNOWN'] += 1
            elif (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (max_age is None or 36 <= max_age):
                age_dict['Adult'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(
            set(annot_sextext_list) - set(sex_keys)
        )
        sextext2_nAnnots = ut.odict(
            [(key, len(sextext2_aids.get(key, []))) for key in sex_keys]
        )
        # Filter 0's
        sextext2_nAnnots = {
            key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0
        }
        return sextext2_nAnnots

    def get_annot_qual_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        qualtext2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.quality_texts)),
            list(ibs.const.QUALITY_TEXT_TO_INT.keys()),
        )
        return qualtext2_nAnnots

    def get_annot_viewpoint_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        viewcode2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.viewpoint_code)),
            list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None],
        )
        return viewcode2_nAnnots

    if verbose:
        logger.info('Checking Other Annot Stats')

    qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids)
    viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        logger.info('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]

    image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags)
    contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags)

    contributor_tag_to_qualstats = {
        key: get_annot_qual_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }
    contributor_tag_to_viewstats = {
        key: get_annot_viewpoint_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }

    contributor_tag_to_nImages = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_gids)
    }
    contributor_tag_to_nAnnots = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_aids)
    }

    if verbose:
        logger.info('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton = len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_annots)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            logger.info('Checking Disk Space')
        ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            logger.info('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_annots)
            _num_names_total_check = (
                num_names_singleton + num_names_unassociated + num_names_multiton
            )
            _num_annots_total_check = (
                num_unknown_annots + num_singleton_annots + num_multiton_annots
            )
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            # if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(
                ex,
                keys=[
                    '_num_names_total_check',
                    'num_names',
                    '_num_annots_total_check',
                    'num_annots',
                    'num_names_singleton',
                    'num_names_multiton',
                    'num_unknown_annots',
                    'num_multiton_annots',
                    'num_singleton_annots',
                ],
            )
            raise

    # Get contributor statistics
    contributor_rowids = ibs.get_valid_contributor_rowids()
    num_contributors = len(contributor_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.repr2(dict_, si=True)
        return align2(str_)

    header_block_lines = [('+============================')] + (
        [
            ('+ singleton := single sighting'),
            ('+ multiton  := multiple sightings'),
            ('--' * num_tabs),
        ]
        if not short and with_header
        else []
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = (
        [
            ('--' * num_tabs),
            ('DB Bytes: '),
            ('     +- dbdir nBytes:         ' + dbdir_space),
            ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
            ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
            ('     |  |  +-cachedir nBytes: ' + cachedir_space),
        ]
        if with_bytes
        else []
    )

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = (
        [
            ('--' * num_tabs),
            ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
            ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
            ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
        ]
        if not short
        else []
    )

    occurrence_block_lines = (
        [
            ('--' * num_tabs),
            # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
            # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
            ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
        ]
        if not short
        else []
    )

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = (
        [
            '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
            '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
        ]
        if not short and with_agesex
        else []
    )

    contributor_block_lines = (
        [
            '# Images per contributor       = ' + align_dict2(contributor_tag_to_nImages),
            '# Annots per contributor       = ' + align_dict2(contributor_tag_to_nAnnots),
            '# Quality per contributor      = '
            + ut.repr2(contributor_tag_to_qualstats, sorted_=True),
            '# Viewpoint per contributor    = '
            + ut.repr2(contributor_tag_to_viewstats, sorted_=True),
        ]
        if with_contrib
        else []
    )

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None
        if short
        else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        # ('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None
        if short
        else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines
        + bytes_block_lines
        + source_block_lines
        + name_block_lines
        + annot_block_lines
        + annot_per_basic_block_lines
        + occurrence_block_lines
        + annot_per_qualview_block_lines
        + annot_per_agesex_block_lines
        + img_block_lines
        + contributor_block_lines
        + imgsize_stat_lines
        + [('L============================')]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        logger.info(info_str2)
    locals_ = locals()
    return locals_
Example #27
0
def get_layer_info_str(output_layer, batch_size=128):
    r"""
    Args:
        output_layer (lasange.layers.Layer):

    CommandLine:
        python -m ibeis_cnn.net_strs --test-get_layer_info_str:0
        python -m ibeis_cnn.net_strs --test-get_layer_info_str:1

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.net_strs import *  # NOQA
        >>> from ibeis_cnn import models
        >>> model = models.DummyModel(data_shape=(24, 24, 3), autoinit=True)
        >>> output_layer = model.output_layer
        >>> result = get_layer_info_str(output_layer)
        >>> result = '\n'.join([x.rstrip() for x in result.split('\n')])
        >>> print(result)
        Network Structure:
         index  Layer    Outputs    Bytes OutShape         Params
         0      Input      1,728   55,296 (8, 3, 24, 24)   []
         1      Conv2D     7,744  249,600 (8, 16, 22, 22)  [W(16,3,3,3, {t,r}), b(16, {t})]
         2      Conv2D     7,056  229,952 (8, 16, 21, 21)  [W(16,16,2,2, {t,r}), b(16, {t})]
         3      Dense          8  226,080 (8, 8)           [W(7056,8, {t,r}), b(8, {t})]
         4      Dense          5      340 (8, 5)           [W(8,5, {t,r}), b(5, {t})]
        ...this model has 57,989 learnable parameters
        ...this model will use 761,268 bytes = 743.43 KB

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.net_strs import *  # NOQA
        >>> from ibeis_cnn import models
        >>> model = models.mnist.MNISTModel(batch_size=128, output_dims=10,
        >>>                                 data_shape=(24, 24, 3))
        >>> model.init_arch()
        >>> output_layer = model.output_layer
        >>> result = get_layer_info_str(output_layer)
        >>> result = '\n'.join([x.rstrip() for x in result.split('\n')])
        >>> print(result)
    """
    import ibeis_cnn.__LASAGNE__ as lasagne
    info_lines = []
    _print = info_lines.append
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', '.*topo.*')
        nn_layers = lasagne.layers.get_all_layers(output_layer)
        _print('Network Structure:')

        columns_ = ut.ddict(list)

        for index, layer in enumerate(nn_layers):

            layer_info = get_layer_info(layer)

            columns_['index'].append(index)
            columns_['name'].append(layer_info['name'])
            #columns_['type'].append(getattr(layer, 'type', None))
            #columns_['layer'].append(layer_info['classname'])
            columns_['layer'].append(layer_info['classalias'])
            columns_['num_outputs'].append('{:,}'.format(int(layer_info['num_outputs'])))
            columns_['output_shape'].append(str(layer_info['output_shape'] ))
            columns_['params'].append(layer_info['param_str'])
            columns_['param_type'].append(layer_info['param_type_str'])
            columns_['mem'].append(layer_info['total_memory'])
            columns_['bytes'].append('{:,}'.format(int(layer_info['total_bytes'])))
            #ut.embed()

        header_nice = {
            'index'        : 'index',
            'name'         : 'Name',
            'layer'        : 'Layer',
            'type'         : 'Type',
            'num_outputs'  : 'Outputs',
            'output_shape' : 'OutShape',
            'params'       : 'Params',
            'param_type'   : 'ParamType',
            'mem'          : 'Mem',
            'bytes'        : 'Bytes',
        }

        header_align = {
            'index'       : '<',
            'params'      : '<',
            'bytes'       : '>',
            'num_outputs' : '>',
        }

        def get_col_maxval(key):
            header_len = len(header_nice[key])
            val_len = max(list(map(len, map(str, columns_[key]))))
            return max(val_len, header_len)

        header_order = ['index']
        if len(ut.filter_Nones(columns_['name'])) > 0:
            header_order += ['name']
        header_order += ['layer', 'num_outputs']
        #header_order += ['mem']
        header_order += ['bytes']
        header_order += ['output_shape', 'params' ]
        #'param_type']

        max_len = {key: str(get_col_maxval(key) + 1) for key, col in six.iteritems(columns_)}

        fmtstr = ' ' + ' '.join(
            [
                '{:' + align + len_ + '}'
                for align, len_ in zip(ut.dict_take(header_align, header_order, '<'),
                                       ut.dict_take(max_len, header_order))
            ]
        )
        _print(fmtstr.format(*ut.dict_take(header_nice, header_order)))

        row_list = zip(*ut.dict_take(columns_, header_order))
        for row in row_list:
            try:
                row = ['' if _ is None else _ for _ in row]
                str_ = fmtstr.format(*row)
                _print(str_)
            except TypeError:
                print('Error printing %r with args %r' % (fmtstr, row, ))

        total_bytes = count_bytes(output_layer)
        num_params = lasagne.layers.count_params(output_layer)

        _print('...this model has {:,} learnable parameters'.format(num_params))
        _print('...this model will use ~{:,} bytes = {} per input'.format(
            total_bytes, ut.byte_str2(total_bytes)))
        _print('...this model will use ~{:,} bytes = {} per batch with a batch size of {}'.format(
            total_bytes * batch_size, ut.byte_str2(total_bytes * batch_size), batch_size))
    info_str = '\n'.join(info_lines)
    return info_str
Example #28
0
def analyize_multiple_drives(drives):
    """
    CommandLine:
        export PYTHONPATH=$PYTHONPATH:~/local/scripts

        python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/

        python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store
        python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/
        /media/joncrall/backup

        cd ~/local/scripts

    Example:
        >>> from register_files import *  # NOQA
        >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/'])
        >>> drives = [Drive(root_dpath) for root_dpath in dpaths]
        >>> drive = Broadcaster(drives)
        >>> drive.compute_info()
        >>> #drive.build_fpath_hashes()
        >>> drive.check_consistency()
        >>> E = drive = drives[0]
        >>> analyize_multiple_drives(drives)
        >>> #D, E, F = drives
        >>> #drive = D
    """
    # -----
    ## Find the files shared on all disks
    #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives])
    #print('#allhave = %r' % (len(allhave),))
    #allhave.keys()[0:3]
    #allhave.values()[0:3]
    #ut.embed()
    #for drive in drives:
    #drive.rrr()
    #print(drive.root_dpath)
    #print(len(drive.hash_to_unique_fpaths))
    #print(len(drive.hash_to_fpaths))
    #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths))

    # Build dict to map from dpath to file pointers of unique descendants
    #unique_fidxs_list = drive.hash_to_fidxs.values()
    #fidxs = ut.flatten(unique_fidxs_list)

    esc = re.escape

    # Find which files exist on all drives
    hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives]
    allhave_hashes = reduce(set.intersection, hashes_list)
    print('Drives %r have %d file hashes in common' %
          (drives, len(allhave_hashes)))

    lbls = [drive_.root_dpath for drive_ in drives]
    isect_lens = np.zeros((len(drives), len(drives)))
    for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)):
        for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)):
            if drive1 is not drive2:
                common = set.intersection(hashes1, hashes2)
                isect_lens[idx1, idx2] = len(common)
            else:
                isect_lens[idx1, idx2] = len(hashes2)
    import pandas as pd
    print(pd.DataFrame(isect_lens, index=lbls, columns=lbls))

    # for drive in drives
    drive = drives[0]
    print('Finding unique files in drive=%r' % (drive, ))
    # Get subset of fidxs on this drive
    unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes)
    valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs))

    # Filter fpaths by patterns
    ignore_patterns = [esc('Thumbs.db')]
    ignore_paths = ['Spotify']
    patterns = ignore_paths + ignore_patterns
    valid_fpaths = ut.take(drive.fpath_list, valid_fidxs)
    valid_flags = [
        not any([re.search(p, fpath) for p in patterns])
        for fpath in valid_fpaths
    ]
    valid_flags = np.array(valid_flags)
    valid_fidxs = ut.compress(valid_fidxs, valid_flags)

    print(ut.filtered_infostr(valid_flags, 'invalid fpaths'))

    fidxs = valid_fidxs
    valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs))

    dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs,
                                               drive.root_dpath)

    def make_tree_structure(valid_fpaths):
        root = {}

        def dict_getitem_default(dict_, key, type_):
            try:
                val = dict_[key]
            except KeyError:
                val = type_()
                dict_[key] = val
            return val

        for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000):
            path_components = ut.dirsplit(fpath)
            current = root
            for comp in path_components[:-1]:
                current = dict_getitem_default(current, comp, dict)
            contents = dict_getitem_default(current, '.', list)
            contents.append(path_components[-1])
        return root

    root = make_tree_structure(valid_fpaths)

    def print_tree(root,
                   path,
                   dpath_to_unique_fidx=dpath_to_unique_fidx,
                   drive=drive,
                   depth=None):
        print('path = %r' % (path, ))
        print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
        path_components = ut.dirsplit(path)
        # Navigate to correct spot in tree
        current = root
        for c in path_components:
            current = current[c]
        print(ut.repr3(current, truncate=1))

    def get_tree_info(root,
                      path,
                      dpath_to_unique_fidx=dpath_to_unique_fidx,
                      drive=drive,
                      depth=0):
        path_components = ut.dirsplit(path)
        current = root
        for c in path_components:
            current = current[c]
        if isinstance(current, list):
            tree_tmp = []
        else:
            key_list = list(current.keys())
            child_list = [join(path, key) for key in key_list]
            dpath_nbytes_list = [
                drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            nfiles_list = [
                len(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            tree_tmp = sorted([
                (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else
                (key, ut.byte_str2(nbytes), nfiles,
                 get_tree_info(root,
                               path=child,
                               dpath_to_unique_fidx=dpath_to_unique_fidx,
                               drive=drive,
                               depth=depth - 1))
                for key, child, nbytes, nfiles in zip(
                    key_list, child_list, dpath_nbytes_list, nfiles_list)
            ])
        return tree_tmp

    def print_tree_struct(*args, **kwargs):
        tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1)))
        print(tree_str)
        #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))
        #print('path = %r, %s' % (path, bytes_str))
        #print(ut.repr3(key_list))
        return tree_str

    dpath_to_unique_fidx
    dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx)
    complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs,
                                           dpath_to_unique_fidx)
    complete_root = make_tree_structure(complete_unique_dpaths.keys())

    globals()['ut'] = ut
    globals()['os'] = os
    globals()['join'] = join

    print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\'])))
    get_tree_info(root, path='E:\\', depth=0)

    get_tree_info(complete_root, path='E:\\', depth=0)

    get_tree_info(root, path='E:\\', depth=1)
    print(print_tree_struct(root, path='E:\\Clutter', depth=0))
    print_tree(root, path=r'E:\TV')
    print_tree(root, path=r'E:\Movies')
    print_tree(root, path=r'E:\Boot')

    print_tree(root, path='E:\\')
    print_tree(root, path=r'E:\Downloaded')
    print_tree(root, path=r'E:\Recordings')
    print_tree(root, path=r'E:\Clutter')
    print_tree(root, path=r'E:\Audio Books')

    # TODO:
    # * Ignore list
    # * Find and rectify internal duplicates
    # * Update registry with new files and deleted ones
    # * Ensure that all unique files are backed up
    # Index the C: Drive as well.
    # * Lazy properties of drive
    # * Multiple types of identifiers (hash, fname, ext, fsize)
    # Drive subsets
    # Export/Import Drive for analysis on other machines

    ut.embed()
Example #29
0
 def __str__(drive):
     if drive.total_bytes is None:
         bytes_str = '?'
     else:
         bytes_str = ut.byte_str2(drive.total_bytes)
     return drive.root_dpath + ' - ' + bytes_str
Example #30
0
 def __str__(drive):
     if drive.total_bytes is None:
         bytes_str = '?'
     else:
         bytes_str = ut.byte_str2(drive.total_bytes)
     return drive.root_dpath + ' - ' + bytes_str
Example #31
0
def analyize_multiple_drives(drives):
    """
    CommandLine:
        export PYTHONPATH=$PYTHONPATH:~/local/scripts

        python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/

        python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store
        python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/
        /media/joncrall/backup

        cd ~/local/scripts

    Example:
        >>> from register_files import *  # NOQA
        >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/'])
        >>> drives = [Drive(root_dpath) for root_dpath in dpaths]
        >>> drive = Broadcaster(drives)
        >>> drive.compute_info()
        >>> #drive.build_fpath_hashes()
        >>> drive.check_consistency()
        >>> E = drive = drives[0]
        >>> analyize_multiple_drives(drives)
        >>> #D, E, F = drives
        >>> #drive = D
    """
    # -----
    ## Find the files shared on all disks
    #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives])
    #print('#allhave = %r' % (len(allhave),))
    #allhave.keys()[0:3]
    #allhave.values()[0:3]
    #ut.embed()
    #for drive in drives:
    #drive.rrr()
    #print(drive.root_dpath)
    #print(len(drive.hash_to_unique_fpaths))
    #print(len(drive.hash_to_fpaths))
    #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths))

    # Build dict to map from dpath to file pointers of unique descendants
    #unique_fidxs_list = drive.hash_to_fidxs.values()
    #fidxs = ut.flatten(unique_fidxs_list)

    esc = re.escape

    # Find which files exist on all drives
    hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives]
    allhave_hashes = reduce(set.intersection, hashes_list)
    print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes)))

    lbls = [drive_.root_dpath for drive_ in drives]
    isect_lens = np.zeros((len(drives), len(drives)))
    for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)):
        for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)):
            if drive1 is not drive2:
                common = set.intersection(hashes1, hashes2)
                isect_lens[idx1, idx2] = len(common)
            else:
                isect_lens[idx1, idx2] = len(hashes2)
    import pandas as pd
    print(pd.DataFrame(isect_lens, index=lbls, columns=lbls))

    # for drive in drives
    drive = drives[0]
    print('Finding unique files in drive=%r' % (drive,))
    # Get subset of fidxs on this drive
    unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes)
    valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs))

    # Filter fpaths by patterns
    ignore_patterns = [
        esc('Thumbs.db')
    ]
    ignore_paths = [
        'Spotify'
    ]
    patterns = ignore_paths + ignore_patterns
    valid_fpaths = ut.take(drive.fpath_list, valid_fidxs)
    valid_flags = [not any([re.search(p, fpath) for p in patterns])
                   for fpath in valid_fpaths]
    valid_flags = np.array(valid_flags)
    valid_fidxs = ut.compress(valid_fidxs, valid_flags)

    print(ut.filtered_infostr(valid_flags, 'invalid fpaths'))

    fidxs = valid_fidxs
    valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs))

    dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs,
                                                drive.root_dpath)

    def make_tree_structure(valid_fpaths):
        root = {}

        def dict_getitem_default(dict_, key, type_):
            try:
                val = dict_[key]
            except KeyError:
                val = type_()
                dict_[key] = val
            return val

        for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000):
            path_components = ut.dirsplit(fpath)
            current = root
            for comp in path_components[:-1]:
                current = dict_getitem_default(current, comp, dict)
            contents = dict_getitem_default(current, '.', list)
            contents.append(path_components[-1])
        return root

    root = make_tree_structure(valid_fpaths)

    def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None):
        print('path = %r' % (path,))
        print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
        path_components = ut.dirsplit(path)
        # Navigate to correct spot in tree
        current = root
        for c in path_components:
            current = current[c]
        print(ut.repr3(current, truncate=1))

    def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0):
        path_components = ut.dirsplit(path)
        current = root
        for c in path_components:
            current = current[c]
        if isinstance(current, list):
            tree_tmp = []
        else:
            key_list = list(current.keys())
            child_list = [join(path, key) for key in key_list]
            dpath_nbytes_list = [
                drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            nfiles_list = [
                len(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            tree_tmp = sorted([
                (key, ut.byte_str2(nbytes), nfiles)
                if depth == 0 else
                (key, ut.byte_str2(nbytes), nfiles,
                    get_tree_info(root, path=child,
                                  dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive,
                                  depth=depth - 1))
                for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list)
            ])
        return tree_tmp

    def print_tree_struct(*args, **kwargs):
        tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1)))
        print(tree_str)
        #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))
        #print('path = %r, %s' % (path, bytes_str))
        #print(ut.repr3(key_list))
        return tree_str

    dpath_to_unique_fidx
    dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx)
    complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx)
    complete_root = make_tree_structure(complete_unique_dpaths.keys())

    globals()['ut'] = ut
    globals()['os'] = os
    globals()['join'] = join

    print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\'])))
    get_tree_info(root, path='E:\\', depth=0)

    get_tree_info(complete_root, path='E:\\', depth=0)

    get_tree_info(root, path='E:\\', depth=1)
    print(print_tree_struct(root, path='E:\\Clutter', depth=0))
    print_tree(root, path=r'E:\TV')
    print_tree(root, path=r'E:\Movies')
    print_tree(root, path=r'E:\Boot')

    print_tree(root, path='E:\\')
    print_tree(root, path=r'E:\Downloaded')
    print_tree(root, path=r'E:\Recordings')
    print_tree(root, path=r'E:\Clutter')
    print_tree(root, path=r'E:\Audio Books')

    # TODO:
    # * Ignore list
    # * Find and rectify internal duplicates
    # * Update registry with new files and deleted ones
    # * Ensure that all unique files are backed up
    # Index the C: Drive as well.
    # * Lazy properties of drive
    # * Multiple types of identifiers (hash, fname, ext, fsize)
    # Drive subsets
    # Export/Import Drive for analysis on other machines

    ut.embed()