Exemple #1
0
def cheetah_stats(ibs):
    filters = [
        dict(view=['right', 'frontright', 'backright'], minqual='good'),
        dict(view=['right', 'frontright', 'backright']),
    ]
    for filtkw in filters:
        annots = ibs.annots(ibs.filter_annots_general(**filtkw))
        unique_nids, grouped_annots = annots.group(annots.nids)
        annots_per_name = ut.lmap(len, grouped_annots)
        annots_per_name_freq = ut.dict_hist(annots_per_name)
        def bin_mapper(num):
            if num < 5:
                return (num, num + 1)
            else:
                for bin, mod in [(20, 5), (50, 10)]:
                    if num < bin:
                        low = (num // mod) * mod
                        high = low + mod
                        return (low, high)
                if num >= bin:
                    return (bin, None)
                else:
                    assert False, str(num)
        hist = ut.ddict(lambda: 0)
        for num in annots_per_name:
            hist[bin_mapper(num)] += 1
        hist = ut.sort_dict(hist)

        print('------------')
        print('filters = %s' % ut.repr4(filtkw))
        print('num_annots = %r' % (len(annots)))
        print('num_names = %r' % (len(unique_nids)))
        print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq)))
        print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist)))
        assert sum(hist.values()) == len(unique_nids)
Exemple #2
0
def print_database_structure(cur):
    import utool as ut
    tablename_list = ut.get_tablenames(cur)
    colinfos_list = [ut.get_table_columninfo_list(cur, tablename) for tablename in tablename_list]
    numrows_list = [ut.get_table_num_rows(cur, tablename) for tablename in tablename_list]
    for tablename, colinfo_list, num_rows in ut.sortedby(list(zip(tablename_list, colinfos_list, numrows_list)), numrows_list):
        print('+-------------')
        print('tablename = %r' % (tablename,))
        print('num_rows = %r' % (num_rows,))
        #print(ut.repr4(colinfo_list))
        print(ut.repr4(ut.get_primary_columninfo(cur, tablename)))
        print(ut.repr4(ut.get_nonprimary_columninfo(cur, tablename)))
        print('+-------------')
Exemple #3
0
 def _postprocess_feats(extr, feats):
     # Take the filtered subset of columns
     if extr.feat_dims is not None:
         missing = set(extr.feat_dims).difference(feats.columns)
         if any(missing):
             # print('We have: ' + ut.repr4(feats.columns))
             alt = feats.columns.difference(extr.feat_dims)
             mis_msg = ('Missing feature dims: ' + ut.repr4(missing))
             alt_msg = ('Did you mean? ' + ut.repr4(alt))
             print(mis_msg)
             print(alt_msg)
             raise KeyError(mis_msg)
         feats = feats[extr.feat_dims]
     return feats
Exemple #4
0
    def isect_info(self, other):
        set1 = set(self.rel_fpath_list)
        set2 = set(other.rel_fpath_list)

        set_comparisons = ut.odict([
            ('s1', set1),
            ('s2', set2),
            ('union', set1.union(set2)),
            ('isect', set1.intersection(set2)),
            ('s1 - s2', set1.difference(set2)),
            ('s2 - s1', set1.difference(set1)),
        ])
        stat_stats = ut.map_vals(len, set_comparisons)
        print(ut.repr4(stat_stats))
        return set_comparisons

        if False:
            idx_lookup1 = ut.make_index_lookup(self.rel_fpath_list)
            idx_lookup2 = ut.make_index_lookup(other.rel_fpath_list)

            uuids1 = ut.take(self.uuids,
                             ut.take(idx_lookup1, set_comparisons['union']))
            uuids2 = ut.take(other.uuids,
                             ut.take(idx_lookup2, set_comparisons['union']))

            uuids1 == uuids2
Exemple #5
0
 def _print_previous_loop_statistics(infr, count):
     # Print stats about what happend in the this loop
     history = infr.metrics_list[-count:]
     recover_blocks = ut.group_items([
         (k, sum(1 for i in g))
         for k, g in it.groupby(ut.take_column(history, 'recovering'))
     ]).get(True, [])
     infr.print((
         'Recovery mode entered {} times, '
         'made {} recovery decisions.').format(
             len(recover_blocks), sum(recover_blocks)), color='green')
     testaction_hist = ut.dict_hist(ut.take_column(history, 'test_action'))
     infr.print(
         'Test Action Histogram: {}'.format(
             ut.repr4(testaction_hist, si=True)), color='yellow')
     if infr.params['inference.enabled']:
         action_hist = ut.dict_hist(
             ut.emap(frozenset, ut.take_column(history, 'action')))
         infr.print(
             'Inference Action Histogram: {}'.format(
                 ub.repr2(action_hist, si=True)), color='yellow')
     infr.print(
         'Decision Histogram: {}'.format(ut.repr2(ut.dict_hist(
             ut.take_column(history, 'pred_decision')
         ), si=True)), color='yellow')
     infr.print(
         'User Histogram: {}'.format(ut.repr2(ut.dict_hist(
             ut.take_column(history, 'user_id')
         ), si=True)), color='yellow')
Exemple #6
0
def testdata_showchip():
    import wbia

    ibs = wbia.opendb(defaultdb='PZ_MTEST')
    aid_list = ut.get_argval(('--aids', '--aid'), type_=list, default=None)
    if aid_list is None:
        aid_list = ibs.get_valid_aids()[0:4]
    weight_label = ut.get_argval('--weight_label',
                                 type_=str,
                                 default='fg_weights')
    annote = not ut.get_argflag('--no-annote')
    kwargs = dict(ori=ut.get_argflag('--ori'),
                  weight_label=weight_label,
                  annote=annote)
    kwargs['notitle'] = ut.get_argflag('--notitle')
    kwargs['pts'] = ut.get_argflag('--drawpts')
    kwargs['ell'] = True or ut.get_argflag('--drawell')
    kwargs['ell_alpha'] = ut.get_argval('--ellalpha', default=0.4)
    kwargs['ell_linewidth'] = ut.get_argval('--ell_linewidth', default=2)
    kwargs['draw_lbls'] = ut.get_argval('--draw_lbls', default=True)
    logger.info('kwargs = ' + ut.repr4(kwargs, nl=True))
    default_config = dict(wbia.algo.Config.FeatureWeightConfig().parse_items())
    cfgdict = ut.argparse_dict(default_config)
    logger.info('[viz_chip.testdata] cfgdict = %r' % (cfgdict, ))
    config2_ = cfgdict
    logger.info('[viz_chip.testdata] aid_list = %r' % (aid_list, ))
    return ibs, aid_list, kwargs, config2_
Exemple #7
0
    def update(client, data_list):
        client.review_vip = None

        if data_list is None:
            print('GRAPH CLIENT GOT NONE UPDATE')
            client.review_dict = None
        else:
            data_list = list(data_list)
            num_samples = 5
            num_items = len(data_list)
            num_samples = min(num_samples, num_items)
            first = list(data_list[:num_samples])

            print('UPDATING GRAPH CLIENT WITH {} ITEM(S):'.format(num_items))
            print('First few are: ' + ut.repr4(first, si=2, precision=4))
            client.review_dict = {}

            for (edge, priority, edge_data_dict) in data_list:
                aid1, aid2 = edge
                if aid2 < aid1:
                    aid1, aid2 = aid2, aid1
                edge = (aid1, aid2, )
                if client.review_vip is None:
                    # Hack around the double review problem
                    if edge != client.prev_vip:
                        client.review_vip = edge
                client.review_dict[edge] = (priority, edge_data_dict, )
Exemple #8
0
def assert_unique(item_list, ignore=[], name='list', verbose=None):
    import utool as ut
    dups = ut.find_duplicate_items(item_list)
    ut.delete_dict_keys(dups, ignore)
    if len(dups) > 0:
        raise AssertionError('Found duplicate items in %s: %s' %
                             (name, ut.repr4(dups)))
    if verbose:
        print('No duplicates found in %s' % (name, ))
Exemple #9
0
    def hardcase_review_gen(infr):
        """
        Subiterator for hardcase review

        Re-review non-confident edges that vsone did not classify correctly
        """
        infr.print('==============================', color='white')
        infr.print('--- HARDCASE PRIORITY LOOP ---', color='white')

        verifiers = infr.learn_evaluation_verifiers()
        verif = verifiers['match_state']

        edges_ = list(infr.edges())
        real_ = list(infr.edge_decision_from(edges_))
        flags_ = [r in {POSTV, NEGTV, INCMP} for r in real_]
        real = ut.compress(real_, flags_)
        edges = ut.compress(edges_, flags_)

        hardness = 1 - verif.easiness(edges, real)

        if True:
            df = pd.DataFrame({'edges': edges, 'real': real})
            df['hardness'] = hardness

            pred = verif.predict(edges)
            df['pred'] = pred.values

            df.sort_values('hardness', ascending=False)
            infr.print('hardness analysis')
            infr.print(str(df))

            infr.print('infr status: ' + ut.repr4(infr.status()))

        # Don't re-review anything that was confidently reviewed
        # CONFIDENCE = const.CONFIDENCE
        # CODE_TO_INT = CONFIDENCE.CODE_TO_INT.copy()
        # CODE_TO_INT[CONFIDENCE.CODE.UNKNOWN] = 0
        # conf = ut.take(CODE_TO_INT, infr.gen_edge_values(
        #     'confidence', edges, on_missing='default',
        #     default=CONFIDENCE.CODE.UNKNOWN))

        # This should only be run with certain params
        assert not infr.params['autoreview.enabled']
        assert not infr.params['redun.enabled']
        assert not infr.params['ranking.enabled']
        assert infr.params['inference.enabled']
        # const.CONFIDENCE.CODE.PRETTY_SURE
        if infr.params['queue.conf.thresh'] is None:
            # != 'pretty_sure':
            infr.print('WARNING: should queue.conf.thresh = "pretty_sure"?')

        # work around add_candidate_edges
        infr.prioritize(metric='hardness', edges=edges,
                        scores=hardness)
        infr.set_edge_attrs('hardness', ut.dzip(edges, hardness))
        for _ in infr._inner_priority_gen(use_refresh=False):
            yield _
Exemple #10
0
def sed_projects(regexpr, repl, force=False, recursive=True, user_profile=None, **kwargs):
    r"""

    Args:
        regexpr (?):
        repl (?):
        force (bool): (default = False)
        recursive (bool): (default = True)
        user_profile (None): (default = None)

    CommandLine:
        python -m utool.util_project --exec-sed_projects

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_project import *  # NOQA
        >>> regexpr = ut.get_argval('--find', type_=str, default=sys.argv[-1])
        >>> repl = ut.get_argval('--repl', type_=str, default=sys.argv[-2])
        >>> force = False
        >>> recursive = True
        >>> user_profile = None
        >>> result = sed_projects(regexpr, repl, force, recursive, user_profile)
        >>> print(result)

    Ignore:
        regexpr = 'annotation match_scores'
        repl = 'draw_annot_scoresep'

    """
    # FIXME: finishme
    import utool as ut
    user_profile = ensure_user_profile(user_profile)

    sedkw = {}
    sedkw['exclude_dirs'] = user_profile.project_exclude_dirs
    sedkw['dpath_list'] = user_profile.project_dpaths
    sedkw['include_patterns'] = user_profile.project_include_patterns
    sedkw.update(kwargs)

    msg_list1 = []
    #msg_list2 = []

    print_ = msg_list1.append
    print_('Seding Projects')
    print(' * regular expression : %r' % (regexpr,))
    print(' * replacement        : %r' % (repl,))
    print_('sedkw = %s' % ut.repr4(sedkw, nl=True))

    print(' * recursive: %r' % (recursive,))
    print(' * force: %r' % (force,))

    # Walk through each directory recursively
    for fpath in ut.matching_fpaths(sedkw['dpath_list'],
                                    sedkw['include_patterns'],
                                    sedkw['exclude_dirs'],
                                    recursive=recursive):
        ut.sedfile(fpath, regexpr, repl, force)
Exemple #11
0
def get_timestats_str(unixtime_list, newlines=1, full=True, isutc=False):
    r"""
    Args:
        unixtime_list (list):
        newlines (bool):

    Returns:
        str: timestat_str

    CommandLine:
        python -m utool.util_time --test-get_timestats_str

    Example:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_time import *  # NOQA
        >>> import utool as ut
        >>> unixtime_list = [0, 0 + 60 * 60 * 5 , 10 + 60 * 60 * 5, 100 + 60 * 60 * 5, 1000 + 60 * 60 * 5]
        >>> newlines = 1
        >>> full = False
        >>> timestat_str = get_timestats_str(unixtime_list, newlines, full=full, isutc=True)
        >>> result = ut.align(str(timestat_str), ':')
        >>> print(result)
        {
            'max'  : '1970/01/01 05:16:40',
            'mean' : '1970/01/01 04:03:42',
            'min'  : '1970/01/01 00:00:00',
            'range': '5:16:40',
            'std'  : '2:02:01',
        }

    Example2:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_time import *  # NOQA
        >>> import utool as ut
        >>> unixtime_list = [0, 0 + 60 * 60 * 5 , 10 + 60 * 60 * 5, 100 + 60 * 60 * 5, 1000 + 60 * 60 * 5, float('nan'), 0]
        >>> newlines = 1
        >>> timestat_str = get_timestats_str(unixtime_list, newlines, isutc=True)
        >>> result = ut.align(str(timestat_str), ':')
        >>> print(result)
        {
            'max'    : '1970/01/01 05:16:40',
            'mean'   : '1970/01/01 03:23:05',
            'min'    : '1970/01/01 00:00:00',
            'nMax'   : 1,
            'nMin'   : 2,
            'num_nan': 1,
            'range'  : '5:16:40',
            'shape'  : (7,),
            'std'    : '2:23:43',
        }

    """
    import utool as ut
    datetime_stats = get_timestats_dict(unixtime_list, full=full, isutc=isutc)
    timestat_str = ut.repr4(datetime_stats, newlines=newlines)
    return timestat_str
Exemple #12
0
def assert_unique(item_list, ignore=[], name='list', verbose=None):
    import utool as ut
    dups = ut.find_duplicate_items(item_list)
    ut.delete_dict_keys(dups, ignore)
    if len(dups) > 0:
        raise AssertionError(
            'Found duplicate items in %s: %s' % (
                name, ut.repr4(dups)))
    if verbose:
        print('No duplicates found in %s' % (name,))
Exemple #13
0
def parse_window_type_and_flags(self):

    # type_ = self.windowType()
    for key, val in WindowTypes.items():
        if bin(val).count('1') == 1:
            pass
        # logger.info('{:<16s}: 0x{:08b}'.format(key, val))
        logger.info('{:<16s}: 0x{:08x}'.format(key, val))

    has = []
    missing = []
    flags = int(self.windowFlags())
    for key, val in WindowFlags.items():
        if flags & val == val:
            has.append(key)
        else:
            missing.append(key)
    logger.info('has = %s' % (ut.repr4(has), ))
    logger.info('missing = %s' % (ut.repr4(missing), ))
    pass
Exemple #14
0
def autogen_argparse_block(extra_args=[]):
    """
    SHOULD TURN ANY REGISTERED ARGS INTO A A NEW PARSING CONFIG
    FILE FOR BETTER --help COMMANDS

    import utool as ut
    __REGISTERED_ARGS__ = ut.util_arg.__REGISTERED_ARGS__

    Args:
        extra_args (list): (default = [])

    CommandLine:
        python -m utool.util_arg --test-autogen_argparse_block

    Example:
        >>> # DISABLE_DOCTEST
        >>> import utool as ut
        >>> extra_args = []
        >>> result = ut.autogen_argparse_block(extra_args)
        >>> print(result)
    """
    #import utool as ut  # NOQA
    #__REGISTERED_ARGS__
    # TODO FINISHME

    grouped_args = []
    # Group similar a args
    for argtup in __REGISTERED_ARGS__:
        argstr_list, type_, default, help_ = argtup
        argstr_set = set(argstr_list)
        # <MULTIKEY_SETATTR>
        # hack in multikey setattr n**2 yuck
        found = False
        for index, (keyset, vals) in enumerate(grouped_args):
            if len(keyset.intersection(argstr_set)) > 0:
                # update
                keyset.update(argstr_set)
                vals.append(argtup)
                found = True
                break
        if not found:
            new_keyset = argstr_set
            new_vals = [argtup]
            grouped_args.append((new_keyset, new_vals))
        # </MULTIKEY_SETATTR>
    # DEBUG
    multi_groups = []
    for keyset, vals in grouped_args:
        if len(vals) > 1:
            multi_groups.append(vals)
    if len(multi_groups) > 0:
        import utool as ut
        print('Following arg was specified multiple times')
        print(ut.repr4(multi_groups, newlines=2))
Exemple #15
0
    def find_duplicates(index):
        # fpaths = list(index.files.keys())
        files = list(index.files.values())
        print('Grouping {} files'.format(len(files)))
        grouped = ut.group_items(files, [f.nbytes for f in files])
        print('Found {} groups'.format(len(grouped)))
        potential_dups = {k: v for k, v in grouped.items() if len(v) > 1}
        print('Found {} potential dups by nbytes'.format(len(potential_dups)))

        GB = 2**30  # NOQA
        MB = 2**20  # NOQA
        max_bytes = 10 * MB
        min_bytes = 64 * MB

        duplicates = []
        for k, fs in ut.ProgIter(potential_dups.items(), freq=1):
            names = [f.n for f in fs]
            if ut.allsame(names):
                # Don't do big files yet
                if k < max_bytes and k > min_bytes:
                    if ut.allsame([f.hashid for f in fs]):
                        duplicates.extend(fs)
                        for f1, f2 in ut.combinations(fs, 2):
                            f1.duplicates.add(f2)
                            f2.duplicates.add(f1)

        def dpath_similarity(index, dpath1, dpath2):
            d1 = index[dpath1]
            d2 = index[dpath2]
            set1 = {f.hashid for f in ut.ProgIter(d1.files)}
            set2 = {f.hashid for f in ut.ProgIter(d2.files)}
            # n_isect = len(set1.intersection(set2))
            size1, size2 = map(len, (set1, set2))
            # minsize = min(size1, size2)
            # sim_measures = (n_isect, n_isect / minsize)
            return ut.set_overlaps(set1, set2)
            # return sim_measures

        similarities = {}
        r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates])
        for dpath, dups in r_to_dup.items():
            # Check to see if the duplicates all point to the same dir
            f = dups[0]  # NOQA
            common_dpath = set.intersection(*[{_.r
                                               for _ in f.duplicates}
                                              for f in dups])

            for other in common_dpath:
                sim_measures = dpath_similarity(index, dpath, other)
                similarities[(dpath, other)] = sim_measures

        print(ut.repr4(similarities, si=True, nl=2))
 def assert_union_invariant(infr, msg=''):
     edge_sets = {
         key: set(it.starmap(e_, graph.edges()))
         for key, graph in infr.review_graphs.items()
     }
     edge_union = set.union(*edge_sets.values())
     all_edges = set(it.starmap(e_, infr.graph.edges()))
     if edge_union != all_edges:
         logger.info('ERROR STATUS DUMP:')
         logger.info(ut.repr4(infr.status()))
         raise AssertionError(
             'edge sets must have full union. Found union=%d vs all=%d' %
             (len(edge_union), len(all_edges)))
Exemple #17
0
    def find_duplicates(index):
        # fpaths = list(index.files.keys())
        files = list(index.files.values())
        print('Grouping {} files'.format(len(files)))
        grouped = ut.group_items(files, [f.nbytes for f in files])
        print('Found {} groups'.format(len(grouped)))
        potential_dups = {k: v for k, v in grouped.items() if len(v) > 1}
        print('Found {} potential dups by nbytes'.format(len(potential_dups)))

        GB = 2 ** 30  # NOQA
        MB = 2 ** 20  # NOQA
        max_bytes = 10 * MB
        min_bytes = 64 * MB

        duplicates = []
        for k, fs in ut.ProgIter(potential_dups.items(), freq=1):
            names = [f.n for f in fs]
            if ut.allsame(names):
                # Don't do big files yet
                if k < max_bytes and k > min_bytes:
                    if ut.allsame([f.hashid for f in fs]):
                        duplicates.extend(fs)
                        for f1, f2 in ut.combinations(fs, 2):
                            f1.duplicates.add(f2)
                            f2.duplicates.add(f1)

        def dpath_similarity(index, dpath1, dpath2):
            d1 = index[dpath1]
            d2 = index[dpath2]
            set1 = {f.hashid for f in ut.ProgIter(d1.files)}
            set2 = {f.hashid for f in ut.ProgIter(d2.files)}
            # n_isect = len(set1.intersection(set2))
            size1, size2 = map(len, (set1, set2))
            # minsize = min(size1, size2)
            # sim_measures = (n_isect, n_isect / minsize)
            return ut.set_overlaps(set1, set2)
            # return sim_measures

        similarities = {}
        r_to_dup = ut.group_items(duplicates, [p.r for p in duplicates])
        for dpath, dups in r_to_dup.items():
            # Check to see if the duplicates all point to the same dir
            f = dups[0]  # NOQA
            common_dpath = set.intersection(*[
                {_.r for _ in f.duplicates} for f in dups])

            for other in common_dpath:
                sim_measures = dpath_similarity(index, dpath, other)
                similarities[(dpath, other)] = sim_measures

        print(ut.repr4(similarities, si=True, nl=2))
Exemple #18
0
    def build_sphinx_apidoc_cmdstr():
        print('')
        print('if this fails try: sudo pip install sphinx')
        print('')
        apidoc = 'sphinx-apidoc'
        if ut.WIN32:
            winprefix = 'C:/Python27/Scripts/'
            sphinx_apidoc_exe = winprefix + apidoc + '.exe'
        else:
            sphinx_apidoc_exe = apidoc
        apidoc_argfmt_list = [
            sphinx_apidoc_exe,
            '--force',
            '--full',
            '--maxdepth="{maxdepth}"',
            '--doc-author="{author}"',
            '--doc-version="{doc_version}"',
            '--doc-release="{doc_release}"',
            '--output-dir="_doc"',
            #'--separate',  # Put documentation for each module on its own page
            '--private',  # Include "_private" modules
            '{pkgdir}',
        ]
        outputdir = '_doc'
        author = ut.parse_author()
        packages = ut.find_packages(maxdepth=1)
        assert len(packages) != 0, 'directory must contain at least one package'
        if len(packages) > 1:
            assert len(packages) == 1,\
                ('FIXME I dont know what to do with more than one root package: %r'
                 % (packages,))
        pkgdir = packages[0]
        version = ut.parse_package_for_version(pkgdir)
        modpath = dirname(ut.truepath(pkgdir))

        apidoc_fmtdict = {
            'author': author,
            'maxdepth': '8',
            'pkgdir': pkgdir,
            'doc_version': version,
            'doc_release': version,
            'outputdir': outputdir,
        }
        ut.assert_exists('setup.py')
        ut.ensuredir('_doc')
        apidoc_fmtstr = ' '.join(apidoc_argfmt_list)
        apidoc_cmdstr = apidoc_fmtstr.format(**apidoc_fmtdict)
        print('[util_setup] autogenerate sphinx docs for %r' % (pkgdir,))
        if ut.VERBOSE:
            print(ut.repr4(apidoc_fmtdict))
        return apidoc_cmdstr, modpath, outputdir
Exemple #19
0
    def __init__(
        extr,
        ibs=None,
        config={},
        use_cache=True,
        verbose=1,
        # Nested config props
        match_config=None,
        pairfeat_cfg=None,
        global_keys=None,
        need_lnbnn=None,
        feat_dims=None,
    ):

        extr.verbose = verbose
        extr.use_cache = use_cache
        extr.ibs = ibs

        # Configs for this are a bit foobar. Allow config to be a catch-all It
        # can either store params in nested or flat form
        config = config.copy()
        vars_ = vars()

        def _popconfig(key, default):
            """ ensures param is either specified in func args xor config """
            if key in config:
                if vars_.get(key, None) is not None:
                    raise ValueError('{} specified twice'.format(key))
                value = config.pop(key)
            else:
                # See if the local namespace has it
                value = vars_.get(key, None)
                if value is None:
                    value = default
            return value

        # These also sort-of belong to pair-feat config
        extr.global_keys = _popconfig('global_keys', [])
        extr.need_lnbnn = _popconfig('need_lnbnn', False)
        extr.feat_dims = _popconfig('feat_dims', None)

        extr.match_config = MatchConfig(**_popconfig('match_config', {}))
        extr.pairfeat_cfg = PairFeatureConfig(**_popconfig('pairfeat_cfg', {}))

        # Allow config to store flat versions of these params
        extr.match_config.pop_update(config)
        extr.pairfeat_cfg.pop_update(config)

        if len(config) > 0:
            raise ValueError('Unused config items: ' + ut.repr4(config))
Exemple #20
0
def auto_docstr(modname, funcname, verbose=True, moddir=None, **kwargs):
    r"""
    called from vim. Uses strings of filename and modnames to build docstr

    Args:
        modname (str): name of a python module
        funcname (str): name of a function in the module

    Returns:
        str: docstr

    CommandLine:
        python -m utool.util_autogen auto_docstr
        python -m utool --tf auto_docstr

    Example:
        >>> import utool as ut
        >>> from utool.util_autogen import *  # NOQA
        >>> ut.util_autogen.rrr(verbose=False)
        >>> #docstr = ut.auto_docstr('ibeis.algo.hots.smk.smk_index', 'compute_negentropy_names')
        >>> modname = ut.get_argval('--modname', default='utool.util_autogen')
        >>> funcname = ut.get_argval('--funcname', default='auto_docstr')
        >>> moddir = ut.get_argval('--moddir', type_=str, default=None)
        >>> docstr = ut.util_autogen.auto_docstr(modname, funcname)
        >>> print(docstr)
    """
    #import utool as ut
    func, module, error_str = load_func_from_module(
        modname, funcname, verbose=verbose, moddir=moddir)
    if error_str is None:
        try:
            docstr = make_default_docstr(func, **kwargs)
        except Exception as ex:
            import utool as ut
            error_str = ut.formatex(ex, 'Caught Error in parsing docstr', tb=True)
            #ut.printex(ex)
            error_str += (
                '\n\nReplicateCommand:\n    '
                'python -m utool --tf auto_docstr '
                '--modname={modname} --funcname={funcname} --moddir={moddir}').format(
                    modname=modname, funcname=funcname, moddir=moddir)
            error_str += '\n kwargs='  + ut.repr4(kwargs)
            return error_str
    else:
        docstr = error_str
    return docstr
Exemple #21
0
def _test_pos_neg():
    infr = demo.demodata_infr(num_pccs=0)
    # Make 3 inconsistent CCs
    infr.add_feedback((1, 2), POSTV)
    infr.add_feedback((2, 3), POSTV)
    infr.add_feedback((3, 4), POSTV)
    infr.add_feedback((4, 1), POSTV)
    infr.add_feedback((1, 3), NEGTV)
    # -----
    infr.add_feedback((11, 12), POSTV)
    infr.add_feedback((12, 13), POSTV)
    infr.add_feedback((13, 11), NEGTV)
    # -----
    infr.add_feedback((21, 22), POSTV)
    infr.add_feedback((22, 23), POSTV)
    infr.add_feedback((23, 21), NEGTV)
    # -----
    # Fix inconsistency
    infr.add_feedback((23, 21), POSTV)
    # Merge inconsistent CCS
    infr.add_feedback((1, 11), POSTV)
    # Negative edge within an inconsistent CC
    infr.add_feedback((2, 13), NEGTV)
    # Negative edge external to an inconsistent CC
    infr.add_feedback((12, 21), NEGTV)
    # -----
    # Make inconsistency from positive
    infr.add_feedback((31, 32), POSTV)
    infr.add_feedback((33, 34), POSTV)
    infr.add_feedback((31, 33), NEGTV)
    infr.add_feedback((32, 34), NEGTV)
    infr.add_feedback((31, 34), POSTV)
    # Fix everything
    infr.add_feedback((1, 3), POSTV)
    infr.add_feedback((2, 4), POSTV)
    infr.add_feedback((32, 34), POSTV)
    infr.add_feedback((31, 33), POSTV)
    infr.add_feedback((13, 11), POSTV)
    infr.add_feedback((23, 21), POSTV)
    infr.add_feedback((1, 11), NEGTV)
    logger.info('Final state:')
    logger.info(ut.repr4(sorted(infr.gen_edge_attrs('decision'))))
    def start(actor, dbdir, aids='all', config={}, **kwargs):
        import wbia

        assert dbdir is not None, 'must specify dbdir'
        assert actor.infr is None, 'AnnotInference already running'
        ibs = wbia.opendb(dbdir=dbdir,
                          use_cache=False,
                          web=False,
                          force_serial=True)

        # Create the AnnotInference
        log.info('starting via actor with ibs = %r' % (ibs, ))
        actor.infr = wbia.AnnotInference(ibs=ibs, aids=aids, autoinit=True)
        actor.infr.print('started via actor')
        actor.infr.print('config = {}'.format(ut.repr3(config)))
        # Configure query_annot_infr
        for key in config:
            actor.infr.params[key] = config[key]
        # Initialize
        # TODO: Initialize state from staging reviews after annotmatch
        # timestamps (in case of crash)

        actor.infr.print('Initializing infr tables')
        table = kwargs.get('init', 'staging')
        actor.infr.reset_feedback(table, apply=True)
        actor.infr.ensure_mst()
        actor.infr.apply_nondynamic_update()

        actor.infr.print('infr.status() = {}'.format(
            ut.repr4(actor.infr.status())))

        # Load random forests (TODO: should this be config specifiable?)
        actor.infr.print('loading published models')
        try:
            actor.infr.load_published()
        except Exception:
            pass

        # Start actor.infr Main Loop
        actor.infr.print('start id review')
        actor.infr.start_id_review()
        return 'initialized'
Exemple #23
0
def __debug_win_msvcr():
    import utool as ut
    fname = 'msvcr*.dll'
    key_list = ['PATH']
    found = ut.search_env_paths(fname, key_list)
    fpaths = ut.unique(ut.flatten(found.values()))
    fpaths = ut.lmap(ut.ensure_unixslash, fpaths)
    from os.path import basename
    dllnames = [basename(x) for x in fpaths]
    grouped = dict(ut.group_items(fpaths, dllnames))
    print(ut.repr4(grouped, nl=4))

    keytoid = {
    }

    for key, vals in grouped.items():
        infos = ut.lmap(ut.get_file_nBytes, vals)
        #infos = ut.lmap(ut.get_file_uuid, vals)
        #uuids = [ut.get_file_uuid(val) for val in vals]
        keytoid[key] = list(zip(infos, vals))
    ut.print_dict(keytoid, nl=2)
Exemple #24
0
def _test_unrev_inference():
    infr = demo.demodata_infr(num_pccs=0)
    # Make 2 consistent and 2 inconsistent CCs
    infr.add_feedback((1, 2), POSTV)
    infr.add_feedback((2, 3), POSTV)
    infr.add_feedback((3, 4), POSTV)
    infr.add_feedback((4, 1), POSTV)
    # -----
    infr.add_feedback((11, 12), POSTV)
    infr.add_feedback((12, 13), POSTV)
    infr.add_feedback((13, 14), POSTV)
    infr.add_feedback((14, 11), POSTV)
    infr.add_feedback((12, 14), NEGTV)
    # -----
    infr.add_feedback((21, 22), POSTV)
    infr.add_feedback((22, 23), POSTV)
    infr.add_feedback((23, 21), NEGTV)
    # -----
    infr.add_feedback((31, 32), POSTV)
    infr.add_feedback((32, 33), POSTV)
    infr.add_feedback((33, 31), POSTV)
    infr.add_feedback((2, 32), NEGTV)
    infr.add_feedback((3, 33), NEGTV)
    infr.add_feedback((12, 21), NEGTV)
    # -----
    # Incomparable within CCs
    logger.info('==========================')
    infr.add_feedback((1, 3), UNREV)
    infr.add_feedback((1, 4), UNREV)
    infr.add_feedback((1, 2), UNREV)
    infr.add_feedback((11, 13), UNREV)
    infr.add_feedback((11, 14), UNREV)
    infr.add_feedback((11, 12), UNREV)
    infr.add_feedback((1, 31), UNREV)
    infr.add_feedback((2, 32), UNREV)
    infr.add_feedback((12, 21), UNREV)
    infr.add_feedback((23, 21), UNREV)
    infr.add_feedback((12, 14), UNREV)
    logger.info('Final state:')
    logger.info(ut.repr4(sorted(infr.gen_edge_attrs('decision'))))
Exemple #25
0
    def refresh_candidate_edges(infr):
        """
        Search for candidate edges.
        Assign each edge a priority and add to queue.
        """
        infr.print('refresh_candidate_edges', 1)
        infr.assert_consistency_invariant()

        if infr.ibs is not None:
            candidate_edges = infr.find_lnbnn_candidate_edges()
        elif hasattr(infr, 'dummy_verif'):
            infr.print('Searching for dummy candidates')
            infr.print(
                'dummy vsone params ='
                + ut.repr4(infr.dummy_verif.dummy_params, nl=1, si=True)
            )
            ranks_top = infr.params['ranking.ntop']
            candidate_edges = infr.dummy_verif.find_candidate_edges(K=ranks_top)
        else:
            raise Exception('No method available to search for candidate edges')
        infr.add_candidate_edges(candidate_edges)
        infr.assert_consistency_invariant()
Exemple #26
0
def print_system_users():
    r"""

    prints users on the system

    On unix looks for /bin/bash users in /etc/passwd

    CommandLine:
        python -m utool.util_cplat --test-print_system_users

    Example:
        >>> # SCRIPT
        >>> from utool.util_cplat import *  # NOQA
        >>> result = print_system_users()
        >>> print(result)
    """
    import utool as ut
    text = ut.read_from('/etc/passwd')
    userinfo_text_list = text.splitlines()
    userinfo_list = [uitext.split(':') for uitext in userinfo_text_list]
    #print(ut.repr4(sorted(userinfo_list)))
    bash_users = [tup for tup in userinfo_list if tup[-1] == '/bin/bash']
    print(ut.repr4(sorted(bash_users)))
Exemple #27
0
def print_module_info(modname):
    print('Checking modname = %r' % (modname,))
    # Handles special cases for certain modules
    if modname.lower() == 'pillow':
        from PIL import Image
        import PIL
        pil_path = PIL.__path__
        infodict = module_stdinfo_dict(Image, versionattr='PILLOW_VERSION',
                                       image_version=Image.VERSION,
                                       pil_path=pil_path)
    elif modname.lower() == 'pyqt4':
        from PyQt4 import QtCore
        infodict = module_stdinfo_dict(QtCore, 'PYQT_VERSION_STR')
    elif modname.lower() == 'pyqt5':
        from PyQt5 import QtCore
        infodict = module_stdinfo_dict(QtCore, 'PYQT_VERSION_STR')
    else:
        # Handle normal modules
        module = ut.import_modname(modname)
        infodict = module_stdinfo_dict(module)
    if any([infodict['__file__'].endswith(ext) for ext in ut.LIB_EXT_LIST]):
        infodict['libdep'] = ut.get_dynlib_dependencies(infodict['__file__'])
    return print(ut.repr4(infodict, strvals=True))
Exemple #28
0
    def isect_info(self, other):
        set1 = set(self.rel_fpath_list)
        set2 = set(other.rel_fpath_list)

        set_comparisons = ut.odict([
            ('s1', set1),
            ('s2', set2),
            ('union', set1.union(set2)),
            ('isect', set1.intersection(set2)),
            ('s1 - s2', set1.difference(set2)),
            ('s2 - s1', set1.difference(set1)),
        ])
        stat_stats = ut.map_vals(len, set_comparisons)
        print(ut.repr4(stat_stats))
        return set_comparisons

        if False:
            idx_lookup1 = ut.make_index_lookup(self.rel_fpath_list)
            idx_lookup2 = ut.make_index_lookup(other.rel_fpath_list)

            uuids1 = ut.take(self.uuids, ut.take(idx_lookup1, set_comparisons['union']))
            uuids2 = ut.take(other.uuids, ut.take(idx_lookup2, set_comparisons['union']))

            uuids1 == uuids2
Exemple #29
0
def demo2():
    """
    CommandLine:
        python -m wbia.algo.graph.demo demo2 --viz
        python -m wbia.algo.graph.demo demo2

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.graph.demo import *  # NOQA
        >>> result = demo2()
        >>> print(result)
    """
    import wbia.plottool as pt

    from wbia.scripts.thesis import TMP_RC
    import matplotlib as mpl

    mpl.rcParams.update(TMP_RC)

    # ---- Synthetic data params
    params = {
        'redun.pos': 2,
        'redun.neg': 2,
    }
    # oracle_accuracy = .98
    # oracle_accuracy = .90
    # oracle_accuracy = (.8, 1.0)
    oracle_accuracy = (0.85, 1.0)
    # oracle_accuracy = 1.0

    # --- draw params

    VISUALIZE = ut.get_argflag('--viz')
    # QUIT_OR_EMEBED = 'embed'
    QUIT_OR_EMEBED = 'quit'
    TARGET_REVIEW = ut.get_argval('--target', type_=int, default=None)
    START = ut.get_argval('--start', type_=int, default=None)
    END = ut.get_argval('--end', type_=int, default=None)

    # ------------------

    # rng = np.random.RandomState(42)

    # infr = demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0)
    # infr = demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0)
    # infr = demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0)
    infr = demodata_infr(pcc_sizes=[5, 2, 4])
    infr.verbose = 100
    # apply_dummy_viewpoints(infr)
    # infr.ensure_cliques()
    infr.ensure_cliques()
    infr.ensure_full()
    # infr.apply_edge_truth()
    # Dummy scoring

    infr.init_simulation(oracle_accuracy=oracle_accuracy, name='demo2')

    # infr_gt = infr.copy()

    dpath = ut.ensuredir(ut.truepath('~/Desktop/demo'))
    ut.remove_files_in_dir(dpath)

    fig_counter = it.count(0)

    def show_graph(infr, title, final=False, selected_edges=None):
        if not VISUALIZE:
            return
        # TODO: rich colored text?
        latest = '\n'.join(infr.latest_logs())
        showkw = dict(
            # fontsize=infr.graph.graph['fontsize'],
            # fontname=infr.graph.graph['fontname'],
            show_unreviewed_edges=True,
            show_inferred_same=False,
            show_inferred_diff=False,
            outof=(len(infr.aids)),
            # show_inferred_same=True,
            # show_inferred_diff=True,
            selected_edges=selected_edges,
            show_labels=True,
            simple_labels=True,
            # show_recent_review=not final,
            show_recent_review=False,
            # splines=infr.graph.graph['splines'],
            reposition=False,
            # with_colorbar=True
        )
        verbose = infr.verbose
        infr.verbose = 0
        infr_ = infr.copy()
        infr_ = infr
        infr_.verbose = verbose
        infr_.show(pickable=True, verbose=0, **showkw)
        infr.verbose = verbose
        # logger.info('status ' + ut.repr4(infr_.status()))
        # infr.show(**showkw)
        ax = pt.gca()
        pt.set_title(title, fontsize=20)
        fig = pt.gcf()
        fontsize = 22
        if True:
            # postprocess xlabel
            lines = []
            for line in latest.split('\n'):
                if False and line.startswith('ORACLE ERROR'):
                    lines += ['ORACLE ERROR']
                else:
                    lines += [line]
            latest = '\n'.join(lines)
            if len(lines) > 10:
                fontsize = 16
            if len(lines) > 12:
                fontsize = 14
            if len(lines) > 14:
                fontsize = 12
            if len(lines) > 18:
                fontsize = 10

            if len(lines) > 23:
                fontsize = 8

        if True:
            pt.adjust_subplots(top=0.95, left=0, right=1, bottom=0.45, fig=fig)
            ax.set_xlabel('\n' + latest)
            xlabel = ax.get_xaxis().get_label()
            xlabel.set_horizontalalignment('left')
            # xlabel.set_x(.025)
            xlabel.set_x(-0.6)
            # xlabel.set_fontname('CMU Typewriter Text')
            xlabel.set_fontname('Inconsolata')
            xlabel.set_fontsize(fontsize)
        ax.set_aspect('equal')

        # ax.xaxis.label.set_color('red')

        from os.path import join

        fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter)))
        fig.savefig(
            fpath,
            dpi=300,
            # transparent=True,
            edgecolor='none',
        )

        # pt.save_figure(dpath=dpath, dpi=300)
        infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs(groupby='name_label')
        infr.set_node_attrs('pin', 'true')
        node_dict = ut.nx_node_dict(infr.graph)
        logger.info(ut.repr4(node_dict[1]))

    if VISUALIZE:
        infr.latest_logs()
        # Pin Nodes into the target groundtruth position
        show_graph(infr, 'target-gt')

    logger.info(ut.repr4(infr.status()))
    infr.clear_feedback()
    infr.clear_name_labels()
    infr.clear_edges()
    logger.info(ut.repr4(infr.status()))
    infr.latest_logs()

    if VISUALIZE:
        infr.update_visual_attrs()

    infr.prioritize('prob_match')
    if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0:
        show_graph(infr, 'initial state')

    def on_new_candidate_edges(infr, edges):
        # hack updateing visual attrs as a callback
        infr.update_visual_attrs()

    infr.on_new_candidate_edges = on_new_candidate_edges

    infr.params.update(**params)
    infr.refresh_candidate_edges()

    VIZ_ALL = VISUALIZE and TARGET_REVIEW is None and START is None
    logger.info('VIZ_ALL = %r' % (VIZ_ALL, ))

    if VIZ_ALL or TARGET_REVIEW == 0:
        show_graph(infr, 'find-candidates')

    # _iter2 = enumerate(infr.generate_reviews(**params))
    # _iter2 = list(_iter2)
    # assert len(_iter2) > 0

    # prog = ut.ProgIter(_iter2, label='demo2', bs=False, adjust=False,
    #                    enabled=False)
    count = 1
    first = 1
    for edge, priority in infr._generate_reviews(data=True):
        msg = 'review #%d, priority=%.3f' % (count, priority)
        logger.info('\n----------')
        infr.print('pop edge {} with priority={:.3f}'.format(edge, priority))
        # logger.info('remaining_reviews = %r' % (infr.remaining_reviews()),)
        # Make the next review

        if START is not None:
            VIZ_ALL = count >= START

        if END is not None and count >= END:
            break

        infr.print(msg)
        if ut.allsame(infr.pos_graph.node_labels(*edge)) and first:
            # Have oracle make a mistake early
            feedback = infr.request_oracle_review(edge, accuracy=0)
            first -= 1
        else:
            feedback = infr.request_oracle_review(edge)

        AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1

        SHOW_CANDIATE_POP = True
        if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET):
            # import utool
            # utool.embed()
            infr.print(
                ut.repr2(infr.task_probs['match_state'][edge],
                         precision=4,
                         si=True))
            infr.print('len(queue) = %r' % (len(infr.queue)))
            # Show edge selection
            infr.print('Oracle will predict: ' + feedback['evidence_decision'])
            show_graph(infr, 'pre' + msg, selected_edges=[edge])

        if count == TARGET_REVIEW:
            infr.EMBEDME = QUIT_OR_EMEBED == 'embed'
        infr.add_feedback(edge, **feedback)
        infr.print('len(queue) = %r' % (len(infr.queue)))
        # infr.apply_nondynamic_update()
        # Show the result
        if VIZ_ALL or AT_TARGET:
            show_graph(infr, msg)
            # import sys
            # sys.exit(1)
        if count == TARGET_REVIEW:
            break
        count += 1

    infr.print('status = ' + ut.repr4(infr.status(extended=False)))
    show_graph(infr, 'post-review (#reviews={})'.format(count), final=True)

    # ROUND 2 FIGHT
    # if TARGET_REVIEW is None and round2_params is not None:
    #     # HACK TO GET NEW THINGS IN QUEUE
    #     infr.params = round2_params

    #     _iter2 = enumerate(infr.generate_reviews(**params))
    #     prog = ut.ProgIter(_iter2, label='round2', bs=False, adjust=False,
    #                        enabled=False)
    #     for count, (aid1, aid2) in prog:
    #         msg = 'reviewII #%d' % (count)
    #         logger.info('\n----------')
    #         logger.info(msg)
    #         logger.info('remaining_reviews = %r' % (infr.remaining_reviews()),)
    #         # Make the next review evidence_decision
    #         feedback = infr.request_oracle_review(edge)
    #         if count == TARGET_REVIEW:
    #             infr.EMBEDME = QUIT_OR_EMEBED == 'embed'
    #         infr.add_feedback(edge, **feedback)
    #         # Show the result
    #         if PRESHOW or TARGET_REVIEW is None or count >= TARGET_REVIEW - 1:
    #             show_graph(infr, msg)
    #         if count == TARGET_REVIEW:
    #             break

    #     show_graph(infr, 'post-re-review', final=True)

    if not getattr(infr, 'EMBEDME', False):
        if ut.get_computer_name().lower() in ['hyrule', 'ooo']:
            pt.all_figures_tile(monitor_num=0, percent_w=0.5)
        else:
            pt.all_figures_tile()
        ut.show_if_requested()
Exemple #30
0
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc,
                                      target_looptime=1.0, serial_cheat=1,
                                      buffer_size=2, show_serial=True):
    """
    # We are going to generate output of bgfunc in the background while
    # fgfunc is running in the foreground. fgfunc takes results of bffunc as
    # args.
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    with ut.Timer('One* call to bgfunc') as t_bgfunc:
        results = [bgfunc(arg) for arg in bgargs]
    bgfunctime = t_bgfunc.ellapsed / len(bgargs)
    #fgfunc = ut.is_prime
    with ut.Timer('One* call to fgfunc') as t_fgfunc:
        [fgfunc(x) for x in results]
    fgfunctime = t_fgfunc.ellapsed / len(bgargs)
    # compute amount of loops to run
    est_looptime = (bgfunctime + fgfunctime)
    _num_loops = round(target_looptime // est_looptime)
    num_data = int(_num_loops // len(bgargs))
    num_loops =  int(num_data * len(bgargs))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([bgargs] * num_data)
    est_tfg = fgfunctime * num_loops
    est_tbg = bgfunctime * num_loops
    est_needed_buffers =  fgfunctime / bgfunctime
    print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [
        'num_loops',
        'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat',
        'buffer_size', 'est_needed_buffers',
    ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(bgfunc, data[:len(data) // serial_cheat]):
                fgfunc(x)
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size)
        for x in gen_:
            fgfunc(x)
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0)
        for x in gen_:
            fgfunc(x)
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')
    def parallel_efficiency(ellapsed, est_tfg, est_tbg):
        return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100
    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' % (
            parallel_efficiency(t_serial, est_tfg, est_tbg),))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (
        parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),))
    print('Theoretical gain (ut.generate) = %.2f%%' % (
        parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),))
    if show_serial:
        prac_tbg = t_serial - est_tfg
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' % (
            parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),))
        print('Practical gain (ut.buffered_generator) = %.3f%%' % (
            parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),))
        print('Practical gain (ut.generate) = %.2f%%' % (
            parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),))
Exemple #31
0
pd.options.display.max_rows = 20
pd.options.display.max_columns = 40
pd.options.display.width = 160
pd.options.display.float_format = lambda x: '%.4f' % (x, )

# PARSE DATABASE
# full_bibman = fix_bib.BibMan('FULL.bib', doc='thesis')

bibman = fix_bib.BibMan('final-bib.bib', doc='thesis')
bibman.sort_entries()
bibman.write_testfile()
bibman.printdiff()
bibman.save()

print('bibman.unregistered_pubs = {}'.format(ut.repr4(
    bibman.unregistered_pubs)))
for pub in bibman.unregistered_pubs:
    if 'None' in str(pub):
        print(ut.repr4(pub.entry))

df = pd.DataFrame.from_dict(bibman.cleaned, orient='index')
del df['abstract']

# want = text.count('@')
want = len(df)

# paged_items = df[~pd.isnull(df['pub_abbrev'])]
# has_pages = ~pd.isnull(paged_items['pages'])
# print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
# print(ut.repr4(paged_items[~has_pages]['title'].values.tolist()))
Exemple #32
0
def _test_buffered_generator_general(func, args, sleepfunc,
                                     target_looptime=1.0,
                                     serial_cheat=1, argmode=False,
                                     buffer_size=2):
    """
    # We are going to generate output of func in the background while sleep
    # func is running in the foreground
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    #serial_cheat = 1  # approx division factor to run serial less times
    show_serial = True  # target_looptime < 10.  # 3.0

    with ut.Timer('One* call to func') as t_fgfunc:
        results = [func(arg) for arg in args]
    functime = t_fgfunc.ellapsed / len(args)
    #sleepfunc = ut.is_prime
    with ut.Timer('One* call to sleep func') as t_sleep:
        if argmode:
            [sleepfunc(x) for x in results]
        else:
            [sleepfunc() for x in results]
    sleeptime = t_sleep.ellapsed / len(args)
    # compute amount of loops to run
    _num_loops = round(target_looptime // (functime + sleeptime))
    num_data = int(_num_loops // len(args))
    num_loops =  int(num_data * len(args))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([args] * num_data)
    est_tsleep = sleeptime * num_loops
    est_tfunc = functime * num_loops
    est_needed_buffers =  sleeptime / functime
    print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [
        'num_loops',
        'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size',
        'est_needed_buffers',
    ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(func, data[:len(data) // serial_cheat]):
                if argmode:
                    sleepfunc(x)
                else:
                    sleepfunc()
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc()
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc( )
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')
    def parallel_efficiency(ellapsed, est_tsleep, est_tfunc):
        return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100
    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' % (
            parallel_efficiency(t_serial, est_tsleep, est_tfunc),))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (
        parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),))
    print('Theoretical gain (ut.generate) = %.2f%%' % (
        parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),))
    if show_serial:
        prac_tfunc = t_serial - est_tsleep
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' % (
            parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),))
        print('Practical gain (ut.buffered_generator) = %.3f%%' % (
            parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),))
        print('Practical gain (ut.generate) = %.2f%%' % (
            parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
Exemple #33
0
def compute_vocab(depc, fid_list, config):
    r"""
    Depcache method for computing a new visual vocab

    CommandLine:
        python -m wbia.core_annots --exec-compute_neighbor_index --show
        python -m wbia show_depc_annot_table_input --show --tablename=neighbor_index

        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:0
        python -m wbia.algo.smk.vocab_indexer --exec-compute_vocab:1

        # FIXME make util_tests register
        python -m wbia.algo.smk.vocab_indexer compute_vocab:0

    Ignore:
        >>> # Lev Oxford Debug Example
        >>> import wbia
        >>> ibs = wbia.opendb('Oxford')
        >>> depc = ibs.depc
        >>> table = depc['vocab']
        >>> # Check what currently exists in vocab table
        >>> table.print_configs()
        >>> table.print_table()
        >>> table.print_internal_info()
        >>> # Grab aids used to compute vocab
        >>> from wbia.expt.experiment_helpers import get_annotcfg_list
        >>> expanded_aids_list = get_annotcfg_list(ibs, ['oxford'])[1]
        >>> qaids, daids = expanded_aids_list[0]
        >>> vocab_aids = daids
        >>> config = {'num_words': 64000}
        >>> exists = depc.check_rowids('vocab', [vocab_aids], config=config)
        >>> print('exists = %r' % (exists,))
        >>> vocab_rowid = depc.get_rowids('vocab', [vocab_aids], config=config)[0]
        >>> print('vocab_rowid = %r' % (vocab_rowid,))
        >>> vocab = table.get_row_data([vocab_rowid], 'words')[0]
        >>> print('vocab = %r' % (vocab,))

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> # Test depcache access
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> input_tuple = [aid_list]
        >>> rowid_kw = {}
        >>> tablename = 'vocab'
        >>> vocabid_list = depc.get_rowids(tablename, input_tuple, **rowid_kw)
        >>> vocab = depc.get(tablename, input_tuple, 'words')[0]
        >>> assert vocab.wordflann is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is not None
        >>> assert vocab.wordflann._FLANN__curindex_data is vocab.wx_to_word

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.vocab_indexer import *  # NOQA
        >>> import wbia
        >>> ibs, aid_list = wbia.testdata_aids('testdb1')
        >>> depc = ibs.depc_annot
        >>> fid_list = depc.get_rowids('feat', aid_list)
        >>> config = VocabConfig()
        >>> vocab, train_vecs = ut.exec_func_src(compute_vocab, keys=['vocab', 'train_vecs'])
        >>> idx_to_vec = depc.d.get_feat_vecs(aid_list)[0]
        >>> self = vocab
        >>> ut.quit_if_noshow()
        >>> data = train_vecs
        >>> centroids = vocab.wx_to_word
        >>> import wbia.plottool as pt
        >>> vt.plot_centroids(data, centroids, num_pca_dims=2)
        >>> ut.show_if_requested()
        >>> #config = ibs.depc_annot['vocab'].configclass()

    """
    logger.info('[IBEIS] COMPUTE_VOCAB:')
    vecs_list = depc.get_native('feat', fid_list, 'vecs')
    train_vecs = np.vstack(vecs_list).astype(np.float32)
    num_words = config['num_words']
    logger.info(
        '[smk_index] Train Vocab(nWords=%d) using %d annots and %d descriptors'
        % (num_words, len(fid_list), len(train_vecs)))
    if config['algorithm'] == 'kdtree':
        flann_params = vt.get_flann_params(random_seed=42)
        kwds = dict(max_iters=20, flann_params=flann_params)
        words = vt.akmeans(train_vecs, num_words, **kwds)
    elif config['algorithm'] == 'minibatch':
        logger.info('Using minibatch kmeans')
        import sklearn.cluster

        rng = np.random.RandomState(config['random_seed'])
        n_init = config['n_init']
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            init_size = int(num_words * 4)
            batch_size = 1000
            n_batches = ut.get_num_chunks(train_vecs.shape[0], batch_size)
            minibatch_params = dict(
                n_clusters=num_words,
                init='k-means++',
                init_size=init_size,
                n_init=n_init,
                max_iter=30000 // n_batches,
                batch_size=batch_size,
                tol=0.0,
                max_no_improvement=10,
                reassignment_ratio=0.01,
            )
            logger.info('minibatch_params = %s' %
                        (ut.repr4(minibatch_params), ))
            clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False,
                                                        random_state=rng,
                                                        verbose=2,
                                                        **minibatch_params)
            try:
                clusterer.fit(train_vecs)
            except (Exception, KeyboardInterrupt) as ex:
                ut.printex(ex, tb=True)
                if ut.is_developer():
                    ut.embed()
                else:
                    raise
        words = clusterer.cluster_centers_
        logger.info('Finished clustering')
    # if False:
    #     flann_params['checks'] = 64
    #     flann_params['trees'] = 4
    #     num_words = 128
    #     centroids = vt.initialize_centroids(num_words, train_vecs, 'akmeans++')
    #     words, hist = vt.akmeans_iterations(
    #         train_vecs, centroids, max_iters=1000, monitor=True,
    #         flann_params=flann_params)

    logger.info('Constructing vocab')
    vocab = VisualVocab(words)
    logger.info('Building vocab index')
    vocab.build()
    logger.info('Returning vocab')
    return (vocab, )
Exemple #34
0
def inject_instance(self, classkey=None, allow_override=False,
                    verbose=VERBOSE_CLASS, strict=True):
    """
    Injects an instance (self) of type (classkey)
    with all functions registered to (classkey)

    call this in the __init__ class function

    Args:
        self: the class instance
        classkey: key for a class, preferably the class type itself, but it
            doesnt have to be

    SeeAlso:
        make_class_method_decorator

    Example:
        >>> # DOCTEST_DISABLE
        >>> utool.make_class_method_decorator(InvertedIndex)(smk_debug.invindex_dbgstr)
        >>> utool.inject_instance(invindex)
    """
    import utool as ut
    if verbose:
        print('[util_class] begin inject_instance')
    try:
        if classkey is None:
            # Probably should depricate this block of code
            # It tries to do too much
            classkey = self.__class__
            if classkey == 'ibeis.gui.models_and_views.IBEISTableView':
                # HACK HACK HACK
                # from guitool.__PYQT__ import QtGui  # NOQA
                from guitool.__PYQT__ import QtWidgets  # NOQA
                classkey = QtWidgets.QAbstractItemView
            if len(__CLASSTYPE_ATTRIBUTES__[classkey]) == 0:
                print('[utool] Warning: no classes of type %r are registered' % (classkey,))
                print('[utool] type(self)=%r, self=%r' % (type(self), self)),
                print('[utool] Checking to see if anybody else was registered...')
                print('[utool] __CLASSTYPE_ATTRIBUTES__ = ' +
                      ut.repr4(__CLASSTYPE_ATTRIBUTES__.keys()))
                for classtype_, _ in six.iteritems(__CLASSTYPE_ATTRIBUTES__):
                    isinstance(self, classtype_)
                    classkey = classtype_
                    print('[utool] Warning: using subclass=%r' % (classtype_,))
                    break
        func_list = __CLASSTYPE_ATTRIBUTES__[classkey]
        if verbose:
            print('[util_class] injecting %d methods\n   with classkey=%r\n   into %r'
                  % (len(func_list), classkey, self,))
        for func in func_list:
            if VERBOSE_CLASS:
                print('[util_class] * injecting %r' % (func,))
            method_name = None
            # Allow user to register tuples for aliases
            if isinstance(func, tuple):
                func, method_name = func
            inject_func_as_method(self, func, method_name=method_name,
                                  allow_override=allow_override, verbose=verbose)
    except Exception as ex:
        ut.printex(ex, 'ISSUE WHEN INJECTING %r' % (classkey,),
                      iswarning=not strict)
        if strict:
            raise
        if fpaths:
            cmd_to_fpaths[cmd].extend(fpaths)


for key in cmd_to_fpaths.keys():
    cmd = key.lstrip('\\')
    if not root.find_descendant_type(cmd):
        print(key)

from os.path import abspath, dirname
used_fpaths = ut.flatten(cmd_to_fpaths.values())
used_fpaths = set(ut.emap(abspath, used_fpaths))

all_fpaths = set(ut.emap(abspath, ut.glob('.', ['*.png', '*.jpg'], recursive=True)))

unused = list(all_fpaths - used_fpaths)

unuse_dirs = ut.group_items(unused, ut.emap(dirname, unused))


semi_used = {}
for dpath, fpaths in unuse_dirs.items():
    used_in_dpath = set(ut.ls(dpath)) - set(fpaths)
    if len(used_in_dpath) == 0:
        # completely unused directories
        print(dpath)
    else:
        semi_used[dpath] = fpaths

print(ut.repr4(list(semi_used.keys())))
Exemple #36
0
def makeinit(mod_dpath, exclude_modnames=[], use_star=False):
    r"""
    Args:
        mod_dpath (str):
        exclude_modnames (list): (Defaults to [])
        use_star (bool): (Defaults to False)

    Returns:
        str: init_codeblock

    CommandLine:
        python -m utool.util_autogen makeinit --modname=ibeis.algo

    Example:
        >>> # SCRIPT
        >>> from utool.util_autogen import *  # NOQA
        >>> import utool as ut
        >>> modname = ut.get_argval('--modname', str, default=None)
        >>> mod_dpath = (os.getcwd() if modname is None else
        >>>              ut.get_modpath(modname, prefer_pkg=True))
        >>> mod_dpath = ut.unixpath(mod_dpath)
        >>> mod_fpath = join(mod_dpath, '__init__.py')
        >>> exclude_modnames = ut.get_argval(('--exclude', '-x'), list, default=[])
        >>> use_star = ut.get_argflag('--star')
        >>> init_codeblock = makeinit(mod_dpath, exclude_modnames, use_star)
        >>> ut.dump_autogen_code(mod_fpath, init_codeblock)
    """
    from utool._internal import util_importer
    import utool as ut
    module_name = ut.get_modname_from_modpath(mod_dpath)
    IMPORT_TUPLES = util_importer.make_import_tuples(mod_dpath, exclude_modnames=exclude_modnames)
    initstr = util_importer.make_initstr(module_name, IMPORT_TUPLES)
    regen_command = 'cd %s\n' % (mod_dpath)
    regen_command += '    makeinit.py'
    regen_command += ' --modname={modname}'.format(modname=module_name)
    if use_star:
        regen_command += ' --star'
    if len(exclude_modnames ) > 0:
        regen_command += ' -x ' + ' '.join(exclude_modnames)

    regen_block = (ut.codeblock('''
    """
    Regen Command:
        {regen_command}
    """
    ''').format(regen_command=regen_command))

    importstar_codeblock = ut.codeblock(
        '''
        """
        python -c "import {module_name}" --dump-{module_name}-init
        python -c "import {module_name}" --update-{module_name}-init
        """
        __DYNAMIC__ = True
        if __DYNAMIC__:
            # TODO: import all utool external prereqs. Then the imports will not import
            # anything that has already in a toplevel namespace
            # COMMENTED OUT FOR FROZEN __INIT__
            # Dynamically import listed util libraries and their members.
            from utool._internal import util_importer
            # FIXME: this might actually work with rrrr, but things arent being
            # reimported because they are already in the modules list
            import_execstr = util_importer.dynamic_import(__name__, IMPORT_TUPLES)
            exec(import_execstr)
            DOELSE = False
        else:
            # Do the nonexec import (can force it to happen no matter what if alwyas set
            # to True)
            DOELSE = True

        if DOELSE:
            # <AUTOGEN_INIT>
            pass
            # </AUTOGEN_INIT>
        '''.format(module_name=module_name)
    )

    ts_line = '# Autogenerated on {ts}'.format(ts=ut.get_timestamp('printable'))

    init_codeblock_list = ['# -*- coding: utf-8 -*-', ts_line]
    init_codeblock_list.append(initstr)
    init_codeblock_list.append('\nIMPORT_TUPLES = ' + ut.repr4(IMPORT_TUPLES))
    if use_star:
        init_codeblock_list.append(importstar_codeblock)
    init_codeblock_list.append(regen_block)

    init_codeblock = '\n'.join(init_codeblock_list)
    return init_codeblock
Exemple #37
0
    def do(*cmd_list, **kwargs):
        import utool as ut
        import time
        import six
        import sys
        verbose = kwargs.get('verbose', False)
        orig_print = globals()['print']
        print = ut.partial(orig_print, file=kwargs.get('file', sys.stdout))
        # print('Running xctrl.do script')
        if verbose:
            print('Executing x do: %s' % (ut.repr4(cmd_list),))
        debug = False

        cmdkw = dict(verbose=False, quiet=True, silence=True)
        # http://askubuntu.com/questions/455762/xbindkeys-wont-work-properly
        # Make things work even if other keys are pressed
        defaultsleep = 0.0
        sleeptime = kwargs.get('sleeptime', defaultsleep)
        time.sleep(.05)
        out, err, ret = ut.cmd('xset r off', **cmdkw)
        if debug:
            print('----------')
            print('xset r off')
            print('ret = %r' % (ret,))
            print('err = %r' % (err,))
            print('out = %r' % (out,))

        memory = {}

        tmpverbose = 0
        for count, item in enumerate(cmd_list):
            # print('item = %r' % (item,))
            sleeptime = kwargs.get('sleeptime', defaultsleep)
            if tmpverbose:
                print('moving on')
            tmpverbose = 0
            nocommand = 0

            assert isinstance(item, tuple)
            assert len(item) >= 2
            xcmd, key_ = item[0:2]
            if len(item) >= 3:
                if isinstance(item[2], six.string_types) and item[2].endswith('?'):
                    sleeptime = float(item[2][:-1])
                    tmpverbose = 1
                    print('special command sleep')
                    print('sleeptime = %r' % (sleeptime,))
                else:
                    sleeptime = float(item[2])

            if xcmd == 'focus':
                key_ = str(key_)
                if key_.startswith('$'):
                    key_ = memory[key_[1:]]
                pattern = key_
                win_id = XCtrl.find_window_id(pattern, method='mru')
                if win_id is None:
                    args = ['wmctrl', '-xa', pattern]
                else:
                    args = ['wmctrl', '-ia', hex(win_id)]
            elif xcmd == 'focus_id':
                key_ = str(key_)
                if key_.startswith('$'):
                    key_ = memory[key_[1:]]
                args = ['wmctrl', '-ia', hex(key_)]
            elif xcmd == 'remember_window_id':
                out, err, ret = ut.cmd('xdotool getwindowfocus', **cmdkw)
                memory[key_] = int(out.strip())
                nocommand = True
                args = []
            elif xcmd == 'remember_window_name':
                out, err, ret = ut.cmd('xdotool getwindowfocus getwindowname', **cmdkw)
                import pipes
                memory[key_] = pipes.quote(out.strip())
                nocommand = True
                args = []
            elif xcmd == 'type':
                args = [
                    'xdotool',
                    'keyup', '--window', '0', '7',
                    'type', '--clearmodifiers',
                    '--window', '0', str(key_)
                ]
            elif xcmd == 'type2':
                import pipes
                args = [
                    'xdotool', 'type', pipes.quote(str(key_))
                ]
            elif xcmd == 'xset-r-on':
                args = ['xset', 'r', 'on']
            elif xcmd == 'xset-r-off':
                args = ['xset', 'r', 'off']
            else:
                args = ['xdotool', str(xcmd), str(key_)]

            if verbose or tmpverbose:
                print('\n\n# Step %d' % (count,))
                print(args, ' '.join(args))

            if nocommand:
                continue
                # print('args = %r -> %s' % (args, ' '.join(args),))
            # print('args = %r' % (args,))
            out, err, ret = ut.cmd(*args, **cmdkw)
            if debug:
                print('---- ' + xcmd + ' ------')
                print(' '.join(args))
                print('ret = %r' % (ret,))
                print('err = %r' % (err,))
                print('out = %r' % (out,))

            if sleeptime > 0:
                time.sleep(sleeptime)

        out, err, ret = ut.cmd('xset r on', verbose=False, quiet=True,
                               silence=True)
        if debug:
            print('----------')
            print('xset r on')
            print('ret = %r' % (ret,))
            print('err = %r' % (err,))
            print('out = %r' % (out,))
Exemple #38
0
def get_dbinfo(
    ibs,
    verbose=True,
    with_imgsize=False,
    with_bytes=False,
    with_contrib=False,
    with_agesex=False,
    with_header=True,
    short=False,
    tag='dbinfo',
    aid_list=None,
    aids=None,
):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    SeeAlso:
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all

    CommandLine:
        python -m wbia.other.dbinfo --exec-get_dbinfo:0
        python -m wbia.other.dbinfo --test-get_dbinfo:1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = wbia.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from wbia.expt import cfghelpers
        >>> #from wbia.expt import annotation_configs
        >>> #from wbia.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> verbose = True
        >>> short = True
        >>> #ibs = wbia.opendb(db='GZ_ALL')
        >>> #ibs = wbia.opendb(db='PZ_Master0')
        >>> ibs = wbia.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = wbia.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...
    if aids is not None:
        aid_list = aids

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from wbia.expt import experiment_helpers

            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list
            )
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            # aid_list =
        if verbose:
            logger.info('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False))
            - {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    # associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    valid_images = ibs.images(valid_gids)
    valid_annots = ibs.annots(valid_aids)

    # Image info
    if verbose:
        logger.info('Checking Image Info')
    gx2_aids = valid_images.aids
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats = ut.repr4(
        ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True
    )
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        logger.info('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    if False:
        # Occurrence Info
        def compute_annot_occurrence_ids(ibs, aid_list):
            from wbia.algo.preproc import preproc_occurrence

            gid_list = ibs.get_annot_gids(aid_list)
            gid2_aids = ut.group_items(aid_list, gid_list)
            config = {'seconds_thresh': 4 * 60 * 60}
            flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences(
                ibs, gid_list, config=config, verbose=False
            )
            occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
            occurid2_aids = {
                oid: ut.flatten(ut.take(gid2_aids, gids))
                for oid, gids in occurid2_gids.items()
            }
            return occurid2_aids

        import utool

        with utool.embed_on_exception_context:
            occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
            occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
            occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
            nid2_occurxs = ut.ddict(list)
            for occurx, nids in enumerate(occur_unique_nids):
                for nid in nids:
                    nid2_occurxs[nid].append(occurx)

        nid2_occurx_single = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1
        }
        nid2_occurx_resight = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1
        }
        singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

        singlesight_annot_stats = ut.get_stats(
            list(map(len, singlesight_encounters)), use_median=True, use_sum=True
        )
        resight_name_stats = ut.get_stats(
            list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True
        )

    # Encounter Info
    def break_annots_into_encounters(aids):
        from wbia.algo.preproc import occurrence_blackbox
        import datetime

        thresh_sec = datetime.timedelta(minutes=30).seconds
        posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids))
        # latlons = ibs.get_annot_image_gps(aids)
        labels = occurrence_blackbox.cluster_timespace2(
            posixtimes, None, thresh_sec=thresh_sec
        )
        return labels
        # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()]
        # ut.square_pdist(ave_enc_time)

    try:
        am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[
            0
        ]
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids)
        undirected_tags = ibs.get_aidpair_tags(
            aid_pairs.T[0], aid_pairs.T[1], directed=False
        )
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)
    except Exception:
        pair_tag_info = {}

    # logger.info(ut.repr2(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        logger.info('Checking Annot Species')
    unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots))
    species_list = valid_annots.species_texts
    species2_annots = valid_annots.group_items(valid_annots.species_texts)
    species2_nAids = {key: len(val) for key, val in species2_annots.items()}

    if verbose:
        logger.info('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        logger.info('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            logger.info('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)

        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = collections.OrderedDict(
                [
                    ('max', wh_list.max(0)),
                    ('min', wh_list.min(0)),
                    ('mean', wh_list.mean(0)),
                    ('std', wh_list.std(0)),
                ]
            )

            def arr2str(var):
                return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']'

            ret = ',\n    '.join(
                ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()]
            )
            return '{\n    ' + ret + '\n}'

        logger.info('reading image sizes')
        # Image size stats
        img_size_list = ibs.get_image_sizes(valid_gids)
        img_size_stats = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        logger.info('Building Stats String')

    multiton_stats = ut.repr3(
        ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True
    )

    # Time stats
    unixtime_list = valid_images.unixtime2
    # valid_unixtime_list = [time for time in unixtime_list if time != -1]
    # unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda: 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if max_age is None:
                max_age = min_age
            if min_age is None:
                min_age = max_age
            if max_age is None and min_age is None:
                logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,))
                age_dict['UNKNOWN'] += 1
            elif (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (max_age is None or 36 <= max_age):
                age_dict['Adult'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(
            set(annot_sextext_list) - set(sex_keys)
        )
        sextext2_nAnnots = ut.odict(
            [(key, len(sextext2_aids.get(key, []))) for key in sex_keys]
        )
        # Filter 0's
        sextext2_nAnnots = {
            key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0
        }
        return sextext2_nAnnots

    def get_annot_qual_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        qualtext2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.quality_texts)),
            list(ibs.const.QUALITY_TEXT_TO_INT.keys()),
        )
        return qualtext2_nAnnots

    def get_annot_viewpoint_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        viewcode2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.viewpoint_code)),
            list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None],
        )
        return viewcode2_nAnnots

    if verbose:
        logger.info('Checking Other Annot Stats')

    qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids)
    viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        logger.info('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]

    image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags)
    contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags)

    contributor_tag_to_qualstats = {
        key: get_annot_qual_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }
    contributor_tag_to_viewstats = {
        key: get_annot_viewpoint_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }

    contributor_tag_to_nImages = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_gids)
    }
    contributor_tag_to_nAnnots = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_aids)
    }

    if verbose:
        logger.info('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton = len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_annots)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            logger.info('Checking Disk Space')
        ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            logger.info('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_annots)
            _num_names_total_check = (
                num_names_singleton + num_names_unassociated + num_names_multiton
            )
            _num_annots_total_check = (
                num_unknown_annots + num_singleton_annots + num_multiton_annots
            )
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            # if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(
                ex,
                keys=[
                    '_num_names_total_check',
                    'num_names',
                    '_num_annots_total_check',
                    'num_annots',
                    'num_names_singleton',
                    'num_names_multiton',
                    'num_unknown_annots',
                    'num_multiton_annots',
                    'num_singleton_annots',
                ],
            )
            raise

    # Get contributor statistics
    contributor_rowids = ibs.get_valid_contributor_rowids()
    num_contributors = len(contributor_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.repr2(dict_, si=True)
        return align2(str_)

    header_block_lines = [('+============================')] + (
        [
            ('+ singleton := single sighting'),
            ('+ multiton  := multiple sightings'),
            ('--' * num_tabs),
        ]
        if not short and with_header
        else []
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = (
        [
            ('--' * num_tabs),
            ('DB Bytes: '),
            ('     +- dbdir nBytes:         ' + dbdir_space),
            ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
            ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
            ('     |  |  +-cachedir nBytes: ' + cachedir_space),
        ]
        if with_bytes
        else []
    )

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = (
        [
            ('--' * num_tabs),
            ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
            ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
            ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
        ]
        if not short
        else []
    )

    occurrence_block_lines = (
        [
            ('--' * num_tabs),
            # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
            # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
            ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
        ]
        if not short
        else []
    )

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = (
        [
            '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
            '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
        ]
        if not short and with_agesex
        else []
    )

    contributor_block_lines = (
        [
            '# Images per contributor       = ' + align_dict2(contributor_tag_to_nImages),
            '# Annots per contributor       = ' + align_dict2(contributor_tag_to_nAnnots),
            '# Quality per contributor      = '
            + ut.repr2(contributor_tag_to_qualstats, sorted_=True),
            '# Viewpoint per contributor    = '
            + ut.repr2(contributor_tag_to_viewstats, sorted_=True),
        ]
        if with_contrib
        else []
    )

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None
        if short
        else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        # ('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None
        if short
        else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines
        + bytes_block_lines
        + source_block_lines
        + name_block_lines
        + annot_block_lines
        + annot_per_basic_block_lines
        + occurrence_block_lines
        + annot_per_qualview_block_lines
        + annot_per_agesex_block_lines
        + img_block_lines
        + contributor_block_lines
        + imgsize_stat_lines
        + [('L============================')]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        logger.info(info_str2)
    locals_ = locals()
    return locals_
Exemple #39
0
def grep_projects(tofind_list, user_profile=None, verbose=True, new=False,
                  **kwargs):
    r"""
    Greps the projects defined in the current UserProfile

    Args:
        tofind_list (list):
        user_profile (None): (default = None)

    Kwargs:
        user_profile

    CommandLine:
        python -m utool --tf grep_projects grep_projects

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_project import *  # NOQA
        >>> import utool as ut
        >>> import sys
        >>> tofind_list = ut.get_argval('--find', type_=list,
        >>>                             default=[sys.argv[-1]])
        >>> grep_projects(tofind_list)
    """
    import utool as ut
    user_profile = ensure_user_profile(user_profile)

    kwargs = kwargs.copy()
    colored = kwargs.pop('colored', True)

    grepkw = {}
    grepkw['greater_exclude_dirs'] = user_profile.project_exclude_dirs
    grepkw['exclude_dirs'] = user_profile.project_exclude_dirs
    grepkw['dpath_list'] = user_profile.project_dpaths
    grepkw['include_patterns'] = user_profile.project_include_patterns
    grepkw['exclude_patterns'] = user_profile.project_exclude_patterns
    grepkw.update(kwargs)

    msg_list1 = []
    msg_list2 = []

    print_ = msg_list1.append
    print_('Greping Projects')
    print_('tofind_list = %s' % (ut.repr4(tofind_list, nl=True),))
    #print_('grepkw = %s' % ut.repr4(grepkw, nl=True))
    if verbose:
        print('\n'.join(msg_list1))
    #with ut.Timer('greping', verbose=True):
    grep_result = ut.grep(tofind_list, **grepkw)
    found_fpath_list, found_lines_list, found_lxs_list = grep_result

    # HACK, duplicate behavior. TODO: write grep print result function
    reflags = grepkw.get('reflags', 0)
    _exprs_flags = [ut.extend_regex2(expr, reflags)
                    for expr in tofind_list]
    extended_regex_list = ut.take_column(_exprs_flags, 0)
    reflags_list = ut.take_column(_exprs_flags, 1)
    # HACK
    # pat = ut.util_regex.regex_or(extended_regex_list)
    reflags = reflags_list[0]

    # from utool import util_regex
    resultstr = ut.make_grep_resultstr(grep_result, extended_regex_list,
                                       reflags, colored=colored)
    msg_list2.append(resultstr)
    print_ = msg_list2.append
    #for fpath, lines, lxs in zip(found_fpath_list, found_lines_list,
    #                             found_lxs_list):
    #    print_('----------------------')
    #    print_('found %d line(s) in %r: ' % (len(lines), fpath))
    #    name = split(fpath)[1]
    #    max_line = len(lines)
    #    ndigits = str(len(str(max_line)))
    #    for (lx, line) in zip(lxs, lines):
    #        line = line.replace('\n', '')
    #        print_(('%s : %' + ndigits + 'd |%s') % (name, lx, line))
    # iter_ = zip(found_fpath_list, found_lines_list, found_lxs_list)
    # for fpath, lines, lxs in iter_:
    #     print_('----------------------')
    #     print_('found %d line(s) in %r: ' % (len(lines), fpath))
    #     name = split(fpath)[1]
    #     max_line = len(lines)
    #     ndigits = str(len(str(max_line)))
    #     for (lx, line) in zip(lxs, lines):
    #         line = line.replace('\n', '')
    #         colored_line = ut.highlight_regex(
    #             line.rstrip('\n'), pat, reflags=reflags)
    #         print_(('%s : %' + ndigits + 'd |%s') % (name, lx, colored_line))

    print_('====================')
    print_('found_fpath_list = ' + ut.repr4(found_fpath_list))
    print_('')
    #print_('gvim -o ' + ' '.join(found_fpath_list))
    if verbose:
        print('\n'.join(msg_list2))
    msg_list = msg_list1 + msg_list2

    if new:
        return GrepResult(found_fpath_list, found_lines_list, found_lxs_list, extended_regex_list, reflags)
    else:
        return msg_list
def run_asmk_script():
    with ut.embed_on_exception_context:  # NOQA
        """
    >>> from wbia.algo.smk.script_smk import *
    """

  # NOQA

        # ==============================================
        # PREPROCESSING CONFIGURATION
        # ==============================================
        config = {
            # 'data_year': 2013,
            'data_year': None,
            'dtype': 'float32',
            # 'root_sift': True,
            'root_sift': False,
            # 'centering': True,
            'centering': False,
            'num_words': 2**16,
            # 'num_words': 1E6
            # 'num_words': 8000,
            'kmeans_impl': 'sklearn.mini',
            'extern_words': False,
            'extern_assign': False,
            'assign_algo': 'kdtree',
            'checks': 1024,
            'int_rvec': True,
            'only_xy': False,
        }
        # Define which params are relevant for which operations
        relevance = {}
        relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year']
        relevance['words'] = relevance['feats'] + [
            'num_words',
            'extern_words',
            'kmeans_impl',
        ]
        relevance['assign'] = relevance['words'] + [
            'checks',
            'extern_assign',
            'assign_algo',
        ]
        # relevance['ydata'] = relevance['assign'] + ['int_rvec']
        # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec']

        nAssign = 1

        class SMKCacher(ut.Cacher):
            def __init__(self, fname, ext='.cPkl'):
                relevant_params = relevance[fname]
                relevant_cfg = ut.dict_subset(config, relevant_params)
                cfgstr = ut.get_cfg_lbl(relevant_cfg)
                dbdir = ut.truepath('/raid/work/Oxford/')
                super(SMKCacher, self).__init__(fname,
                                                cfgstr,
                                                cache_dir=dbdir,
                                                ext=ext)

        # ==============================================
        # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES
        # ==============================================
        if config['data_year'] == 2007:
            data = load_oxford_2007()
        elif config['data_year'] == 2013:
            data = load_oxford_2013()
        elif config['data_year'] is None:
            data = load_oxford_wbia()

        offset_list = data['offset_list']
        all_kpts = data['all_kpts']
        raw_vecs = data['all_vecs']
        query_uri_order = data['query_uri_order']
        data_uri_order = data['data_uri_order']
        # del data

        # ================
        # PRE-PROCESS
        # ================
        import vtool as vt

        # Alias names to avoid errors in interactive sessions
        proc_vecs = raw_vecs
        del raw_vecs

        feats_cacher = SMKCacher('feats', ext='.npy')
        all_vecs = feats_cacher.tryload()
        if all_vecs is None:
            if config['dtype'] == 'float32':
                logger.info('Converting vecs to float32')
                proc_vecs = proc_vecs.astype(np.float32)
            else:
                proc_vecs = proc_vecs
                raise NotImplementedError('other dtype')

            if config['root_sift']:
                with ut.Timer('Apply root sift'):
                    np.sqrt(proc_vecs, out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['centering']:
                with ut.Timer('Apply centering'):
                    mean_vec = np.mean(proc_vecs, axis=0)
                    # Center and then re-normalize
                    np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['dtype'] == 'int8':
                smk_funcs

            all_vecs = proc_vecs
            feats_cacher.save(all_vecs)
        del proc_vecs

        # =====================================
        # BUILD VISUAL VOCABULARY
        # =====================================
        if config['extern_words']:
            words = data['words']
            assert config['num_words'] is None or len(
                words) == config['num_words']
        else:
            word_cacher = SMKCacher('words')
            words = word_cacher.tryload()
            if words is None:
                with ut.embed_on_exception_context:
                    if config['kmeans_impl'] == 'sklearn.mini':
                        import sklearn.cluster

                        rng = np.random.RandomState(13421421)
                        # init_size = int(config['num_words'] * 8)
                        init_size = int(config['num_words'] * 4)
                        # converged after 26043 iterations
                        clusterer = sklearn.cluster.MiniBatchKMeans(
                            config['num_words'],
                            init_size=init_size,
                            batch_size=1000,
                            compute_labels=False,
                            max_iter=20,
                            random_state=rng,
                            n_init=1,
                            verbose=1,
                        )
                        clusterer.fit(all_vecs)
                        words = clusterer.cluster_centers_
                    elif config['kmeans_impl'] == 'yael':
                        from yael import ynumpy

                        centroids, qerr, dis, assign, nassign = ynumpy.kmeans(
                            all_vecs,
                            config['num_words'],
                            init='kmeans++',
                            verbose=True,
                            output='all',
                        )
                        words = centroids
                    word_cacher.save(words)

        # =====================================
        # ASSIGN EACH VECTOR TO ITS NEAREST WORD
        # =====================================
        if config['extern_assign']:
            assert config[
                'extern_words'], 'need extern cluster to extern assign'
            idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2)
            idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32)
            idx_to_wxs = np.ma.array(idx_to_wxs)
            idx_to_maws = np.ma.array(idx_to_maws)
        else:
            from wbia.algo.smk import vocab_indexer

            vocab = vocab_indexer.VisualVocab(words)
            dassign_cacher = SMKCacher('assign')
            assign_tup = dassign_cacher.tryload()
            if assign_tup is None:
                vocab.flann_params['algorithm'] = config['assign_algo']
                vocab.build()
                # Takes 12 minutes to assign jegous vecs to 2**16 vocab
                with ut.Timer('assign vocab neighbors'):
                    _idx_to_wx, _idx_to_wdist = vocab.nn_index(
                        all_vecs, nAssign, checks=config['checks'])
                    if nAssign > 1:
                        idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns(
                            _idx_to_wx,
                            _idx_to_wdist,
                            massign_alpha=1.2,
                            massign_sigma=80.0,
                            massign_equal_weights=True,
                        )
                    else:
                        idx_to_wxs = np.ma.masked_array(_idx_to_wx,
                                                        fill_value=-1)
                        idx_to_maws = np.ma.ones(idx_to_wxs.shape,
                                                 fill_value=-1,
                                                 dtype=np.float32)
                        idx_to_maws.mask = idx_to_wxs.mask
                assign_tup = (idx_to_wxs, idx_to_maws)
                dassign_cacher.save(assign_tup)

        idx_to_wxs, idx_to_maws = assign_tup

        # Breakup vectors, keypoints, and word assignments by annotation
        wx_lists = [
            idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        maw_lists = [
            idx_to_maws[left:right] for left, right in ut.itertwo(offset_list)
        ]
        vecs_list = [
            all_vecs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        kpts_list = [
            all_kpts[left:right] for left, right in ut.itertwo(offset_list)
        ]

        # =======================
        # FIND QUERY SUBREGIONS
        # =======================

        ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots(
            data_uri_order, query_uri_order)
        daids = data_annots.aids
        qaids = query_annots.aids

        query_super_kpts = ut.take(kpts_list, qx_to_dx)
        query_super_vecs = ut.take(vecs_list, qx_to_dx)
        query_super_wxs = ut.take(wx_lists, qx_to_dx)
        query_super_maws = ut.take(maw_lists, qx_to_dx)
        # Mark which keypoints are within the bbox of the query
        query_flags_list = []
        only_xy = config['only_xy']
        for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes):
            flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy)
            query_flags_list.append(flags)

        logger.info('Queries are crops of existing database images.')
        logger.info('Looking at average percents')
        percent_list = [
            flags_.sum() / flags_.shape[0] for flags_ in query_flags_list
        ]
        percent_stats = ut.get_stats(percent_list)
        logger.info('percent_stats = %s' % (ut.repr4(percent_stats), ))

        import vtool as vt

        query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0)
        query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0)
        query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0)
        query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0)

        # =======================
        # CONSTRUCT QUERY / DATABASE REPR
        # =======================

        # int_rvec = not config['dtype'].startswith('float')
        int_rvec = config['int_rvec']

        X_list = []
        _prog = ut.ProgPartial(length=len(qaids),
                               label='new X',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(
                zip(qaids, query_wxs, query_maws)):
            X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            X_list.append(X)

        # ydata_cacher = SMKCacher('ydata')
        # Y_list = ydata_cacher.tryload()
        # if Y_list is None:
        Y_list = []
        _prog = ut.ProgPartial(length=len(daids),
                               label='new Y',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists,
                                                    maw_lists)):
            Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            Y_list.append(Y)
        # ydata_cacher.save(Y_list)

        # ======================
        # Add in some groundtruth

        logger.info('Add in some groundtruth')
        for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)):
            Y.nid = nid

        for X, nid in zip(X_list, ibs.get_annot_nids(qaids)):
            X.nid = nid

        for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)):
            Y.qual = qual

        # ======================
        # Add in other properties
        for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list):
            Y.vecs = vecs
            Y.kpts = kpts

        imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images')
        for Y, imgid in zip(Y_list, data_uri_order):
            gpath = ut.unixjoin(imgdir, imgid + '.jpg')
            Y.gpath = gpath

        for X, vecs, kpts in zip(X_list, query_vecs, query_kpts):
            X.kpts = kpts
            X.vecs = vecs

        # ======================
        logger.info('Building inverted list')
        daids = [Y.aid for Y in Y_list]
        # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list]))
        wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list]))
        assert daids == data_annots.aids
        assert len(wx_list) <= config['num_words']

        wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list],
                                            all_wxs=wx_list)

        # Compute IDF weights
        logger.info('Compute IDF weights')
        ndocs_total = len(daids)
        # Use only the unique number of words
        ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list])
        logger.info('ndocs_perword stats: ' +
                    ut.repr4(ut.get_stats(ndocs_per_word)))
        idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
        wx_to_weight = dict(zip(wx_list, idf_per_word))
        logger.info('idf stats: ' +
                    ut.repr4(ut.get_stats(wx_to_weight.values())))

        # Filter junk
        Y_list_ = [Y for Y in Y_list if Y.qual != 'junk']

        # =======================
        # CHOOSE QUERY KERNEL
        # =======================
        params = {
            'asmk': dict(alpha=3.0, thresh=0.0),
            'bow': dict(),
            'bow2': dict(),
        }
        # method = 'bow'
        method = 'bow2'
        method = 'asmk'
        smk = SMK(wx_to_weight, method=method, **params[method])

        # Specific info for the type of query
        if method == 'asmk':
            # Make residual vectors
            if True:
                # The stacked way is 50x faster
                # TODO: extend for multi-assignment and record fxs
                flat_query_vecs = np.vstack(query_vecs)
                flat_query_wxs = np.vstack(query_wxs)
                flat_query_offsets = np.array(
                    [0] + ut.cumsum(ut.lmap(len, query_wxs)))

                flat_wxs_assign = flat_query_wxs
                flat_offsets = flat_query_offsets
                flat_vecs = flat_query_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)
                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list,
                                                   agg_flags_list):
                    X.agg_rvecs = agg_rvecs
                    X.agg_flags = agg_flags[:, None]

                flat_wxs_assign = idx_to_wxs
                flat_offsets = offset_list
                flat_vecs = all_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)

                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list,
                                                   agg_flags_list):
                    Y.agg_rvecs = agg_rvecs
                    Y.agg_flags = agg_flags[:, None]
            else:
                # This non-stacked way is about 500x slower
                _prog = ut.ProgPartial(label='agg Y rvecs',
                                       bs=True,
                                       adjust=True)
                for Y in _prog(Y_list_):
                    make_agg_vecs(Y, words, Y.vecs)

                _prog = ut.ProgPartial(label='agg X rvecs',
                                       bs=True,
                                       adjust=True)
                for X in _prog(X_list):
                    make_agg_vecs(X, words, X.vecs)
        elif method == 'bow2':
            # Hack for orig tf-idf bow vector
            nwords = len(words)
            for X in ut.ProgIter(X_list, label='make bow vector'):
                ensure_tf(X)
                bow_vector(X, wx_to_weight, nwords)

            for Y in ut.ProgIter(Y_list_, label='make bow vector'):
                ensure_tf(Y)
                bow_vector(Y, wx_to_weight, nwords)

        if method != 'bow2':
            for X in ut.ProgIter(X_list, 'compute X gamma'):
                X.gamma = smk.gamma(X)
            for Y in ut.ProgIter(Y_list_, 'compute Y gamma'):
                Y.gamma = smk.gamma(Y)

        # Execute matches (could go faster by enumerating candidates)
        scores_list = []
        for X in ut.ProgIter(X_list, label='query %s' % (smk, )):
            scores = [smk.kernel(X, Y) for Y in Y_list_]
            scores = np.array(scores)
            scores = np.nan_to_num(scores)
            scores_list.append(scores)

        import sklearn.metrics

        avep_list = []
        _iter = list(zip(scores_list, X_list))
        _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, ))
        for scores, X in _iter:
            truth = [X.nid == Y.nid for Y in Y_list_]
            avep = sklearn.metrics.average_precision_score(truth, scores)
            avep_list.append(avep)
        avep_list = np.array(avep_list)
        mAP = np.mean(avep_list)
        logger.info('mAP  = %r' % (mAP, ))
Exemple #41
0
def argparse_dict(default_dict_, lbl=None, verbose=None,
                  only_specified=False, force_keys={}, type_hint=None,
                  alias_dict={}):
    r"""
    Gets values for a dict based on the command line

    Args:
        default_dict_ (?):
        only_specified (bool): if True only returns keys that are specified on commandline. no defaults.

    Returns:
        dict_: dict_ -  a dictionary

    CommandLine:
        python -m utool.util_arg --test-argparse_dict
        python -m utool.util_arg --test-argparse_dict --foo=3
        python -m utool.util_arg --test-argparse_dict --flag1
        python -m utool.util_arg --test-argparse_dict --flag2
        python -m utool.util_arg --test-argparse_dict --noflag2
        python -m utool.util_arg --test-argparse_dict --thresh=43
        python -m utool.util_arg --test-argparse_dict --bins=-10
        python -m utool.util_arg --test-argparse_dict --bins=-10 --only-specified --helpx

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_arg import *  # NOQA
        >>> import utool as ut
        >>> # build test data
        >>> default_dict_ = {
        ...    'bins': 8,
        ...    'foo': None,
        ...    'flag1': False,
        ...    'flag2': True,
        ...    'max': 0.2,
        ...    'neg': -5,
        ...    'thresh': -5.333,
        ... }
        >>> # execute function
        >>> only_specified = ut.get_argflag('--only-specified')
        >>> dict_ = argparse_dict(default_dict_, only_specified=only_specified)
        >>> # verify results
        >>> result = ut.repr4(dict_, sorted_=True)
        >>> print(result)
    """
    if verbose is None:
        verbose = VERBOSE_ARGPARSE
    def make_argstrs(key, prefix_list):
        for prefix in prefix_list:
            yield prefix + key
            yield prefix + key.replace('-', '_')
            yield prefix + key.replace('_', '-')

    def get_dictkey_cmdline_val(key, default, type_hint):
        # see if the user gave a commandline value for this dict key
        defaulttype_ = None if default is None else type(default)
        if type_hint is None:
            type_ = defaulttype_
        elif isinstance(type_hint, dict):
            type_ = type_hint.get(key, defaulttype_)
        elif isinstance(type_hint, type):
            type_ = type_hint
        else:
            raise NotImplementedError('Unknown type of type_hint=%r' % (type_hint,))
        was_specified = False
        if isinstance(default, bool):
            val = default
            if default is True:
                falsekeys = list(set(make_argstrs(key, ['--no', '--no-'])))
                notval, was_specified = get_argflag(falsekeys, return_specified=True)
                val = not notval
                if not was_specified:
                    truekeys = list(set(make_argstrs(key, ['--'])))
                    val_, was_specified = get_argflag(truekeys, return_specified=True)
                    if was_specified:
                        val = val_
            elif default is False:
                truekeys = list(set(make_argstrs(key, ['--'])))
                val, was_specified = get_argflag(truekeys, return_specified=True)
        else:
            argtup = list(set(make_argstrs(key, ['--'])))
            #if key == 'species':
            #    import utool as ut
            #    ut.embed()
            val, was_specified = get_argval(argtup, type_=type_,
                                            default=default,
                                            return_specified=True)
        return val, was_specified

    dict_  = {}
    num_specified = 0
    for key, default in six.iteritems(default_dict_):
        val, was_specified = get_dictkey_cmdline_val(key, default, type_hint)
        if not was_specified:
            alias_keys = meta_util_iter.ensure_iterable(alias_dict.get(key, []))
            for alias_key in alias_keys:
                val, was_specified = get_dictkey_cmdline_val(alias_key, default,
                                                             type_hint)
                if was_specified:
                    break
        if VERBOSE_ARGPARSE:
            if was_specified:
                num_specified += 1
                print('[argparse_dict] Specified key=%r, val=%r' % (key, val))
        #if key == 'foo':
        #    import utool as ut
        #    ut.embed()
        if not only_specified or was_specified or key in force_keys:
            dict_[key] = val
    if VERBOSE_ARGPARSE:
        print('[argparse_dict] num_specified = %r' % (num_specified,))
        print('[argparse_dict] force_keys = %r' % (force_keys,))
    #dict_ = {key: get_dictkey_cmdline_val(key, default) for key, default in
    #six.iteritems(default_dict_)}

    if verbose:
        for key in dict_:
            if dict_[key] != default_dict_[key]:
                print('[argparse_dict] GOT ARGUMENT: cfgdict[%r] = %r' % (key, dict_[key]))

    do_helpx = get_argflag('--helpx',
                           help_='Specifies that argparse_dict should print help and quit')

    if get_argflag(('--help', '--help2')) or do_helpx:
        import utool as ut
        print('COMMAND LINE IS ACCEPTING THESE PARAMS WITH DEFAULTS:')
        if lbl is not None:
            print(lbl)
        #print(ut.align(ut.repr4(dict_, sorted_=True), ':'))
        print(ut.align(ut.repr4(default_dict_, sorted_=True), ':'))
        if do_helpx:
            sys.exit(1)
    return dict_