Example #1
0
    def get_tokens(self, types=None):
        r"""
        Args:
            types (None): (default = None)

        Returns:
            list: tokens

        CommandLine:
            python -m mtgmonte.mtgobjs --exec-get_tokens --show

        Example:
            >>> # ENABLE_DOCTEST
            >>> from mtgmonte.mtgobjs import *  # NOQA
            >>> tokens = tokenize_manacost('12WWUBRG(W/B)(2/G)(U/P)(U/P)')
            >>> self = ManaCost(tokens)
            >>> types = ['hybrid']
            >>> tokens = self.get_tokens(types)
            >>> result = ('tokens = %s' % (ut.repr2(tokens),))
            >>> print(result)
            tokens = [('(W/B)', 'hybrid'), ('(2/G)', 'hybrid')]
        """
        if types is None:
            tokens = [(color, type_)
                      for type_, color_list in self.type2_manas.items()
                      for color in color_list]
        else:
            tokens = [(color, type_)
                      for type_, color_list in ut.dict_subset(self.type2_manas, types, []).items()
                      for color in color_list]
        return tokens
Example #2
0
def on_pick(event, infr=None):
    import wbia.plottool as pt

    logger.info('ON PICK: %r' % (event, ))
    artist = event.artist
    plotdat = pt.get_plotdat_dict(artist)
    if plotdat:
        if 'node' in plotdat:
            all_node_data = ut.sort_dict(plotdat['node_data'].copy())
            visual_node_data = ut.dict_subset(all_node_data,
                                              infr.visual_node_attrs, None)
            node_data = ut.delete_dict_keys(all_node_data,
                                            infr.visual_node_attrs)
            node = plotdat['node']
            node_data['degree'] = infr.graph.degree(node)
            node_label = infr.pos_graph.node_label(node)
            logger.info('visual_node_data: ' +
                        ut.repr2(visual_node_data, nl=1))
            logger.info('node_data: ' + ut.repr2(node_data, nl=1))
            ut.cprint('node: ' + ut.repr2(plotdat['node']), 'blue')
            logger.info('(pcc) node_label = %r' % (node_label, ))
            logger.info('artist = %r' % (artist, ))
        elif 'edge' in plotdat:
            all_edge_data = ut.sort_dict(plotdat['edge_data'].copy())
            logger.info(infr.repr_edge_data(all_edge_data))
            ut.cprint('edge: ' + ut.repr2(plotdat['edge']), 'blue')
            logger.info('artist = %r' % (artist, ))
        else:
            logger.info('???: ' + ut.repr2(plotdat))
    logger.info(ut.get_timestamp())
Example #3
0
 def __init__(self, fname, ext='.cPkl'):
     relevant_params = relevance[fname]
     relevant_cfg = ut.dict_subset(config, relevant_params)
     cfgstr = ut.get_cfg_lbl(relevant_cfg)
     dbdir = ut.truepath('/raid/work/Oxford/')
     super(SMKCacher, self).__init__(fname,
                                     cfgstr,
                                     cache_dir=dbdir,
                                     ext=ext)
Example #4
0
 def basic_infodict(card):
     _basic_keys = [
         'name',  'mana_cost', 'color_indicator',  'types',  'subtypes',
         'power', 'toughness', 'loyalty',
         #'rules_text'
     ]
     basic_keys = filter(lambda key: card.__dict__[key], _basic_keys)
     basic_infodict = ut.dict_subset(card.__dict__, basic_keys)
     return basic_infodict
def main():
    tests_ = tests
    subset = ['consistent_info', 'inconsistent_info']
    subset = ['chain1', 'chain2', 'chain3']
    subset += ['triangle1', 'triangle2', 'triangle3']
    # subset = ['inconsistent_info']
    tests_ = ut.dict_subset(tests, subset)

    for name, func in tests_.items():
        logger.info('\n==============')
        ut.cprint('name = %r' % (name, ), 'yellow')
        uvw_list, pass_values, fail_values = func()
        G = build_graph(uvw_list)

        nodes = sorted(G.nodes())
        edges = [tuple(sorted(e)) for e in G.edges()]
        edges = ut.sortedby2(edges, edges)

        n_annots = len(nodes)
        n_names = n_annots

        annot_idxs = list(range(n_annots))
        lookup_annot_idx = ut.dzip(nodes, annot_idxs)
        nx.set_node_attributes(G, name='annot_idx', values=lookup_annot_idx)

        edge_probs = np.array([
            get_edge_id_probs(G, aid1, aid2, n_names) for aid1, aid2 in edges
        ])

        logger.info('nodes = %r' % (nodes, ))
        # logger.info('edges = %r' % (edges,))
        logger.info('Noisy Observations')
        logger.info(
            pd.DataFrame(edge_probs,
                         columns=['same', 'diff'],
                         index=pd.Series(edges)))
        edge_probs = None

        cut_step(
            G,
            nodes,
            edges,
            n_annots,
            n_names,
            lookup_annot_idx,
            edge_probs,
            pass_values,
            fail_values,
        )

        edge_probs = bp_step(G, nodes, edges, n_annots, n_names,
                             lookup_annot_idx)
Example #6
0
def get_pipecfg_list(test_cfg_name_list, ibs=None, verbose=None):
    r"""
    Builds a list of varied query configurations. Only custom configs depend on
    an ibs object. The order of the output is not gaurenteed to aggree with
    input order.

    FIXME:
        This breaks if you proot=BC_DTW and ibs is None

    Args:
        test_cfg_name_list (list): list of strs
        ibs (wbia.IBEISController): wbia controller object (optional)

    Returns:
        tuple: (cfg_list, cfgx2_lbl) -
            cfg_list (list): list of config objects
            cfgx2_lbl (list): denotes which parameters are being varied.
                If there is just one config then nothing is varied

    CommandLine:
        python -m wbia get_pipecfg_list:0
        python -m wbia get_pipecfg_list:1 --db humpbacks
        python -m wbia get_pipecfg_list:2

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.expt.experiment_helpers import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='testdb1')
        >>> #test_cfg_name_list = ['best', 'custom', 'custom:sv_on=False']
        >>> #test_cfg_name_list = ['default', 'default:sv_on=False', 'best']
        >>> test_cfg_name_list = ['default', 'default:sv_on=False', 'best']
        >>> # execute function
        >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs)
        >>> # verify results
        >>> assert pipecfg_list[0].sv_cfg.sv_on is True
        >>> assert pipecfg_list[1].sv_cfg.sv_on is False
        >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list)
        >>> result = ('pipecfg_lbls = '+ ut.repr2(pipecfg_lbls))
        >>> print(result)
        pipecfg_lbls = ['default:', 'default:sv_on=False']

    Example1:
        >>> # DISABLE_DOCTEST
        >>> import wbia_flukematch.plugin
        >>> from wbia.expt.experiment_helpers import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='humpbacks')
        >>> test_cfg_name_list = ['default:pipeline_root=BC_DTW,decision=average,crop_dim_size=[960,500]', 'default:K=[1,4]']
        >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs)
        >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list)
        >>> result = ('pipecfg_lbls = '+ ut.repr2(pipecfg_lbls))
        >>> print(result)
        >>> print_pipe_configs(pcfgdict_list, pipecfg_list)
    """
    if verbose is None:
        verbose = ut.VERBOSE
    if ut.VERBOSE:
        logger.info(
            '[expt_help.get_pipecfg_list] building pipecfg_list using: %s'
            % test_cfg_name_list
        )
    if isinstance(test_cfg_name_list, six.string_types):
        test_cfg_name_list = [test_cfg_name_list]
    _standard_cfg_names = []
    _pcfgdict_list = []

    # HACK: Parse out custom configs first
    for test_cfg_name in test_cfg_name_list:
        if test_cfg_name.startswith('custom:') or test_cfg_name == 'custom':
            logger.info('[expthelpers] Parsing nonstandard custom config')
            assert False, 'custom is no longer supported'
            # if test_cfg_name.startswith('custom:'):
            #    # parse out modifications to custom
            #    cfgstr_list = ':'.join(test_cfg_name.split(':')[1:]).split(',')
            #    augcfgdict = ut.parse_cfgstr_list(cfgstr_list, smartcast=True)
            # else:
            #    augcfgdict = {}
            # # Take the configuration from the wbia object
            # pipe_cfg = ibs.--cfg.query_cfg.deepcopy()
            # # Update with augmented params
            # pipe_cfg.update_query_cfg(**augcfgdict)
            # # Parse out a standard cfgdict
            # cfgdict = dict(pipe_cfg.parse_items())
            # cfgdict['_cfgname'] = 'custom'
            # cfgdict['_cfgstr'] = test_cfg_name
            # _pcfgdict_list.append(cfgdict)
        else:
            _standard_cfg_names.append(test_cfg_name)
    # Handle stanndard configs next
    if len(_standard_cfg_names) > 0:
        # Get parsing information
        # cfg_default_dict = dict(Config.QueryConfig().parse_items())
        # valid_keys = list(cfg_default_dict.keys())
        cfgstr_list = _standard_cfg_names
        named_defaults_dict = ut.dict_subset(
            experiment_configs.__dict__, experiment_configs.TEST_NAMES
        )
        alias_keys = experiment_configs.ALIAS_KEYS
        # Parse standard pipeline cfgstrings
        metadata = {'ibs': ibs}
        dict_comb_list = cfghelpers.parse_cfgstr_list2(
            cfgstr_list,
            named_defaults_dict,
            cfgtype=None,
            alias_keys=alias_keys,
            # Hack out valid keys for humpbacks
            # valid_keys=valid_keys,
            strict=False,
            metadata=metadata,
        )
        # Get varied params (there may be duplicates)
        _pcfgdict_list.extend(ut.flatten(dict_comb_list))

    # Expand cfgdicts into PipelineConfig config objects
    # TODO: respsect different algorithm parameters like flukes
    if ibs is None:
        configclass_list = [Config.QueryConfig] * len(_pcfgdict_list)
    else:
        root_to_config = ibs.depc_annot.configclass_dict.copy()
        from wbia.algo.smk import smk_pipeline

        root_to_config['smk'] = smk_pipeline.SMKRequestConfig
        configclass_list = [
            root_to_config.get(
                _cfgdict.get('pipeline_root', _cfgdict.get('proot', 'vsmany')),
                Config.QueryConfig,
            )
            for _cfgdict in _pcfgdict_list
        ]
    _pipecfg_list = [
        cls(**_cfgdict) for cls, _cfgdict in zip(configclass_list, _pcfgdict_list)
    ]

    # Enforce rule that removes duplicate configs
    # by using feasiblity from wbia.algo.Config
    # TODO: Move this unique finding code to its own function
    # and then move it up one function level so even the custom
    # configs can be uniquified
    _flag_list = ut.flag_unique_items(_pipecfg_list)
    cfgdict_list = ut.compress(_pcfgdict_list, _flag_list)
    pipecfg_list = ut.compress(_pipecfg_list, _flag_list)
    if verbose:
        # for cfg in _pipecfg_list:
        #    logger.info(cfg.get_cfgstr())
        #    logger.info(cfg)
        logger.info(
            '[harn.help] return %d / %d unique pipeline configs from: %r'
            % (len(cfgdict_list), len(_pcfgdict_list), test_cfg_name_list)
        )

    if ut.get_argflag(('--pcfginfo', '--pinfo', '--pipecfginfo')):
        ut.colorprint('Requested PcfgInfo for tests... ', 'red')
        print_pipe_configs(cfgdict_list, pipecfg_list)
        ut.colorprint('Finished Reporting PcfgInfo. Exiting', 'red')
        sys.exit(0)
    return (cfgdict_list, pipecfg_list)
Example #7
0
def tune_flann(dpts,
               target_precision=.90,
               build_weight=0.50,
               memory_weight=0.00,
               sample_fraction=0.01):
    r"""

    References:
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_pami2014.pdf
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf
        http://docs.opencv.org/trunk/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html

    Math::
        cost of an algorithm is:

        LaTeX:
            \cost = \frac
                {\search + build_weight * \build }
                { \minoverparams( \search + build_weight \build)} +
                memory_weight * \memory

    Args:
        dpts (ndarray):

        target_precision (float): number between 0 and 1 representing desired
            accuracy. Higher values are more accurate.

        build_weight (float): importance weight given to minimizing build time
            relative to search time. This number can range from 0 to infinity.
            typically because building is a more complex computation you want
            to keep the number relatively low, (less than 1) otherwise you'll
            end up getting a linear search (no build time).

        memory_weight (float): Importance of memory relative to total speed.
            A value less than 1 gives more importance to the time spent and a
            value greater than 1 gives more importance to the memory usage.

        sample_fraction (float): number between 0 and 1 representing the
            fraction of the input data to use in the optimization. A higher
            number uses more data.

    Returns:
        dict: tuned_params

    CommandLine:
        python -m vtool.nearest_neighbors --test-tune_flann

    """
    with ut.Timer('tuning flann'):
        print('Autotuning flann with %d %dD vectors' %
              (dpts.shape[0], dpts.shape[1]))
        print('a sample of %d vectors will be used' %
              (int(dpts.shape[0] * sample_fraction)))
        flann = pyflann.FLANN()
        #num_data = len(dpts)
        flann_atkwargs = dict(algorithm='autotuned',
                              target_precision=target_precision,
                              build_weight=build_weight,
                              memory_weight=memory_weight,
                              sample_fraction=sample_fraction)
        suffix = repr(flann_atkwargs)
        badchar_list = ',{}\': '
        for badchar in badchar_list:
            suffix = suffix.replace(badchar, '')
        print('flann_atkwargs:')
        print(utool.dict_str(flann_atkwargs))
        print('starting optimization')
        tuned_params = flann.build_index(dpts, **flann_atkwargs)
        print('finished optimization')

        # The algorithm is sometimes returned as default which is
        # very unuseful as the default name is embeded in the pyflann
        # module where most would not care to look. This finds the default
        # name for you.
        for key in ['algorithm', 'centers_init', 'log_level']:
            val = tuned_params.get(key, None)
            if val == 'default':
                dict_ = pyflann.FLANNParameters._translation_[key]
                other_algs = ut.dict_find_other_sameval_keys(dict_, 'default')
                assert len(other_algs
                           ) == 1, 'more than 1 default for key=%r' % (key, )
                tuned_params[key] = other_algs[0]

        common_params = [
            'algorithm',
            'checks',
        ]
        relevant_params_dict = dict(
            linear=['algorithm'],
            #---
            kdtree=['trees'],
            #---
            kmeans=[
                'branching',
                'iterations',
                'centers_init',
                'cb_index',
            ],
            #---
            lsh=[
                'table_number',
                'key_size',
                'multi_probe_level',
            ],
        )
        relevant_params_dict['composite'] = relevant_params_dict[
            'kmeans'] + relevant_params_dict['kdtree'] + common_params
        relevant_params_dict['kmeans'] += common_params
        relevant_params_dict['kdtree'] += common_params
        relevant_params_dict['lsh'] += common_params

        #kdtree_single_params = [
        #    'leaf_max_size',
        #]
        #other_params = [
        #    'build_weight',
        #    'sorted',
        #]
        out_file = 'flann_tuned' + suffix
        utool.write_to(out_file,
                       ut.dict_str(tuned_params, sorted_=True, newlines=True))
        flann.delete_index()
        if tuned_params['algorithm'] in relevant_params_dict:
            print('relevant_params=')
            relevant_params = relevant_params_dict[tuned_params['algorithm']]
            print(
                ut.dict_str(ut.dict_subset(tuned_params, relevant_params),
                            sorted_=True,
                            newlines=True))
            print('irrelevant_params=')
            print(
                ut.dict_str(ut.dict_setdiff(tuned_params, relevant_params),
                            sorted_=True,
                            newlines=True))
        else:
            print('unknown tuned algorithm=%r' % (tuned_params['algorithm'], ))

        print('all_tuned_params=')
        print(ut.dict_str(tuned_params, sorted_=True, newlines=True))
    return tuned_params
Example #8
0
    def new_cpd(self, parents=None, pmf_func=None):
        """
        Makes a new random variable that is an instance of this tempalte

        parents : only used to define the name of this node.
        """
        if pmf_func is None:
            pmf_func = self.pmf_func

        # --- MAKE VARIABLE ID
        def _getid(obj):
            if isinstance(obj, int):
                return str(obj)
            elif isinstance(obj, six.string_types):
                return obj
            else:
                return obj._template_id

        if not ut.isiterable(parents):
            parents = [parents]

        template_ids = [_getid(cpd) for cpd in parents]
        HACK_SAME_IDS = True
        # TODO: keep track of parent index inheritence
        # then rectify uniqueness based on that
        if HACK_SAME_IDS and ut.allsame(template_ids):
            _id = template_ids[0]
        else:
            _id = ''.join(template_ids)
        variable = ''.join([self.varpref, _id])
        # variable = '_'.join([self.varpref, '{' + _id + '}'])
        # variable = '$%s$' % (variable,)

        evidence_cpds = [cpd for cpd in parents if hasattr(cpd, 'ttype')]
        if len(evidence_cpds) == 0:
            evidence_cpds = None

        variable_card = len(self.basis)
        statename_dict = {
            variable: self.basis,
        }
        if self.evidence_ttypes is not None:
            if any(cpd.ttype != tcpd.ttype
                   for cpd, tcpd in zip(evidence_cpds, evidence_cpds)):
                raise ValueError('Evidence is not of appropriate type')
            evidence_bases = [cpd.variable_statenames for cpd in evidence_cpds]
            evidence_card = list(map(len, evidence_bases))
            evidence_states = list(ut.iprod(*evidence_bases))

            for cpd in evidence_cpds:
                _dict = ut.dict_subset(cpd.statename_dict, [cpd.variable])
                statename_dict.update(_dict)

            evidence = [cpd.variable for cpd in evidence_cpds]
        else:
            if evidence_cpds is not None:
                raise ValueError('Gave evidence for evidence-less template')
            evidence = None
            evidence_card = None

        # --- MAKE TABLE VALUES
        if pmf_func is not None:
            if isinstance(pmf_func, list):
                values = np.array(pmf_func)
            else:
                values = np.array([[
                    pmf_func(vstate, *estates) for estates in evidence_states
                ] for vstate in self.basis])
            ensure_normalized = True
            if ensure_normalized:
                values = values / values.sum(axis=0)
        else:
            # assume uniform
            fill_value = 1.0 / variable_card
            if evidence_card is None:
                values = np.full((1, variable_card), fill_value)
            else:
                values = np.full([variable_card] + list(evidence_card),
                                 fill_value)

        try:
            cpd = pgmpy.factors.TabularCPD(
                variable=variable,
                variable_card=variable_card,
                values=values,
                evidence=evidence,
                evidence_card=evidence_card,
                # statename_dict=statename_dict,
                state_names=statename_dict,
            )
        except Exception as ex:
            ut.printex(
                ex,
                'Failed to create TabularCPD',
                keys=[
                    'variable',
                    'variable_card',
                    'statename_dict',
                    'evidence_card',
                    'evidence',
                    'values.shape',
                ],
            )
            ut.embed()
            raise

        cpd.ttype = self.ttype
        cpd._template_ = self
        cpd._template_id = _id
        return cpd
Example #9
0
def estimate_twoday_count(ibs, day1, day2, filter_kw):
    #gid_list = ibs.get_valid_gids()
    all_images = ibs.images()
    dates = [dt.date() for dt in all_images.datetime]
    date_to_images = all_images.group_items(dates)
    date_to_images = ut.sort_dict(date_to_images)
    #date_hist = ut.map_dict_vals(len, date2_gids)
    #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),))
    verbose = 0

    visit_dates = [day1, day2]
    visit_info_list_ = []
    for day in visit_dates:
        images = date_to_images[day]
        aids = ut.flatten(images.aids)
        aids = ibs.filter_annots_general(aids, filter_kw=filter_kw,
                                         verbose=verbose)
        nids = ibs.get_annot_name_rowids(aids)
        grouped_aids = ut.group_items(aids, nids)
        unique_nids = ut.unique(list(grouped_aids.keys()))

        if False:
            aids_list = ut.take(grouped_aids, unique_nids)
            for aids in aids_list:
                if len(aids) > 30:
                    break
            timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list)
            # Do the five second rule
            marked_thresh = 5
            flags = []
            for nid, timedeltas in zip(unique_nids, timedeltas_list):
                flags.append(timedeltas.max() > marked_thresh)
            print('Unmarking %d names' % (len(flags) - sum(flags)))
            unique_nids = ut.compress(unique_nids, flags)
            grouped_aids = ut.dict_subset(grouped_aids, unique_nids)

        unique_aids = ut.flatten(list(grouped_aids.values()))
        info = {
            'unique_nids': unique_nids,
            'grouped_aids': grouped_aids,
            'unique_aids': unique_aids,
        }
        visit_info_list_.append(info)

    # Estimate statistics
    from ibeis.other import dbinfo
    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)
    nsight1 = len(nids_day1)
    nsight2 = len(nids_day2)
    resight = len(resight_nids)
    lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight)

    if False:
        from ibeis.other import dbinfo
        print('DAY 1 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1)  # NOQA
        print('DAY 2 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2)  # NOQA
        print('COMBINED STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2)  # NOQA

    print('%d annots on day 1' % (len(aids_day1)) )
    print('%d annots on day 2' % (len(aids_day2)) )
    print('%d names on day 1' % (nsight1,))
    print('%d names on day 2' % (nsight2,))
    print('resight = %r' % (resight,))
    print('lp_index = %r ± %r' % (lp_index, lp_error))
    return nsight1, nsight2, resight, lp_index, lp_error
Example #10
0
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc,
                                      target_looptime=1.0, serial_cheat=1,
                                      buffer_size=2, show_serial=True):
    """
    # We are going to generate output of bgfunc in the background while
    # fgfunc is running in the foreground. fgfunc takes results of bffunc as
    # args.
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    with ut.Timer('One* call to bgfunc') as t_bgfunc:
        results = [bgfunc(arg) for arg in bgargs]
    bgfunctime = t_bgfunc.ellapsed / len(bgargs)
    #fgfunc = ut.is_prime
    with ut.Timer('One* call to fgfunc') as t_fgfunc:
        [fgfunc(x) for x in results]
    fgfunctime = t_fgfunc.ellapsed / len(bgargs)
    # compute amount of loops to run
    est_looptime = (bgfunctime + fgfunctime)
    _num_loops = round(target_looptime // est_looptime)
    num_data = int(_num_loops // len(bgargs))
    num_loops =  int(num_data * len(bgargs))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([bgargs] * num_data)
    est_tfg = fgfunctime * num_loops
    est_tbg = bgfunctime * num_loops
    est_needed_buffers =  fgfunctime / bgfunctime
    print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [
        'num_loops',
        'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat',
        'buffer_size', 'est_needed_buffers',
    ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(bgfunc, data[:len(data) // serial_cheat]):
                fgfunc(x)
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size)
        for x in gen_:
            fgfunc(x)
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0)
        for x in gen_:
            fgfunc(x)
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')
    def parallel_efficiency(ellapsed, est_tfg, est_tbg):
        return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100
    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' % (
            parallel_efficiency(t_serial, est_tfg, est_tbg),))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (
        parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),))
    print('Theoretical gain (ut.generate) = %.2f%%' % (
        parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),))
    if show_serial:
        prac_tbg = t_serial - est_tfg
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' % (
            parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),))
        print('Practical gain (ut.buffered_generator) = %.3f%%' % (
            parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),))
        print('Practical gain (ut.generate) = %.2f%%' % (
            parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),))
Example #11
0
def tune_flann(dpts,
               target_precision=.90,
               build_weight=0.50,
               memory_weight=0.00,
               sample_fraction=0.01):
    r"""

    References:
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_pami2014.pdf
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf
        http://docs.opencv.org/trunk/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html

    Math::
        cost of an algorithm is:

        LaTeX:
            \cost = \frac
                {\search + build_weight * \build }
                { \minoverparams( \search + build_weight \build)} +
                memory_weight * \memory

    Args:
        dpts (ndarray):

        target_precision (float): number between 0 and 1 representing desired
            accuracy. Higher values are more accurate.

        build_weight (float): importance weight given to minimizing build time
            relative to search time. This number can range from 0 to infinity.
            typically because building is a more complex computation you want
            to keep the number relatively low, (less than 1) otherwise you'll
            end up getting a linear search (no build time).

        memory_weight (float): Importance of memory relative to total speed.
            A value less than 1 gives more importance to the time spent and a
            value greater than 1 gives more importance to the memory usage.

        sample_fraction (float): number between 0 and 1 representing the
            fraction of the input data to use in the optimization. A higher
            number uses more data.

    Returns:
        dict: tuned_params

    CommandLine:
        python -m vtool.nearest_neighbors --test-tune_flann

    """
    with ut.Timer('tuning flann'):
        print('Autotuning flann with %d %dD vectors' % (dpts.shape[0], dpts.shape[1]))
        print('a sample of %d vectors will be used' % (int(dpts.shape[0] * sample_fraction)))
        flann = pyflann.FLANN()
        #num_data = len(dpts)
        flann_atkwargs = dict(algorithm='autotuned',
                              target_precision=target_precision,
                              build_weight=build_weight,
                              memory_weight=memory_weight,
                              sample_fraction=sample_fraction)
        suffix = repr(flann_atkwargs)
        badchar_list = ',{}\': '
        for badchar in badchar_list:
            suffix = suffix.replace(badchar, '')
        print('flann_atkwargs:')
        print(utool.dict_str(flann_atkwargs))
        print('starting optimization')
        tuned_params = flann.build_index(dpts, **flann_atkwargs)
        print('finished optimization')

        # The algorithm is sometimes returned as default which is
        # very unuseful as the default name is embeded in the pyflann
        # module where most would not care to look. This finds the default
        # name for you.
        for key in ['algorithm', 'centers_init', 'log_level']:
            val = tuned_params.get(key, None)
            if val == 'default':
                dict_ = pyflann.FLANNParameters._translation_[key]
                other_algs = ut.dict_find_other_sameval_keys(dict_, 'default')
                assert len(other_algs) == 1, 'more than 1 default for key=%r' % (key,)
                tuned_params[key] = other_algs[0]

        common_params = [
            'algorithm',
            'checks',
        ]
        relevant_params_dict = dict(
            linear=['algorithm'],
            #---
            kdtree=[
                'trees'
            ],
            #---
            kmeans=[
                'branching',
                'iterations',
                'centers_init',
                'cb_index',
            ],
            #---
            lsh=[
                'table_number',
                'key_size',
                'multi_probe_level',
            ],
        )
        relevant_params_dict['composite'] = relevant_params_dict['kmeans'] + relevant_params_dict['kdtree'] + common_params
        relevant_params_dict['kmeans'] += common_params
        relevant_params_dict['kdtree'] += common_params
        relevant_params_dict['lsh'] += common_params

        #kdtree_single_params = [
        #    'leaf_max_size',
        #]
        #other_params = [
        #    'build_weight',
        #    'sorted',
        #]
        out_file = 'flann_tuned' + suffix
        utool.write_to(out_file, ut.dict_str(tuned_params, sorted_=True, newlines=True))
        flann.delete_index()
        if tuned_params['algorithm'] in relevant_params_dict:
            print('relevant_params=')
            relevant_params = relevant_params_dict[tuned_params['algorithm']]
            print(ut.dict_str(ut.dict_subset(tuned_params, relevant_params),
                              sorted_=True, newlines=True))
            print('irrelevant_params=')
            print(ut.dict_str(ut.dict_setdiff(tuned_params, relevant_params),
                              sorted_=True, newlines=True))
        else:
            print('unknown tuned algorithm=%r' % (tuned_params['algorithm'],))

        print('all_tuned_params=')
        print(ut.dict_str(tuned_params, sorted_=True, newlines=True))
    return tuned_params
Example #12
0
 def get_hesaff_params(feat_cfg):
     import pyhesaff
     default_keys = list(pyhesaff.get_hesaff_default_params().keys())
     hesaff_param_dict = ut.dict_subset(feat_cfg, default_keys)
     return hesaff_param_dict
Example #13
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1_ = ut.compress(aids1, flags)
    aids2_ = ut.compress(aids2, flags)
    chosen_props = ut.dict_subset(edge_props, ['weight'])
    edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags), chosen_props)

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)
    edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)]

    unique_aids = list(set(aids1_ + aids2_))
    # Make a graph between the chips
    nodes = unique_aids
    edges = list(zip(aids1_, aids2_, edge_attr_list))
    import networkx as nx
    graph = nx.DiGraph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    from ibeis.viz import viz_graph
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    viz_graph.viz_netx_chipgraph(ibs, graph, fnum=fnum, with_images=True, augment_graph=False)
Example #14
0
def _test_buffered_generator_general2(bgfunc,
                                      bgargs,
                                      fgfunc,
                                      target_looptime=1.0,
                                      serial_cheat=1,
                                      buffer_size=2,
                                      show_serial=True):
    """
    # We are going to generate output of bgfunc in the background while
    # fgfunc is running in the foreground. fgfunc takes results of bffunc as
    # args.
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    with ut.Timer('One* call to bgfunc') as t_bgfunc:
        results = [bgfunc(arg) for arg in bgargs]
    bgfunctime = t_bgfunc.ellapsed / len(bgargs)
    #fgfunc = ut.is_prime
    with ut.Timer('One* call to fgfunc') as t_fgfunc:
        [fgfunc(x) for x in results]
    fgfunctime = t_fgfunc.ellapsed / len(bgargs)
    # compute amount of loops to run
    est_looptime = (bgfunctime + fgfunctime)
    _num_loops = round(target_looptime // est_looptime)
    num_data = int(_num_loops // len(bgargs))
    num_loops = int(num_data * len(bgargs))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([bgargs] * num_data)
    est_tfg = fgfunctime * num_loops
    est_tbg = bgfunctime * num_loops
    est_needed_buffers = fgfunctime / bgfunctime
    print('Estimated stats' + ut.dict_str(
        ut.dict_subset(locals(), [
            'num_loops',
            'bgfunctime',
            'fgfunctime',
            'est_tfg',
            'est_tbg',
            'serial_cheat',
            'buffer_size',
            'est_needed_buffers',
        ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(bgfunc, data[:len(data) // serial_cheat]):
                fgfunc(x)
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(bgfunc, data),
                                     buffer_size=buffer_size)
        for x in gen_:
            fgfunc(x)
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(bgfunc,
                           data,
                           chunksize=buffer_size,
                           quiet=1,
                           verbose=0)
        for x in gen_:
            fgfunc(x)
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')

    def parallel_efficiency(ellapsed, est_tfg, est_tbg):
        return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100

    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' %
              (parallel_efficiency(t_serial, est_tfg, est_tbg), ))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' %
          (parallel_efficiency(t2.ellapsed, est_tfg, est_tbg), ))
    print('Theoretical gain (ut.generate) = %.2f%%' %
          (parallel_efficiency(t3.ellapsed, est_tfg, est_tbg), ))
    if show_serial:
        prac_tbg = t_serial - est_tfg
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' %
              (parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg), ))
        print('Practical gain (ut.buffered_generator) = %.3f%%' %
              (parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg), ))
        print('Practical gain (ut.generate) = %.2f%%' %
              (parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg), ))
Example #15
0
def _test_buffered_generator_general(func,
                                     args,
                                     sleepfunc,
                                     target_looptime=1.0,
                                     serial_cheat=1,
                                     argmode=False,
                                     buffer_size=2):
    """
    # We are going to generate output of func in the background while sleep
    # func is running in the foreground
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    #serial_cheat = 1  # approx division factor to run serial less times
    show_serial = True  # target_looptime < 10.  # 3.0

    with ut.Timer('One* call to func') as t_fgfunc:
        results = [func(arg) for arg in args]
    functime = t_fgfunc.ellapsed / len(args)
    #sleepfunc = ut.is_prime
    with ut.Timer('One* call to sleep func') as t_sleep:
        if argmode:
            [sleepfunc(x) for x in results]
        else:
            [sleepfunc() for x in results]
    sleeptime = t_sleep.ellapsed / len(args)
    # compute amount of loops to run
    _num_loops = round(target_looptime // (functime + sleeptime))
    num_data = int(_num_loops // len(args))
    num_loops = int(num_data * len(args))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([args] * num_data)
    est_tsleep = sleeptime * num_loops
    est_tfunc = functime * num_loops
    est_needed_buffers = sleeptime / functime
    print('Estimated stats' + ut.dict_str(
        ut.dict_subset(locals(), [
            'num_loops',
            'functime',
            'sleeptime',
            'est_tsleep',
            'est_tfunc',
            'serial_cheat',
            'buffer_size',
            'est_needed_buffers',
        ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(func, data[:len(data) // serial_cheat]):
                if argmode:
                    sleepfunc(x)
                else:
                    sleepfunc()
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc()
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(func,
                           data,
                           chunksize=buffer_size,
                           quiet=1,
                           verbose=0)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc()
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')

    def parallel_efficiency(ellapsed, est_tsleep, est_tfunc):
        return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100

    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' %
              (parallel_efficiency(t_serial, est_tsleep, est_tfunc), ))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' %
          (parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc), ))
    print('Theoretical gain (ut.generate) = %.2f%%' %
          (parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc), ))
    if show_serial:
        prac_tfunc = t_serial - est_tsleep
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' %
              (parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc), ))
        print('Practical gain (ut.buffered_generator) = %.3f%%' %
              (parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc), ))
        print('Practical gain (ut.generate) = %.2f%%' %
              (parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc), ))
Example #16
0
 def heuristic_infodict(card):
     infodict = ut.dict_subset(card.__dict__, ['name', 'mana_cost'])
     heuristic_types, heuristic_subtypes = card.get_heuristic_info()
     infodict['heuristic_types'] = heuristic_types
     return infodict
Example #17
0
def get_pipecfg_list(test_cfg_name_list, ibs=None):
    r"""
    Builds a list of varied query configurations. Only custom configs depend on
    an ibs object. The order of the output is not gaurenteed to aggree with
    input order.

    Args:
        test_cfg_name_list (list): list of strs
        ibs (IBEISController): ibeis controller object (optional)

    Returns:
        tuple: (cfg_list, cfgx2_lbl) -
            cfg_list (list): list of config objects
            cfgx2_lbl (list): denotes which parameters are being varied.
                If there is just one config then nothing is varied

    CommandLine:
        python -m ibeis.expt.experiment_helpers --exec-get_pipecfg_list:0
        python -m ibeis.expt.experiment_helpers --exec-get_pipecfg_list:1 --db humpbacks

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.experiment_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> #test_cfg_name_list = ['best', 'custom', 'custom:sv_on=False']
        >>> #test_cfg_name_list = ['default', 'default:sv_on=False', 'best']
        >>> test_cfg_name_list = ['default', 'default:sv_on=False', 'best']
        >>> # execute function
        >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs)
        >>> # verify results
        >>> assert pipecfg_list[0].sv_cfg.sv_on is True
        >>> assert pipecfg_list[1].sv_cfg.sv_on is False
        >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list)
        >>> result = ('pipecfg_lbls = '+ ut.list_str(pipecfg_lbls))
        >>> print(result)
        pipecfg_lbls = [
            'default:',
            'default:sv_on=False',
        ]

    Example1:
        >>> # DISABLE_DOCTEST
        >>> import ibeis_flukematch.plugin
        >>> from ibeis.expt.experiment_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='humpbacks')
        >>> test_cfg_name_list = ['default:pipeline_root=BC_DTW,decision=average', 'default:K=[1,4]']
        >>> (pcfgdict_list, pipecfg_list) = get_pipecfg_list(test_cfg_name_list, ibs)
        >>> pipecfg_lbls = get_varied_pipecfg_lbls(pcfgdict_list)
        >>> result = ('pipecfg_lbls = '+ ut.list_str(pipecfg_lbls))
        >>> print(result)
        >>> print_pipe_configs(pcfgdict_list, pipecfg_list)
    """
    if ut.VERBOSE:
        print('[expt_help.get_pipecfg_list] building pipecfg_list using: %s' %
              test_cfg_name_list)
    if isinstance(test_cfg_name_list, six.string_types):
        test_cfg_name_list = [test_cfg_name_list]
    _standard_cfg_names = []
    _pcfgdict_list = []

    # HACK: Parse out custom configs first
    for test_cfg_name in test_cfg_name_list:
        if test_cfg_name.startswith('custom:') or test_cfg_name == 'custom':
            print('[expthelpers] Parsing nonstandard custom config')
            if test_cfg_name.startswith('custom:'):
                # parse out modifications to custom
                cfgstr_list = ':'.join(test_cfg_name.split(':')[1:]).split(',')
                augcfgdict = ut.parse_cfgstr_list(cfgstr_list, smartcast=True)
            else:
                augcfgdict = {}
            # Take the configuration from the ibeis object
            pipe_cfg = ibs.cfg.query_cfg.deepcopy()
            # Update with augmented params
            pipe_cfg.update_query_cfg(**augcfgdict)
            # Parse out a standard cfgdict
            cfgdict = dict(pipe_cfg.parse_items())
            cfgdict['_cfgname'] = 'custom'
            cfgdict['_cfgstr'] = test_cfg_name
            _pcfgdict_list.append(cfgdict)
        else:
            _standard_cfg_names.append(test_cfg_name)
    # Handle stanndard configs next
    if len(_standard_cfg_names) > 0:
        # Get parsing information
        #cfg_default_dict = dict(Config.QueryConfig().parse_items())
        #valid_keys = list(cfg_default_dict.keys())
        cfgstr_list = _standard_cfg_names
        named_defaults_dict = ut.dict_subset(
            experiment_configs.__dict__, experiment_configs.TEST_NAMES)
        alias_keys = experiment_configs.ALIAS_KEYS
        # Parse standard pipeline cfgstrings
        metadata = {'ibs': ibs}
        dict_comb_list = cfghelpers.parse_cfgstr_list2(
            cfgstr_list,
            named_defaults_dict,
            cfgtype=None,
            alias_keys=alias_keys,
            # Hack out valid keys for humpbacks
            #valid_keys=valid_keys,
            strict=False,
            metadata=metadata
        )
        # Get varied params (there may be duplicates)
        _pcfgdict_list.extend(ut.flatten(dict_comb_list))

    # TODO: respsect different algorithm parameters
    # like flukes

    # Expand cfgdicts into PipelineConfig config objects
    if ibs is None:
        configclass_list = [Config.QueryConfig] * len(_pcfgdict_list)
    else:
        root_to_config = ibs.depc.configclass_dict
        configclass_list = [
            root_to_config.get(_cfgdict.get('pipeline_root', 'vsmany'), Config.QueryConfig)
            for _cfgdict in _pcfgdict_list]
    _pipecfg_list = [cls(**_cfgdict) for cls, _cfgdict in zip(configclass_list, _pcfgdict_list)]

    # Enforce rule that removes duplicate configs
    # by using feasiblity from ibeis.algo.Config
    # TODO: Move this unique finding code to its own function
    # and then move it up one function level so even the custom
    # configs can be uniquified
    _flag_list = ut.flag_unique_items(_pipecfg_list)
    cfgdict_list = ut.compress(_pcfgdict_list, _flag_list)
    pipecfg_list = ut.compress(_pipecfg_list, _flag_list)
    if ut.NOT_QUIET:
        print('[harn.help] return %d / %d unique pipeline configs from: %r' %
              (len(cfgdict_list), len(_pcfgdict_list), test_cfg_name_list))

    if ut.get_argflag(('--pcfginfo', '--pinfo', '--pipecfginfo')):
        import sys
        ut.colorprint('Requested PcfgInfo for tests... ', 'red')
        print_pipe_configs(cfgdict_list, pipecfg_list)
        ut.colorprint('Finished Reporting PcfgInfo. Exiting', 'red')
        sys.exit(1)
    return (cfgdict_list, pipecfg_list)
Example #18
0
 def cut(self, event):
     keys = ['min_labels', 'max_labels']
     infrkw = ut.dict_subset(self.config, keys)
     self.infr.relabel_using_inference(**infrkw)
     self.show_page()
Example #19
0
                print("Took %0.2f seconds" % active_toc)
                # give the list of active anchors (i.e. the keys in active_triplets) as the dataset
                # 'y' key is a hack
                full_triplet_sets = {k:full_triplets(active_triplets[k]) for k in active_triplets}
                dataset = full_triplet_sets


                #anchor_sets = {k:{'y':np.random.permutation(np.array(active_triplets[k].keys()))} for k in active_triplets}

                print("Active anchors left: %r" % ({k:len(v) for k, v in active_triplets.items()}))
                print("Active triplets left: %r" % ({k:v['y'].shape[0] for k, v in full_triplet_sets.items()}))
                # build the batch loader as a partial function on the dataset and the actives
                #batch_maker = {k:partial(make_batch, dataset=dset[k], actives=active_triplets[k]) for k in ['train','valid']}
            else:
                print("Number of pairs: %r" % ({k:len(dset[k]['y']) for k in dset}))
                train_pairs_shuffled = {k:shuffle_dataset(dict(ut.dict_subset(dset[k], ['pairs','y']))) for k in ['train','valid']}
                dataset = train_pairs_shuffled


            batch_maker = {k:partial(FUNCTIONS[options.loss_type]['bm'], dataset=dset[k]) for k in ['train','valid']}
            batch_loader = partial(FUNCTIONS[options.loss_type]['nonaug_bl'], batch_maker=batch_maker)

            # so we're going to just give dset as
            loss = train_epoch(iter_funcs, dataset, batch_size, batch_loader, layer_names=layer_names)
            epoch_losses.append(loss['train_loss'])
            batch_losses.append(loss['all_train_loss'])
            toc = time.time() - tic
            print("Learning rate: %0.5f" % momentum_params['l_r'].get_value())
            print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
                    (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
            print("Train %s failed: %s\nValid %s failed: %s" % (options.loss_type, loss['train_acc'],
Example #20
0
def test_sver_wrapper():
    """
    Test to ensure cpp and python agree and that cpp is faster

    CommandLine:
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --rebuild-sver
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --dummy
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=easy2.png
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=hard3.png
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=carl.jpg --fname2=hard3.png

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.sver_c_wrapper import *  # NOQA
        >>> test_sver_wrapper()

    Ignore:
        %timeit call_python_version(*args)
        %timeit get_affine_inliers_cpp(*args)
    """
    import vtool.spatial_verification as sver
    import vtool.tests.dummy as dummy
    xy_thresh_sqrd = ktool.KPTS_DTYPE(.4)
    scale_thresh_sqrd = ktool.KPTS_DTYPE(2.0)
    ori_thresh = ktool.KPTS_DTYPE(TAU / 4.0)
    keys = 'xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh'.split(', ')
    print(ut.dict_str(ut.dict_subset(locals(), keys)))

    def report_errors():
        pass

    if ut.get_argflag('--dummy'):
        testtup = dummy.testdata_dummy_matches()
        (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup
        fm_input = fm_input.astype(fm_dtype)
        #fm_input = fm_input[0:10].astype(fm_dtype)
        #fs_input = fs_input[0:10].astype(np.float32)
    else:
        fname1 = ut.get_argval('--fname1', type_=str, default='easy1.png')
        fname2 = ut.get_argval('--fname2', type_=str, default='easy2.png')
        testtup = dummy.testdata_ratio_matches(fname1, fname2)
        (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup

    # pack up call to aff hypothesis
    import vtool as vt
    import scipy.stats.mstats
    scales1 = vt.get_scales(kpts1.take(fm_input.T[0], axis=0))
    scales2 = vt.get_scales(kpts2.take(fm_input.T[1], axis=0))
    #fs_input = 1 / scipy.stats.mstats.gmean(np.vstack((scales1, scales2)))
    fs_input = scipy.stats.mstats.gmean(np.vstack((scales1, scales2)))
    print('fs_input = ' + ut.numpy_str(fs_input))
    #fs_input[0:-9] = 0
    #fs_input = np.ones(len(fm_input), dtype=fs_dtype)
    #ut.embed()
    #fs_input = scales1 * scales2
    args = (kpts1, kpts2, fm_input, fs_input, xy_thresh_sqrd,
            scale_thresh_sqrd, ori_thresh)

    ex_list = []

    try:
        with ut.Indenter('[TEST1] '):
            inlier_tup = vt.compare_implementations(
                sver.get_affine_inliers,
                get_affine_inliers_cpp,
                args,
                lbl1='py',
                lbl2='c',
                output_lbl=('aff_inliers_list', 'aff_errors_list', 'Aff_mats'))
            out_inliers, out_errors, out_mats = inlier_tup
    except AssertionError as ex:
        ex_list.append(ex)
        raise

    try:
        import functools
        with ut.Indenter('[TEST2] '):
            bestinlier_tup = vt.compare_implementations(
                functools.partial(sver.get_best_affine_inliers, forcepy=True),
                get_best_affine_inliers_cpp,
                args,
                show_output=True,
                lbl1='py',
                lbl2='c',
                output_lbl=('bestinliers', 'besterror', 'bestmat'))
            bestinliers, besterror, bestmat = bestinlier_tup
    except AssertionError as ex:
        ex_list.append(ex)
        raise

    if len(ex_list) > 0:
        raise AssertionError('some tests failed. see previous stdout')

    #num_inliers_list = np.array(map(len, out_inliers_c))
    #best_argx = num_inliers_list.argmax()
    ##best_inliers_py = out_inliers_py[best_argx]
    #best_inliers_c = out_inliers_c[best_argx]
    if ut.show_was_requested():
        import plottool as pt
        fm_output = fm_input.take(bestinliers, axis=0)
        fnum = pt.next_fnum()
        pt.figure(fnum=fnum, doclf=True, docla=True)
        pt.show_chipmatch2(rchip1,
                           rchip2,
                           kpts1,
                           kpts2,
                           fm_input,
                           ell_linewidth=5,
                           fnum=fnum,
                           pnum=(2, 1, 1))
        pt.show_chipmatch2(rchip1,
                           rchip2,
                           kpts1,
                           kpts2,
                           fm_output,
                           ell_linewidth=5,
                           fnum=fnum,
                           pnum=(2, 1, 2))
        pt.show_if_requested()
Example #21
0
def show_arch_nx_graph(layers, fnum=None, fullinfo=True):
    r"""

    CommandLine:
        python -m ibeis_cnn.draw_net show_arch_nx_graph:0 --show
        python -m ibeis_cnn.draw_net show_arch_nx_graph:1 --show

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis_cnn.draw_net import *  # NOQA
        >>> from ibeis_cnn import models
        >>> model = models.mnist.MNISTModel(batch_size=128, output_dims=10,
        >>>                                 data_shape=(24, 24, 3))
        >>> model.init_arch()
        >>> layers = model.get_all_layers()
        >>> show_arch_nx_graph(layers)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis_cnn.draw_net import *  # NOQA
        >>> from ibeis_cnn import models
        >>> model = models.SiameseCenterSurroundModel(autoinit=True)
        >>> layers = model.get_all_layers()
        >>> show_arch_nx_graph(layers)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()

    """
    import networkx as nx
    import plottool as pt
    import ibeis_cnn.__LASAGNE__ as lasange
    #from matplotlib import offsetbox
    #import matplotlib as mpl

    REMOVE_BATCH_SIZE = True
    from ibeis_cnn import net_strs

    def get_hex_color(layer_type):
        if 'Input' in layer_type:
            return '#A2CECE'
        if 'Conv2D' in layer_type:
            return '#7C9ABB'
        if 'Dense' in layer_type:
            return '#6CCF8D'
        if 'Pool' in layer_type:
            return '#9D9DD2'
        if 'SoftMax' in layer_type:
            return '#7E9FD9'
        else:
            return '#{0:x}'.format(hash(layer_type + 'salt') % 2 ** 24)

    node_dict = {}
    edge_list = []
    edge_attrs = ut.ddict(dict)

    # Make layer ids (ensure no duplicates)
    layer_to_id = {
        l: repr(l) if l.name is None else l.name
        for l in set(layers)
    }
    keys_ = layer_to_id.keys()
    dups = ut.find_duplicate_items(layer_to_id.values())
    for dupval, dupidxs in dups.items():
        newval_fmt = dupval + '_%d'
        for layer in ut.take(keys_, dupidxs):
            newid = ut.get_nonconflicting_string(newval_fmt, layer_to_id.values())
            layer_to_id[layer] = newid

    def layerid(layer):
        return layer_to_id[layer]

    main_nodes = []

    for i, layer in enumerate(layers):
        layer_info = net_strs.get_layer_info(layer)
        layer_type = layer_info['classalias']

        key = layerid(layer)

        color = get_hex_color(layer_info['classalias'])
        # Make label
        lines = []
        if layer_info['name'] is not None:
            lines.append(layer_info['name'])
        if fullinfo:
            lines.append(layer_info['classalias'])
            for attr, val in layer_info['layer_attrs'].items():
                if attr == 'shape' and REMOVE_BATCH_SIZE:
                    val = val[1:]
                if attr == 'output_shape' and REMOVE_BATCH_SIZE:
                    val = val[1:]
                lines.append('{0}: {1}'.format(attr, val))

            nonlinearity = layer_info.get('nonlinearity')
            if nonlinearity is not None:
                alias_map = {
                    'LeakyRectify': 'LReLU',
                }
                val = layer_info['nonlinearity']['type']
                val = alias_map.get(val, val)
                lines.append('nonlinearity:\n{0}'.format(val))

        label = '\n'.join(lines)

        # append node
        is_main_layer = len(layer.params) > 0
        #is_main_layer = len(lasange.layers.get_all_params(layer, trainable=True)) > 0
        if layer_info['classname'] in lasange.layers.normalization.__all__:
            is_main_layer = False
        if layer_info['classname'] in lasange.layers.special.__all__:
            is_main_layer = False
        if layer_info['classname'].startswith('BatchNorm'):
            is_main_layer = False
        if layer_info['classname'].startswith('ElemwiseSum'):
            is_main_layer = True

        if layer_type == 'Input':
            is_main_layer = True

        if hasattr(layer, '_is_main_layer'):
            is_main_layer = layer._is_main_layer

        #if getattr(layer, 'name', '') is not None and getattr(layer, 'name', '') .endswith('/sum'):
        #    is_main_layer = True

        node_attr = dict(name=key, label=label, color=color,
                         fillcolor=color, style='filled',
                         is_main_layer=is_main_layer)

        node_attr['is_main_layer'] = is_main_layer
        if is_main_layer:
            main_nodes.append(key)
        node_attr['classalias'] = layer_info['classalias']

        if is_main_layer or node_attr['classalias'].startswith('Conv'):
            if hasattr(layer, 'shape'):
                if len(layer.shape) == 3:
                    node_attr['out_size'] = (layer.shape[2],
                                             layer.shape[1])
                    node_attr['depth'] = layer.output_shape[0]
            if hasattr(layer, 'output_shape'):
                if len(layer.output_shape) == 4:
                    depth = layer.output_shape[1]
                    width, height = (layer.output_shape[3],
                                     layer.output_shape[2])
                    xshift = -width * (.1 / (depth ** (1 / 3))) / 3
                    yshift = height * (.1 / (depth ** (1 / 3))) / 2
                    node_attr['depth'] = depth
                    node_attr['xshift'] = xshift
                    node_attr['yshift'] = yshift
                    node_attr['out_size'] = (width, height)

                if len(layer.output_shape) == 2:
                    node_attr['out_size'] = (1,
                                             layer.output_shape[1])

        node_dict[key] = node_attr

        _input_layers = []
        if hasattr(layer, 'input_layers'):
            _input_layers += layer.input_layers
        if hasattr(layer, 'input_layer'):
            _input_layers += [layer.input_layer]

        for input_layer in _input_layers:
            parent_key = layerid(input_layer)
            edge = (parent_key, key)
            edge_list.append(edge)

    main_size_ = np.array((100, 100)) * 4
    sub_size = np.array((75, 50)) * 4

    # Setup scaled width and heights
    out_size_list = [v['out_size'] for v in node_dict.values() if 'out_size' in v]
    out_size_list = np.array(out_size_list)
    #out_size_list = out_size_list[out_size_list.T[0] > 1]
    area_arr = np.prod(out_size_list, axis=1)
    main_outsize = np.array(out_size_list[area_arr.argmax()])
    #main_outsize = np.array(out_size_list[area_arr.argmin()])
    scale = main_size_ / main_outsize

    scale_dense_max = .25
    scale_dense_min = 8

    for k, v in node_dict.items():
        if v['is_main_layer'] or v['classalias'].startswith('Conv'):
            if 'out_size' in v:
                # Make dense layers more visible
                if v['classalias'] == 'Dense':
                    v['shape'] = 'rect'
                    v['width'] = scale_dense_min
                    if v['out_size'][1] > main_outsize[1]:
                        v['height'] =  v['out_size'][1] * scale[1] * scale_dense_max
                    elif v['out_size'][1] < scale_dense_min:
                        v['height'] = scale_dense_min * v['out_size'][1]
                    else:
                        v['height'] = v['out_size'][1]
                elif v['classalias'].startswith('Conv'):
                    v['shape'] = 'stack'
                    #v['shape'] = 'rect'
                    v['width'] = v['out_size'][0] * scale[0]
                    v['height'] =  v['out_size'][1] * scale[1]
                else:
                    v['shape'] = 'rect'
                    v['width'] = v['out_size'][0] * scale[0]
                    v['height'] =  v['out_size'][1] * scale[1]
            else:
                v['shape'] = 'rect'
                v['width'] = main_size_[0]
                v['height'] = main_size_[1]
        else:
            #v['shape'] = 'ellipse'
            v['shape'] = 'rect'
            v['style'] = 'rounded'
            v['width'] = sub_size[0]
            v['height'] = sub_size[1]

    key_order = ut.take(layer_to_id, layers)
    node_dict = ut.dict_subset(node_dict, key_order)

    #print('node_dict = ' + ut.repr3(node_dict))

    # Create the networkx graph structure
    G = nx.DiGraph()
    G.add_nodes_from(node_dict.items())
    G.add_edges_from(edge_list)
    for key, val in edge_attrs.items():
        nx.set_edge_attributes(G, key, val)

    # Add invisible structure
    #main_nodes = [key for key, val in
    #              nx.get_node_attributes(G, 'is_main_layer').items() if val]

    main_children = ut.odict()

    #for n1, n2 in ut.itertwo(main_nodes):
    #    print('n1, n2 = %r %r' % (n1, n2))
    #    import utool
    #    utool.embed()
    #    children = ut.nx_all_nodes_between(G, n1, n2)
    #    if n1 in children:
    #        children.remove(n1)
    #    if n2 in children:
    #        children.remove(n2)
    #    main_children[n1] = children

    #    #pass
    #main_children[main_nodes[-1]] = []

    for n1 in main_nodes:
        main_children[n1] = []
        # Main nodes only place constraints on nodes in the next main group.
        # Not their own
        next_main = None
        G.node[n1]['group'] = n1
        for (_, n2) in nx.bfs_edges(G, n1):
            if next_main is None:
                if n2 in main_nodes:
                    next_main = n2
                else:
                    G.node[n2]['group'] = n1
                    main_children[n1].append(n2)
            else:
                if n2 not in list(nx.descendants(G, next_main)):
                    G.node[n2]['group'] = n1
                    main_children[n1].append(n2)

    # Custom positioning
    x = 0
    y = 1000
    #print('main_children = %s' % (ut.repr3(main_children),))

    #main_nodes = ut.isect(list(nx.topological_sort(G)), main_nodes)
    xpad = main_size_[0] * .3
    ypad = main_size_[1] * .3

    # Draw each main node, and then put its children under it
    # Then move to the left and draw the next main node.
    cumwidth = 0
    for n1 in main_nodes:
        cumheight = 0

        maxwidth = G.node[n1]['width']
        for n2 in main_children[n1]:
            maxwidth = max(maxwidth, G.node[n2]['width'])

        cumwidth += xpad
        cumwidth += maxwidth / 2

        pos = np.array([x + cumwidth, y - cumheight])
        G.node[n1]['pos'] = pos
        G.node[n1]['pin'] = 'true'

        height = G.node[n1]['height']
        cumheight += height / 2

        for n2 in main_children[n1]:
            height = G.node[n2]['height']
            cumheight += ypad
            cumheight += height / 2
            pos = np.array([x + cumwidth, y - cumheight])
            G.node[n2]['pos'] = pos
            G.node[n2]['pin'] = 'true'
            cumheight += height / 2

        cumwidth += maxwidth / 2

    # Pin everybody
    nx.set_node_attributes(G, 'pin', 'true')
    layoutkw = dict(prog='neato', splines='line')
    #layoutkw = dict(prog='neato', splines='spline')
    layoutkw = dict(prog='neato', splines='ortho')
    G_ = G.copy()
    # delete lables for positioning
    _labels = nx.get_node_attributes(G_, 'label')
    ut.nx_delete_node_attr(G_, 'label')
    nx.set_node_attributes(G_, 'label', '')
    nolayout = False
    if nolayout:
        G_.remove_edges_from(list(G_.edges()))
    else:
        layout_info = pt.nx_agraph_layout(G_, inplace=True, **layoutkw)  # NOQA
    # reset labels
    if not nolayout:
        nx.set_node_attributes(G_, 'label', _labels)
    _ = pt.show_nx(G_, fontsize=8, arrow_width=.3, layout='custom', fnum=fnum)  # NOQA
    #pt.adjust_subplots(top=1, bot=0, left=0, right=1)
    pt.plt.tight_layout()
Example #22
0
    def new_cpd(self, parents=None, pmf_func=None):
        """
        Makes a new random variable that is an instance of this tempalte

        parents : only used to define the name of this node.
        """
        if pmf_func is None:
            pmf_func = self.pmf_func

        # --- MAKE VARIABLE ID
        def _getid(obj):
            if isinstance(obj, int):
                return str(obj)
            elif isinstance(obj, six.string_types):
                return obj
            else:
                return obj._template_id

        if not ut.isiterable(parents):
            parents = [parents]

        template_ids = [_getid(cpd) for cpd in parents]
        HACK_SAME_IDS = True
        # TODO: keep track of parent index inheritence
        # then rectify uniqueness based on that
        if HACK_SAME_IDS and ut.list_allsame(template_ids):
            _id = template_ids[0]
        else:
            _id = ''.join(template_ids)
        variable = ''.join([self.varpref, _id])
        #variable = '_'.join([self.varpref, '{' + _id + '}'])
        #variable = '$%s$' % (variable,)

        evidence_cpds = [cpd for cpd in parents if hasattr(cpd, 'ttype')]
        if len(evidence_cpds) == 0:
            evidence_cpds = None

        variable_card = len(self.basis)
        statename_dict = {
            variable: self.basis,
        }
        if self.evidence_ttypes is not None:
            if any(cpd.ttype != tcpd.ttype
                   for cpd, tcpd in zip(evidence_cpds, evidence_cpds)):
                raise ValueError('Evidence is not of appropriate type')
            evidence_bases = [cpd.variable_statenames for cpd in evidence_cpds]
            evidence_card = list(map(len, evidence_bases))
            evidence_states = list(ut.iprod(*evidence_bases))

            for cpd in evidence_cpds:
                _dict = ut.dict_subset(cpd.statename_dict, [cpd.variable])
                statename_dict.update(_dict)

            evidence = [cpd.variable for cpd in evidence_cpds]
        else:
            if evidence_cpds is not None:
                raise ValueError('Gave evidence for evidence-less template')
            evidence = None
            evidence_card = None

        # --- MAKE TABLE VALUES
        if pmf_func is not None:
            if isinstance(pmf_func, list):
                values = np.array(pmf_func)
            else:
                values = np.array([
                    [pmf_func(vstate, *estates) for estates in evidence_states]
                    for vstate in self.basis
                ])
            ensure_normalized = True
            if ensure_normalized:
                values = values / values.sum(axis=0)
        else:
            # assume uniform
            fill_value = 1.0 / variable_card
            if evidence_card is None:
                values = np.full((1, variable_card), fill_value)
            else:
                values = np.full([variable_card] + list(evidence_card), fill_value)

        try:
            cpd = pgmpy.factors.TabularCPD(
                variable=variable,
                variable_card=variable_card,
                values=values,
                evidence=evidence,
                evidence_card=evidence_card,
                statename_dict=statename_dict,
            )
        except Exception as ex:
            ut.printex(ex, 'Failed to create TabularCPD',
                       keys=[
                           'variable',
                           'variable_card',
                           'statename_dict',
                           'evidence_card',
                           'evidence',
                           'values.shape',
                       ])
            raise

        cpd.ttype = self.ttype
        cpd._template_ = self
        cpd._template_id = _id
        return cpd
Example #23
0
def _test_buffered_generator_general(func, args, sleepfunc,
                                     target_looptime=1.0,
                                     serial_cheat=1, argmode=False,
                                     buffer_size=2):
    """
    # We are going to generate output of func in the background while sleep
    # func is running in the foreground
    # --- Hyperparams
    target_looptime = 1.5  # maximum time to run all loops
    """
    import utool as ut
    #serial_cheat = 1  # approx division factor to run serial less times
    show_serial = True  # target_looptime < 10.  # 3.0

    with ut.Timer('One* call to func') as t_fgfunc:
        results = [func(arg) for arg in args]
    functime = t_fgfunc.ellapsed / len(args)
    #sleepfunc = ut.is_prime
    with ut.Timer('One* call to sleep func') as t_sleep:
        if argmode:
            [sleepfunc(x) for x in results]
        else:
            [sleepfunc() for x in results]
    sleeptime = t_sleep.ellapsed / len(args)
    # compute amount of loops to run
    _num_loops = round(target_looptime // (functime + sleeptime))
    num_data = int(_num_loops // len(args))
    num_loops =  int(num_data * len(args))
    serial_cheat = min(serial_cheat, num_data)
    data = ut.flatten([args] * num_data)
    est_tsleep = sleeptime * num_loops
    est_tfunc = functime * num_loops
    est_needed_buffers =  sleeptime / functime
    print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [
        'num_loops',
        'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size',
        'est_needed_buffers',
    ])))
    if show_serial:
        with ut.Timer('serial') as t1:
            # cheat for serial to make it go faster
            for x in map(func, data[:len(data) // serial_cheat]):
                if argmode:
                    sleepfunc(x)
                else:
                    sleepfunc()
        t_serial = serial_cheat * t1.ellapsed
        print('...toc(\'adjusted_serial\') = %r' % (t_serial))
    with ut.Timer('ut.buffered_generator') as t2:
        gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc()
    with ut.Timer('ut.generate') as t3:
        gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0)
        for x in gen_:
            if argmode:
                sleepfunc(x)
            else:
                sleepfunc( )
    # Compare theoretical vs practical efficiency
    print('\n Theoretical Results')
    def parallel_efficiency(ellapsed, est_tsleep, est_tfunc):
        return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100
    if show_serial:
        print('Theoretical gain (serial) = %.3f%%' % (
            parallel_efficiency(t_serial, est_tsleep, est_tfunc),))
    print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (
        parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),))
    print('Theoretical gain (ut.generate) = %.2f%%' % (
        parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),))
    if show_serial:
        prac_tfunc = t_serial - est_tsleep
        print('\n Practical Results')
        print('Practical gain (serial) = %.3f%%' % (
            parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),))
        print('Practical gain (ut.buffered_generator) = %.3f%%' % (
            parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),))
        print('Practical gain (ut.generate) = %.2f%%' % (
            parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
Example #24
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()
Example #25
0
 def asdict(dataset):
     # save all args passed into constructor as a dict
     key_list = ut.get_func_argspec(dataset.__init__).args[1:]
     data_dict = ut.dict_subset(dataset.__dict__, key_list)
     return data_dict
Example #26
0
    def make_annot_inference_dict(infr, internal=False):
        #import uuid

        def convert_to_name_uuid(nid):
            #try:
            text = ibs.get_name_texts(nid, apply_fix=False)
            if text is None:
                text = 'NEWNAME_%s' % (str(nid), )
            #uuid_ = uuid.UUID(text)
            #except ValueError:
            #    text = 'NEWNAME_%s' % (str(nid),)
            #    #uuid_ = nid
            return text

        ibs = infr.qreq_.ibs

        if internal:
            get_annot_uuids = ut.identity
        else:
            get_annot_uuids = ibs.get_annot_uuids
            #return uuid_

        # Compile the cluster_dict
        col_list = [
            'aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list',
            'error_flag_list'
        ]
        cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples)))
        cluster_dict['annot_uuid_list'] = get_annot_uuids(
            cluster_dict['aid_list'])
        # We store the name's UUID as the name's text
        #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid)
        #                                       for nid in cluster_dict['orig_nid_list']]
        #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid)
        # for nid in cluster_dict['new_nid_list']]
        cluster_dict['orig_name_list'] = [
            convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list']
        ]
        cluster_dict['new_name_list'] = [
            convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list']
        ]
        # Filter out only the keys we want to send back in the dictionary
        #key_list = ['annot_uuid_list', 'orig_name_uuid_list',
        #            'new_name_uuid_list', 'exemplar_flag_list',
        #            'error_flag_list']
        key_list = [
            'annot_uuid_list', 'orig_name_list', 'new_name_list',
            'exemplar_flag_list', 'error_flag_list'
        ]
        cluster_dict = ut.dict_subset(cluster_dict, key_list)

        # Compile the annot_pair_dict
        col_list = [
            'aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list',
            'raw_score_list'
        ]
        annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list)))
        annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(
            annot_pair_dict['aid_1_list'])
        annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(
            annot_pair_dict['aid_2_list'])
        zipped = zip(annot_pair_dict['annot_uuid_1_list'],
                     annot_pair_dict['annot_uuid_2_list'],
                     annot_pair_dict['p_same_list'])
        annot_pair_dict['review_pair_list'] = [{
            'annot_uuid_key': annot_uuid_1,
            'annot_uuid_1': annot_uuid_1,
            'annot_uuid_2': annot_uuid_2,
            'prior_matching_state': {
                'p_match': p_same,
                'p_nomatch': 1.0 - p_same,
                'p_notcomp': 0.0,
            }
        } for (annot_uuid_1, annot_uuid_2, p_same) in zipped]
        # Filter out only the keys we want to send back in the dictionary
        key_list = ['review_pair_list', 'confidence_list']
        annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list)

        # Compile the inference dict
        inference_dict = ut.odict([
            ('cluster_dict', cluster_dict),
            ('annot_pair_dict', annot_pair_dict),
            ('_internal_state', None),
        ])
        return inference_dict
Example #27
0
def draw_twoday_count(ibs, visit_info_list_):
    import copy
    visit_info_list = copy.deepcopy(visit_info_list_)

    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)

    if False:
        # HACK REMOVE DATA TO MAKE THIS FASTER
        num = 20
        for info in visit_info_list:
            non_resight_nids = list(set(info['unique_nids']) - set(resight_nids))
            sample_nids2 = non_resight_nids[0:num] + resight_nids[:num]
            info['grouped_aids'] = ut.dict_subset(info['grouped_aids'], sample_nids2)
            info['unique_nids'] = sample_nids2

    # Build a graph of matches
    if False:

        debug = False

        for info in visit_info_list:
            edges = []
            grouped_aids = info['grouped_aids']

            aids_list = list(grouped_aids.values())
            ams_list = ibs.get_annotmatch_rowids_in_cliques(aids_list)
            aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list)
            aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list)
            for ams, aids, aids1, aids2 in zip(ams_list, aids_list, aids1_list, aids2_list):
                edge_nodes = set(aids1 + aids2)
                ##if len(edge_nodes) != len(set(aids)):
                #    #print('--')
                #    #print('aids = %r' % (aids,))
                #    #print('edge_nodes = %r' % (edge_nodes,))
                bad_aids = edge_nodes - set(aids)
                if len(bad_aids) > 0:
                    print('bad_aids = %r' % (bad_aids,))
                unlinked_aids = set(aids) - edge_nodes
                mst_links = list(ut.itertwo(list(unlinked_aids) + list(edge_nodes)[:1]))
                bad_aids.add(None)
                user_links = [(u, v) for (u, v) in zip(aids1, aids2) if u not in bad_aids and v not in bad_aids]
                new_edges = mst_links + user_links
                new_edges = [(int(u), int(v)) for u, v in new_edges if u not in bad_aids and v not in bad_aids]
                edges += new_edges
            info['edges'] = edges

        # Add edges between days
        grouped_aids1, grouped_aids2 = ut.take_column(visit_info_list, 'grouped_aids')
        nids_day1, nids_day2 = ut.take_column(visit_info_list, 'unique_nids')
        resight_nids = ut.isect(nids_day1, nids_day2)

        resight_aids1 = ut.take(grouped_aids1, resight_nids)
        resight_aids2 = ut.take(grouped_aids2, resight_nids)
        #resight_aids3 = [list(aids1) + list(aids2) for aids1, aids2 in zip(resight_aids1, resight_aids2)]

        ams_list = ibs.get_annotmatch_rowids_between_groups(resight_aids1, resight_aids2)
        aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list)
        aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list)

        between_edges = []
        for ams, aids1, aids2, rawaids1, rawaids2 in zip(ams_list, aids1_list, aids2_list, resight_aids1, resight_aids2):
            link_aids = aids1 + aids2
            rawaids3 = rawaids1 + rawaids2
            badaids = ut.setdiff(link_aids, rawaids3)
            assert not badaids
            user_links = [(int(u), int(v)) for (u, v) in zip(aids1, aids2)
                          if u is not None and v is not None]
            # HACK THIS OFF
            user_links = []
            if len(user_links) == 0:
                # Hack in an edge
                between_edges += [(rawaids1[0], rawaids2[0])]
            else:
                between_edges += user_links

        assert np.all(0 == np.diff(np.array(ibs.unflat_map(ibs.get_annot_nids, between_edges)), axis=1))

        import plottool_ibeis as pt
        import networkx as nx
        #pt.qt4ensure()
        #len(list(nx.connected_components(graph1)))
        #print(ut.graph_info(graph1))

        # Layout graph
        layoutkw = dict(
            prog='neato',
            draw_implicit=False, splines='line',
            #splines='curved',
            #splines='spline',
            #sep=10 / 72,
            #prog='dot', rankdir='TB',
        )

        def translate_graph_to_origin(graph):
            x, y, w, h = ut.get_graph_bounding_box(graph)
            ut.translate_graph(graph, (-x, -y))

        def stack_graphs(graph_list, vert=False, pad=None):
            graph_list_ = [g.copy() for g in graph_list]
            for g in graph_list_:
                translate_graph_to_origin(g)
            bbox_list = [ut.get_graph_bounding_box(g) for g in graph_list_]
            if vert:
                dim1 = 3
                dim2 = 2
            else:
                dim1 = 2
                dim2 = 3
            dim1_list = np.array([bbox[dim1] for bbox in bbox_list])
            dim2_list = np.array([bbox[dim2] for bbox in bbox_list])
            if pad is None:
                pad = np.mean(dim1_list) / 2
            offset1_list = ut.cumsum([0] + [d + pad for d in dim1_list[:-1]])
            max_dim2 = max(dim2_list)
            offset2_list = [(max_dim2 - d2) / 2 for d2 in dim2_list]
            if vert:
                t_xy_list = [(d2, d1) for d1, d2 in zip(offset1_list, offset2_list)]
            else:
                t_xy_list = [(d1, d2) for d1, d2 in zip(offset1_list, offset2_list)]

            for g, t_xy in zip(graph_list_, t_xy_list):
                ut.translate_graph(g, t_xy)
                nx.set_node_attributes(g, name='pin', values='true')

            new_graph = nx.compose_all(graph_list_)
            #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
            return new_graph

        # Construct graph
        for count, info in enumerate(visit_info_list):
            graph = nx.Graph()
            edges = [(int(u), int(v)) for u, v in info['edges']
                     if u is not None and v is not None]
            graph.add_edges_from(edges, attr_dict={'zorder': 10})
            nx.set_node_attributes(graph, name='zorder', values=20)

            # Layout in neato
            _ = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)  # NOQA

            # Extract components and then flatten in nid ordering
            ccs = list(nx.connected_components(graph))
            root_aids = []
            cc_graphs = []
            for cc_nodes in ccs:
                cc = graph.subgraph(cc_nodes)
                try:
                    root_aids.append(list(ut.nx_source_nodes(cc.to_directed()))[0])
                except nx.NetworkXUnfeasible:
                    root_aids.append(list(cc.nodes())[0])
                cc_graphs.append(cc)

            root_nids = ibs.get_annot_nids(root_aids)
            nid2_graph = dict(zip(root_nids, cc_graphs))

            resight_nids_ = set(resight_nids).intersection(set(root_nids))
            noresight_nids_ = set(root_nids) - resight_nids_

            n_graph_list = ut.take(nid2_graph, sorted(noresight_nids_))
            r_graph_list = ut.take(nid2_graph, sorted(resight_nids_))

            if len(n_graph_list) > 0:
                n_graph = nx.compose_all(n_graph_list)
                _ = pt.nx_agraph_layout(n_graph, inplace=True, **layoutkw)  # NOQA
                n_graphs = [n_graph]
            else:
                n_graphs = []

            r_graphs = [stack_graphs(chunk) for chunk in ut.ichunks(r_graph_list, 100)]
            if count == 0:
                new_graph = stack_graphs(n_graphs + r_graphs, vert=True)
            else:
                new_graph = stack_graphs(r_graphs[::-1] + n_graphs, vert=True)

            #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
            info['graph'] = new_graph

        graph1_, graph2_ = ut.take_column(visit_info_list, 'graph')
        if False:
            _ = pt.show_nx(graph1_, layout='custom', node_labels=False, as_directed=False)  # NOQA
            _ = pt.show_nx(graph2_, layout='custom', node_labels=False, as_directed=False)  # NOQA

        graph_list = [graph1_, graph2_]
        twoday_graph = stack_graphs(graph_list, vert=True, pad=None)
        nx.set_node_attributes(twoday_graph, name='pin', values='true')

        if debug:
            ut.nx_delete_None_edge_attr(twoday_graph)
            ut.nx_delete_None_node_attr(twoday_graph)
            print('twoday_graph(pre) info' + ut.repr3(ut.graph_info(twoday_graph), nl=2))

        # Hack, no idea why there are nodes that dont exist here
        between_edges_ = [edge for edge in between_edges
                          if twoday_graph.has_node(edge[0]) and twoday_graph.has_node(edge[1])]

        twoday_graph.add_edges_from(between_edges_, attr_dict={'alpha': .2, 'zorder': 0})
        ut.nx_ensure_agraph_color(twoday_graph)

        layoutkw['splines'] = 'line'
        layoutkw['prog'] = 'neato'
        agraph = pt.nx_agraph_layout(twoday_graph, inplace=True, return_agraph=True, **layoutkw)[-1]  # NOQA
        if False:
            fpath = ut.truepath('~/ggr_graph.png')
            agraph.draw(fpath)
            ut.startfile(fpath)

        if debug:
            print('twoday_graph(post) info' + ut.repr3(ut.graph_info(twoday_graph)))

        _ = pt.show_nx(twoday_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
Example #28
0
def test_sver_wrapper():
    """
    Test to ensure cpp and python agree and that cpp is faster

    CommandLine:
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --rebuild-sver
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --dummy
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=easy2.png
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=easy1.png --fname2=hard3.png
        python -m vtool.sver_c_wrapper --test-test_sver_wrapper --show --fname1=carl.jpg --fname2=hard3.png

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.sver_c_wrapper import *  # NOQA
        >>> test_sver_wrapper()

    Ignore:
        %timeit call_python_version(*args)
        %timeit get_affine_inliers_cpp(*args)
    """
    import vtool.spatial_verification as sver
    import vtool.tests.dummy as dummy
    xy_thresh_sqrd    = ktool.KPTS_DTYPE(.4)
    scale_thresh_sqrd = ktool.KPTS_DTYPE(2.0)
    ori_thresh        = ktool.KPTS_DTYPE(TAU / 4.0)
    keys = 'xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh'.split(', ')
    print(ut.dict_str(ut.dict_subset(locals(), keys)))

    def report_errors():
        pass

    if ut.get_argflag('--dummy'):
        testtup = dummy.testdata_dummy_matches()
        (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup
        fm_input = fm_input.astype(fm_dtype)
        #fm_input = fm_input[0:10].astype(fm_dtype)
        #fs_input = fs_input[0:10].astype(np.float32)
    else:
        fname1 = ut.get_argval('--fname1', type_=str, default='easy1.png')
        fname2 = ut.get_argval('--fname2', type_=str, default='easy2.png')
        testtup = dummy.testdata_ratio_matches(fname1, fname2)
        (kpts1, kpts2, fm_input, fs_input, rchip1, rchip2) = testtup

    # pack up call to aff hypothesis
    import vtool as vt
    import scipy.stats.mstats
    scales1 = vt.get_scales(kpts1.take(fm_input.T[0], axis=0))
    scales2 = vt.get_scales(kpts2.take(fm_input.T[1], axis=0))
    #fs_input = 1 / scipy.stats.mstats.gmean(np.vstack((scales1, scales2)))
    fs_input = scipy.stats.mstats.gmean(np.vstack((scales1, scales2)))
    print('fs_input = ' + ut.numpy_str(fs_input))
    #fs_input[0:-9] = 0
    #fs_input = np.ones(len(fm_input), dtype=fs_dtype)
    #ut.embed()
    #fs_input = scales1 * scales2
    args = (kpts1, kpts2, fm_input, fs_input, xy_thresh_sqrd, scale_thresh_sqrd, ori_thresh)

    ex_list = []

    try:
        with ut.Indenter('[TEST1] '):
            inlier_tup = vt.compare_implementations(
                sver.get_affine_inliers,
                get_affine_inliers_cpp,
                args, lbl1='py', lbl2='c',
                output_lbl=('aff_inliers_list', 'aff_errors_list', 'Aff_mats')
            )
            out_inliers, out_errors, out_mats = inlier_tup
    except AssertionError as ex:
        ex_list.append(ex)
        raise

    try:
        import functools
        with ut.Indenter('[TEST2] '):
            bestinlier_tup = vt.compare_implementations(
                functools.partial(sver.get_best_affine_inliers, forcepy=True),
                get_best_affine_inliers_cpp,
                args, show_output=True, lbl1='py', lbl2='c',
                output_lbl=('bestinliers', 'besterror', 'bestmat')
            )
            bestinliers, besterror, bestmat = bestinlier_tup
    except AssertionError as ex:
        ex_list.append(ex)
        raise

    if len(ex_list) > 0:
        raise AssertionError('some tests failed. see previous stdout')

    #num_inliers_list = np.array(map(len, out_inliers_c))
    #best_argx = num_inliers_list.argmax()
    ##best_inliers_py = out_inliers_py[best_argx]
    #best_inliers_c = out_inliers_c[best_argx]
    if ut.show_was_requested():
        import plottool as pt
        fm_output = fm_input.take(bestinliers, axis=0)
        fnum = pt.next_fnum()
        pt.figure(fnum=fnum, doclf=True, docla=True)
        pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_input, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 1))
        pt.show_chipmatch2(rchip1, rchip2, kpts1, kpts2, fm_output, ell_linewidth=5, fnum=fnum, pnum=(2, 1, 2))
        pt.show_if_requested()
Example #29
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1_ = ut.compress(aids1, flags)
    aids2_ = ut.compress(aids2, flags)
    chosen_props = ut.dict_subset(edge_props, ['weight'])
    edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags),
                                  chosen_props)

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)
    edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)]

    unique_aids = list(set(aids1_ + aids2_))
    # Make a graph between the chips
    nodes = unique_aids
    edges = list(zip(aids1_, aids2_, edge_attr_list))
    import networkx as nx
    graph = nx.DiGraph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    from ibeis.viz import viz_graph
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    viz_graph.viz_netx_chipgraph(ibs,
                                 graph,
                                 fnum=fnum,
                                 with_images=True,
                                 augment_graph=False)
Example #30
0
    def make_annot_inference_dict(infr, internal=False):
        #import uuid

        def convert_to_name_uuid(nid):
            #try:
            text = ibs.get_name_texts(nid, apply_fix=False)
            if text is None:
                text = 'NEWNAME_%s' % (str(nid),)
            #uuid_ = uuid.UUID(text)
            #except ValueError:
            #    text = 'NEWNAME_%s' % (str(nid),)
            #    #uuid_ = nid
            return text
        ibs = infr.qreq_.ibs

        if internal:
            get_annot_uuids = ut.identity
        else:
            get_annot_uuids = ibs.get_annot_uuids
            #return uuid_

        # Compile the cluster_dict
        col_list = ['aid_list', 'orig_nid_list', 'new_nid_list',
                    'exemplar_flag_list', 'error_flag_list']
        cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples)))
        cluster_dict['annot_uuid_list'] = get_annot_uuids(cluster_dict['aid_list'])
        # We store the name's UUID as the name's text
        #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid)
        #                                       for nid in cluster_dict['orig_nid_list']]
        #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid)
        # for nid in cluster_dict['new_nid_list']]
        cluster_dict['orig_name_list'] = [convert_to_name_uuid(nid)
                                          for nid in cluster_dict['orig_nid_list']]
        cluster_dict['new_name_list'] = [convert_to_name_uuid(nid)
                                         for nid in cluster_dict['new_nid_list']]
        # Filter out only the keys we want to send back in the dictionary
        #key_list = ['annot_uuid_list', 'orig_name_uuid_list',
        #            'new_name_uuid_list', 'exemplar_flag_list',
        #            'error_flag_list']
        key_list = ['annot_uuid_list', 'orig_name_list', 'new_name_list',
                    'exemplar_flag_list', 'error_flag_list']
        cluster_dict = ut.dict_subset(cluster_dict, key_list)

        # Compile the annot_pair_dict
        col_list = ['aid_1_list', 'aid_2_list', 'p_same_list',
                    'confidence_list', 'raw_score_list']
        annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list)))
        annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(annot_pair_dict['aid_1_list'])
        annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(annot_pair_dict['aid_2_list'])
        zipped = zip(annot_pair_dict['annot_uuid_1_list'],
                     annot_pair_dict['annot_uuid_2_list'],
                     annot_pair_dict['p_same_list'])
        annot_pair_dict['review_pair_list'] = [
            {
                'annot_uuid_key'       : annot_uuid_1,
                'annot_uuid_1'         : annot_uuid_1,
                'annot_uuid_2'         : annot_uuid_2,
                'prior_matching_state' : {
                    'p_match'   : p_same,
                    'p_nomatch' : 1.0 - p_same,
                    'p_notcomp' : 0.0,
                }
            }
            for (annot_uuid_1, annot_uuid_2, p_same) in zipped
        ]
        # Filter out only the keys we want to send back in the dictionary
        key_list = ['review_pair_list', 'confidence_list']
        annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list)

        # Compile the inference dict
        inference_dict = ut.odict([
            ('cluster_dict', cluster_dict),
            ('annot_pair_dict', annot_pair_dict),
            ('_internal_state', None),
        ])
        return inference_dict