Example #1
0
    def _cached_pairwise_features(extr, edges):
        """
        Create pairwise features for annotations in a test inference object
        based on the features used to learn here

        TODO: need a more systematic way of specifying which feature dimensions
        need to be computed

        Notes:
            Given a edge (u, v), we need to:
            * Check which classifiers we have
            * Check which feat-cols the classifier needs,
               and construct a configuration that can acheive that.
                * Construct the chip/feat config
                * Construct the vsone config
                * Additional LNBNN enriching config
                * Pairwise feature construction config
            * Then we can apply the feature to the classifier

        edges = [(1, 2)]
        """
        edges = list(edges)
        if extr.verbose:
            print('[pairfeat] Requesting {} cached pairwise features'.format(
                len(edges)))

        # TODO: use object properties
        if len(edges) == 0:
            assert extr.feat_dims is not None, 'no edges and unset feat dims'
            index = nxu.ensure_multi_index([], ('aid1', 'aid2'))
            feats = pd.DataFrame(columns=extr.feat_dims, index=index)
            return feats
        else:
            use_cache = not extr.need_lnbnn and len(edges) > 2
            cache_dir = join(extr.ibs.get_cachedir(), 'infr_bulk_cache')
            feat_cfgstr = extr._make_cfgstr(edges)
            cacher = ub.Cacher('bulk_pairfeats_v3',
                               feat_cfgstr,
                               enabled=use_cache,
                               dpath=cache_dir,
                               verbose=extr.verbose - 3)

            # if cacher.exists() and extr.verbose > 3:
            #     fpath = cacher.get_fpath()
            #     print('Load match cache size: {}'.format(
            #         ut.get_file_nBytes_str(fpath)))

            data = cacher.tryload()
            if data is None:
                data = extr._make_pairwise_features(edges)
                cacher.save(data)

                # if cacher.enabled and extr.verbose > 3:
                #     fpath = cacher.get_fpath()
                #     print('Save match cache size: {}'.format(
                #         ut.get_file_nBytes_str(fpath)))

            matches, feats = data
            feats = extr._postprocess_feats(feats)
        return feats
Example #2
0
    def predict_proba_df(verif, edges):
        """
        CommandLine:
            python -m ibeis.algo.graph.demo DummyVerif.predict_edges

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.algo.graph.demo import *  # NOQA
            >>> from ibeis.algo.graph import demo
            >>> import networkx as nx
            >>> kwargs = dict(num_pccs=40, size=2)
            >>> infr = demo.demodata_infr(**kwargs)
            >>> verif = infr.dummy_verif
            >>> edges = list(infr.graph.edges())
            >>> probs = verif.predict_proba_df(edges)
            >>> #print('scores = %r' % (scores,))
            >>> #hashid = ut.hash_data(scores)
            >>> #print('hashid = %r' % (hashid,))
            >>> #assert hashid == 'cdlkytilfeqgmtsihvhqwffmhczqmpil'
        """
        infr = verif.infr
        edges = list(it.starmap(verif.infr.e_, edges))
        prob_cache = infr.task_probs['match_state']
        is_miss = np.array([e not in prob_cache for e in edges])
        # is_hit = ~is_miss
        if np.any(is_miss):
            miss_edges = ut.compress(edges, is_miss)
            miss_truths = [verif._get_truth(edge) for edge in miss_edges]
            grouped_edges = ut.group_items(miss_edges,
                                           miss_truths,
                                           sorted_=False)
            # Need to make this determenistic too
            states = [POSTV, NEGTV, INCMP]
            for key in sorted(grouped_edges.keys()):
                group = grouped_edges[key]
                probs0 = randn(shape=[len(group)],
                               rng=verif.rng,
                               a_max=1,
                               a_min=0,
                               **verif.dummy_params[key])
                # Just randomly assign other probs
                probs1 = verif.rng.rand(len(group)) * (1 - probs0)
                probs2 = 1 - (probs0 + probs1)
                for edge, probs in zip(group, zip(probs0, probs1, probs2)):
                    prob_cache[edge] = ut.dzip(states, probs)

        from ibeis.algo.graph import nx_utils as nxu
        import pandas as pd
        probs = pd.DataFrame(ut.take(prob_cache, edges),
                             index=nxu.ensure_multi_index(
                                 edges, ('aid1', 'aid2')))
        return probs
Example #3
0
 def match_state_df(infr, index):
     """ Returns groundtruth state based on ibeis controller """
     index = ensure_multi_index(index, ('aid1', 'aid2'))
     aid_pairs = np.asarray(index.tolist())
     aid_pairs = vt.ensure_shape(aid_pairs, (None, 2))
     is_same = infr.is_same(aid_pairs)
     is_comp = infr.is_comparable(aid_pairs)
     match_state_df = pd.DataFrame.from_items([
         (NEGTV, ~is_same & is_comp),
         (POSTV,  is_same & is_comp),
         (INCMP, ~is_comp),
     ])
     match_state_df.index = index
     return match_state_df
Example #4
0
    def _make_pairwise_features(extr, edges):
        """
        Construct matches and their pairwise features

        CommandLine:
            python -m ibeis.algo.verif.pairfeat _make_pairwise_features

        Doctest:
            >>> from ibeis.algo.verif.pairfeat import *
            >>> from ibeis.algo.graph import demo
            >>> infr = demo.demodata_mtest_infr()
            >>> extr = PairwiseFeatureExtractor(ibs=infr.ibs)
            >>> match_config = {'K': 1, 'Knorm': 3, 'affine_invariance': True,
            >>>           'augment_orientation': True, 'checks': 20,
            >>>           'ratio_thresh': 0.8, 'refine_method': 'homog',
            >>>           'sv_on': True, 'sver_xy_thresh': 0.01,
            >>>           'symmetric': True, 'weight': 'fgweights'}
            >>> local_keys =  [
            >>>     'fgweights', 'match_dist', 'norm_dist', 'norm_x1', 'norm_x2',
            >>>     'norm_y1', 'norm_y2', 'ratio_score', 'scale1', 'scale2',
            >>>     'sver_err_ori', 'sver_err_scale', 'sver_err_xy',
            >>>     'weighted_norm_dist', 'weighted_ratio_score']
            >>> pairfeat_cfg = {
            >>>     'bin_key': 'ratio',
            >>>     'bins': [0.6, 0.7, 0.8],
            >>>     'indices': [],
            >>>     'local_keys': local_keys,
            >>>     'sorters': [],
            >>>     'summary_ops': {'len', 'mean', 'sum'}
            >>> }
            >>> global_keys = ['gps', 'qual', 'time', 'view']
            >>> ibs = infr.ibs
            >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config,
            >>>                                 pairfeat_cfg=pairfeat_cfg,
            >>>                                 global_keys=global_keys)
            >>> multi_index = True
            >>> edges = [(1, 2), (2, 3)]
            >>> matches, X = extr._make_pairwise_features(edges)
            >>> featinfo = vt.AnnotPairFeatInfo(X.columns)
            >>> print(featinfo.get_infostr())
            >>> match = matches[0]
            >>> glob_X = match._make_global_feature_vector(global_keys)
            >>> assert len(glob_X) == 19
        """
        edges = ut.lmap(tuple, ut.aslist(edges))
        if len(edges) == 0:
            return [], []

        matches = extr._enriched_pairwise_matches(edges)
        # ---------------
        # Try different feature constructions
        print('[extr] building pairwise features')
        pairfeat_cfg = extr.pairfeat_cfg.copy()
        use_na = pairfeat_cfg.pop('use_na')
        pairfeat_cfg['summary_ops'] = set(pairfeat_cfg['summary_ops'])
        X = pd.DataFrame([
            m.make_feature_vector(**pairfeat_cfg)
            for m in ut.ProgIter(matches, label='making pairwise feats')
        ])
        multi_index = True
        if multi_index:
            # Index features by edges
            uv_index = nxu.ensure_multi_index(edges, ('aid1', 'aid2'))
            X.index = uv_index
        X[pd.isnull(X)] = np.nan
        X[np.isinf(X)] = np.nan
        # Re-order column names to ensure dimensions are consistent
        X = X.reindex_axis(sorted(X.columns), axis=1)

        # hack to fix feature validity
        if 'global(speed)' in X.columns:
            if np.any(np.isinf(X['global(speed)'])):
                flags = np.isinf(X['global(speed)'])
                numer = X.loc[flags, 'global(gps_delta)']
                denom = X.loc[flags, 'global(time_delta)']
                newvals = np.full(len(numer), np.nan)
                newvals[(numer == 0) & (denom == 0)] = 0
                X.loc[flags, 'global(speed)'] = newvals

        aid_pairs_ = [(m.annot1['aid'], m.annot2['aid']) for m in matches]
        assert aid_pairs_ == edges, 'edge ordering changed'

        if not use_na:
            # Fill nan values with very large values to workaround lack of nan
            # support in sklearn master.
            X[pd.isnull(X)] = (2 ** 30) - 1
        return matches, X
Example #5
0
    def ensure_priority_scores(infr, priority_edges):
        """
        Ensures that priority attributes are assigned to the edges.
        This does not change the state of the queue.

        Doctest:
            >>> import ibeis
            >>> ibs = ibeis.opendb('PZ_MTEST')
            >>> infr = ibeis.AnnotInference(ibs, aids='all')
            >>> infr.ensure_mst()
            >>> priority_edges = list(infr.edges())[0:1]
            >>> infr.ensure_priority_scores(priority_edges)

        Doctest:
            >>> import ibeis
            >>> ibs = ibeis.opendb('PZ_MTEST')
            >>> infr = ibeis.AnnotInference(ibs, aids='all')
            >>> infr.ensure_mst()
            >>> infr.load_published()
            >>> priority_edges = list(infr.edges())
            >>> infr.ensure_priority_scores(priority_edges)

        Doctest:
            >>> from ibeis.algo.graph import demo
            >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2)
            >>> edges = list(infr.edges())
            >>> infr.ensure_priority_scores(edges)
        """
        infr.print('Checking for verifiers: %r' % (infr.verifiers, ))

        if infr.verifiers:
            infr.print(
                'Prioritizing {} edges with one-vs-one probs'.format(
                    len(priority_edges)), 1)
            infr.print('Using thresholds: %r' % (infr.task_thresh, ))
            infr.print('Using infr.params[autoreview.enabled]          : %r' %
                       (infr.params['autoreview.enabled'], ))
            infr.print('Using infr.params[autoreview.prioritize_nonpos]: %r' %
                       (infr.params['autoreview.prioritize_nonpos'], ))

            infr.ensure_task_probs(priority_edges)

            primary_task = 'match_state'
            match_probs = infr.task_probs[primary_task]
            primary_thresh = infr.task_thresh[primary_task]

            # Read match_probs into a DataFrame
            primary_probs = pd.DataFrame(ut.take(match_probs, priority_edges),
                                         index=nxu.ensure_multi_index(
                                             priority_edges, ('aid1', 'aid2')))

            # Convert match-state probabilities into priorities
            prob_match = primary_probs[POSTV]

            # Initialize priorities to probability of matching
            default_priority = prob_match.copy()

            # If the edges are currently between the same individual, then
            # prioritize by non-positive probability (because those edges might
            # expose an inconsistency)
            already_pos = [
                infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v)
                for u, v in priority_edges
            ]
            default_priority[already_pos] = 1 - default_priority[already_pos]

            if infr.params['autoreview.enabled']:
                if infr.params['autoreview.prioritize_nonpos']:
                    # Give positives that pass automatic thresholds high priority
                    _probs = primary_probs[POSTV]
                    flags = _probs > primary_thresh[POSTV]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

                    # Give negatives that pass automatic thresholds high priority
                    _probs = primary_probs[NEGTV]
                    flags = _probs > primary_thresh[NEGTV]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

                    # Give not-comps that pass automatic thresholds high priority
                    _probs = primary_probs[INCMP]
                    flags = _probs > primary_thresh[INCMP]
                    default_priority[flags] = np.maximum(
                        default_priority[flags], _probs[flags]) + 1

            infr.set_edge_attrs('prob_match', prob_match.to_dict())
            infr.set_edge_attrs('default_priority', default_priority.to_dict())

            metric = 'default_priority'
            priority = default_priority
        elif infr.cm_list is not None:
            infr.print(
                'Prioritizing {} edges with one-vs-vsmany scores'.format(
                    len(priority_edges), 1))
            # Not given any deploy classifier, this is the best we can do
            scores = infr._make_lnbnn_scores(priority_edges)
            metric = 'normscore'
            priority = scores
        else:
            infr.print('WARNING: No verifiers to prioritize {} edge(s)'.format(
                len(priority_edges)))
            metric = 'random'
            priority = np.zeros(len(priority_edges)) + 1e-6

        infr.set_edge_attrs(metric, ut.dzip(priority_edges, priority))
        return metric, priority