Ejemplo n.º 1
0
def load_multiclass_scores(self):
    # convert simple scores to multiclass scores
    import vtool as vt
    self.multiclass_scores = {}
    for key in self.samples.simple_scores.keys():
        scores = self.samples.simple_scores[key].values
        # Hack scores into the range 0 to 1
        normer = vt.ScoreNormalizer(adjust=8, monotonize=True)
        normer.fit(scores, y=self.samples.is_same())
        normed_scores = normer.normalize_scores(scores)
        # Create a dimension for each class
        # but only populate two of the dimensions
        class_idxs = ut.take(self.samples.text_to_class, ['nomatch', 'match'])
        pred = np.zeros((len(scores), len(self.samples.class_names)))
        pred[:, class_idxs[0]] = 1 - normed_scores
        pred[:, class_idxs[1]] = normed_scores
        self.multiclass_scores[key] = pred
Ejemplo n.º 2
0
def learn_annotscore_normalizer(qreq_, learnkw={}):
    """
    Takes the result of queries and trains a score encoder

    Args:
        qreq_ (wbia.QueryRequest):  query request object with hyper-parameters

    Returns:
        vtool.ScoreNormalizer: encoder

    CommandLine:
        python -m wbia --tf learn_annotscore_normalizer --show

    Example:
        >>> # FIXME failing-test (22-Jul-2020) This test is failing and it's not clear how to fix it
        >>> # xdoctest: +SKIP
        >>> from wbia.unstable.scorenorm import *  # NOQA
        >>> import wbia
        >>> qreq_ = wbia.testdata_qreq_(
        >>>     defaultdb='PZ_MTEST', a=['default'], p=['default'])
        >>> encoder = learn_annotscore_normalizer(qreq_)
        >>> ut.quit_if_noshow()
        >>> encoder.visualize(figtitle=encoder.get_cfgstr())
        >>> ut.show_if_requested()
    """
    cm_list = qreq_.execute()
    tup = get_training_annotscores(qreq_, cm_list)
    tp_scores, tn_scores, good_tn_aidnid_pairs, good_tp_aidnid_pairs = tup
    part_attrs = {
        0: {
            'aid_pairs': good_tn_aidnid_pairs
        },
        1: {
            'aid_pairs': good_tp_aidnid_pairs
        },
    }
    scores, labels, attrs = vt.flatten_scores(tp_scores, tn_scores, part_attrs)
    _learnkw = {'monotonize': True}
    _learnkw.update(learnkw)
    # timestamp = ut.get_timestamp()
    encoder = vt.ScoreNormalizer(**_learnkw)
    encoder.fit(scores, labels, attrs=attrs)
    encoder.cfgstr = 'annotscore'
    return encoder
Ejemplo n.º 3
0
def learn_annotscore_normalizer(qreq_, learnkw={}):
    """
    Takes the result of queries and trains a score encoder

    Args:
        qreq_ (ibeis.QueryRequest):  query request object with hyper-parameters

    Returns:
        vtool.ScoreNormalizer: encoder

    CommandLine:
        python -m ibeis --tf learn_annotscore_normalizer --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.testdata_qreq_(
        >>>     defaultdb='PZ_MTEST', a=['default'], p=['default'])
        >>> encoder = learn_annotscore_normalizer(qreq_)
        >>> ut.quit_if_noshow()
        >>> encoder.visualize(figtitle=encoder.get_cfgstr())
        >>> ut.show_if_requested()
    """
    cm_list = qreq_.execute()
    tup = get_training_annotscores(qreq_, cm_list)
    tp_scores, tn_scores, good_tn_aidnid_pairs, good_tp_aidnid_pairs = tup
    part_attrs = {
        0: {
            'aid_pairs': good_tn_aidnid_pairs
        },
        1: {
            'aid_pairs': good_tp_aidnid_pairs
        },
    }
    scores, labels, attrs = vt.flatten_scores(tp_scores, tn_scores, part_attrs)
    _learnkw = {'monotonize': True}
    _learnkw.update(learnkw)
    # timestamp = ut.get_timestamp()
    encoder = vt.ScoreNormalizer(**_learnkw)
    encoder.fit(scores, labels, attrs=attrs)
    encoder.cfgstr = 'annotscore'
    return encoder
Ejemplo n.º 4
0
def load_featscore_normalizer(normer_cfgstr):
    r"""
    Args:
        normer_cfgstr (?):

    CommandLine:
        python -m ibeis.algo.hots.scorenorm --exec-load_featscore_normalizer --show
        python -m ibeis.algo.hots.scorenorm --exec-load_featscore_normalizer --show --cfgstr=featscore
        python -m ibeis.algo.hots.scorenorm --exec-load_featscore_normalizer --show --cfgstr=lovb

    Example:
        >>> # SCRIPT
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> normer_cfgstr = ut.get_argval('--cfgstr', default='featscore')
        >>> encoder = load_featscore_normalizer(normer_cfgstr)
        >>> encoder.visualize(figtitle=encoder.get_cfgstr())
        >>> ut.show_if_requested()
    """
    encoder = vt.ScoreNormalizer()
    # qreq_.lnbnn_normer.load(cfgstr=config2_.lnbnn_normer)
    encoder.fuzzyload(partial_cfgstr=normer_cfgstr)
    return encoder
Ejemplo n.º 5
0
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool as vt

    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {
            'mu': 1.0,
            'sigma': 2.2
        },
        False: {
            'mu': 7.0,
            'sigma': 0.9
        }
    }

    if True:
        import vtool as vt
        import wbia.plottool as pt

        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array(
        [hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        logger.info(ut.repr2(pairwise_scores_mat, precision=1))

    # aids = list(range(num_annots))
    # g = netx.DiGraph()
    # g.add_nodes_from(aids)
    # g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    # netx.draw_graphviz(g)
    # pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn

    bandwidth = sklearn.cluster.estimate_bandwidth(
        X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, '[] bandwidth is 0. Cannot cluster'
    # bandwidth is with respect to the RBF used in clustering
    # ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth,
                                   bin_seeding=True,
                                   cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1,
                                           max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    # graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    # )

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data,
                                                  thresh,
                                                  metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    logger.info(groupxs)
    logger.info(lblgroupxs)
    logger.info('groupdiff = %r' %
                (ut.compare_groupings(groupxs, lblgroupxs), ))
    logger.info('common groups = %r' %
                (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))
    # X_data, seconds_thresh, criterion='distance')

    # help(hdbscan.HDBSCAN)

    import hdbscan

    alg = hdbscan.HDBSCAN(metric=metric,
                          min_cluster_size=1,
                          p=1,
                          gen_min_span_tree=1,
                          min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    logger.info(groupxs)
    logger.info(lblgroupxs)
    logger.info('groupdiff = %r' %
                (ut.compare_groupings(groupxs, lblgroupxs), ))
    logger.info('common groups = %r' %
                (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))

    # import ddbscan
    # help(ddbscan.DDBSCAN)
    # alg = ddbscan.DDBSCAN(2, 2)

    # D = np.zeros((len(aids), len(aids) + 1))
    # D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    # hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow

        # from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])
Ejemplo n.º 6
0
def learn_prob_score(num_scores=5, pad=55, ret_enc=False, use_cache=None):
    r"""
    Args:
        num_scores (int): (default = 5)

    Returns:
        tuple: (discr_domain, discr_p_same)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-learn_prob_score --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_scores = 2
        >>> (discr_domain, discr_p_same, encoder) = learn_prob_score(num_scores, ret_enc=True, use_cache=False)
        >>> print('discr_p_same = %r' % (discr_p_same,))
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> encoder.visualize()
        >>> ut.show_if_requested()
    """
    num_annots_train = 200
    num_names_train = 5
    toy_data = get_toy_data_1v1(num_annots_train, num_names_train)
    #pairwise_aidxs, pairwise_scores, pairwise_matches = ut.dict_take(
    #    toy_data, 'pairwise_aidxs, pairwise_scores, pairwise_matches'.split(', '))

    diag_scores, diag_labels = ut.dict_take(
        toy_data, 'diag_scores, diag_labels'.split(', '))
    #is_diag = [r < c for r, c, in pairwise_aidxs]
    #diag_scores = pairwise_scores.compress(is_diag)
    #diag_labels = pairwise_matches.compress(is_diag)

    # Learn P(S_{ij} | M_{ij})
    import vtool as vt
    encoder = vt.ScoreNormalizer(
        reverse=True,
        monotonize=True,
        adjust=4,
    )
    encoder.fit(X=diag_scores, y=diag_labels, verbose=True)

    if False:
        import plottool as pt
        pt.ensure_pylab_qt4()
        encoder.visualize()
        #show_toy_distributions()

    def discretize_probs(encoder):
        p_tp_given_score = encoder.p_tp_given_score / encoder.p_tp_given_score.sum(
        )
        bins = len(p_tp_given_score) - (pad * 2)
        stride = int(np.ceil(bins / num_scores))
        idxs = np.arange(0, bins, stride) + pad
        discr_p_same = p_tp_given_score.take(idxs)
        discr_p_same = discr_p_same / discr_p_same.sum()
        discr_domain = encoder.score_domain.take(idxs)
        return discr_domain, discr_p_same

    discr_domain, discr_p_same = discretize_probs(encoder)
    if ret_enc:
        return discr_domain, discr_p_same, encoder
    return discr_domain, discr_p_same
Ejemplo n.º 7
0
def compare_featscores():
    """
    CommandLine:

        ibeis --tf compare_featscores  --db PZ_MTEST \
            --nfscfg :disttype=[L2_sift,lnbnn],top_percent=[None,.5,.1] -a timectrl \
            -p default:K=[1,2],normalizer_rule=name \
            --save featscore{db}.png --figsize=13,20 --diskshow

        ibeis --tf compare_featscores  --db PZ_MTEST \
            --nfscfg :disttype=[L2_sift,normdist,lnbnn],top_percent=[None,.5] -a timectrl \
            -p default:K=[1],normalizer_rule=name,sv_on=[True,False] \
            --save featscore{db}.png --figsize=13,10 --diskshow

        ibeis --tf compare_featscores --nfscfg :disttype=[L2_sift,normdist,lnbnn] \
            -a timectrl -p default:K=1,normalizer_rule=name --db PZ_Master1 \
            --save featscore{db}.png  --figsize=13,13 --diskshow

        ibeis --tf compare_featscores --nfscfg :disttype=[L2_sift,normdist,lnbnn] \
            -a timectrl -p default:K=1,normalizer_rule=name --db GZ_ALL \
            --save featscore{db}.png  --figsize=13,13 --diskshow

        ibeis --tf compare_featscores  --db GIRM_Master1 \
            --nfscfg ':disttype=fg,L2_sift,normdist,lnbnn' \
            -a timectrl -p default:K=1,normalizer_rule=name \
            --save featscore{db}.png  --figsize=13,13

        ibeis --tf compare_featscores --nfscfg :disttype=[L2_sift,normdist,lnbnn] \
            -a timectrl -p default:K=[1,2,3],normalizer_rule=name,sv_on=False \
            --db PZ_Master1 --save featscore{db}.png  \
                --dpi=128 --figsize=15,20 --diskshow

        ibeis --tf compare_featscores --show --nfscfg :disttype=[L2_sift,normdist] -a timectrl -p :K=1 --db PZ_MTEST
        ibeis --tf compare_featscores --show --nfscfg :disttype=[L2_sift,normdist] -a timectrl -p :K=1 --db GZ_ALL
        ibeis --tf compare_featscores --show --nfscfg :disttype=[L2_sift,normdist] -a timectrl -p :K=1 --db PZ_Master1
        ibeis --tf compare_featscores --show --nfscfg :disttype=[L2_sift,normdist] -a timectrl -p :K=1 --db GIRM_Master1

        ibeis --tf compare_featscores  --db PZ_MTEST \
            --nfscfg :disttype=[L2_sift,normdist,lnbnn],top_percent=[None,.5,.2] -a timectrl \
            -p default:K=[1],normalizer_rule=name \
            --save featscore{db}.png --figsize=13,20 --diskshow

        ibeis --tf compare_featscores  --db PZ_MTEST \
            --nfscfg :disttype=[L2_sift,normdist,lnbnn],top_percent=[None,.5,.2] -a timectrl \
            -p default:K=[1],normalizer_rule=name \
            --save featscore{db}.png --figsize=13,20 --diskshow

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> result = compare_featscores()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import plottool as pt
    import ibeis
    nfs_cfg_list = NormFeatScoreConfig.from_argv_cfgs()
    learnkw = {}
    ibs, testres = ibeis.testdata_expts(
        defaultdb='PZ_MTEST', a=['default'], p=['default:K=1'])
    print('nfs_cfg_list = ' + ut.repr3(nfs_cfg_list))

    encoder_list = []
    lbl_list = []

    varied_nfs_lbls = ut.get_varied_cfg_lbls(nfs_cfg_list)
    varied_qreq_lbls = ut.get_varied_cfg_lbls(testres.cfgdict_list)
    #varies_qreq_lbls

    #func = ut.cached_func(cache_dir='.')(learn_featscore_normalizer)
    for datakw, nlbl in zip(nfs_cfg_list, varied_nfs_lbls):
        for qreq_, qlbl in zip(testres.cfgx2_qreq_, varied_qreq_lbls):
            lbl = qlbl + ' ' + nlbl
            cfgstr = '_'.join([datakw.get_cfgstr(), qreq_.get_full_cfgstr()])
            try:
                encoder = vt.ScoreNormalizer()
                encoder.load(cfgstr=cfgstr)
            except IOError:
                print('datakw = %r' % (datakw,))
                encoder = learn_featscore_normalizer(qreq_, datakw, learnkw)
                encoder.save(cfgstr=cfgstr)
            encoder_list.append(encoder)
            lbl_list.append(lbl)

    fnum = 1
    # next_pnum = pt.make_pnum_nextgen(nRows=len(encoder_list), nCols=3)
    next_pnum = pt.make_pnum_nextgen(nRows=len(encoder_list) + 1, nCols=3, start=3)

    iconsize = 94
    if len(encoder_list) > 3:
        iconsize = 64

    icon = qreq_.ibs.get_database_icon(max_dsize=(None, iconsize), aid=qreq_.qaids[0])
    score_range = (0, .6)
    for encoder, lbl in zip(encoder_list, lbl_list):
        #encoder.visualize(figtitle=encoder.get_cfgstr(), with_prebayes=False, with_postbayes=False)
        encoder._plot_score_support_hist(fnum, pnum=next_pnum(), titlesuf='\n' + lbl, score_range=score_range)
        encoder._plot_prebayes(fnum, pnum=next_pnum())
        encoder._plot_roc(fnum, pnum=next_pnum())
        if icon is not None:
            pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0))

    nonvaried_lbl = ut.get_nonvaried_cfg_lbls(nfs_cfg_list)[0]
    figtitle = qreq_.__str__() + '\n' + nonvaried_lbl

    pt.set_figtitle(figtitle)
    pt.adjust_subplots(hspace=.5, top=.92, bottom=.08, left=.1, right=.9)
    pt.update_figsize()
    pt.plt.tight_layout()
Ejemplo n.º 8
0
def learn_featscore_normalizer(qreq_, datakw={}, learnkw={}):
    r"""
    Takes the result of queries and trains a score encoder

    Args:
        qreq_ (ibeis.QueryRequest):  query request object with hyper-parameters

    Returns:
        vtool.ScoreNormalizer: encoder

    CommandLine:
        python -m ibeis --tf learn_featscore_normalizer --show -t default:
        python -m ibeis --tf learn_featscore_normalizer --show --fsvx=0 --threshx=1 --show
        python -m ibeis --tf learn_featscore_normalizer --show -a default:size=40 -t default:fg_on=False,lnbnn_on=False,ratio_thresh=1.0,K=1,Knorm=6,sv_on=False,normalizer_rule=name --fsvx=0 --threshx=1 --show

        python -m ibeis --tf learn_featscore_normalizer --show --disttype=ratio
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=lnbnn
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -t default:K=1

        python -m ibeis --tf learn_featscore_normalizer --show --disttype=L2_sift -a timectrl -t default:K=1 --db PZ_Master1
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=ratio -a timectrl -t default:K=1 --db PZ_Master1
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=lnbnn -a timectrl -t default:K=1 --db PZ_Master1

        # LOOK AT THIS
        python -m ibeis --tf learn_featscore_normalizer --show --disttype=normdist -a timectrl -t default:K=1 --db PZ_Master1
        #python -m ibeis --tf learn_featscore_normalizer --show --disttype=parzen -a timectrl -t default:K=1 --db PZ_Master1
        #python -m ibeis --tf learn_featscore_normalizer --show --disttype=norm_parzen -a timectrl -t default:K=1 --db PZ_Master1

        python -m ibeis --tf learn_featscore_normalizer --show --disttype=lnbnn --db PZ_Master1 -a timectrl -t best

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scorenorm import *  # NOQA
        >>> import ibeis
        >>> learnkw = {}
        >>> datakw = NormFeatScoreConfig.from_argv_dict()
        >>> qreq_ = ibeis.testdata_qreq_(
        >>>     defaultdb='PZ_MTEST', a=['default'], p=['default'])
        >>> encoder = learn_featscore_normalizer(qreq_, datakw, learnkw)
        >>> ut.quit_if_noshow()
        >>> encoder.visualize(figtitle=encoder.get_cfgstr())
        >>> ut.show_if_requested()
    """
    cm_list = qreq_.execute()
    print('learning scorenorm')
    print('datakw = %s' % ut.repr3(datakw))
    tp_scores, tn_scores, scorecfg = get_training_featscores(
        qreq_, cm_list, **datakw)
    _learnkw = dict(monotonize=True, adjust=2)
    _learnkw.update(learnkw)
    encoder = vt.ScoreNormalizer(**_learnkw)
    encoder.fit_partitioned(tp_scores, tn_scores, verbose=False)
    # ut.hashstr27(qreq_.get_cfgstr())

    # Maintain regen command info: TODO: generalize and integrate
    encoder._regen_info = {
        'cmd': 'python -m ibeis --tf learn_featscore_normalizer',
        'scorecfg': scorecfg,
        'learnkw': learnkw,
        'datakw': datakw,
        'qaids': qreq_.qaids,
        'daids': qreq_.daids,
        'qreq_cfg': qreq_.get_full_cfgstr(),
        'qreq_regen_info': getattr(qreq_, '_regen_info', {}),
    }
    # 'timestamp': ut.get_printable_timestamp(),

    scorecfg_safe = scorecfg
    scorecfg_safe = re.sub('[' + re.escape('()= ') + ']', '', scorecfg_safe)
    scorecfg_safe = re.sub('[' + re.escape('+*<>[]') + ']', '_', scorecfg_safe)

    hashid = ut.hashstr27(ut.to_json(encoder._regen_info))
    naidinfo = ('q%s_d%s' % (len(qreq_.qaids), len(qreq_.daids)))
    cfgstr = 'featscore_{}_{}_{}_{}'.format(scorecfg_safe, qreq_.ibs.get_dbname(), naidinfo, hashid)
    encoder.cfgstr = cfgstr
    return encoder
Ejemplo n.º 9
0
def sift_dataset_separability(dataset):
    """
    VERY HACKED RIGHT NOW. ONLY LIBERTY. BLINDLY CACHES

    Args:
        dataset (?):

    CommandLine:
        python -m ibeis_cnn.experiments --exec-sift_dataset_separability --show

    Example:
        >>> # SCRIPT
        >>> from ibeis_cnn.experiments import *  # NOQA
        >>> from ibeis_cnn import ingest_data
        >>> dataset = ingest_data.grab_liberty_siam_dataset(250000)
        >>> ut.quit_if_noshow()
        >>> sift_dataset_separability(dataset)
        >>> ut.show_if_requested()
    """
    import vtool as vt
    @ut.cached_func('tempsiftscorecache', cache_dir='.')
    def cached_siftscores():
        data, labels = dataset.subset('test')
        sift_scores, sift_list = test_sift_patchmatch_scores(data, labels)
        sift_scores = sift_scores.astype(np.float64)
        return sift_scores, labels, sift_list
    sift_scores, labels, sift_list = cached_siftscores()

    # I dont think we can compare lnbnn on liberty
    # because we dont have a set of id labels, we have
    # pairs of correspondences.
    #import pyflann
    #flann = pyflann.FLANN()
    #flann.build_index(sift_list)
    #idxs, dists = flann.nn_index(sift_list, 10)

    encoder_kw = {
        #'monotonize': False,
        'monotonize': True,
    }
    sift_encoder = vt.ScoreNormalizer(**encoder_kw)
    sift_encoder.fit(sift_scores, labels)
    dataname = dataset.alias_key
    viz_kw = dict(
        with_scores=False,
        with_postbayes=False,
        with_prebayes=False,
        target_tpr=.95,
        score_range=(0, 1)
    )
    inter_sift = sift_encoder.visualize(
        figtitle=dataname + ' SIFT scores. #data=' + str(len(labels)),
        fnum=None, **viz_kw)

    import plottool as pt

    #icon = ibs.get_database_icon()
    icon = ('http://www.councilchronicle.com/wp-content/uploads/2015/08/'
            'West-Virginia-Arrested-over-Bogus-Statue-of-Liberty-Bomb-Threat.jpg')
    if icon is not None:
        pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0), max_dsize=(None, 192))

    if ut.get_argflag('--contextadjust'):
        pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2)
        pt.adjust_subplots(use_argv=True)
    return inter_sift
Ejemplo n.º 10
0
def test_siamese_performance(model, data, labels, flat_metadata, dataname=''):
    r"""
    CommandLine:
        utprof.py -m ibeis_cnn --tf pz_patchmatch --db liberty --test --weights=liberty:current --arch=siaml2_128 --test
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test  --ensure
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test  --ensure --weights=new
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --train --weights=new
        python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128 --test  # NOQA
        python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128
    """
    import vtool as vt
    import plottool as pt

    # TODO: save in model.trainind_dpath/diagnostics/figures
    ut.colorprint('\n[siam_perf] Testing Siamese Performance', 'white')
    #epoch_dpath = model.get_epoch_diagnostic_dpath()
    epoch_dpath = model.arch_dpath
    ut.vd(epoch_dpath)

    dataname += ' ' + model.get_history_hashid() + '\n'

    history_text = ut.list_str(model.era_history, newlines=True)

    ut.write_to(ut.unixjoin(epoch_dpath, 'era_history.txt'), history_text)

    #if True:
    #    import matplotlib as mpl
    #    mpl.rcParams['agg.path.chunksize'] = 100000

    #data   = data[::50]
    #labels = labels[::50]
    #from ibeis_cnn import utils
    #data, labels = utils.random_xy_sample(data, labels, 10000, model.data_per_label_input)

    FULL = not ut.get_argflag('--quick')

    fnum_gen = pt.make_fnum_nextgen()

    ut.colorprint('[siam_perf] Show era history', 'white')
    fig = model.show_era_loss(fnum=fnum_gen())
    pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)

    # hack
    ut.colorprint('[siam_perf] Show weights image', 'white')
    fig = model.show_weights_image(fnum=fnum_gen())
    pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)
    #model.draw_all_conv_layer_weights(fnum=fnum_gen())
    #model.imwrite_weights(1)
    #model.imwrite_weights(2)

    # Compute each type of score
    ut.colorprint('[siam_perf] Building Scores', 'white')
    test_outputs = model.predict2(model, data)
    network_output = test_outputs['network_output_determ']
    # hack converting network output to distances for non-descriptor networks
    if len(network_output.shape) == 2 and network_output.shape[1] == 1:
        cnn_scores = network_output.T[0]
    elif len(network_output.shape) == 1:
        cnn_scores = network_output
    elif len(network_output.shape) == 2 and network_output.shape[1] > 1:
        assert model.data_per_label_output == 2
        vecs1 = network_output[0::2]
        vecs2 = network_output[1::2]
        cnn_scores = vt.L2(vecs1, vecs2)
    else:
        assert False
    cnn_scores = cnn_scores.astype(np.float64)

    # Segfaults with the data passed in is large (AND MEMMAPPED apparently)
    # Fixed in hesaff implementation
    SIFT = FULL
    if SIFT:
        sift_scores, sift_list = test_sift_patchmatch_scores(data, labels)
        sift_scores = sift_scores.astype(np.float64)

    ut.colorprint('[siam_perf] Learning Encoders', 'white')
    # Learn encoders
    encoder_kw = {
        #'monotonize': False,
        'monotonize': True,
    }
    cnn_encoder = vt.ScoreNormalizer(**encoder_kw)
    cnn_encoder.fit(cnn_scores, labels)

    if SIFT:
        sift_encoder = vt.ScoreNormalizer(**encoder_kw)
        sift_encoder.fit(sift_scores, labels)

    # Visualize
    ut.colorprint('[siam_perf] Visualize Encoders', 'white')
    viz_kw = dict(
        with_scores=False,
        with_postbayes=False,
        with_prebayes=False,
        target_tpr=.95,
    )
    inter_cnn = cnn_encoder.visualize(
        figtitle=dataname + ' CNN scores. #data=' + str(len(data)),
        fnum=fnum_gen(), **viz_kw)
    if SIFT:
        inter_sift = sift_encoder.visualize(
            figtitle=dataname + ' SIFT scores. #data=' + str(len(data)),
            fnum=fnum_gen(), **viz_kw)

    # Save
    pt.save_figure(fig=inter_cnn.fig, dpath=epoch_dpath)
    if SIFT:
        pt.save_figure(fig=inter_sift.fig, dpath=epoch_dpath)

    # Save out examples of hard errors
    #cnn_fp_label_indicies, cnn_fn_label_indicies =
    #cnn_encoder.get_error_indicies(cnn_scores, labels)
    #sift_fp_label_indicies, sift_fn_label_indicies =
    #sift_encoder.get_error_indicies(sift_scores, labels)

    with_patch_examples = FULL
    if with_patch_examples:
        ut.colorprint('[siam_perf] Visualize Confusion Examples', 'white')
        cnn_indicies = cnn_encoder.get_confusion_indicies(cnn_scores, labels)
        if SIFT:
            sift_indicies = sift_encoder.get_confusion_indicies(sift_scores, labels)

        warped_patch1_list, warped_patch2_list = list(zip(*ut.ichunks(data, 2)))
        samp_args = (warped_patch1_list, warped_patch2_list, labels)
        _sample = functools.partial(draw_results.get_patch_sample_img, *samp_args)

        cnn_fp_img = _sample({'fs': cnn_scores}, cnn_indicies.fp)[0]
        cnn_fn_img = _sample({'fs': cnn_scores}, cnn_indicies.fn)[0]
        cnn_tp_img = _sample({'fs': cnn_scores}, cnn_indicies.tp)[0]
        cnn_tn_img = _sample({'fs': cnn_scores}, cnn_indicies.tn)[0]

        if SIFT:
            sift_fp_img = _sample({'fs': sift_scores}, sift_indicies.fp)[0]
            sift_fn_img = _sample({'fs': sift_scores}, sift_indicies.fn)[0]
            sift_tp_img = _sample({'fs': sift_scores}, sift_indicies.tp)[0]
            sift_tn_img = _sample({'fs': sift_scores}, sift_indicies.tn)[0]

        #if ut.show_was_requested():
        #def rectify(arr):
        #    return np.flipud(arr)
        SINGLE_FIG = False
        if SINGLE_FIG:
            def dump_img(img_, lbl, fnum):
                fig, ax = pt.imshow(img_, figtitle=dataname + ' ' + lbl, fnum=fnum)
                pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)
            dump_img(cnn_fp_img, 'cnn_fp_img', fnum_gen())
            dump_img(cnn_fn_img, 'cnn_fn_img', fnum_gen())
            dump_img(cnn_tp_img, 'cnn_tp_img', fnum_gen())
            dump_img(cnn_tn_img, 'cnn_tn_img', fnum_gen())

            dump_img(sift_fp_img, 'sift_fp_img', fnum_gen())
            dump_img(sift_fn_img, 'sift_fn_img', fnum_gen())
            dump_img(sift_tp_img, 'sift_tp_img', fnum_gen())
            dump_img(sift_tn_img, 'sift_tn_img', fnum_gen())
            #vt.imwrite(dataname + '_' + 'cnn_fp_img.png', (cnn_fp_img))
            #vt.imwrite(dataname + '_' + 'cnn_fn_img.png', (cnn_fn_img))
            #vt.imwrite(dataname + '_' + 'sift_fp_img.png', (sift_fp_img))
            #vt.imwrite(dataname + '_' + 'sift_fn_img.png', (sift_fn_img))
        else:
            print('Drawing TP FP TN FN')
            fnum = fnum_gen()
            pnum_gen = pt.make_pnum_nextgen(4, 2)
            fig = pt.figure(fnum)
            pt.imshow(cnn_fp_img,  title='CNN FP',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_fp_img, title='SIFT FP', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_fn_img,  title='CNN FN',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_fn_img, title='SIFT FN', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_tp_img,  title='CNN TP',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_tp_img, title='SIFT TP', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_tn_img,  title='CNN TN',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_tn_img, title='SIFT TN', fnum=fnum, pnum=pnum_gen())
            pt.set_figtitle(dataname + ' confusions')
            pt.adjust_subplots(left=0, right=1.0, bottom=0., wspace=.01, hspace=.05)
            pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18))

    with_patch_desc = FULL
    if with_patch_desc:
        ut.colorprint('[siam_perf] Visualize Patch Descriptors', 'white')
        fnum = fnum_gen()
        fig = pt.figure(fnum=fnum, pnum=(1, 1, 1))
        num_rows = 7
        pnum_gen = pt.make_pnum_nextgen(num_rows, 3)
        # Compare actual output descriptors
        for index in ut.random_indexes(len(sift_list), num_rows):
            vec_sift = sift_list[index]
            vec_cnn = network_output[index]
            patch = data[index]
            pt.imshow(patch, fnum=fnum, pnum=pnum_gen())
            pt.plot_descriptor_signature(vec_cnn, 'cnn vec',  fnum=fnum, pnum=pnum_gen())
            pt.plot_sift_signature(vec_sift, 'sift vec',  fnum=fnum, pnum=pnum_gen())
        pt.set_figtitle('Patch Descriptors')
        pt.adjust_subplots(left=0, right=0.95, bottom=0., wspace=.1, hspace=.15)
        pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18))
Ejemplo n.º 11
0
def draw_feat_scoresep(testres, f=None, disttype=None):
    r"""
    SeeAlso:
        ibeis.algo.hots.scorenorm.train_featscore_normalizer

    CommandLine:
        python -m ibeis --tf TestResult.draw_feat_scoresep --show
        python -m ibeis --tf TestResult.draw_feat_scoresep --show -t default:sv_on=[True,False]
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift,fg
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=True
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=False

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift -t best:SV=False

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=1:2
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=0:1

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=False,bar_l2_on=True  --fsvx=0:1

        # We want to query the oxford annots taged query
        # and we want the database to contain
        # K correct images per query, as well as the distractors

        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=ok
        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=good

        python -m ibeis --tf get_annotcfg_list  --db PZ_Master1 -a timectrl --acfginfo --verbtd  --veryverbtd --nocache-aid

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=ratio

    Example:
        >>> # SCRIPT
        >>> from ibeis.expt.test_result import *  # NOQA
        >>> from ibeis.init import main_helpers
        >>> disttype = ut.get_argval('--disttype', type_=list, default=None)
        >>> ibs, testres = main_helpers.testdata_expts(
        >>>     defaultdb='PZ_MTEST', a=['timectrl'], t=['best'])
        >>> f = ut.get_argval(('--filt', '-f'), type_=list, default=[''])
        >>> testres.draw_feat_scoresep(f=f)
        >>> ut.show_if_requested()
    """
    print('[testres] draw_feat_scoresep')
    import plottool as pt

    def load_feat_scores(qreq_, qaids):
        import ibeis  # NOQA
        from os.path import dirname, join  # NOQA
        # HACKY CACHE
        cfgstr = qreq_.get_cfgstr(with_input=True)
        cache_dir = join(dirname(dirname(ibeis.__file__)),
                         'TMP_FEATSCORE_CACHE')
        namemode = ut.get_argval('--namemode', default=True)
        fsvx = ut.get_argval('--fsvx',
                             type_='fuzzy_subset',
                             default=slice(None, None, None))
        threshx = ut.get_argval('--threshx', type_=int, default=None)
        thresh = ut.get_argval('--thresh', type_=float, default=.9)
        num = ut.get_argval('--num', type_=int, default=1)
        cfg_components = [
            cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num
        ]
        cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components))
        cache_hashid = ut.hashstr27(cache_cfgstr + '_v1')
        cache_name = ('get_cfgx_feat_scores_' + cache_hashid)

        @ut.cached_func(cache_name,
                        cache_dir=cache_dir,
                        key_argx=[],
                        use_cache=True)
        def get_cfgx_feat_scores(qreq_, qaids):
            from ibeis.algo.hots import scorenorm
            cm_list = qreq_.execute(qaids)
            # print('Done loading cached chipmatches')
            tup = scorenorm.get_training_featscores(qreq_,
                                                    cm_list,
                                                    disttype,
                                                    namemode,
                                                    fsvx,
                                                    threshx,
                                                    thresh,
                                                    num=num)
            # print(ut.depth_profile(tup))
            tp_scores, tn_scores, scorecfg = tup
            return tp_scores, tn_scores, scorecfg

        tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids)
        return tp_scores, tn_scores, scorecfg

    valid_case_pos = testres.case_sample2(filt_cfg=f, return_mask=False)
    cfgx2_valid_qxs = ut.group_items(valid_case_pos.T[0], valid_case_pos.T[1])
    test_qaids = testres.get_test_qaids()
    cfgx2_valid_qaids = ut.map_dict_vals(ut.partial(ut.take, test_qaids),
                                         cfgx2_valid_qxs)

    join_acfgs = True

    # TODO: option to average over pipeline configurations
    if join_acfgs:
        groupxs = testres.get_cfgx_groupxs()
    else:
        groupxs = list(zip(range(len(testres.cfgx2_qreq_))))
    grouped_qreqs = ut.apply_grouping(testres.cfgx2_qreq_, groupxs)

    grouped_scores = []
    for cfgxs, qreq_group in zip(groupxs, grouped_qreqs):
        # testres.print_pcfg_info()
        score_group = []
        for cfgx, qreq_ in zip(cfgxs, testres.cfgx2_qreq_):
            print('Loading cached chipmatches')
            qaids = cfgx2_valid_qaids[cfgx]
            tp_scores, tn_scores, scorecfg = load_feat_scores(qreq_, qaids)
            score_group.append((tp_scores, tn_scores, scorecfg))
        grouped_scores.append(score_group)

    cfgx2_shortlbl = testres.get_short_cfglbls(join_acfgs=join_acfgs)
    for score_group, lbl in zip(grouped_scores, cfgx2_shortlbl):
        tp_scores = np.hstack(ut.take_column(score_group, 0))
        tn_scores = np.hstack(ut.take_column(score_group, 1))
        scorecfg = '+++'.join(ut.unique(ut.take_column(score_group, 2)))
        score_group
        # TODO: learn this score normalizer as a model
        # encoder = vt.ScoreNormalizer(adjust=4, monotonize=False)
        encoder = vt.ScoreNormalizer(adjust=2, monotonize=True)
        encoder.fit_partitioned(tp_scores, tn_scores, verbose=False)
        figtitle = 'Feature Scores: %s, %s' % (scorecfg, lbl)
        fnum = None

        vizkw = {}
        sephack = ut.get_argflag('--sephack')
        if not sephack:
            vizkw['target_tpr'] = .95
            vizkw['score_range'] = (0, 1.0)

        encoder.visualize(
            figtitle=figtitle,
            fnum=fnum,
            with_scores=False,
            #with_prebayes=True,
            with_prebayes=False,
            with_roc=True,
            with_postbayes=False,
            #with_postbayes=True,
            **vizkw)
        icon = testres.ibs.get_database_icon()
        if icon is not None:
            pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0))

        if ut.get_argflag('--contextadjust'):
            pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2)
            pt.adjust_subplots(use_argv=True)
    return encoder