Ejemplo n.º 1
0
def get_toy_data_1v1(num_annots=5, num_names=None, **kwargs):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1v1 --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> toy_data = get_toy_data_1v1()
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> show_toy_distributions(toy_data['toy_params'])
        >>> ut.show_if_requested()

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> toy_data = get_toy_data_1v1()
        >>> kwargs = {}
        >>> initial_aids = toy_data['aids']
        >>> initial_nids = toy_data['nids']
        >>> num_annots = 1
        >>> num_names = 6
        >>> toy_data2 = get_toy_data_1v1(num_annots, num_names, initial_aids=initial_aids, initial_nids=initial_nids)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> show_toy_distributions(toy_data['toy_params'])
        >>> ut.show_if_requested()

    Ignore:
        >>> num_annots = 1000
        >>> num_names = 400
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    def pairwise_feature(aidx1,
                         aidx2,
                         all_nids=all_nids,
                         toy_params=toy_params):
        if aidx1 == aidx2:
            score = -1
        else:
            #rng = np.random.RandomState(int((aidx1 + 13) * (aidx2 + 13)))
            nid1 = all_nids[int(aidx1)]
            nid2 = all_nids[int(aidx2)]
            params = toy_params[nid1 == nid2]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma)
            score = np.clip(score_, 0, np.inf)
        return score

    pairwise_nids = list([tup[::-1] for tup in ut.iprod(nids, nids1)])
    pairwise_matches = np.array([nid1 == nid2 for nid1, nid2 in pairwise_nids])

    pairwise_aidxs = list([tup[::-1] for tup in ut.iprod(aids, aids1)])

    pairwise_features = np.array(
        [pairwise_feature(aidx1, aidx2) for aidx1, aidx2 in pairwise_aidxs])

    #pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    is_diag = [r < c for r, c, in pairwise_aidxs]
    diag_scores = pairwise_features.compress(is_diag)
    diag_aidxs = ut.compress(pairwise_aidxs, is_diag)
    import utool
    with utool.embed_on_exception_context:
        diag_nids = ut.compress(pairwise_nids, is_diag)
    diag_labels = pairwise_matches.compress(is_diag)

    #import utool
    #utool.embed()

    toy_data = {
        'aids': aids,
        'nids': nids,
        'all_nids': all_nids,
        'all_aids': all_aids,
        #'pairwise_aidxs': pairwise_aidxs,
        #'pairwise_scores': pairwise_scores,
        #'pairwise_matches': pairwise_matches,
        'diag_labels': diag_labels,
        'diag_scores': diag_scores,
        'diag_nids': diag_nids,
        'diag_aidxs': diag_aidxs,
        'toy_params': toy_params,
    }
    return toy_data
Ejemplo n.º 2
0
def get_toy_annots(num_annots,
                   num_names=None,
                   initial_aids=None,
                   initial_nids=None,
                   nid_sequence=None,
                   seed=None):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)
        initial_aids (None): (default = None)
        initial_nids (None): (default = None)
        nid_sequence (None): (default = None)
        seed (None): (default = None)

    Returns:
        tuple: (aids, nids, aids1, nids1, all_aids, all_nids)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_annots

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1
        >>> num_names = 5
        >>> initial_aids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64)
        >>> initial_nids = np.array([0, 0, 1, 2, 2, 1, 1, 1, 2, 3], dtype=np.int64)
        >>> nid_sequence = np.array([0, 0, 1, 2, 2, 1, 1], dtype=np.int64)
        >>> seed = 0
        >>> (aids, nids, aids1, nids1, all_aids, all_nids) = get_toy_annots(num_annots, num_names, initial_aids, initial_nids, nid_sequence, seed)
        >>> result = ('(aids, nids, aids1, nids1, all_aids, all_nids) = %s' % (ut.repr2((aids, nids, aids1, nids1, all_aids, all_nids), nl=1),))
        >>> print(result)
    """
    import vtool as vt
    if num_names is None:
        num_names = num_annots
    print('Generating toy data with num_annots=%r' % (num_annots, ))
    if initial_aids is None:
        assert initial_nids is None
        first_step = True
        initial_aids = []
        initial_nids = []
    else:
        first_step = False
        assert initial_nids is not None

    aids = np.arange(len(initial_aids), num_annots + len(initial_aids))
    rng = vt.ensure_rng(seed)
    if nid_sequence is None:
        nids = rng.randint(0, num_names, num_annots)
    else:
        unused_from_sequence = max(len(nid_sequence) - len(initial_aids), 0)
        if unused_from_sequence == 0:
            nids = rng.randint(0, num_names, num_annots)
        elif unused_from_sequence > 0 and unused_from_sequence < num_annots:
            num_remain = num_annots - unused_from_sequence
            nids = np.append(nid_sequence[-unused_from_sequence:],
                             rng.randint(0, num_names, num_remain))
        else:
            nids = nid_sequence[-unused_from_sequence]
            nids = np.array(
                ut.take(
                    nid_sequence,
                    range(len(initial_aids),
                          len(initial_aids) + num_annots)))

    if first_step:
        aids1 = aids
        nids1 = nids
    else:
        aids1 = initial_aids
        nids1 = initial_nids

    all_nids = np.append(initial_nids, nids)
    all_aids = np.append(initial_aids, aids)
    import utool
    with utool.embed_on_exception_context:
        ut.assert_eq(len(aids), len(nids), 'len new')
        ut.assert_eq(len(aids1), len(nids1), 'len comp')
        ut.assert_eq(len(all_aids), len(all_nids), 'len all')
    return aids, nids, aids1, nids1, all_aids, all_nids
Ejemplo n.º 3
0
def get_toy_data_1vM(num_annots, num_names=None, **kwargs):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)

    Kwargs:
        initial_aids, initial_nids, nid_sequence, seed

    Returns:
        tuple: (pair_list, feat_list)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1000
        >>> num_names = 40
        >>> get_toy_data_1vM(num_annots, num_names)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    # Test a simple SVM classifier
    nid2_nexemp = ut.dict_hist(nids1)
    aid2_nid = dict(zip(aids, nids))

    ut.fix_embed_globals()

    #def add_to_globals(globals_, subdict):
    #    globals_.update(subdict)

    unique_nids = list(nid2_nexemp.keys())

    def annot_to_class_feats2(aid, aid2_nid, top=None):
        pair_list = []
        score_list = []
        nexemplar_list = []
        for nid in unique_nids:
            label = (aid2_nid[aid] == nid)
            num_exemplars = nid2_nexemp.get(nid, 0)
            if num_exemplars == 0:
                continue
            params = toy_params[label]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma, size=num_exemplars).max()
            score = np.clip(score_, 0, np.inf)
            pair_list.append((aid, nid))
            score_list.append(score)
            nexemplar_list.append(num_exemplars)
        rank_list = ut.argsort(score_list, reverse=True)
        feat_list = np.array([score_list, rank_list, nexemplar_list]).T
        sortx = np.argsort(rank_list)
        feat_list = feat_list.take(sortx, axis=0)
        pair_list = np.array(pair_list).take(sortx, axis=0)
        if top is not None:
            feat_list = feat_list[:top]
            pair_list = pair_list[0:top]
        return pair_list, feat_list

    toclass_features = [
        annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids
    ]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    score_list = feat_list.T[0:1].T
    lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs]

    from sklearn import svm
    #clf1 = svm.LinearSVC()
    print('Learning classifiers')

    clf3 = svm.SVC(probability=True)
    clf3.fit(feat_list, lbl_list)
    #prob_true, prob_false = clf3.predict_proba(feat_list).T

    clf1 = svm.LinearSVC()
    clf1.fit(score_list, lbl_list)

    # Score new annots against the training database
    tup_ = get_toy_annots(num_annots * 2,
                          num_names,
                          initial_aids=all_aids,
                          initial_nids=all_nids)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    aid2_nid = dict(zip(aids, nids))
    toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs])

    print('Running tests')

    score_list = feat_list.T[0:1].T

    tp_feat_list = feat_list[lbl_list]
    tn_feat_list = feat_list[~lbl_list]
    tp_lbls = lbl_list[lbl_list]
    tn_lbls = lbl_list[~lbl_list]
    print('num tp: %d' % len(tp_lbls))
    print('num fp: %d' % len(tn_lbls))

    tp_score_list = score_list[lbl_list]
    tn_score_list = score_list[~lbl_list]

    print('tp_feat' +
          ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2))
    print('tp_feat' +
          ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2))

    print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2))
    print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2))

    tp_pred3 = clf3.predict(tp_feat_list)
    tn_pred3 = clf3.predict(tn_feat_list)
    print((tp_pred3.sum(), tp_pred3.shape))
    print((tn_pred3.sum(), tn_pred3.shape))

    tp_score3 = clf3.score(tp_feat_list, tp_lbls)
    tn_score3 = clf3.score(tn_feat_list, tn_lbls)

    tp_pred1 = clf1.predict(tp_score_list)
    tn_pred1 = clf1.predict(tn_score_list)
    print((tp_pred1.sum(), tp_pred1.shape))
    print((tn_pred1.sum(), tn_pred1.shape))

    tp_score1 = clf1.score(tp_score_list, tp_lbls)
    tn_score1 = clf1.score(tn_score_list, tn_lbls)
    print('tp score with rank    = %r' % (tp_score3, ))
    print('tn score with rank    = %r' % (tn_score3, ))

    print('tp score without rank = %r' % (tp_score1, ))
    print('tn score without rank = %r' % (tn_score1, ))
    toy_data = {}

    return toy_data
Ejemplo n.º 4
0
def get_toy_data_1v1(num_annots=5, num_names=None, **kwargs):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1v1 --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> toy_data = get_toy_data_1v1()
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> show_toy_distributions(toy_data['toy_params'])
        >>> ut.show_if_requested()

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> toy_data = get_toy_data_1v1()
        >>> kwargs = {}
        >>> initial_aids = toy_data['aids']
        >>> initial_nids = toy_data['nids']
        >>> num_annots = 1
        >>> num_names = 6
        >>> toy_data2 = get_toy_data_1v1(num_annots, num_names, initial_aids=initial_aids, initial_nids=initial_nids)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> show_toy_distributions(toy_data['toy_params'])
        >>> ut.show_if_requested()

    Ignore:
        >>> num_annots = 1000
        >>> num_names = 400
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    def pairwise_feature(aidx1, aidx2, all_nids=all_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            score = -1
        else:
            #rng = np.random.RandomState(int((aidx1 + 13) * (aidx2 + 13)))
            nid1 = all_nids[int(aidx1)]
            nid2 = all_nids[int(aidx2)]
            params = toy_params[nid1 == nid2]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma)
            score = np.clip(score_, 0, np.inf)
        return score

    pairwise_nids = list([tup[::-1] for tup in ut.iprod(nids, nids1)])
    pairwise_matches = np.array(
        [nid1 == nid2 for nid1, nid2 in pairwise_nids])

    pairwise_aidxs = list([tup[::-1] for tup in ut.iprod(aids, aids1)])

    pairwise_features = np.array(
        [pairwise_feature(aidx1, aidx2) for aidx1, aidx2 in pairwise_aidxs])

    #pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    is_diag = [r < c for r, c, in pairwise_aidxs]
    diag_scores = pairwise_features.compress(is_diag)
    diag_aidxs = ut.compress(pairwise_aidxs, is_diag)
    import utool
    with utool.embed_on_exception_context:
        diag_nids = ut.compress(pairwise_nids, is_diag)
    diag_labels = pairwise_matches.compress(is_diag)

    #import utool
    #utool.embed()

    toy_data = {
        'aids': aids,
        'nids': nids,
        'all_nids': all_nids,
        'all_aids': all_aids,
        #'pairwise_aidxs': pairwise_aidxs,
        #'pairwise_scores': pairwise_scores,
        #'pairwise_matches': pairwise_matches,
        'diag_labels': diag_labels,
        'diag_scores': diag_scores,
        'diag_nids': diag_nids,
        'diag_aidxs': diag_aidxs,
        'toy_params': toy_params,
    }
    return toy_data
Ejemplo n.º 5
0
def get_toy_annots(num_annots, num_names=None, initial_aids=None, initial_nids=None, nid_sequence=None, seed=None):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)
        initial_aids (None): (default = None)
        initial_nids (None): (default = None)
        nid_sequence (None): (default = None)
        seed (None): (default = None)

    Returns:
        tuple: (aids, nids, aids1, nids1, all_aids, all_nids)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_annots

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1
        >>> num_names = 5
        >>> initial_aids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64)
        >>> initial_nids = np.array([0, 0, 1, 2, 2, 1, 1, 1, 2, 3], dtype=np.int64)
        >>> nid_sequence = np.array([0, 0, 1, 2, 2, 1, 1], dtype=np.int64)
        >>> seed = 0
        >>> (aids, nids, aids1, nids1, all_aids, all_nids) = get_toy_annots(num_annots, num_names, initial_aids, initial_nids, nid_sequence, seed)
        >>> result = ('(aids, nids, aids1, nids1, all_aids, all_nids) = %s' % (ut.repr2((aids, nids, aids1, nids1, all_aids, all_nids), nl=1),))
        >>> print(result)
    """
    import vtool as vt
    if num_names is None:
        num_names = num_annots
    print('Generating toy data with num_annots=%r' % (num_annots,))
    if initial_aids is None:
        assert initial_nids is None
        first_step = True
        initial_aids = []
        initial_nids = []
    else:
        first_step = False
        assert initial_nids is not None

    aids = np.arange(len(initial_aids), num_annots + len(initial_aids))
    rng = vt.ensure_rng(seed)
    if nid_sequence is None:
        nids = rng.randint(0, num_names, num_annots)
    else:
        unused_from_sequence = max(len(nid_sequence) - len(initial_aids), 0)
        if unused_from_sequence == 0:
            nids = rng.randint(0, num_names, num_annots)
        elif unused_from_sequence > 0 and unused_from_sequence < num_annots:
            num_remain = num_annots - unused_from_sequence
            nids = np.append(nid_sequence[-unused_from_sequence:], rng.randint(0, num_names, num_remain))
        else:
            nids = nid_sequence[-unused_from_sequence]
            nids = np.array(ut.take(nid_sequence, range(len(initial_aids), len(initial_aids) + num_annots)))

    if first_step:
        aids1 = aids
        nids1 = nids
    else:
        aids1 = initial_aids
        nids1 = initial_nids

    all_nids = np.append(initial_nids, nids)
    all_aids = np.append(initial_aids, aids)
    import utool
    with utool.embed_on_exception_context:
        ut.assert_eq(len(aids), len(nids), 'len new')
        ut.assert_eq(len(aids1), len(nids1), 'len comp')
        ut.assert_eq(len(all_aids), len(all_nids), 'len all')
    return aids, nids, aids1, nids1, all_aids, all_nids
Ejemplo n.º 6
0
def get_toy_data_1vM(num_annots, num_names=None, **kwargs):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)

    Kwargs:
        initial_aids, initial_nids, nid_sequence, seed

    Returns:
        tuple: (pair_list, feat_list)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1000
        >>> num_names = 40
        >>> get_toy_data_1vM(num_annots, num_names)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    # Test a simple SVM classifier
    nid2_nexemp = ut.dict_hist(nids1)
    aid2_nid = dict(zip(aids, nids))

    ut.fix_embed_globals()

    #def add_to_globals(globals_, subdict):
    #    globals_.update(subdict)

    unique_nids = list(nid2_nexemp.keys())

    def annot_to_class_feats2(aid, aid2_nid, top=None):
        pair_list = []
        score_list = []
        nexemplar_list = []
        for nid in unique_nids:
            label = (aid2_nid[aid] == nid)
            num_exemplars = nid2_nexemp.get(nid, 0)
            if num_exemplars == 0:
                continue
            params = toy_params[label]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma, size=num_exemplars).max()
            score = np.clip(score_, 0, np.inf)
            pair_list.append((aid, nid))
            score_list.append(score)
            nexemplar_list.append(num_exemplars)
        rank_list = ut.argsort(score_list, reverse=True)
        feat_list = np.array([score_list, rank_list, nexemplar_list]).T
        sortx = np.argsort(rank_list)
        feat_list = feat_list.take(sortx, axis=0)
        pair_list = np.array(pair_list).take(sortx, axis=0)
        if top is not None:
            feat_list = feat_list[:top]
            pair_list = pair_list[0:top]
        return pair_list, feat_list

    toclass_features = [annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    score_list = feat_list.T[0:1].T
    lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs]

    from sklearn import svm
    #clf1 = svm.LinearSVC()
    print('Learning classifiers')

    clf3 = svm.SVC()
    clf3.fit(feat_list, lbl_list)

    clf1 = svm.LinearSVC()
    clf1.fit(score_list, lbl_list)

    # Score new annots against the training database
    tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    aid2_nid = dict(zip(aids, nids))
    toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs])

    print('Running tests')

    score_list = feat_list.T[0:1].T

    tp_feat_list = feat_list[lbl_list]
    tn_feat_list = feat_list[~lbl_list]
    tp_lbls = lbl_list[lbl_list]
    tn_lbls = lbl_list[~lbl_list]
    print('num tp: %d' % len(tp_lbls))
    print('num fp: %d' % len(tn_lbls))

    tp_score_list = score_list[lbl_list]
    tn_score_list = score_list[~lbl_list]

    print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2))
    print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2))

    print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2))
    print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2))

    tp_pred3 = clf3.predict(tp_feat_list)
    tn_pred3 = clf3.predict(tn_feat_list)
    print((tp_pred3.sum(), tp_pred3.shape))
    print((tn_pred3.sum(), tn_pred3.shape))

    tp_score3 = clf3.score(tp_feat_list, tp_lbls)
    tn_score3 = clf3.score(tn_feat_list, tn_lbls)

    tp_pred1 = clf1.predict(tp_score_list)
    tn_pred1 = clf1.predict(tn_score_list)
    print((tp_pred1.sum(), tp_pred1.shape))
    print((tn_pred1.sum(), tn_pred1.shape))

    tp_score1 = clf1.score(tp_score_list, tp_lbls)
    tn_score1 = clf1.score(tn_score_list, tn_lbls)
    print('tp score with rank    = %r' % (tp_score3,))
    print('tn score with rank    = %r' % (tn_score3,))

    print('tp score without rank = %r' % (tp_score1,))
    print('tn score without rank = %r' % (tn_score1,))
    toy_data = {}

    return toy_data