def get_toy_data_1v1(num_annots=5, num_names=None, **kwargs): r""" CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1v1 --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> toy_data = get_toy_data_1v1() >>> ut.quit_if_noshow() >>> import plottool as pt >>> show_toy_distributions(toy_data['toy_params']) >>> ut.show_if_requested() Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> toy_data = get_toy_data_1v1() >>> kwargs = {} >>> initial_aids = toy_data['aids'] >>> initial_nids = toy_data['nids'] >>> num_annots = 1 >>> num_names = 6 >>> toy_data2 = get_toy_data_1v1(num_annots, num_names, initial_aids=initial_aids, initial_nids=initial_nids) >>> ut.quit_if_noshow() >>> import plottool as pt >>> show_toy_distributions(toy_data['toy_params']) >>> ut.show_if_requested() Ignore: >>> num_annots = 1000 >>> num_names = 400 """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) def pairwise_feature(aidx1, aidx2, all_nids=all_nids, toy_params=toy_params): if aidx1 == aidx2: score = -1 else: #rng = np.random.RandomState(int((aidx1 + 13) * (aidx2 + 13))) nid1 = all_nids[int(aidx1)] nid2 = all_nids[int(aidx2)] params = toy_params[nid1 == nid2] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma) score = np.clip(score_, 0, np.inf) return score pairwise_nids = list([tup[::-1] for tup in ut.iprod(nids, nids1)]) pairwise_matches = np.array([nid1 == nid2 for nid1, nid2 in pairwise_nids]) pairwise_aidxs = list([tup[::-1] for tup in ut.iprod(aids, aids1)]) pairwise_features = np.array( [pairwise_feature(aidx1, aidx2) for aidx1, aidx2 in pairwise_aidxs]) #pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) is_diag = [r < c for r, c, in pairwise_aidxs] diag_scores = pairwise_features.compress(is_diag) diag_aidxs = ut.compress(pairwise_aidxs, is_diag) import utool with utool.embed_on_exception_context: diag_nids = ut.compress(pairwise_nids, is_diag) diag_labels = pairwise_matches.compress(is_diag) #import utool #utool.embed() toy_data = { 'aids': aids, 'nids': nids, 'all_nids': all_nids, 'all_aids': all_aids, #'pairwise_aidxs': pairwise_aidxs, #'pairwise_scores': pairwise_scores, #'pairwise_matches': pairwise_matches, 'diag_labels': diag_labels, 'diag_scores': diag_scores, 'diag_nids': diag_nids, 'diag_aidxs': diag_aidxs, 'toy_params': toy_params, } return toy_data
def get_toy_annots(num_annots, num_names=None, initial_aids=None, initial_nids=None, nid_sequence=None, seed=None): r""" Args: num_annots (int): num_names (int): (default = None) initial_aids (None): (default = None) initial_nids (None): (default = None) nid_sequence (None): (default = None) seed (None): (default = None) Returns: tuple: (aids, nids, aids1, nids1, all_aids, all_nids) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_annots Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1 >>> num_names = 5 >>> initial_aids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) >>> initial_nids = np.array([0, 0, 1, 2, 2, 1, 1, 1, 2, 3], dtype=np.int64) >>> nid_sequence = np.array([0, 0, 1, 2, 2, 1, 1], dtype=np.int64) >>> seed = 0 >>> (aids, nids, aids1, nids1, all_aids, all_nids) = get_toy_annots(num_annots, num_names, initial_aids, initial_nids, nid_sequence, seed) >>> result = ('(aids, nids, aids1, nids1, all_aids, all_nids) = %s' % (ut.repr2((aids, nids, aids1, nids1, all_aids, all_nids), nl=1),)) >>> print(result) """ import vtool as vt if num_names is None: num_names = num_annots print('Generating toy data with num_annots=%r' % (num_annots, )) if initial_aids is None: assert initial_nids is None first_step = True initial_aids = [] initial_nids = [] else: first_step = False assert initial_nids is not None aids = np.arange(len(initial_aids), num_annots + len(initial_aids)) rng = vt.ensure_rng(seed) if nid_sequence is None: nids = rng.randint(0, num_names, num_annots) else: unused_from_sequence = max(len(nid_sequence) - len(initial_aids), 0) if unused_from_sequence == 0: nids = rng.randint(0, num_names, num_annots) elif unused_from_sequence > 0 and unused_from_sequence < num_annots: num_remain = num_annots - unused_from_sequence nids = np.append(nid_sequence[-unused_from_sequence:], rng.randint(0, num_names, num_remain)) else: nids = nid_sequence[-unused_from_sequence] nids = np.array( ut.take( nid_sequence, range(len(initial_aids), len(initial_aids) + num_annots))) if first_step: aids1 = aids nids1 = nids else: aids1 = initial_aids nids1 = initial_nids all_nids = np.append(initial_nids, nids) all_aids = np.append(initial_aids, aids) import utool with utool.embed_on_exception_context: ut.assert_eq(len(aids), len(nids), 'len new') ut.assert_eq(len(aids1), len(nids1), 'len comp') ut.assert_eq(len(all_aids), len(all_nids), 'len all') return aids, nids, aids1, nids1, all_aids, all_nids
def get_toy_data_1vM(num_annots, num_names=None, **kwargs): r""" Args: num_annots (int): num_names (int): (default = None) Kwargs: initial_aids, initial_nids, nid_sequence, seed Returns: tuple: (pair_list, feat_list) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1000 >>> num_names = 40 >>> get_toy_data_1vM(num_annots, num_names) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) # Test a simple SVM classifier nid2_nexemp = ut.dict_hist(nids1) aid2_nid = dict(zip(aids, nids)) ut.fix_embed_globals() #def add_to_globals(globals_, subdict): # globals_.update(subdict) unique_nids = list(nid2_nexemp.keys()) def annot_to_class_feats2(aid, aid2_nid, top=None): pair_list = [] score_list = [] nexemplar_list = [] for nid in unique_nids: label = (aid2_nid[aid] == nid) num_exemplars = nid2_nexemp.get(nid, 0) if num_exemplars == 0: continue params = toy_params[label] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma, size=num_exemplars).max() score = np.clip(score_, 0, np.inf) pair_list.append((aid, nid)) score_list.append(score) nexemplar_list.append(num_exemplars) rank_list = ut.argsort(score_list, reverse=True) feat_list = np.array([score_list, rank_list, nexemplar_list]).T sortx = np.argsort(rank_list) feat_list = feat_list.take(sortx, axis=0) pair_list = np.array(pair_list).take(sortx, axis=0) if top is not None: feat_list = feat_list[:top] pair_list = pair_list[0:top] return pair_list, feat_list toclass_features = [ annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids ] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) score_list = feat_list.T[0:1].T lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs] from sklearn import svm #clf1 = svm.LinearSVC() print('Learning classifiers') clf3 = svm.SVC(probability=True) clf3.fit(feat_list, lbl_list) #prob_true, prob_false = clf3.predict_proba(feat_list).T clf1 = svm.LinearSVC() clf1.fit(score_list, lbl_list) # Score new annots against the training database tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids) aids, nids, aids1, nids1, all_aids, all_nids = tup_ aid2_nid = dict(zip(aids, nids)) toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs]) print('Running tests') score_list = feat_list.T[0:1].T tp_feat_list = feat_list[lbl_list] tn_feat_list = feat_list[~lbl_list] tp_lbls = lbl_list[lbl_list] tn_lbls = lbl_list[~lbl_list] print('num tp: %d' % len(tp_lbls)) print('num fp: %d' % len(tn_lbls)) tp_score_list = score_list[lbl_list] tn_score_list = score_list[~lbl_list] print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2)) print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2)) tp_pred3 = clf3.predict(tp_feat_list) tn_pred3 = clf3.predict(tn_feat_list) print((tp_pred3.sum(), tp_pred3.shape)) print((tn_pred3.sum(), tn_pred3.shape)) tp_score3 = clf3.score(tp_feat_list, tp_lbls) tn_score3 = clf3.score(tn_feat_list, tn_lbls) tp_pred1 = clf1.predict(tp_score_list) tn_pred1 = clf1.predict(tn_score_list) print((tp_pred1.sum(), tp_pred1.shape)) print((tn_pred1.sum(), tn_pred1.shape)) tp_score1 = clf1.score(tp_score_list, tp_lbls) tn_score1 = clf1.score(tn_score_list, tn_lbls) print('tp score with rank = %r' % (tp_score3, )) print('tn score with rank = %r' % (tn_score3, )) print('tp score without rank = %r' % (tp_score1, )) print('tn score without rank = %r' % (tn_score1, )) toy_data = {} return toy_data
def get_toy_data_1v1(num_annots=5, num_names=None, **kwargs): r""" CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1v1 --show Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> toy_data = get_toy_data_1v1() >>> ut.quit_if_noshow() >>> import plottool as pt >>> show_toy_distributions(toy_data['toy_params']) >>> ut.show_if_requested() Example1: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> toy_data = get_toy_data_1v1() >>> kwargs = {} >>> initial_aids = toy_data['aids'] >>> initial_nids = toy_data['nids'] >>> num_annots = 1 >>> num_names = 6 >>> toy_data2 = get_toy_data_1v1(num_annots, num_names, initial_aids=initial_aids, initial_nids=initial_nids) >>> ut.quit_if_noshow() >>> import plottool as pt >>> show_toy_distributions(toy_data['toy_params']) >>> ut.show_if_requested() Ignore: >>> num_annots = 1000 >>> num_names = 400 """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) def pairwise_feature(aidx1, aidx2, all_nids=all_nids, toy_params=toy_params): if aidx1 == aidx2: score = -1 else: #rng = np.random.RandomState(int((aidx1 + 13) * (aidx2 + 13))) nid1 = all_nids[int(aidx1)] nid2 = all_nids[int(aidx2)] params = toy_params[nid1 == nid2] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma) score = np.clip(score_, 0, np.inf) return score pairwise_nids = list([tup[::-1] for tup in ut.iprod(nids, nids1)]) pairwise_matches = np.array( [nid1 == nid2 for nid1, nid2 in pairwise_nids]) pairwise_aidxs = list([tup[::-1] for tup in ut.iprod(aids, aids1)]) pairwise_features = np.array( [pairwise_feature(aidx1, aidx2) for aidx1, aidx2 in pairwise_aidxs]) #pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) is_diag = [r < c for r, c, in pairwise_aidxs] diag_scores = pairwise_features.compress(is_diag) diag_aidxs = ut.compress(pairwise_aidxs, is_diag) import utool with utool.embed_on_exception_context: diag_nids = ut.compress(pairwise_nids, is_diag) diag_labels = pairwise_matches.compress(is_diag) #import utool #utool.embed() toy_data = { 'aids': aids, 'nids': nids, 'all_nids': all_nids, 'all_aids': all_aids, #'pairwise_aidxs': pairwise_aidxs, #'pairwise_scores': pairwise_scores, #'pairwise_matches': pairwise_matches, 'diag_labels': diag_labels, 'diag_scores': diag_scores, 'diag_nids': diag_nids, 'diag_aidxs': diag_aidxs, 'toy_params': toy_params, } return toy_data
def get_toy_annots(num_annots, num_names=None, initial_aids=None, initial_nids=None, nid_sequence=None, seed=None): r""" Args: num_annots (int): num_names (int): (default = None) initial_aids (None): (default = None) initial_nids (None): (default = None) nid_sequence (None): (default = None) seed (None): (default = None) Returns: tuple: (aids, nids, aids1, nids1, all_aids, all_nids) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_annots Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1 >>> num_names = 5 >>> initial_aids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) >>> initial_nids = np.array([0, 0, 1, 2, 2, 1, 1, 1, 2, 3], dtype=np.int64) >>> nid_sequence = np.array([0, 0, 1, 2, 2, 1, 1], dtype=np.int64) >>> seed = 0 >>> (aids, nids, aids1, nids1, all_aids, all_nids) = get_toy_annots(num_annots, num_names, initial_aids, initial_nids, nid_sequence, seed) >>> result = ('(aids, nids, aids1, nids1, all_aids, all_nids) = %s' % (ut.repr2((aids, nids, aids1, nids1, all_aids, all_nids), nl=1),)) >>> print(result) """ import vtool as vt if num_names is None: num_names = num_annots print('Generating toy data with num_annots=%r' % (num_annots,)) if initial_aids is None: assert initial_nids is None first_step = True initial_aids = [] initial_nids = [] else: first_step = False assert initial_nids is not None aids = np.arange(len(initial_aids), num_annots + len(initial_aids)) rng = vt.ensure_rng(seed) if nid_sequence is None: nids = rng.randint(0, num_names, num_annots) else: unused_from_sequence = max(len(nid_sequence) - len(initial_aids), 0) if unused_from_sequence == 0: nids = rng.randint(0, num_names, num_annots) elif unused_from_sequence > 0 and unused_from_sequence < num_annots: num_remain = num_annots - unused_from_sequence nids = np.append(nid_sequence[-unused_from_sequence:], rng.randint(0, num_names, num_remain)) else: nids = nid_sequence[-unused_from_sequence] nids = np.array(ut.take(nid_sequence, range(len(initial_aids), len(initial_aids) + num_annots))) if first_step: aids1 = aids nids1 = nids else: aids1 = initial_aids nids1 = initial_nids all_nids = np.append(initial_nids, nids) all_aids = np.append(initial_aids, aids) import utool with utool.embed_on_exception_context: ut.assert_eq(len(aids), len(nids), 'len new') ut.assert_eq(len(aids1), len(nids1), 'len comp') ut.assert_eq(len(all_aids), len(all_nids), 'len all') return aids, nids, aids1, nids1, all_aids, all_nids
def get_toy_data_1vM(num_annots, num_names=None, **kwargs): r""" Args: num_annots (int): num_names (int): (default = None) Kwargs: initial_aids, initial_nids, nid_sequence, seed Returns: tuple: (pair_list, feat_list) CommandLine: python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.demobayes import * # NOQA >>> num_annots = 1000 >>> num_names = 40 >>> get_toy_data_1vM(num_annots, num_names) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import vtool as vt tup_ = get_toy_annots(num_annots, num_names, **kwargs) aids, nids, aids1, nids1, all_aids, all_nids = tup_ rng = vt.ensure_rng(None) # Test a simple SVM classifier nid2_nexemp = ut.dict_hist(nids1) aid2_nid = dict(zip(aids, nids)) ut.fix_embed_globals() #def add_to_globals(globals_, subdict): # globals_.update(subdict) unique_nids = list(nid2_nexemp.keys()) def annot_to_class_feats2(aid, aid2_nid, top=None): pair_list = [] score_list = [] nexemplar_list = [] for nid in unique_nids: label = (aid2_nid[aid] == nid) num_exemplars = nid2_nexemp.get(nid, 0) if num_exemplars == 0: continue params = toy_params[label] mu, sigma = ut.dict_take(params, ['mu', 'sigma']) score_ = rng.normal(mu, sigma, size=num_exemplars).max() score = np.clip(score_, 0, np.inf) pair_list.append((aid, nid)) score_list.append(score) nexemplar_list.append(num_exemplars) rank_list = ut.argsort(score_list, reverse=True) feat_list = np.array([score_list, rank_list, nexemplar_list]).T sortx = np.argsort(rank_list) feat_list = feat_list.take(sortx, axis=0) pair_list = np.array(pair_list).take(sortx, axis=0) if top is not None: feat_list = feat_list[:top] pair_list = pair_list[0:top] return pair_list, feat_list toclass_features = [annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) score_list = feat_list.T[0:1].T lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs] from sklearn import svm #clf1 = svm.LinearSVC() print('Learning classifiers') clf3 = svm.SVC() clf3.fit(feat_list, lbl_list) clf1 = svm.LinearSVC() clf1.fit(score_list, lbl_list) # Score new annots against the training database tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids) aids, nids, aids1, nids1, all_aids, all_nids = tup_ aid2_nid = dict(zip(aids, nids)) toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids] aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0)) feat_list = np.vstack(ut.get_list_column(toclass_features, 1)) lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs]) print('Running tests') score_list = feat_list.T[0:1].T tp_feat_list = feat_list[lbl_list] tn_feat_list = feat_list[~lbl_list] tp_lbls = lbl_list[lbl_list] tn_lbls = lbl_list[~lbl_list] print('num tp: %d' % len(tp_lbls)) print('num fp: %d' % len(tn_lbls)) tp_score_list = score_list[lbl_list] tn_score_list = score_list[~lbl_list] print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2)) print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2)) print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2)) tp_pred3 = clf3.predict(tp_feat_list) tn_pred3 = clf3.predict(tn_feat_list) print((tp_pred3.sum(), tp_pred3.shape)) print((tn_pred3.sum(), tn_pred3.shape)) tp_score3 = clf3.score(tp_feat_list, tp_lbls) tn_score3 = clf3.score(tn_feat_list, tn_lbls) tp_pred1 = clf1.predict(tp_score_list) tn_pred1 = clf1.predict(tn_score_list) print((tp_pred1.sum(), tp_pred1.shape)) print((tn_pred1.sum(), tn_pred1.shape)) tp_score1 = clf1.score(tp_score_list, tp_lbls) tn_score1 = clf1.score(tn_score_list, tn_lbls) print('tp score with rank = %r' % (tp_score3,)) print('tn score with rank = %r' % (tn_score3,)) print('tp score without rank = %r' % (tp_score1,)) print('tn score without rank = %r' % (tn_score1,)) toy_data = {} return toy_data