def test_syntetic_weak(mode): # needs refactoring # Syntetic data # test latentSSVM on different train set sizes & on different train sets # mode can be 'heterogenous' or 'latent' results = np.zeros((18, 6)) full_labeled = np.array([0, 2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, H = load_syntetic(dataset) H = list(H) Y = weak_from_hidden(H) for j, nfull in enumerate(full_labeled): if mode == 'latent': crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5) elif mode == 'heterogenous': crf = HCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0, tol=0.001, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5, verbose=0) x_train = X[:train_size] y_train = Y[:train_size] h_train = H[:train_size] x_test = X[(train_size + 1):] h_test = H[(train_size + 1):] for i in xrange(nfull, len(h_train)): h_train[i] = None try: if mode == 'latent': clf.fit(x_train, y_train, h_train) elif mode == 'heterogenous': clf.fit(x_train, y_train, h_train, pass_labels=True, initialize=True) h_pred = clf.predict_latent(x_test) results[dataset - 1, j] = compute_error(h_test, h_pred) print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j]) except ValueError: # bad QP print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) if mode == 'latent': np.savetxt('results/weak_labeled.csv', results, delimiter=',') elif mode == 'heterogenous': np.savetxt('results/heterogenous.csv', results, delimiter=',') return results
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5): crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha, inference_method='gco', n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=10) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) Xtest, Ytest = load_msrc('test') Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytest] train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full) train_mask = train_mask[0:n_train].astype(np.bool) Xtrain, Ytrain_raw = load_msrc('train') Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytrain_raw] Ytrain = [] for y, f in zip(Ytrain_raw, train_mask): if f: Ytrain.append(Label(y[:, 0].astype(np.int32), None, y[:, 1], True)) else: Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)), y[:, 1], False)) start = time() clf.fit(Xtrain, Ytrain, initialize=initialize) stop = time() train_score = clf.score(Xtrain, Ytrain_full) test_score = clf.score(Xtest, Ytest) time_elapsed = stop - start print 'Score on train set: %f' % train_score print 'Score on test set: %f' % test_score print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % time_elapsed test_scores = [] for score in clf.staged_score(Xtest, Ytest): test_scores.append(score) result = ExperimentResult(np.array(test_scores), clf.changes_, clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, clf.objective_curve_, clf.timestamps_, clf.base_iter_history_, train_score=train_score, test_score=test_score, time_elapsed=time_elapsed, n_inference_iter=n_inference_iter, n_full=n_full, n_train=n_train, C=C, latent_iter=latent_iter, max_iter=max_iter, inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha, min_changes=min_changes, initialize=initialize, dataset_name='msrc', annotation_type='image-level labelling', label_type='full+weak') return result
def test_simple_dataset(max_iter=1000, C=0.1, latent_iter=10, min_changes=0, inner_tol=1e-5, outer_tol=1e-5): meta_data = locals() crf = SimpleMRF() base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=2, n_jobs=4, tol=inner_tol) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, min_changes=min_changes, n_jobs=4, tol=outer_tol) x_train, y_train_full, y_train, x_test, y_test = load_simple_dataset() start = time() clf.fit(x_train, y_train) stop = time() time_elapsed = stop - start train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) print '============================================================' print 'Score on train set: %f' % train_score print 'Score on test set: %f' % test_score print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % time_elapsed test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) train_scores = [] for score in clf.staged_score(x_train, y_train_full): train_scores.append(score) raw_scores = [] for score in clf.staged_score2(x_train, y_train): raw_scores.append(score) exp_data = clf._get_data() exp_data['test_scores'] = np.array(test_scores) exp_data['train_scores'] = np.array(train_scores) exp_data['raw_scores'] = np.array(raw_scores) meta_data['dataset_name'] = 'toy' meta_data['annotation_type'] = 'area' meta_data['label_type'] = 'full+weak' meta_data['time_elapsed'] = time_elapsed return ExperimentResult(exp_data, meta_data)
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1): crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) X, Y = load_syntetic(dataset) x_train, y_train, y_train_full, x_test, y_test = \ split_test_train(X, Y, n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start print 'Score on train set: %f' % train_score print 'Score on test set: %f' % test_score print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % time_elapsed test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) result = ExperimentResult(np.array(test_scores), clf.changes_, clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, clf.objective_curve_, clf.timestamps_, clf.base_iter_history_, train_score=train_score, test_score=test_score, time_elapsed=time_elapsed, n_full=n_full, n_train=n_train, C=C, dataset=dataset, latent_iter=latent_iter, max_iter=max_iter, inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha, min_changes=min_changes, initialize=initialize, dataset_name='syntetic', annotation_type='image-level labelling', label_type='full+weak') return result
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5, inactive_window=50, inactive_threshold=1e-5, warm_start=False, inference_cache=0, save_inner_w=False, inference_method='gco'): # save parameters as meta meta_data = locals() logger = logging.getLogger(__name__) crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method=inference_method, n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, verbose=2, n_jobs=4, max_iter=max_iter, tol=inner_tol, C=C, inference_cache=inference_cache, inactive_window=inactive_window, inactive_threshold=inactive_threshold) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) x_train, y_train, y_train_full, x_test, y_test = \ load_syntetic(dataset, n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize, warm_start=warm_start, save_inner_w=save_inner_w) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start logger.info('============================================================') logger.info('Score on train set: %f', train_score) logger.info('Score on test set: %f', test_score) logger.info('Norm of weight vector: |w|=%f', np.linalg.norm(clf.w)) logger.info('Elapsed time: %f s', time_elapsed) test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) train_scores = [] for score in clf.staged_score(x_train, y_train_full): train_scores.append(score) raw_scores = [] for score in clf.staged_score2(x_train, y_train): raw_scores.append(score) exp_data = clf._get_data() exp_data['test_scores'] = np.array(test_scores) exp_data['train_scores'] = np.array(train_scores) exp_data['raw_scores'] = np.array(raw_scores) meta_data['dataset_name'] = 'syntetic' meta_data['annotation_type'] = 'image-level labelling' meta_data['label_type'] = 'full+weak' meta_data['train_score'] = train_score meta_data['test_score'] = test_score meta_data['time_elapsed'] = time_elapsed meta_data['iter_done'] = clf.iter_done return ExperimentResult(exp_data, meta_data)
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5, inactive_window=50, inactive_threshold=1e-5): # save parameters as meta meta_data = locals() crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method='gco', n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100, inactive_window=inactive_window, inactive_threshold=inactive_threshold) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) X, Y = load_syntetic(dataset) x_train, y_train, y_train_full, x_test, y_test = \ split_test_train(X, Y, n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start print('============================================================') print('Score on train set: %f' % train_score) print('Score on test set: %f' % test_score) print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) print('Elapsed time: %f s' % time_elapsed) test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) train_scores = [] for score in clf.staged_score(x_train, y_train_full): train_scores.append(score) exp_data = {} exp_data['test_scores'] = np.array(test_scores) exp_data['train_scores'] = np.array(train_scores) exp_data['changes'] = clf.changes_ exp_data['w_history'] = clf.w_history_ exp_data['delta_history'] = clf.delta_history_ exp_data['primal_objective_curve'] = clf.primal_objective_curve_ exp_data['objective_curve'] = clf.objective_curve_ exp_data['timestamps'] = clf.timestamps_ exp_data['qp_timestamps'] = clf.qp_timestamps_ exp_data['inference_timestamps'] = clf.inference_timestamps_ exp_data['number_of_iterations'] = clf.number_of_iterations_ exp_data['number_of_constraints'] = clf.number_of_constraints_ exp_data['calls_to_inference'] = clf.calls_to_inference_ meta_data['dataset_name'] = 'syntetic' meta_data['annotation_type'] = 'image-level labelling' meta_data['label_type'] = 'full+weak' meta_data['train_score'] = train_score meta_data['test_score'] = test_score meta_data['time_elapsed'] = time_elapsed return ExperimentResult(exp_data, meta_data)
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5, inactive_window=50, inactive_threshold=1e-5, warm_start=False, inference_cache=0, save_inner_w=False, inference_method='gco'): meta_data = locals() logger = logging.getLogger(__name__) crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha, inference_method=inference_method, n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, verbose=2, n_jobs=4, tol=inner_tol, max_iter=max_iter, C=C, inference_cache=inference_cache, inactive_window=inactive_window, inactive_threshold=inactive_threshold) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) x_train, y_train, y_train_full, x_test, y_test = \ load_msrc(n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize, warm_start=warm_start, save_inner_w=save_inner_w) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start logger.info('============================================================') logger.info('Score on train set: %f', train_score) logger.info('Score on test set: %f', test_score) logger.info('Norm of weight vector: |w|=%f', np.linalg.norm(clf.w)) logger.info('Elapsed time: %f s', time_elapsed) test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) train_scores = [] for score in clf.staged_score(x_train, y_train_full): train_scores.append(score) raw_scores = [] for score in clf.staged_score2(x_train, y_train): raw_scores.append(score) exp_data = clf._get_data() exp_data['test_scores'] = np.array(test_scores) exp_data['train_scores'] = np.array(train_scores) exp_data['raw_scores'] = np.array(raw_scores) meta_data['dataset_name'] = 'msrc' meta_data['annotation_type'] = 'image-level labelling' meta_data['label_type'] = 'full+weak' meta_data['train_score'] = train_score meta_data['test_score'] = test_score meta_data['time_elapsed'] = time_elapsed meta_data['iter_done'] = clf.iter_done return ExperimentResult(exp_data, meta_data)