def test_syntetic_weak(mode): # needs refactoring # Syntetic data # test latentSSVM on different train set sizes & on different train sets # mode can be 'heterogenous' or 'latent' results = np.zeros((18, 6)) full_labeled = np.array([0, 2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, H = load_syntetic(dataset) H = list(H) Y = weak_from_hidden(H) for j, nfull in enumerate(full_labeled): if mode == 'latent': crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5) elif mode == 'heterogenous': crf = HCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0, tol=0.001, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5, verbose=0) x_train = X[:train_size] y_train = Y[:train_size] h_train = H[:train_size] x_test = X[(train_size + 1):] h_test = H[(train_size + 1):] for i in xrange(nfull, len(h_train)): h_train[i] = None try: if mode == 'latent': clf.fit(x_train, y_train, h_train) elif mode == 'heterogenous': clf.fit(x_train, y_train, h_train, pass_labels=True, initialize=True) h_pred = clf.predict_latent(x_test) results[dataset - 1, j] = compute_error(h_test, h_pred) print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j]) except ValueError: # bad QP print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) if mode == 'latent': np.savetxt('results/weak_labeled.csv', results, delimiter=',') elif mode == 'heterogenous': np.savetxt('results/heterogenous.csv', results, delimiter=',') return results
def syntetic_test(): # test model on different train set size & on different train sets results = np.zeros((18, 5)) full_labeled = np.array([2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, Y = load_syntetic(dataset) for j, nfull in enumerate(full_labeled): crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) x_train = X[:nfull] y_train = Y[:nfull] x_test = X[(train_size + 1):] y_test = Y[(train_size + 1):] try: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) results[dataset - 1, j] = compute_error(y_test, y_pred) print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j]) except ValueError: print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) np.savetxt('results/syntetic/full_labeled.txt', results)
def syntetic(): # train model on a single set models_basedir = 'models/syntetic/' crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_syntetic(1) x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=100, random_state=179) start = time() clf.fit(x_train, y_train) stop = time() np.savetxt(models_basedir + 'syntetic_full.csv', clf.w) with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) y_pred = clf.predict(x_test) print 'Error on test set: %f' % compute_error(y_test, y_pred) print 'Score on test set: %f' % clf.score(x_test, y_test) print 'Score on train set: %f' % clf.score(x_train, y_train) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def syntetic_train_score_per_iter(result, only_weak=False, plot=True): w_history = result.data["w_history"] meta_data = result.meta n_full = meta_data["n_full"] n_train = meta_data["n_train"] n_inference_iter = meta_data["n_inference_iter"] n_full = meta_data["n_full"] n_train = meta_data["n_train"] dataset = meta_data["dataset"] C = meta_data["C"] latent_iter = meta_data["latent_iter"] max_iter = meta_data["max_iter"] inner_tol = meta_data["inner_tol"] outer_tol = meta_data["outer_tol"] alpha = meta_data["alpha"] min_changes = meta_data["min_changes"] initialize = meta_data["initialize"] crf = HCRF( n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method="gco", n_iter=n_inference_iter ) base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) X, Y = load_syntetic(dataset) Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = split_test_train(X, Y, n_full, n_train) if only_weak: Xtrain = [x for (i, x) in enumerate(Xtrain) if not Ytrain[i].full_labeled] Ytrain_full = [y for (i, y) in enumerate(Ytrain_full) if not Ytrain[i].full_labeled] base_clf.w = None clf.w_history_ = w_history clf.iter_done = w_history.shape[0] train_scores = [] for score in clf.staged_score(Xtrain, Ytrain_full): train_scores.append(score) train_scores = np.array(train_scores) if plot: x = np.arange(0, train_scores.size) pl.rc("text", usetex=True) pl.rc("font", family="serif") pl.figure(figsize=(10, 10), dpi=96) pl.title("score on train set") pl.plot(x, train_scores) pl.scatter(x, train_scores) pl.xlabel("iteration") pl.xlim([-0.5, train_scores.size + 1]) return train_scores
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1): crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) X, Y = load_syntetic(dataset) x_train, y_train, y_train_full, x_test, y_test = \ split_test_train(X, Y, n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start print 'Score on train set: %f' % train_score print 'Score on test set: %f' % test_score print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % time_elapsed test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) result = ExperimentResult(np.array(test_scores), clf.changes_, clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, clf.objective_curve_, clf.timestamps_, clf.base_iter_history_, train_score=train_score, test_score=test_score, time_elapsed=time_elapsed, n_full=n_full, n_train=n_train, C=C, dataset=dataset, latent_iter=latent_iter, max_iter=max_iter, inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha, min_changes=min_changes, initialize=initialize, dataset_name='syntetic', annotation_type='image-level labelling', label_type='full+weak') return result
def load_dataset(result): n_train = result.meta['n_train'] n_full = result.meta['n_full'] Xtrain = None Ytrain = None Ytrain_full = None Xtest = None Ytest = None if result.meta['dataset_name'] == 'syntetic': dataset = result.meta['dataset'] X, Y = load_syntetic(dataset) Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = \ split_test_train(X, Y, n_full, n_train) elif result.meta['dataset_name'] == 'msrc': Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = \ msrc_load(n_full, n_train) return Xtrain, Ytrain, Ytrain_full, Xtest, Ytest
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5, inactive_window=50, inactive_threshold=1e-5): # save parameters as meta meta_data = locals() crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method='gco', n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100, inactive_window=inactive_window, inactive_threshold=inactive_threshold) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) X, Y = load_syntetic(dataset) x_train, y_train, y_train_full, x_test, y_test = \ split_test_train(X, Y, n_full, n_train) start = time() clf.fit(x_train, y_train, initialize=initialize) stop = time() train_score = clf.score(x_train, y_train_full) test_score = clf.score(x_test, y_test) time_elapsed = stop - start print('============================================================') print('Score on train set: %f' % train_score) print('Score on test set: %f' % test_score) print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) print('Elapsed time: %f s' % time_elapsed) test_scores = [] for score in clf.staged_score(x_test, y_test): test_scores.append(score) train_scores = [] for score in clf.staged_score(x_train, y_train_full): train_scores.append(score) exp_data = {} exp_data['test_scores'] = np.array(test_scores) exp_data['train_scores'] = np.array(train_scores) exp_data['changes'] = clf.changes_ exp_data['w_history'] = clf.w_history_ exp_data['delta_history'] = clf.delta_history_ exp_data['primal_objective_curve'] = clf.primal_objective_curve_ exp_data['objective_curve'] = clf.objective_curve_ exp_data['timestamps'] = clf.timestamps_ exp_data['qp_timestamps'] = clf.qp_timestamps_ exp_data['inference_timestamps'] = clf.inference_timestamps_ exp_data['number_of_iterations'] = clf.number_of_iterations_ exp_data['number_of_constraints'] = clf.number_of_constraints_ exp_data['calls_to_inference'] = clf.calls_to_inference_ meta_data['dataset_name'] = 'syntetic' meta_data['annotation_type'] = 'image-level labelling' meta_data['label_type'] = 'full+weak' meta_data['train_score'] = train_score meta_data['test_score'] = test_score meta_data['time_elapsed'] = time_elapsed return ExperimentResult(exp_data, meta_data)