def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print 'Error on test set: %f' % compute_error(Y, Y_pred) print 'Score on test set: %f' % clf.score(X, Y) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25, max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0, initialize=True, alpha=0.1, n_inference_iter=5): crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha, inference_method='gco', n_iter=n_inference_iter) base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=10) clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4) Xtest, Ytest = load_msrc('test') Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytest] train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full) train_mask = train_mask[0:n_train].astype(np.bool) Xtrain, Ytrain_raw = load_msrc('train') Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytrain_raw] Ytrain = [] for y, f in zip(Ytrain_raw, train_mask): if f: Ytrain.append(Label(y[:, 0].astype(np.int32), None, y[:, 1], True)) else: Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)), y[:, 1], False)) start = time() clf.fit(Xtrain, Ytrain, initialize=initialize) stop = time() train_score = clf.score(Xtrain, Ytrain_full) test_score = clf.score(Xtest, Ytest) time_elapsed = stop - start print 'Score on train set: %f' % train_score print 'Score on test set: %f' % test_score print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % time_elapsed test_scores = [] for score in clf.staged_score(Xtest, Ytest): test_scores.append(score) result = ExperimentResult(np.array(test_scores), clf.changes_, clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, clf.objective_curve_, clf.timestamps_, clf.base_iter_history_, train_score=train_score, test_score=test_score, time_elapsed=time_elapsed, n_inference_iter=n_inference_iter, n_full=n_full, n_train=n_train, C=C, latent_iter=latent_iter, max_iter=max_iter, inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha, min_changes=min_changes, initialize=initialize, dataset_name='msrc', annotation_type='image-level labelling', label_type='full+weak') return result
def msrc_test(): # test model on different train set sizes basedir = '../data/msrc/trainmasks/' models_basedir = 'models/msrc/' quality = [] Xtest, Ytest = load_msrc('test') Ytest = remove_areas(Ytest) Xtrain, Ytrain = load_msrc('train') Ytrain = remove_areas(Ytrain) for n_train in [20, 40, 80, 160, 276]: crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=1000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) if n_train != 276: train_mask = np.genfromtxt(basedir + 'trainMaskX%d.txt' % n_train) train_mask = train_mask[:277].astype(np.bool) else: train_mask = np.ones(276).astype(np.bool) curX = [] curY = [] for (s, x, y) in zip(train_mask, Xtrain, Ytrain): if s: curX.append(x) curY.append(y) start = time() clf.fit(curX, curY) stop = time() np.savetxt(models_basedir + 'test_model_%d.csv' % n_train, clf.w) with open(models_basedir + 'test_model_%d' % n_train + '.pickle', 'w') as f: cPickle.dump(clf, f) Ypred = clf.predict(Xtest) q = 1 - compute_error(Ytest, Ypred) print 'n_train=%d, quality=%f, time=%f' % (n_train, q, stop - start) quality.append(q) np.savetxt('results/msrc/msrc_full.txt', quality)
def msrc_load(n_full, n_train): Xtest, Ytest = load_msrc('test') Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytest] train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full) train_mask = train_mask[0:n_train].astype(np.bool) Xtrain, Ytrain_raw = load_msrc('train') Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True) for y in Ytrain_raw] Ytrain = [] for y, f in zip(Ytrain_raw, train_mask): if f: Ytrain.append(Label(y[:, 0].astype(np.int32), None, y[:, 1], True)) else: Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)), y[:, 1], False)) return Xtrain, Ytrain, Ytrain_full, Xtest, Ytest