def test_syntetic_weak(mode): # needs refactoring # Syntetic data # test latentSSVM on different train set sizes & on different train sets # mode can be 'heterogenous' or 'latent' results = np.zeros((18, 6)) full_labeled = np.array([0, 2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, H = load_syntetic(dataset) H = list(H) Y = weak_from_hidden(H) for j, nfull in enumerate(full_labeled): if mode == 'latent': crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5) elif mode == 'heterogenous': crf = HCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0, tol=0.001, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5, verbose=0) x_train = X[:train_size] y_train = Y[:train_size] h_train = H[:train_size] x_test = X[(train_size + 1):] h_test = H[(train_size + 1):] for i in xrange(nfull, len(h_train)): h_train[i] = None try: if mode == 'latent': clf.fit(x_train, y_train, h_train) elif mode == 'heterogenous': clf.fit(x_train, y_train, h_train, pass_labels=True, initialize=True) h_pred = clf.predict_latent(x_test) results[dataset - 1, j] = compute_error(h_test, h_pred) print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j]) except ValueError: # bad QP print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) if mode == 'latent': np.savetxt('results/weak_labeled.csv', results, delimiter=',') elif mode == 'heterogenous': np.savetxt('results/heterogenous.csv', results, delimiter=',') return results
def synteticTest(peer): models_basedir = peer.config.get("models.basedir") #with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: # clf = pickle.load(f) with open(models_basedir + 'syntetic_full.csv') as f: w_str = f.readline() assert(w_str) from MasterSlaveBSP import SlaveBSPTest # just to test squire = SlaveBSPTest(peer.config.get("master.index")) squire.setup(peer) for peerName in peer.getAllPeerNames(): peer.send(peerName, w_str) start = time() squire.bsp(peer) stop = time() Y_hat = [] Y_areas = [] for msg in peer.getAllMessages(): msgs = msg.split(";") assert(len(msgs) == 2) Y_hat += [np.array([int(elem) for elem in y_hat.split()]) for y_hat in msgs[0].split(",")] Y_areas += [np.array([int(elem) for elem in y_areas.split()]) for y_areas in msgs[1].split(",")] peer.log('Error on test set: %f' % compute_error(Y_hat, Y_areas)) #print('Score on test set: %f' % clf.score(x_test, y_test)) #print('Score on train set: %f' % clf.score(x_train, y_train)) #print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) peer.log('Elapsed time: %f s' % (stop - start))
def syntetic_test(): # test model on different train set size & on different train sets results = np.zeros((18, 5)) full_labeled = np.array([2, 4, 10, 25, 100]) train_size = 400 for dataset in range(1, 19): X, Y = load_syntetic(dataset) for j, nfull in enumerate(full_labeled): crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) x_train = X[:nfull] y_train = Y[:nfull] x_test = X[(train_size + 1):] y_test = Y[(train_size + 1):] try: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) results[dataset - 1, j] = compute_error(y_test, y_pred) print('dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j])) except ValueError: print('dataset=%d, nfull=%d: Failed' % (dataset, nfull)) np.savetxt('results/syntetic/full_labeled.txt', results)
def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: pickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print('Error on test set: %f' % compute_error(Y, Y_pred)) print('Score on test set: %f' % clf.score(X, Y)) print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) print('Elapsed time: %f s' % (stop - start)) return clf
def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print 'Error on test set: %f' % compute_error(Y, Y_pred) print 'Score on test set: %f' % clf.score(X, Y) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def syntetic_test(): # test model on different train set size & on different train sets results = np.zeros((18, 5)) full_labeled = np.array([2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, Y = load_syntetic(dataset) for j, nfull in enumerate(full_labeled): crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) x_train = X[:nfull] y_train = Y[:nfull] x_test = X[(train_size + 1):] y_test = Y[(train_size + 1):] try: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) results[dataset - 1, j] = compute_error(y_test, y_pred) print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j]) except ValueError: print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) np.savetxt('results/syntetic/full_labeled.txt', results)
def syntetic(): # train model on a single set models_basedir = 'models/syntetic/' crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_syntetic(1) x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=100, random_state=179) start = time() clf.fit(x_train, y_train) stop = time() np.savetxt(models_basedir + 'syntetic_full.csv', clf.w) with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) y_pred = clf.predict(x_test) print 'Error on test set: %f' % compute_error(y_test, y_pred) print 'Score on test set: %f' % clf.score(x_test, y_test) print 'Score on train set: %f' % clf.score(x_train, y_train) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def msrc_test(): # test model on different train set sizes basedir = '../data/msrc/trainmasks/' models_basedir = 'models/msrc/' quality = [] Xtest, Ytest = load_msrc('test') Ytest = remove_areas(Ytest) Xtrain, Ytrain = load_msrc('train') Ytrain = remove_areas(Ytrain) for n_train in [20, 40, 80, 160, 276]: crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=1000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) if n_train != 276: train_mask = np.genfromtxt(basedir + 'trainMaskX%d.txt' % n_train) train_mask = train_mask[:277].astype(np.bool) else: train_mask = np.ones(276).astype(np.bool) curX = [] curY = [] for (s, x, y) in zip(train_mask, Xtrain, Ytrain): if s: curX.append(x) curY.append(y) start = time() clf.fit(curX, curY) stop = time() np.savetxt(models_basedir + 'test_model_%d.csv' % n_train, clf.w) with open(models_basedir + 'test_model_%d' % n_train + '.pickle', 'w') as f: pickle.dump(clf, f) Ypred = clf.predict(Xtest) q = 1 - compute_error(Ytest, Ypred) print('n_train=%d, quality=%f, time=%f' % (n_train, q, stop - start)) quality.append(q) np.savetxt('results/msrc/msrc_full.txt', quality)
def msrc_test(): # test model on different train set sizes basedir = '../data/msrc/trainmasks/' models_basedir = 'models/msrc/' quality = [] Xtest, Ytest = load_msrc('test') Ytest = remove_areas(Ytest) Xtrain, Ytrain = load_msrc('train') Ytrain = remove_areas(Ytrain) for n_train in [20, 40, 80, 160, 276]: crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=1000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) if n_train != 276: train_mask = np.genfromtxt(basedir + 'trainMaskX%d.txt' % n_train) train_mask = train_mask[:277].astype(np.bool) else: train_mask = np.ones(276).astype(np.bool) curX = [] curY = [] for (s, x, y) in zip(train_mask, Xtrain, Ytrain): if s: curX.append(x) curY.append(y) start = time() clf.fit(curX, curY) stop = time() np.savetxt(models_basedir + 'test_model_%d.csv' % n_train, clf.w) with open(models_basedir + 'test_model_%d' % n_train + '.pickle', 'w') as f: cPickle.dump(clf, f) Ypred = clf.predict(Xtest) q = 1 - compute_error(Ytest, Ypred) print 'n_train=%d, quality=%f, time=%f' % (n_train, q, stop - start) quality.append(q) np.savetxt('results/msrc/msrc_full.txt', quality)
def test_syntetic_weak(mode): # needs refactoring; does not work # Syntetic data # test latentSSVM on different train set sizes & on different train sets # mode can be 'heterogenous' or 'latent' results = np.zeros((18, 6)) full_labeled = np.array([0, 2, 4, 10, 25, 100]) train_size = 400 for dataset in xrange(1, 19): X, H = load_syntetic(dataset) H = list(H) Y = weak_from_hidden(H) for j, nfull in enumerate(full_labeled): if mode == 'latent': crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5) elif mode == 'heterogenous': crf = HCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0, tol=0.001, n_jobs=4, inference_cache=100) clf = LatentSSVM(base_clf, latent_iter=5, verbose=0) x_train = X[:train_size] y_train = Y[:train_size] h_train = H[:train_size] x_test = X[(train_size + 1):] h_test = H[(train_size + 1):] for i in xrange(nfull, len(h_train)): h_train[i] = None try: if mode == 'latent': clf.fit(x_train, y_train, h_train) elif mode == 'heterogenous': clf.fit(x_train, y_train, h_train, pass_labels=True, initialize=True) h_pred = clf.predict_latent(x_test) results[dataset - 1, j] = compute_error(h_test, h_pred) print 'dataset=%d, nfull=%d, error=%f' % ( dataset, nfull, results[dataset - 1, j]) except ValueError: # bad QP print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull) if mode == 'latent': np.savetxt('results/weak_labeled.csv', results, delimiter=',') elif mode == 'heterogenous': np.savetxt('results/heterogenous.csv', results, delimiter=',') return results