def train(x_train, y_train, x_test, y_test): x_train = np.asarray(x_train, dtype=np.float) y_train = np.asarray(y_train, dtype=np.int64) # x_test = np.asarray(x_test, dtype=np.float) # y_test = np.asarray(y_test, dtype=np.int64) x_test = x_train y_test = y_train from pystruct.learners import NSlackSSVM, OneSlackSSVM, SubgradientSSVM, LatentSSVM, SubgradientLatentSSVM, PrimalDSStructuredSVM from pystruct.models import MultiLabelClf, MultiClassClf clf = OneSlackSSVM(MultiLabelClf(), C=1, show_loss_every=1, verbose=1, max_iter=1000) # print(x_train, y_train) # input() clf.fit(x_train, y_train) result = clf.predict(x_test) print('Result: \n', result) print('True label:\n', y_test) clf.score(x_test, y_test) print('\n') count = 0 for i in range(len(result)): # print(np.sum(np.square(y_test[i]-result[i]))) if np.sum(np.square(y_test[i] - result[i])) != 0: print('True label: ', y_test[i], 'Predict: ', result[i]) count += 1 print(count) translate_vector(x_test, y_test)
def ssvm_classifier() : x_train,y_train,x_test,y_test = load_data1() print "Data Loaded" pca = PCA(n_components= 1000) x_train_reduced = pca.fit_transform(x_train) x_test_reduced = pca.fit_transform(x_test) print "PCA finished" print "Learning the model" n_labels = y_train.shape[1] full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) tree = chow_liu_tree(y_train) independent_model = MultiLabelClf(inference_method='unary') independent_ssvm = OneSlackSSVM(independent_model, C=.1, tol=0.01) independent_ssvm.fit(x_train_reduced, y_train) print "saving model ..." with open("data/independent_ssvm.pkl","wb+") as f : cp.dump(independent_ssvm,f) #print "Calculatin the cross-validation scores" #scores = model_selection.cross_val_score(independent_ssvm,x_train_reduced,y_train,cv=3) print independent_ssvm.score(x_test_reduced,y_test)
def syntetic(): # train model on a single set models_basedir = 'models/syntetic/' crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_syntetic(1) x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=100, random_state=179) start = time() clf.fit(x_train, y_train) stop = time() np.savetxt(models_basedir + 'syntetic_full.csv', clf.w) with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) y_pred = clf.predict(x_test) print 'Error on test set: %f' % compute_error(y_test, y_pred) print 'Score on test set: %f' % clf.score(x_test, y_test) print 'Score on train set: %f' % clf.score(x_train, y_train) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def test_constraint_removal(): digits = load_digits() X, y = digits.data, digits.target y = 2 * (y % 2) - 1 # even vs odd as +1 vs -1 X = X / 16. pbl = BinaryClf(n_features=X.shape[1]) clf_no_removal = OneSlackSSVM(model=pbl, max_iter=500, C=1, inactive_window=0, tol=0.01) clf_no_removal.fit(X, y) clf = OneSlackSSVM(model=pbl, max_iter=500, C=1, tol=0.01, inactive_threshold=1e-8) clf.fit(X, y) # check that we learned something assert_greater(clf.score(X, y), .92) # results are mostly equal # if we decrease tol, they will get more similar assert_less(np.mean(clf.predict(X) != clf_no_removal.predict(X)), 0.02) # without removal, have as many constraints as iterations assert_equal(len(clf_no_removal.objective_curve_), len(clf_no_removal.constraints_)) # with removal, there are less constraints than iterations assert_less(len(clf.constraints_), len(clf.objective_curve_))
def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: pickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print('Error on test set: %f' % compute_error(Y, Y_pred)) print('Score on test set: %f' % clf.score(X, Y)) print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) print('Elapsed time: %f s' % (stop - start)) return clf
def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print 'Error on test set: %f' % compute_error(Y, Y_pred) print 'Score on test set: %f' % clf.score(X, Y) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def test_constraint_removal(): digits = load_digits() X, y = digits.data, digits.target y = 2 * (y % 2) - 1 # even vs odd as +1 vs -1 X = X / 16. pbl = BinaryClf(n_features=X.shape[1]) clf_no_removal = OneSlackSSVM(model=pbl, max_iter=500, C=1, inactive_window=0, tol=0.01) clf_no_removal.fit(X, y) clf = OneSlackSSVM(model=pbl, max_iter=500, C=1, tol=0.01, inactive_threshold=1e-8) clf.fit(X, y) # check that we learned something assert_greater(clf.score(X, y), .92) # results are mostly equal # if we decrease tol, they will get more similar assert_less(np.mean(clf.predict(X) != clf_no_removal.predict(X)), 0.02) # without removal, have as many constraints as iterations assert_equal(len(clf_no_removal.objective_curve_), len(clf_no_removal.constraints_)) # with removal, there are less constraints than iterations assert_less(len(clf.constraints_), len(clf.objective_curve_))
def test_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method="unary") logger = SaveLogger(file_name) svm = OneSlackSSVM(pbl, check_constraints=True, C=1, n_jobs=1, logger=logger) svm.fit(X_train, y_train) assert_less(0.97, svm.score(X_test, y_test)) assert_less(0.97, logger.load().score(X_test, y_test))
def test_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method='unary') logger = SaveLogger(file_name) svm = OneSlackSSVM(pbl, check_constraints=True, C=1, n_jobs=1, logger=logger) svm.fit(X_train, y_train) assert_less(.97, svm.score(X_test, y_test)) assert_less(.97, logger.load().score(X_test, y_test))
def test_one_slack_repellent_potentials(): # test non-submodular problem with and without submodularity constraint # dataset is checkerboard X, Y = generate_checker() crf = GridCRF(inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=10, C=0.01, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) # standard crf can predict perfectly assert_array_equal(Y, Y_pred) submodular_clf = OneSlackSSVM( model=crf, max_iter=10, C=0.01, check_constraints=True, negativity_constraint=[4, 5, 6] ) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_less(submodular_clf.score(X, Y), 0.99) # submodular crf can not do better than unaries for i, x in enumerate(X): y_pred_unaries = crf.inference(x, np.array([1, 0, 0, 1, 0, 0, 0])) assert_array_equal(y_pred_unaries, Y_pred[i])
def test_one_slack_repellent_potentials(): # test non-submodular problem with and without submodularity constraint # dataset is checkerboard X, Y = generate_checker() crf = GridCRF(inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=10, C=.01, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) # standard crf can predict perfectly assert_array_equal(Y, Y_pred) submodular_clf = OneSlackSSVM(model=crf, max_iter=10, C=.01, check_constraints=True, negativity_constraint=[4, 5, 6]) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_less(submodular_clf.score(X, Y), .99) # submodular crf can not do better than unaries for i, x in enumerate(X): y_pred_unaries = crf.inference(x, np.array([1, 0, 0, 1, 0, 0, 0])) assert_array_equal(y_pred_unaries, Y_pred[i])
from sklearn.utils import shuffle from pystruct.problems import CrammerSingerSVMProblem #from pystruct.learners import SubgradientStructuredSVM #from pystruct.learners import StructuredSVM from pystruct.learners import OneSlackSSVM mnist = fetch_mldata("MNIST original") X, y = mnist.data, mnist.target X = X / 255. X_train, y_train = X[:60000], y[:60000] X_test, y_test = X[60000:], y[60000:] X_train, y_train = shuffle(X_train, y_train) pblm = CrammerSingerSVMProblem(n_classes=10, n_features=28**2) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=10, batch=False, learning_rate=0.0001, #momentum=0) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=2, batch=False, momentum=.9, #learning_rate=0.001, show_loss='true', C=1000) svm = OneSlackSSVM(pblm, verbose=2, n_jobs=1, plot=True, max_iter=2, C=1000) #svm = StructuredSVM(pblm, verbose=50, n_jobs=1, plot=True, max_iter=10, #C=1000) svm.fit(X_train, y_train) print(svm.score(X_train, y_train)) print(svm.score(X_test, y_test))
# [Note: if you get an error on the below line, it may be because you need to upgrade scikit-learn] encoder = OneHotEncoder(n_values=[1, 2, 2, 201, 201], sparse=False).fit(np.vstack(X)) # Represent features using one-of-K scheme: If a feature can take value in X_encoded = [ encoder.transform(x) for x in X ] # {0,...,K}, then introduce K binary features such that the value of only # the i^th binary feature is non-zero when the feature takes value 'i'. # n_values specifies the number of states each feature can take. X_small, y_small = X_encoded[: 100], y[: 100] # Pick the first 100 samples from the encoded training set. # See: http://pystruct.github.io/generated/pystruct.learners.OneSlackSSVM.html # See: http://pystruct.github.io/generated/pystruct.models.ChainCRF.html # Rest of documentation can be found here: http://pystruct.github.io/references.html ssvm = OneSlackSSVM(ChainCRF(n_states=10, inference_method='max-product', directed=True), max_iter=200, C=1) # Construct a directed ChainCRF with 10 states for each variable, # and pass this CRF to OneSlackSSVM constructor to create an object 'ssvm' ssvm.fit(X_small, y_small) # Learn Structured SVM using X_small and y_small weights = ssvm.w # Store learnt weights in 'weights' print ssvm.score(X_small, y_small) # Evaluate training accuracy on X_small, y_small print ssvm.predict( X_small) # Get predicted labels on X_small using the learnt model
""" ================================ Sequence classifcation benchmark ================================ This is a stripped-down version of the "plot_letters.py" example targetted to benchmark inference and learning algorithms on chains. """ import numpy as np from pystruct.datasets import load_letters from pystruct.models import ChainCRF from pystruct.learners import OneSlackSSVM abc = "abcdefghijklmnopqrstuvwxyz" letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] # we convert the lists to object arrays, as that makes slicing much more # convenient X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] # Train linear chain CRF model = ChainCRF() ssvm = OneSlackSSVM(model=model, C=.1, tol=0.1, verbose=3, max_iter=20) ssvm.fit(X_train, y_train) print("Test score with chain CRF: %f" % ssvm.score(X_test, y_test))
sparse=False).fit(np.vstack(X)) # FROM SAMPLE #Represent features using one-of-K scheme: If a feature can take value in X_encoded = [ encoder.transform(x) for x in X ] # FROM SAMPLE #{0,...,K}, then introduce K binary features such that the value of only return X_encoded, y, sentences # FROM SAMPLE #the i^th binary feature is non-zero when the feature takes value 'i'. X_train, Y_train, TrainSent = ReadData("train") best_C = 0.1 crf = ChainCRF(n_states=10, inference_method="max-product", directed=True) ssvm = OneSlackSSVM(crf, max_iter=200, C=best_C) ssvm.fit(X_train[:4500], Y_train[:4500]) error = 1 - ssvm.score(X_train[-500:], Y_train[-500:]) tag = np.array([ 'verb', 'noun', 'adjective', 'adverb', 'preposition', 'pronoun', 'determiner', 'number', 'punctuation', 'other' ]) cl = random.sample(range(10), 3) print('Chosen classes: ', tag[cl]) trans_matrix = np.reshape(ssvm.w[-10 * 10:], (10, 10)) pairs = list(itertools.combinations(cl, 2)) for pair in pairs: print(tag[pair[0]], "->", tag[pair[1]], trans_matrix[pair[0]][pair[1]]) print(tag[pair[1]], "->", tag[pair[0]], trans_matrix[pair[1]][pair[0]]) features = [
Y_train = Y_train[:4500] crf = ChainCRF(n_states=10, inference_method='max-product', directed=True) l1 = [10**i for i in range(-4, 3, 1)] l1.extend([5 * l for l in l1]) Cs = sorted(l1) error = {} best_C = {} Train_Sizes = [100, 200, 500, 1000, 4500] for b in Train_Sizes: score = {} for C in Cs: ssvm = OneSlackSSVM(crf, max_iter=200, C=C) ssvm.fit(X_train[:b], Y_train[:b]) score[C] = ssvm.score(X_val, Y_val) print('b = ', b, 'C = ', C, ' : ', score[C]) best_C[b] = max(score, key=score.get) error['train', b] = 1. - score[best_C[b]] for b in Train_Sizes: ssvm = OneSlackSSVM(crf, max_iter=200, C=best_C[b]) ssvm.fit(X_train[:b], Y_train[:b]) error['test', b] = 1. - ssvm.score(X_test, Y_test) plt.xlabel('Size of the training set') plt.ylabel('Error') plt.plot(Train_Sizes, [error['train', b] for b in Train_Sizes], label='train') plt.plot(Train_Sizes, [error['test', b] for b in Train_Sizes], label='test') plt.legend() plt.show() #
X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start acc_n_slack = n_slack_svm.score(X_test_bias, y_test) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (acc_n_slack, time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time() one_slack_svm.fit(X_train_bias, y_train) time_one_slack_svm = time() - start acc_one_slack = one_slack_svm.score(X_test_bias, y_test) print("Score with pystruct 1-slack ssvm: %f (took %f seconds)" % (acc_one_slack, time_one_slack_svm)) # online subgradient ssvm start = time() subgradient_svm.fit(X_train_bias, y_train) time_subgradient_svm = time() - start acc_subgradient = subgradient_svm.score(X_test_bias, y_test) print("Score with pystruct subgradient ssvm: %f (took %f seconds)" % (acc_subgradient, time_subgradient_svm)) libsvm = SVC(kernel='linear', C=10) start = time() libsvm.fit(X_train, y_train)
print('test index {}'.format(test_index)) print('{} jackets for training, {} for testing'. \ format(len(train_index), len(test_index))) X_train = X[train_index] Y_train = Y[train_index] X_test = X[test_index] Y_test = Y[test_index] start = time.time() """ YOUR S-SVM TRAINING CODE HERE """ ssvm.fit(X_train, Y_train) end = time.time() print('CRF learning of 1 fold has taken {} seconds'.format( (end - start) / 1000.0)) scores_crf[fold] = ssvm.score(X_test, Y_test) print(np.round(end - start), 'elapsed seconds to train the model') print("Test score with chain CRF: %f" % scores_crf[fold]) """ Label the testing set and print results """ Y_pred = ssvm.predict(X_test) wrong_fold_crf = np.sum(np.ravel(Y_test) - np.ravel(Y_pred) != 0) wrong_segments_crf.append(wrong_fold_crf) print('{} wrong segments out of {}'. \ format(wrong_fold_crf, len(test_index) * num_segments_per_jacket)) """ figure showing the result of classification of segments for each jacket in the testing part of present fold """ if plot_labeling: for ti, pred in zip(test_index, Y_pred): print(ti) print(pred) s = segments[ti]
total_correct = 0 total = 0 precision = {0: [0, 0], 1: [0, 0], 2: [0, 0]} recall = {0: [0, 0], 1: [0, 0], 2: [0, 0]} for fold in range(5): print 'fold ' + str(fold) + "--------------------------" X = [] Y = [] for i in range(5): if i == fold: continue X.extend(folds[i]['X']) Y.extend(folds[i]['Y']) print "Training Size: ", len(X), len(Y) ssvm.fit(X, Y) print 'Train Score: ' + str(ssvm.score(X, Y)) # w = Weight(ssvm.w, node_features, edge_features, dictLength) fold_correct = 0 fold_total = 0 testX = folds[fold]['X'] testY = folds[fold]['Y'] for i in range(len(testX)): print "Instance: " + str(i) print folds[fold]['threads'][i].id Yi = testY[i] print list(Yi) infY = crf.inference(testX[i], ssvm.w) print list(infY) for py in range(len(Yi)): recall[Yi[py]][1] += 1
X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start acc_n_slack = n_slack_svm.score(X_test_bias, y_test) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (acc_n_slack, time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time() one_slack_svm.fit(X_train_bias, y_train) time_one_slack_svm = time() - start acc_one_slack = one_slack_svm.score(X_test_bias, y_test) print("Score with pystruct 1-slack ssvm: %f (took %f seconds)" % (acc_one_slack, time_one_slack_svm)) # online subgradient ssvm start = time() subgradient_svm.fit(X_train_bias, y_train) time_subgradient_svm = time() - start acc_subgradient = subgradient_svm.score(X_test_bias, y_test) print("Score with pystruct subgradient ssvm: %f (took %f seconds)" % (acc_subgradient, time_subgradient_svm)) libsvm = SVC(kernel='linear', C=10) start = time() libsvm.fit(X_train, y_train)
from sklearn.utils import shuffle from pystruct.problems import CrammerSingerSVMProblem #from pystruct.learners import SubgradientStructuredSVM #from pystruct.learners import StructuredSVM from pystruct.learners import OneSlackSSVM mnist = fetch_mldata("MNIST original") X, y = mnist.data, mnist.target X = X / 255. X_train, y_train = X[:60000], y[:60000] X_test, y_test = X[60000:], y[60000:] X_train, y_train = shuffle(X_train, y_train) pblm = CrammerSingerSVMProblem(n_classes=10, n_features=28 ** 2) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=10, batch=False, learning_rate=0.0001, #momentum=0) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=2, batch=False, momentum=.9, #learning_rate=0.001, show_loss='true', C=1000) svm = OneSlackSSVM(pblm, verbose=2, n_jobs=1, plot=True, max_iter=2, C=1000) #svm = StructuredSVM(pblm, verbose=50, n_jobs=1, plot=True, max_iter=10, #C=1000) svm.fit(X_train, y_train) print(svm.score(X_train, y_train)) print(svm.score(X_test, y_test))
from pystruct.models import ChainCRF from pystruct.learners import OneSlackSSVM from util import data_generator for shift in range(5, 7): print(shift) x_train, x_test, y_train, y_test = data_generator.baseline_crf(train_percentage=0.6, sft=shift, future=True) crf = ChainCRF(n_states=2, n_features=x_train.shape[1]) x_train = x_train.values x_test = x_test.values y_train = y_train.values.astype(int) y_test = y_test.values.astype(int) ssvm = OneSlackSSVM(model=crf, C=.1, max_iter=10) ssvm.fit([x_train], [y_train]) print(ssvm.score([x_test], [y_test]))