def chain_crf(): letters = load_letters() x, y, folds = letters['data'], letters['labels'], letters['folds'] print "Letters : " print letters # print "Data : " # print letters['data'] # print "Labels : " # print letters['labels'] x, y = np.array(x), np.array(y) x_train, x_test = x[folds == 1], x[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] print len(x_train) print len(x_test) print "Done" print x_train[0].shape print y_train[0].shape print x_train[10].shape print y_train[10].shape model = ChainCRF() ssvm = FrankWolfeSSVM(model=model, C=.1, max_iter=10) print ssvm.fit(x_train, y_train) print ssvm.score(x_test, y_test)
def load_data(self): alphabets = load_letters() X = np.array(alphabets['data']) y = np.array(alphabets['labels']) folds = alphabets['folds'] return X, y, folds
def __init__(self): letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] self.dict = "abcdefghijklmnopqrstuvwxyz" X, y = np.array(X), np.array(y) self.X_train, self.X_test = X[folds == 1], X[folds != 1] self.y_train, self.y_test = y[folds == 1], y[folds != 1] # shuffle num_ex = len(self.X_train) perm = np.arange(num_ex) np.random.shuffle(perm) self.X_train = self.X_train[perm] self.y_train = self.y_train[perm] self.DIM = self.X_train[0].shape[1] self.test_num = len(self.X_test) self.train_num = len(self.X_train) self.y_one_hot_train = self.to_one_hot(self.y_train) self.y_one_hot_test = self.to_one_hot(self.y_test) self.X_lst_train, self.y_lst_train = self._get_lst(self.X_train, self.y_train) self.X_lst_test, self.y_lst_test = self._get_lst(self.X_test, self.y_test)
def chain_crf(): from pystruct.datasets import load_letters letters = load_letters() X, y, folds = ut.take(letters, ['data', 'labels', 'folds']) X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] len(X_train)
def loadWindows(start, stop, window): letters = load_letters() X, y = letters['data'], letters['labels'] X, y = np.array(X), np.array(y) word_features = selectWordsByLetters(X, start, stop) word_labels = selectWordsByLetters(y, start, stop) window_features, window_labels = splitWords(word_features, word_labels, window) return (window_features.astype(np.double), window_labels.astype(np.double))
There are obvious extensions that both methods could benefit from, such as window features or non-linear kernels. This example is more meant to give a demonstration of the CRF than to show its superiority. """ import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from pystruct.datasets import load_letters from pystruct.models import ChainCRF from pystruct.learners import FrankWolfeSSVM abc = "abcdefghijklmnopqrstuvwxyz" letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] # we convert the lists to object arrays, as that makes slicing much more # convenient X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] # Train linear SVM svm = LinearSVC(dual=False, C=.1) # flatten input svm.fit(np.vstack(X_train), np.hstack(y_train)) # Train linear chain CRF model = ChainCRF() ssvm = FrankWolfeSSVM(model=model, C=.1, max_iter=11)
import numpy as np import matplotlib.pyplot as plt import os from sklearn.svm import LinearSVC from sklearn.svm import SVC from common.viewers.imshow import imshow from pystruct.datasets import load_letters from pystruct.models import ChainCRF, GraphCRF from pystruct.learners import FrankWolfeSSVM from sklearn.linear_model import LinearRegression from common.utils import get_letters_in_pred_like, arrange_letters_in_pred_like import cPickle abc = "abcdefghijklmnopqrstuvwxyz" # Load data: letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] # we convert the lists to object arrays, as that makes slicing much more # convenient X, y = np.array(X), np.array(y) X_train, X_test = X[folds == 1], X[folds != 1] y_train, y_test = y[folds == 1], y[folds != 1] net_base_path = '/media/ohadsh/sheard/googleDrive/Master/courses/probabilistic_graphical_models/outputs/part_3/training_2016_06_11/' # Load pre-trained network train_name = 'train_pred_-1.pkl' test_name = 'test_pred_-1.pkl' with open(os.path.join(net_base_path, train_name), 'r') as f: train_net_pred = cPickle.load(f)
def load_data(self): letters = load_letters() X, y, folds = letters['data'], letters['labels'], letters['folds'] X, y = np.array(X), np.array(y) return X, y, folds
def test_dataset_loading(): # test that we can read the datasets. load_scene() load_letters() load_snakes()