def get_dictionary_data(n_comp=20, zero_index=False): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Reconstructing the training images...') t0 = time() reconstructed_images = np.empty((0, 32, 32)) for i, image in enumerate(labeled_data): data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_images = np.concatenate([reconstructed_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_images.shape training_images = reconstructed_images.reshape( reconstructed_images.shape[0], reconstructed_images.shape[1] * reconstructed_images.shape[2]) assert training_images.shape == (n, x * y) print('Reconstructing the test images...') t0 = time() reconstructed_test_images = np.empty((0, 32, 32)) for image in test_data: data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_test_images = np.concatenate( [reconstructed_test_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_test_images.shape test_images = reconstructed_test_images.reshape( reconstructed_test_images.shape[0], reconstructed_test_images.shape[1] * reconstructed_test_images.shape[2]) assert test_images.shape == (n, x * y) return (training_images, labels, test_images)
def get_dictionary_data(n_comp=20, zero_index=True): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.10)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Training SVM with the training images...') t0 = time() reconstructed_images = np.empty((0, 64)) multiplied_labels = np.empty((0)) for i in range(len(labeled_data)): image = labeled_data[i, :, :] label = labels[i] data = extract_patches_2d(image, patch_size, max_patches=0.50) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) reconstructed_images = np.concatenate([reconstructed_images, patches]) extended_labels = np.asarray([label] * len(patches)) multiplied_labels = np.concatenate([multiplied_labels, extended_labels]) print(reconstructed_images.shape, multiplied_labels.shape) svc = SVC() #print('Getting cross-val scores...') #scores = cross_validation.cross_val_score(svc, reconstructed_images, multiplied_labels, cv=10) #print('cross-val scores:', scores) #print('cross-val mean:', np.mean(scores)) #print('cross-val variance:', np.var(scores)) print('done in %.2fs.' % (time() - t0)) svc.fit(reconstructed_images, multiplied_labels) print('Reconstructing the test images...') t0 = time() predictions = [] for i, image in enumerate(test_data): data = extract_patches_2d(image, patch_size, max_patches=0.25) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) pred = svc.predict(patches) print('Variance in the predictions:', np.var(pred)) predictions.append(mode(pred)) print('done in %.2fs.' % (time() - t0)) predictions += 1 util.write_results(predictions, 'svm_patches_25_percent_20_comp.csv')
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix, DefaultViewConverter from pylearn2.datasets import preprocessing from pylearn2.format.target_format import convert_to_one_hot import pylab as plt import cPickle as pickle import numpy as np import util import dictionary_learning if __name__ == "__main__": #train_data, train_labels = util.load_labeled_training(flatten=True, zero_index=True) #train_data = util.standardize(train_data) test_data = util.load_all_test(flatten=True) test_data = util.standardize(test_data) #train_data_20, _, test_data_20 = dictionary_learning.get_dictionary_data(n_comp=20, zero_index=True) #train_data_100, _, test_data_100 = dictionary_learning.get_dictionary_data(n_comp=100, zero_index=True) # convert the training labels into one-hot format, as required by the pylearn2 model #train_labels = convert_to_one_hot(train_labels, dtype='int64', max_labels=7, mode='stack') # pickle the data #serial.save('training_data_for_pylearn2.pkl', train_data) #serial.save('training_data_20_components_for_pylearn2.pkl', train_data_20) #serial.save('training_data_100_components_for_pylearn2.pkl', train_data_100) #serial.save('training_labels_for_pylearn2.pkl', train_labels)
def get_dictionary_data(n_comp=20, zero_index=False): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Reconstructing the training images...') t0 = time() reconstructed_images = np.empty((0, 32, 32)) for i, image in enumerate(labeled_data): data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_images = np.concatenate([reconstructed_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_images.shape training_images = reconstructed_images.reshape(reconstructed_images.shape[0], reconstructed_images.shape[1]*reconstructed_images.shape[2]) assert training_images.shape == (n, x*y) print('Reconstructing the test images...') t0 = time() reconstructed_test_images = np.empty((0, 32, 32)) for image in test_data: data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_test_images = np.concatenate([reconstructed_test_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_test_images.shape test_images = reconstructed_test_images.reshape(reconstructed_test_images.shape[0], reconstructed_test_images.shape[1]*reconstructed_test_images.shape[2]) assert test_images.shape == (n, x*y) return (training_images, labels, test_images)