Example #1
0
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size,
                                           max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False,
                                                      zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(
        reconstructed_images.shape[0],
        reconstructed_images.shape[1] * reconstructed_images.shape[2])
    assert training_images.shape == (n, x * y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate(
            [reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(
        reconstructed_test_images.shape[0],
        reconstructed_test_images.shape[1] *
        reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x * y)

    return (training_images, labels, test_images)
Example #2
0
def get_dictionary_data(n_comp=20, zero_index=True):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.10))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Training SVM with the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 64))
    multiplied_labels = np.empty((0))

    for i in range(len(labeled_data)):
        image = labeled_data[i, :, :]
        label = labels[i]
        data = extract_patches_2d(image, patch_size, max_patches=0.50)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        reconstructed_images = np.concatenate([reconstructed_images, patches])
        extended_labels = np.asarray([label] * len(patches))
        multiplied_labels = np.concatenate([multiplied_labels, extended_labels])

    print(reconstructed_images.shape, multiplied_labels.shape)
    svc = SVC()
    #print('Getting cross-val scores...')
    #scores = cross_validation.cross_val_score(svc, reconstructed_images, multiplied_labels, cv=10)
    #print('cross-val scores:', scores)
    #print('cross-val mean:', np.mean(scores))
    #print('cross-val variance:', np.var(scores))

    print('done in %.2fs.' % (time() - t0))

    svc.fit(reconstructed_images, multiplied_labels)

    print('Reconstructing the test images...')
    t0 = time()

    predictions = []

    for i, image in enumerate(test_data):
        data = extract_patches_2d(image, patch_size, max_patches=0.25)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        pred = svc.predict(patches)
        print('Variance in the predictions:', np.var(pred))
        predictions.append(mode(pred))

    print('done in %.2fs.' % (time() - t0))

    predictions += 1
    util.write_results(predictions, 'svm_patches_25_percent_20_comp.csv')
Example #3
0
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix, DefaultViewConverter
from pylearn2.datasets import preprocessing
from pylearn2.format.target_format import convert_to_one_hot

import pylab as plt
import cPickle as pickle
import numpy as np

import util
import dictionary_learning

if __name__ == "__main__":

    #train_data, train_labels = util.load_labeled_training(flatten=True, zero_index=True)
    #train_data = util.standardize(train_data)
    test_data = util.load_all_test(flatten=True)
    test_data = util.standardize(test_data)

    #train_data_20, _, test_data_20 = dictionary_learning.get_dictionary_data(n_comp=20, zero_index=True)
    #train_data_100, _, test_data_100 = dictionary_learning.get_dictionary_data(n_comp=100, zero_index=True)

    # convert the training labels into one-hot format, as required by the pylearn2 model
    #train_labels = convert_to_one_hot(train_labels, dtype='int64', max_labels=7, mode='stack')

    # pickle the data
    #serial.save('training_data_for_pylearn2.pkl', train_data)
    #serial.save('training_data_20_components_for_pylearn2.pkl', train_data_20)
    #serial.save('training_data_100_components_for_pylearn2.pkl', train_data_100)

    #serial.save('training_labels_for_pylearn2.pkl', train_labels)
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(reconstructed_images.shape[0], reconstructed_images.shape[1]*reconstructed_images.shape[2])
    assert training_images.shape == (n, x*y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate([reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(reconstructed_test_images.shape[0], reconstructed_test_images.shape[1]*reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x*y)

    return (training_images, labels, test_images)