def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    unl = load_unlabeled_training(flatten=True)
    unl = standardize(unl)
    test = load_public_test(flatten=True)
    test = standardize(test)
    shuffle_in_unison(images, labels)
    #d = DictionaryLearning().fit(images)
    d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images)
    s = SparseCoder(d.components_)
    proj_test = s.transform(images)
    pt = s.transform(test)
    #kpca = KernelPCA(kernel="rbf")
    #kpca.fit(unl)
    #test_proj = kpca.transform(images)
    #pt = kpca.transform(test)
    #spca = SparsePCA().fit(unl)
    #test_proj = spca.transform(images)
    #pt = spca.transform(test)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    print scores
    print np.mean(scores)
    print np.var(scores)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    # images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    # ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05)
    # trainer = RPropMinusTrainer(net, dataset=train)
    # cv = validation.CrossValidator(trainer, ds)
    # print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train["class"])
    # trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train), train["class"])
        testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
def main():
    K = 30  # Number of dimensions to PCA down to.
    test_image, test_label = load_labeled_training(flatten=True)
    train_image = load_unlabeled_training(flatten=True)
    pca = PCA(n_components=K).fit(train_image)
    proj_test = pca.transform(test_image)
    #v, mean, projX = pcaimg(train_image.T, K)
    #proj_test = np.dot(v.T, test_image.T).T
    #assert proj_test.shape[0] == test_label.shape[0]
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        proj_test, test_label)
    nbrs = KNeighborsClassifier(n_neighbors=1)
    scores = cross_validation.cross_val_score(nbrs, proj_test, test_label, cv=3)
    print scores
def main():
    K = 30  # Number of dimensions to PCA down to.
    test_image, test_label = load_labeled_training(flatten=True)
    train_image = load_unlabeled_training(flatten=True)
    pca = PCA(n_components=K).fit(train_image)
    proj_test = pca.transform(test_image)
    #v, mean, projX = pcaimg(train_image.T, K)
    #proj_test = np.dot(v.T, test_image.T).T
    #assert proj_test.shape[0] == test_label.shape[0]
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        proj_test, test_label)
    nbrs = KNeighborsClassifier(n_neighbors=1)
    scores = cross_validation.cross_val_score(nbrs,
                                              proj_test,
                                              test_label,
                                              cv=3)
    print scores
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    #images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    #ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim,
                                 1000,
                                 train.outdim,
                                 outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net,
                              dataset=train,
                              momentum=0.1,
                              learningrate=0.01,
                              weightdecay=0.05)
    #trainer = RPropMinusTrainer(net, dataset=train)
    #cv = validation.CrossValidator(trainer, ds)
    #print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train['class'])
    #trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test),
                           test['class'])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train),
                              train['class'])
        testres = percentError(trainer.testOnClassData(dataset=test),
                               test['class'])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (
            trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
Exemple #6
0
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size,
                                           max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False,
                                                      zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(
        reconstructed_images.shape[0],
        reconstructed_images.shape[1] * reconstructed_images.shape[2])
    assert training_images.shape == (n, x * y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate(
            [reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(
        reconstructed_test_images.shape[0],
        reconstructed_test_images.shape[1] *
        reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x * y)

    return (training_images, labels, test_images)
Exemple #7
0
def get_dictionary_data(n_comp=20, zero_index=True):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.10))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Training SVM with the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 64))
    multiplied_labels = np.empty((0))

    for i in range(len(labeled_data)):
        image = labeled_data[i, :, :]
        label = labels[i]
        data = extract_patches_2d(image, patch_size, max_patches=0.50)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        reconstructed_images = np.concatenate([reconstructed_images, patches])
        extended_labels = np.asarray([label] * len(patches))
        multiplied_labels = np.concatenate([multiplied_labels, extended_labels])

    print(reconstructed_images.shape, multiplied_labels.shape)
    svc = SVC()
    #print('Getting cross-val scores...')
    #scores = cross_validation.cross_val_score(svc, reconstructed_images, multiplied_labels, cv=10)
    #print('cross-val scores:', scores)
    #print('cross-val mean:', np.mean(scores))
    #print('cross-val variance:', np.var(scores))

    print('done in %.2fs.' % (time() - t0))

    svc.fit(reconstructed_images, multiplied_labels)

    print('Reconstructing the test images...')
    t0 = time()

    predictions = []

    for i, image in enumerate(test_data):
        data = extract_patches_2d(image, patch_size, max_patches=0.25)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        pred = svc.predict(patches)
        print('Variance in the predictions:', np.var(pred))
        predictions.append(mode(pred))

    print('done in %.2fs.' % (time() - t0))

    predictions += 1
    util.write_results(predictions, 'svm_patches_25_percent_20_comp.csv')
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(reconstructed_images.shape[0], reconstructed_images.shape[1]*reconstructed_images.shape[2])
    assert training_images.shape == (n, x*y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate([reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(reconstructed_test_images.shape[0], reconstructed_test_images.shape[1]*reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x*y)

    return (training_images, labels, test_images)