#!/usr/bin/env python

import os, numpy
from scikits.learn.decomposition import PCA

from ift6266h12.utils.ift6266h12_io import load_train_input, load_test_input, load_valid_input

dest_path = '/data/lisa/data/UTLC/pca'

trainset = load_train_input('sylvester', normalize=True)
testset = load_test_input('sylvester', normalize=True)
validset = load_valid_input('sylvester', normalize=True)

pca = PCA(32)
pca.fit(trainset)

numpy.save(os.path.join(dest_path, 'sylvester_train_x_pca32.npy'),
           pca.transform(trainset))
numpy.save(os.path.join(dest_path, 'sylvester_valid_x_pca32.npy'),
           pca.transform(validset))
numpy.save(os.path.join(dest_path, 'sylvester_test_x_pca32.npy'),
           pca.transform(testset))
Example #2
0
    'box1.npy': 4,
    'box2.npy': 4,
    'box3.npy': 4,
    'box4.npy': 4,
    'box5.npy': 4,
    'bottle1.npy': 3,
    'bottle2.npy': 3,
    'bottle3.npy': 3,
    'bottle4.npy': 3,
    'bottle5.npy': 3
}

X, Y = load_and_pack_data(dnames, 20, 20)

# PCA and Kernel PCA
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)
print 'done simple pca'

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True)
X_kpca = kpca.fit_transform(X)
print 'fitted kernel pca'
X_back = kpca.inverse_transform(X_kpca)
print 'done back transforming with kpca'

# plots
reds = Y == 1
blues = Y == 2
greens = Y == 3
magentas = Y == 4
yellows = Y == 5
Example #3
0
import numpy as np
import pylab as pl

from scikits.learn.decomposition import PCA, FastICA

###############################################################################
# Generate sample data
S = np.random.standard_t(1.5, size=(10000, 2))
S[0] *= 2.

# Mix data
A = np.array([[1, 1], [0, 2]])  # Mixing matrix

X = np.dot(S, A.T)  # Generate observations

pca = PCA()
S_pca_ = pca.fit(X).transform(X)

ica = FastICA()
S_ica_ = ica.fit(X).transform(X)  # Estimate the sources

S_ica_ /= S_ica_.std(axis=0)


###############################################################################
# Plot results

def plot_samples(S, axis_list=None):
    pl.scatter(S[:,0], S[:,1], s=2, marker='o', linewidths=0, zorder=10)
    if axis_list is not None:
        colors = [(0, 0.6, 0), (0.6, 0, 0)]
Example #4
0
print "n_digits: %d" % n_digits
print "n_features: %d" % n_features
print "n_samples: %d" % n_samples
print

print "Raw k-means with k-means++ init..."
t0 = time()
km = KMeans(init='k-means++', k=n_digits, n_init=10).fit(data)
print "done in %0.3fs" % (time() - t0)
print "inertia: %f" % km.inertia_
print

print "Raw k-means with random centroid init..."
t0 = time()
km = KMeans(init='random', k=n_digits, n_init=10).fit(data)
print "done in %0.3fs" % (time() - t0)
print "inertia: %f" % km.inertia_
print

print "Raw k-means with PCA-based centroid init..."
# in this case the seeding of the centers is deterministic, hence we run the
# kmeans algorithm only once with n_init=1
t0 = time()
pca = PCA(n_components=n_digits).fit(data)
km = KMeans(init=pca.components_, k=n_digits, n_init=1).fit(data)
print "done in %0.3fs" % (time() - t0)
print "inertia: %f" % km.inertia_
print

Example #5
0
def main_train(work_dir="../results/avicenna/",
               corruption_level=0.3,
               nvis=75,
               nhid=600,
               tied_weights=True,
               act_enc="sigmoid",
               act_dec=None,
               max_epochs=2,
               learning_rate=0.001,
               batch_size=20,
               monitoring_batches=5,
               save_freq=1,
               n_components_trans_pca=7):

    conf = {
        'corruption_level': corruption_level,
        'nvis': nvis,
        'nhid': nhid,
        'tied_weights': tied_weights,
        'act_enc': act_enc,
        'act_dec': act_dec,
        'max_epochs': max_epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'monitoring_batches': monitoring_batches,
        'save_freq': save_freq,
        'n_components_trans_pca': n_components_trans_pca
    }

    start = time.clock()

    ###############   TRAIN THE DAE
    train_file = work_dir + "train_pca" + str(conf['nvis']) + ".npy"
    save_path = work_dir + "train_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_model.pkl"

    trainset = NpyDataset(file=train_file)
    trainset.yaml_src = 'script'
    corruptor = BinomialCorruptor(corruption_level=conf['corruption_level'])
    dae = DenoisingAutoencoder(nvis=conf['nvis'],
                               nhid=conf['nhid'],
                               tied_weights=conf['tied_weights'],
                               corruptor=corruptor,
                               act_enc=conf['act_enc'],
                               act_dec=conf['act_dec'])
    cost = MeanSquaredReconstructionError()
    termination_criterion = EpochCounter(max_epochs=conf['max_epochs'])
    algorithm = UnsupervisedExhaustiveSGD(
        learning_rate=conf['learning_rate'],
        batch_size=conf['batch_size'],
        monitoring_batches=conf['monitoring_batches'],
        monitoring_dataset=trainset,
        cost=cost,
        termination_criterion=termination_criterion)

    train_obj = Train(dataset=trainset,
                      model=dae,
                      algorithm=algorithm,
                      save_freq=conf['save_freq'],
                      save_path=save_path)
    train_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TRAIN DATASET
    print("Applying the model on the train dataset...")
    model = load(save_path)
    save_train_path = work_dir + "train_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=trainset,
                           path=save_train_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE VALID DATASET
    print("Applying the model on the valid dataset...")
    valid_file = work_dir + "valid_pca" + str(conf['nvis']) + ".npy"

    validset = NpyDataset(file=valid_file)
    validset.yaml_src = 'script'
    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=validset,
                           path=save_valid_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TEST DATASET
    print("Applying the model on the test dataset...")
    test_file = work_dir + "test_pca" + str(conf['nvis']) + ".npy"

    testset = NpyDataset(file=test_file)
    testset.yaml_src = 'script'
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model, dataset=testset, path=save_test_path)
    dump_obj.main_loop()

    ###############   COMPUTE THE ALC SCORE ON VALIDATION SET
    valid_data = ift6266h12.load_npy(save_valid_path)
    label_data = ift6266h12.load_npy(
        '/data/lisa/data/UTLC/numpy_data/avicenna_valid_y.npy')
    alc_1 = score(valid_data, label_data)

    ###############   APPLY THE TRANSDUCTIVE PCA
    test_data = ift6266h12.load_npy(save_test_path)
    trans_pca = PCA(n_components=conf['n_components_trans_pca'])
    final_valid = trans_pca.fit_transform(valid_data)
    final_test = trans_pca.fit_transform(test_data)

    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + "_tpca" + str(
            conf['n_components_trans_pca']) + ".npy"
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_tpca" + str(conf['n_components_trans_pca']) + ".npy"

    np.save(save_valid_path, final_valid)
    np.save(save_test_path, final_test)

    ###############   COMPUTE THE NEW ALC SCORE ON VALIDATION SET
    alc_2 = score(final_valid, label_data)

    ###############   OUTPUT AND RETURN THE RESULTS
    timeSpent = ((time.clock() - start) / 60.)
    print 'FINAL RESULTS (PCA-' + str(conf['nvis']) + ' DAE-' + str(conf['nhid']) + ' TransPCA-' + str(conf['n_components_trans_pca']) + ') ALC after DAE: ', alc_1, ' FINAL ALC: ', alc_2, \
            ' Computed in %5.2f min' % (timeSpent)

    return timeSpent, alc_1, alc_2