예제 #1
0
    def __init__(self,which_set,numclass,
            base_path = '/data/vision/billf/manifold-learning/DL/Data/icml_2013_emotions',
            start = 0,
            stop = -1,
            options = [0],
            axes = ('b', 0, 1, 'c'),            
            fit_test_preprocessor = False,                                    
            ):
        files = {'train': 'train.csv', 'public_test' : 'test.csv'}
        try:
            file_path = files[which_set]
        except KeyError:
            raise ValueError("Unrecognized dataset name: " + which_set)
        
        X, y = self.loadFile(base_path + '/' + file_path, start,stop)
        # train_index
        if flip:
            X_list_flipLR, X_list_flipUD = self.flipData(X)
            X = X + X_list_flipLR
            y = y + y    

        view_converter = DefaultViewConverter(shape=(48,48,1), axes=axes)
        super(ICML_emotion, self).__init__(X=X, y=self.label_id2arr(y,numclass), view_converter=view_converter)
                
        if options[0] == 1:
            fit_preprocessor = False
            from pylearn2.datasets.preprocessing import GlobalContrastNormalization
            preprocessor = GlobalContrastNormalization(sqrt_bias = 10,use_std = 1)            
            preprocessor.apply(self, can_fit=fit_preprocessor)
예제 #2
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
예제 #3
0
    def preprocess(self, basepath, train_pre=0, flip=3, cutoff=[-1, -1]):
        """
        myDataset = LoadData(basepath)
        datasets = myDataset.loadTrain(preprocessFLAG=train_pre,flipFLAG=flip,cutFLAG=cutoff[0])
        self.ds_train = DataPylearn2(datasets[0],self.ishape,self.numclass)
        self.ds_valid = DataPylearn2(datasets[1],self.ishape)
        self.ds_test = DataPylearn2(myDataset.loadTest(train_pre,cutoff[1]),self.ishape)
        """

        from pylearn2.datasets.preprocessing import GlobalContrastNormalization
        pre = GlobalContrastNormalization(sqrt_bias=10, use_std=1)

        self.ds_train = EmotionsDataset(which_set='train',
                                        base_path=basepath,
                                        start=0,
                                        stop=10000,
                                        preprocessor=pre,
                                        trainindex=1)
        self.ds_valid = EmotionsDataset(which_set='train',
                                        base_path=basepath,
                                        start=10000,
                                        stop=15000,
                                        preprocessor=pre)
        #self.ds_test = EmotionsDataset(which_set='public_test')

        myDataset = LoadData(basepath)
        self.ds_test = DataPylearn2(myDataset.loadTest(train_pre, cutoff[1]),
                                    self.ishape)
예제 #4
0
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True,
                                                   sqrt_bias=0.0,
                                                   use_std=True)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        assert isfinite(result)
예제 #5
0
 def preprocess(self):
     pre = GlobalContrastNormalization(sqrt_bias=10, use_std=1)
     self = pre.apply(self)
예제 #6
0
 def preprocess(self):
     pre = GlobalContrastNormalization(sqrt_bias = 10,use_std = 1)
     self = pre.apply(self)
예제 #7
0
def make_majority_vote():

    model_paths = ['convnet_' + str(i + 1) + '.pkl' for i in range(10)]
    out_path = 'submission.csv'

    models = []

    for model_path in model_paths:
        print('Loading ' + model_path + '...')
        try:
            with open(model_path, 'rb') as f:
                models.append(pkl.load(f))
        except Exception as e:
            try:
                with gzip.open(model_path, 'rb') as f:
                    models.append(pkl.load(f))
            except Exception as e:
                usage()
                print(
                    model_path +
                    "doesn't seem to be a valid model path, I got this error when trying to load it: "
                )
                print(e)

    # load the test set
    with open('test_data_for_pylearn2.pkl', 'rb') as f:
        dataset = pkl.load(f)

    dataset = DenseDesignMatrix(X=dataset,
                                view_converter=DefaultViewConverter(
                                    shape=[32, 32, 1], axes=['b', 0, 1, 'c']))
    preprocessor = GlobalContrastNormalization(subtract_mean=True,
                                               sqrt_bias=0.0,
                                               use_std=True)
    preprocessor.apply(dataset)

    predictions = []
    print('Model description:')
    print('')
    print(models[1])
    print('')

    for model in models:

        model.set_batch_size(dataset.X.shape[0])

        X = model.get_input_space().make_batch_theano()
        Y = model.fprop(X)  # forward prop the test data

        y = T.argmax(Y, axis=1)

        f = function([X], y)

        x_arg = dataset.get_topological_view()
        y = f(x_arg.astype(X.dtype))

        assert y.ndim == 1
        assert y.shape[0] == dataset.X.shape[0]

        # add one to the results!
        y += 1

        predictions.append(y)

    predictions = np.array(predictions, dtype='int32')

    y = mode(predictions.T, axis=1)[0]
    y = np.array(y, dtype='int32')

    import itertools
    y = list(itertools.chain(*y))

    assert len(y) == dataset.X.shape[0]

    util.write_results(y, out_path)

    print('Wrote predictions to submission.csv.')
    return np.reshape(y, (1, -1))
예제 #8
0
from galatea.datasets.norb_tiny import NORB_Tiny

train = NORB_Tiny('train')

from pylearn2.datasets.preprocessing import Pipeline, GlobalContrastNormalization, ZCA

pipeline = Pipeline()

pipeline.items = [GlobalContrastNormalization(), ZCA()]

train.apply_preprocessor(pipeline, can_fit=True)

from pylearn2.utils.serial import save

save('norb_tiny_preprocessed_train.pkl', train)
save('norb_tiny_preprocessor.pkl', pipeline)
예제 #9
0
layers = [l1, l2, l3, l4, output]

mdl = mlp.MLP(layers,
              input_space=in_space)

trainer = sgd.SGD(learning_rate=.17,
                  batch_size=128,
                  learning_rule=learning_rule.Momentum(.5),
                  # Remember, default dropout is .5
                  cost=Dropout(input_include_probs={'l1': .8},
                               input_scales={'l1': 1.}),
                  termination_criterion=EpochCounter(max_epochs=475),
                  monitoring_dataset={'valid': tst,
                                      'train': trn})

preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()])
trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True)
tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False)
serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor)

watcher = best_params.MonitorBasedSaveBest(
    channel_name='valid_y_misclass',
    save_path='kaggle_cifar10_maxout_zca.pkl')

velocity = learning_rule.MomentumAdjustor(final_momentum=.65,
                                          start=1,
                                          saturate=250)

decay = sgd.LinearDecayOverEpoch(start=1,
                                 saturate=500,
                                 decay_factor=.01)
예제 #10
0
import theano.tensor as T
import numpy

from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization
from pylearn2.datasets.tfd import TFD

import cPickle as  pkl
theano.subtensor_merge_bug=False

if __name__ == "__main__":
    weights_file = "../out/pae_mnist_enc_weights.npy"
    input = T.matrix("X", dtype=theano.config.floatX)
    tfd_ds = TFD("unlabeled")

    print "TFD shape: ", tfd_ds.X.shape
    gcn = GlobalContrastNormalization()
    standardizer = Standardize()
    lcn = LeCunLCN(img_shape=(48, 48), channels=[0])
    gcn.apply(tfd_ds, can_fit=True)
    standardizer.apply(tfd_ds, can_fit=True)
    lcn.apply(tfd_ds)

    rnd = numpy.random.RandomState(1231)

    powerup = PowerupAutoencoder(input,
                                 nvis=48*48,
                                 nhid=500,
                                 momentum=0.66,
                                 rho=0.92,
                                 num_pieces=4,
                                 cost_type="MeanSquaredCost",
def make_majority_vote():

    model_paths = ['convnet_' + str(i+1) + '.pkl' for i in range(10)]
    out_path = 'submission.csv'

    models = []

    for model_path in model_paths:
        print('Loading ' + model_path + '...')
        try:
            with open(model_path, 'rb') as f:
                models.append(pkl.load(f))
        except Exception as e:
            try:
                with gzip.open(model_path, 'rb') as f:
                    models.append(pkl.load(f))
            except Exception as e:
                usage()
                print(model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: ")
                print(e)

    # load the test set
    with open('test_data_for_pylearn2.pkl', 'rb') as f:
        dataset = pkl.load(f)

    dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter(shape=[32, 32, 1], axes=['b', 0, 1, 'c']))
    preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True)
    preprocessor.apply(dataset)

    predictions = []
    print('Model description:')
    print('')
    print(models[1])
    print('')

    for model in models:

        model.set_batch_size(dataset.X.shape[0])

        X = model.get_input_space().make_batch_theano()
        Y = model.fprop(X) # forward prop the test data

        y = T.argmax(Y, axis=1)

        f = function([X], y)

        x_arg = dataset.get_topological_view()
        y = f(x_arg.astype(X.dtype))

        assert y.ndim == 1
        assert y.shape[0] == dataset.X.shape[0]

        # add one to the results!
        y += 1

        predictions.append(y)

    predictions = np.array(predictions, dtype='int32')

    y = mode(predictions.T, axis=1)[0]
    y = np.array(y, dtype='int32')

    import itertools
    y = list(itertools.chain(*y))

    assert len(y) == dataset.X.shape[0]

    util.write_results(y, out_path)

    print('Wrote predictions to submission.csv.')
    return np.reshape(y, (1, -1))
예제 #12
0
import numpy

from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization
from pylearn2.datasets.tfd import TFD

import pickle as pkl

theano.subtensor_merge_bug = False

if __name__ == "__main__":
    weights_file = "../out/pae_mnist_enc_weights.npy"
    input = T.matrix("X", dtype=theano.config.floatX)
    tfd_ds = TFD("unlabeled")

    print(("TFD shape: ", tfd_ds.X.shape))
    gcn = GlobalContrastNormalization()
    standardizer = Standardize()
    lcn = LeCunLCN(img_shape=(48, 48), channels=[0])
    gcn.apply(tfd_ds, can_fit=True)
    standardizer.apply(tfd_ds, can_fit=True)
    lcn.apply(tfd_ds)

    rnd = numpy.random.RandomState(1231)

    powerup = PowerupAutoencoder(input,
                                 nvis=48 * 48,
                                 nhid=500,
                                 momentum=0.66,
                                 rho=0.92,
                                 num_pieces=4,
                                 cost_type="MeanSquaredCost",