Python Standardize Examples, pylearn2.datasets.preprocessing.Standardize Python Examples

Example #1

0

Show file

File: svm_on_features.py Project: zxsted/lisa_emotiw

    def run(self):
        # Extract features from model.
        preproc = Standardize()
        if self.model:
            self.model.fn = self.model.function("perform",
                                                **self.model_call_kwargs)
        newtrain = self.extract_features(self.trainset, preproc, can_fit=True)
        newtest = self.extract_features(self.testset, preproc, can_fit=False)
        newvalid = newtest if not self.validset else\
                   self.extract_features(self.validset, preproc, can_fit=False)

        # Find optimal SVM hyper-parameters.
        (best_svm, train_error, valid_error) = cross_validate_svm(
            self.svm, (newtrain.X, self.trainset_y),
            (newvalid.X, self.validset_y), self.C_list)
        logging.info('Best train/valid error for C=%f : %f \t %f' %
                     (best_svm.C, train_error, valid_error))

        # Optionally retrain on validation set, using optimal hyperparams.
        if self.validset and self.retrain_on_valid:
            retrain_svm(best_svm, (newtrain.X, self.trainset_y),
                        (newvalid.X, self.validset_y))

        test_error = compute_test_error(best_svm, (newtest.X, self.testset_y))
        logging.info('Test error = %f' % test_error)
        if self.save_fname:
            fp = open(self.save_fname, 'w')
            pickle.dump(best_svm, fp)
            fp.close()
        return best_svm, (train_error, valid_error, test_error)

Example #2

0

Show file

File: sae_script_best.py Project: seylom/kaggle-icml-blackbox-challenge

def get_dataset_icml():

    base_path = '${PYLEARN2_DATA_PATH}/icml_2013_black_box'
    
    
    process= Standardize()
       
    trainset = BlackBoxDataset(which_set = 'train',start = 0, stop = 900)
    validset = BlackBoxDataset(which_set = 'train',start = 900, stop = 1000)
    extraset = BlackBoxDataset(which_set = 'extra',start = 0, stop = 1000) #trainset
    testset =  BlackBoxDataset(which_set = 'public_test')
    
#    process.apply(extraset,can_fit=True)
#    process.apply(trainset)
#    process.apply(validset)
#    process.apply(testset)

    return trainset, validset, testset, extraset

Example #3

0

Show file

File: logreg_on_features.py Project: gdesjardins/pipelines

    def run(self, retrain_on_valid=True):
        # Extract features from model.
        preproc = Standardize()
        self.model.fn = self.model.function("perform", **self.model_call_kwargs)
        newtrain = self.extract_features(self.trainset, preproc, can_fit=True)
        newtest  = self.extract_features(self.testset, preproc, can_fit=False)
        newvalid = newtest if not self.validset else\
                   self.extract_features(self.validset, preproc, can_fit=False)
                   
            
        # Find the best number of training epochs
        best_nb_epoch, valid_error = cross_validate_logistic_regression(
                                         (newtrain.X, self.trainset_y),
                                         (newtest.X, self.testset_y),
                                         self.n_epoch_list)
        logging.info('Best validation error for n_epoch=%i : %f' % (best_nb_epoch, valid_error))

        # Measure test error with the optimal number of epochs
        # (retraining on train and valid if applicable)
        if self.validset and retrain_on_valid:
            full_train_X = numpy.vstack((newtrain.X, newvalid.X))
            full_train_Y = numpy.hstack((self.trainset_y, self.validset_y))
        else:
            full_train_X = newtrain.X
            full_train_Y = self.trainset_y
        full_test_X = newtest.X
        full_test_Y = self.testset_y
            
        best_params, test_error = test_logistic_regression((full_train_X, full_train_Y),
                                                           (full_test_X, full_test_Y),
                                                           best_nb_epoch)
            
        logging.info('Test error = %f' % test_error)
        if self.save_fname:
            fp = open(self.save_fname, 'w')
            pickle.dump(best_params, fp)
            fp.close()
        return (best_params, valid_error, test_error)

Example #4

0

Show file

File: test_pae_tfd.py Project: YunyanYao/autoencoders

import numpy

from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization
from pylearn2.datasets.tfd import TFD

import cPickle as  pkl
theano.subtensor_merge_bug=False

if __name__ == "__main__":
    weights_file = "../out/pae_mnist_enc_weights.npy"
    input = T.matrix("X", dtype=theano.config.floatX)
    tfd_ds = TFD("unlabeled")

    print "TFD shape: ", tfd_ds.X.shape
    gcn = GlobalContrastNormalization()
    standardizer = Standardize()
    lcn = LeCunLCN(img_shape=(48, 48), channels=[0])
    gcn.apply(tfd_ds, can_fit=True)
    standardizer.apply(tfd_ds, can_fit=True)
    lcn.apply(tfd_ds)

    rnd = numpy.random.RandomState(1231)

    powerup = PowerupAutoencoder(input,
                                 nvis=48*48,
                                 nhid=500,
                                 momentum=0.66,
                                 rho=0.92,
                                 num_pieces=4,
                                 cost_type="MeanSquaredCost",
                                 L2_reg=8.2*1e-5,

Example #5

0

Show file

File: make_data.py Project: zxsted/lisa_emotiw

    parser.add_argument("model_path", nargs=1)
    options = parser.parse_args()

    out = options.out[0]
<<<<<<< HEAD
    out = out.strip('.npy')
    model_path = options.model_path[0]

    try:
        model = serial.load(model_path)
    except Exception, e:
        print model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: "
        print e

    dataset = yaml_parse.load(model.dataset_yaml_src.replace('/data/afew/facetubes/p10','/data/lisatmp/bouthilx/facetubes/p10'))
    preprocessor = Standardize()
    preprocessor.apply(dataset.raw,can_fit=True)
    X = dataset.raw.get_design_matrix()
    X = X.reshape(X.shape[0],3,96,96).transpose(0,2,3,1)
#    X = X.reshape(X.shape[0],96,96,3)
    mean = X.mean(axis=0)
    std  = X.std(axis=0)
    std_eps = 1e-4
    print mean.shape
    print std.shape
#    print preprocessor._mean, mean
#    print preprocessor._std,std

    class DummyDataset:
        def __init__(self,X):
            self.X = X.transpose(0,3,1,2).reshape(X.shape[0],np.prod(X.shape[1:]))

Example #6

0

Show file

File: learn_norb_prepro_global.py Project: vd114/galatea

from pylearn2.datasets.norb_small import FoveatedNORB

dataset = FoveatedNORB(which_set='train')

from pylearn2.datasets.preprocessing import Standardize

standardize = Standardize(global_mean=True, global_std=True)

standardize.apply(dataset, can_fit=True)

from pylearn2.utils import serial
serial.save("norb_prepro_global.pkl", standardize)

Example #7

0

Show file

File: maxout_script.py Project: seylom/kaggle-icml-blackbox-challenge

def get_layer_MLP():
    
    extraset = BlackBoxDataset( which_set = 'extra')
    
    processor = Standardize();
    
    processor.apply(extraset,can_fit=True)
    
    trainset = BlackBoxDataset( which_set = 'train',
                                start = 0,
                                stop = 900,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = True,
                                )
    
    validset = BlackBoxDataset( which_set = 'train',
                                start = 900,
                                stop = 1000 ,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = False,
                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .05,
                'init_momentum': .00,
                'cost' : Dropout(**dropCfg), 
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0),
                'update_callbacks': None
              }
     
    config0 = {
                'layer_name': 'h0',
                'num_units': 1875,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    config1 = {
                'layer_name': 'h1',
                'num_units': 700,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    l1 = Maxout(**config0)
    l2 = Maxout(**config1)
    l3 = Softmax(**sftmaxCfg)

    train_algo = SGD(**config)
    model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, 
            save_path = "maxout_best_model.pkl",
            save_freq = 1)

Example #8

0

Show file

from pylearn2.datasets.norb_small import FoveatedNORB

dataset = FoveatedNORB(which_set='train')

from pylearn2.datasets.preprocessing import Standardize

standardize = Standardize()

standardize.apply(dataset, can_fit=True)

from pylearn2.utils import serial
serial.save("norb_prepro.pkl", standardize)

Example #9

0

Show file

File: test_pae_tfd.py Project: policecar/autoencoders

from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization
from pylearn2.datasets.tfd import TFD

import pickle as pkl

theano.subtensor_merge_bug = False

if __name__ == "__main__":
    weights_file = "../out/pae_mnist_enc_weights.npy"
    input = T.matrix("X", dtype=theano.config.floatX)
    tfd_ds = TFD("unlabeled")

    print(("TFD shape: ", tfd_ds.X.shape))
    gcn = GlobalContrastNormalization()
    standardizer = Standardize()
    lcn = LeCunLCN(img_shape=(48, 48), channels=[0])
    gcn.apply(tfd_ds, can_fit=True)
    standardizer.apply(tfd_ds, can_fit=True)
    lcn.apply(tfd_ds)

    rnd = numpy.random.RandomState(1231)

    powerup = PowerupAutoencoder(input,
                                 nvis=48 * 48,
                                 nhid=500,
                                 momentum=0.66,
                                 rho=0.92,
                                 num_pieces=4,
                                 cost_type="MeanSquaredCost",
                                 L2_reg=8.2 * 1e-5,