def __init__(self,which_set,numclass, base_path = '/data/vision/billf/manifold-learning/DL/Data/icml_2013_emotions', start = 0, stop = -1, options = [0], axes = ('b', 0, 1, 'c'), fit_test_preprocessor = False, ): files = {'train': 'train.csv', 'public_test' : 'test.csv'} try: file_path = files[which_set] except KeyError: raise ValueError("Unrecognized dataset name: " + which_set) X, y = self.loadFile(base_path + '/' + file_path, start,stop) # train_index if flip: X_list_flipLR, X_list_flipUD = self.flipData(X) X = X + X_list_flipLR y = y + y view_converter = DefaultViewConverter(shape=(48,48,1), axes=axes) super(ICML_emotion, self).__init__(X=X, y=self.label_id2arr(y,numclass), view_converter=view_converter) if options[0] == 1: fit_preprocessor = False from pylearn2.datasets.preprocessing import GlobalContrastNormalization preprocessor = GlobalContrastNormalization(sqrt_bias = 10,use_std = 1) preprocessor.apply(self, can_fit=fit_preprocessor)
def test_unit_norm(self): """ Test that using std_bias = 0.0 and use_norm = True results in vectors having unit norm """ tol = 1e-5 num_examples = 5 num_features = 10 rng = np.random.RandomState([1, 2, 3]) X = as_floatX(rng.randn(num_examples, num_features)) dataset = DenseDesignMatrix(X=X) # the setting of subtract_mean is not relevant to the test # the test only applies when std_bias = 0.0 and use_std = False preprocessor = GlobalContrastNormalization(subtract_mean=False, sqrt_bias=0.0, use_std=False) dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() norms = np.sqrt(np.square(result).sum(axis=1)) max_norm_error = np.abs(norms - 1.).max() tol = 3e-5 assert max_norm_error < tol
def preprocess(self, basepath, train_pre=0, flip=3, cutoff=[-1, -1]): """ myDataset = LoadData(basepath) datasets = myDataset.loadTrain(preprocessFLAG=train_pre,flipFLAG=flip,cutFLAG=cutoff[0]) self.ds_train = DataPylearn2(datasets[0],self.ishape,self.numclass) self.ds_valid = DataPylearn2(datasets[1],self.ishape) self.ds_test = DataPylearn2(myDataset.loadTest(train_pre,cutoff[1]),self.ishape) """ from pylearn2.datasets.preprocessing import GlobalContrastNormalization pre = GlobalContrastNormalization(sqrt_bias=10, use_std=1) self.ds_train = EmotionsDataset(which_set='train', base_path=basepath, start=0, stop=10000, preprocessor=pre, trainindex=1) self.ds_valid = EmotionsDataset(which_set='train', base_path=basepath, start=10000, stop=15000, preprocessor=pre) #self.ds_test = EmotionsDataset(which_set='public_test') myDataset = LoadData(basepath) self.ds_test = DataPylearn2(myDataset.loadTest(train_pre, cutoff[1]), self.ishape)
def test_zero_vector(self): """ Test that passing in the zero vector does not result in a divide by 0 """ dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1)))) # the settings of subtract_mean and use_norm are not relevant to # the test # std_bias = 0.0 is the only value for which there should be a risk # of failure occurring preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True) dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() assert isfinite(result)
def preprocess(self): pre = GlobalContrastNormalization(sqrt_bias=10, use_std=1) self = pre.apply(self)
def preprocess(self): pre = GlobalContrastNormalization(sqrt_bias = 10,use_std = 1) self = pre.apply(self)
def make_majority_vote(): model_paths = ['convnet_' + str(i + 1) + '.pkl' for i in range(10)] out_path = 'submission.csv' models = [] for model_path in model_paths: print('Loading ' + model_path + '...') try: with open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: try: with gzip.open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: usage() print( model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: " ) print(e) # load the test set with open('test_data_for_pylearn2.pkl', 'rb') as f: dataset = pkl.load(f) dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter( shape=[32, 32, 1], axes=['b', 0, 1, 'c'])) preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True) preprocessor.apply(dataset) predictions = [] print('Model description:') print('') print(models[1]) print('') for model in models: model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1) f = function([X], y) x_arg = dataset.get_topological_view() y = f(x_arg.astype(X.dtype)) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # add one to the results! y += 1 predictions.append(y) predictions = np.array(predictions, dtype='int32') y = mode(predictions.T, axis=1)[0] y = np.array(y, dtype='int32') import itertools y = list(itertools.chain(*y)) assert len(y) == dataset.X.shape[0] util.write_results(y, out_path) print('Wrote predictions to submission.csv.') return np.reshape(y, (1, -1))
from galatea.datasets.norb_tiny import NORB_Tiny train = NORB_Tiny('train') from pylearn2.datasets.preprocessing import Pipeline, GlobalContrastNormalization, ZCA pipeline = Pipeline() pipeline.items = [GlobalContrastNormalization(), ZCA()] train.apply_preprocessor(pipeline, can_fit=True) from pylearn2.utils.serial import save save('norb_tiny_preprocessed_train.pkl', train) save('norb_tiny_preprocessor.pkl', pipeline)
layers = [l1, l2, l3, l4, output] mdl = mlp.MLP(layers, input_space=in_space) trainer = sgd.SGD(learning_rate=.17, batch_size=128, learning_rule=learning_rule.Momentum(.5), # Remember, default dropout is .5 cost=Dropout(input_include_probs={'l1': .8}, input_scales={'l1': 1.}), termination_criterion=EpochCounter(max_epochs=475), monitoring_dataset={'valid': tst, 'train': trn}) preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()]) trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True) tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False) serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='kaggle_cifar10_maxout_zca.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.65, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=500, decay_factor=.01)
import theano.tensor as T import numpy from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization from pylearn2.datasets.tfd import TFD import cPickle as pkl theano.subtensor_merge_bug=False if __name__ == "__main__": weights_file = "../out/pae_mnist_enc_weights.npy" input = T.matrix("X", dtype=theano.config.floatX) tfd_ds = TFD("unlabeled") print "TFD shape: ", tfd_ds.X.shape gcn = GlobalContrastNormalization() standardizer = Standardize() lcn = LeCunLCN(img_shape=(48, 48), channels=[0]) gcn.apply(tfd_ds, can_fit=True) standardizer.apply(tfd_ds, can_fit=True) lcn.apply(tfd_ds) rnd = numpy.random.RandomState(1231) powerup = PowerupAutoencoder(input, nvis=48*48, nhid=500, momentum=0.66, rho=0.92, num_pieces=4, cost_type="MeanSquaredCost",
def make_majority_vote(): model_paths = ['convnet_' + str(i+1) + '.pkl' for i in range(10)] out_path = 'submission.csv' models = [] for model_path in model_paths: print('Loading ' + model_path + '...') try: with open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: try: with gzip.open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: usage() print(model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: ") print(e) # load the test set with open('test_data_for_pylearn2.pkl', 'rb') as f: dataset = pkl.load(f) dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter(shape=[32, 32, 1], axes=['b', 0, 1, 'c'])) preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True) preprocessor.apply(dataset) predictions = [] print('Model description:') print('') print(models[1]) print('') for model in models: model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1) f = function([X], y) x_arg = dataset.get_topological_view() y = f(x_arg.astype(X.dtype)) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # add one to the results! y += 1 predictions.append(y) predictions = np.array(predictions, dtype='int32') y = mode(predictions.T, axis=1)[0] y = np.array(y, dtype='int32') import itertools y = list(itertools.chain(*y)) assert len(y) == dataset.X.shape[0] util.write_results(y, out_path) print('Wrote predictions to submission.csv.') return np.reshape(y, (1, -1))
import numpy from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization from pylearn2.datasets.tfd import TFD import pickle as pkl theano.subtensor_merge_bug = False if __name__ == "__main__": weights_file = "../out/pae_mnist_enc_weights.npy" input = T.matrix("X", dtype=theano.config.floatX) tfd_ds = TFD("unlabeled") print(("TFD shape: ", tfd_ds.X.shape)) gcn = GlobalContrastNormalization() standardizer = Standardize() lcn = LeCunLCN(img_shape=(48, 48), channels=[0]) gcn.apply(tfd_ds, can_fit=True) standardizer.apply(tfd_ds, can_fit=True) lcn.apply(tfd_ds) rnd = numpy.random.RandomState(1231) powerup = PowerupAutoencoder(input, nvis=48 * 48, nhid=500, momentum=0.66, rho=0.92, num_pieces=4, cost_type="MeanSquaredCost",