def run(self): # Extract features from model. preproc = Standardize() if self.model: self.model.fn = self.model.function("perform", **self.model_call_kwargs) newtrain = self.extract_features(self.trainset, preproc, can_fit=True) newtest = self.extract_features(self.testset, preproc, can_fit=False) newvalid = newtest if not self.validset else\ self.extract_features(self.validset, preproc, can_fit=False) # Find optimal SVM hyper-parameters. (best_svm, train_error, valid_error) = cross_validate_svm( self.svm, (newtrain.X, self.trainset_y), (newvalid.X, self.validset_y), self.C_list) logging.info('Best train/valid error for C=%f : %f \t %f' % (best_svm.C, train_error, valid_error)) # Optionally retrain on validation set, using optimal hyperparams. if self.validset and self.retrain_on_valid: retrain_svm(best_svm, (newtrain.X, self.trainset_y), (newvalid.X, self.validset_y)) test_error = compute_test_error(best_svm, (newtest.X, self.testset_y)) logging.info('Test error = %f' % test_error) if self.save_fname: fp = open(self.save_fname, 'w') pickle.dump(best_svm, fp) fp.close() return best_svm, (train_error, valid_error, test_error)
def get_dataset_icml(): base_path = '${PYLEARN2_DATA_PATH}/icml_2013_black_box' process= Standardize() trainset = BlackBoxDataset(which_set = 'train',start = 0, stop = 900) validset = BlackBoxDataset(which_set = 'train',start = 900, stop = 1000) extraset = BlackBoxDataset(which_set = 'extra',start = 0, stop = 1000) #trainset testset = BlackBoxDataset(which_set = 'public_test') # process.apply(extraset,can_fit=True) # process.apply(trainset) # process.apply(validset) # process.apply(testset) return trainset, validset, testset, extraset
def run(self, retrain_on_valid=True): # Extract features from model. preproc = Standardize() self.model.fn = self.model.function("perform", **self.model_call_kwargs) newtrain = self.extract_features(self.trainset, preproc, can_fit=True) newtest = self.extract_features(self.testset, preproc, can_fit=False) newvalid = newtest if not self.validset else\ self.extract_features(self.validset, preproc, can_fit=False) # Find the best number of training epochs best_nb_epoch, valid_error = cross_validate_logistic_regression( (newtrain.X, self.trainset_y), (newtest.X, self.testset_y), self.n_epoch_list) logging.info('Best validation error for n_epoch=%i : %f' % (best_nb_epoch, valid_error)) # Measure test error with the optimal number of epochs # (retraining on train and valid if applicable) if self.validset and retrain_on_valid: full_train_X = numpy.vstack((newtrain.X, newvalid.X)) full_train_Y = numpy.hstack((self.trainset_y, self.validset_y)) else: full_train_X = newtrain.X full_train_Y = self.trainset_y full_test_X = newtest.X full_test_Y = self.testset_y best_params, test_error = test_logistic_regression((full_train_X, full_train_Y), (full_test_X, full_test_Y), best_nb_epoch) logging.info('Test error = %f' % test_error) if self.save_fname: fp = open(self.save_fname, 'w') pickle.dump(best_params, fp) fp.close() return (best_params, valid_error, test_error)
import numpy from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization from pylearn2.datasets.tfd import TFD import cPickle as pkl theano.subtensor_merge_bug=False if __name__ == "__main__": weights_file = "../out/pae_mnist_enc_weights.npy" input = T.matrix("X", dtype=theano.config.floatX) tfd_ds = TFD("unlabeled") print "TFD shape: ", tfd_ds.X.shape gcn = GlobalContrastNormalization() standardizer = Standardize() lcn = LeCunLCN(img_shape=(48, 48), channels=[0]) gcn.apply(tfd_ds, can_fit=True) standardizer.apply(tfd_ds, can_fit=True) lcn.apply(tfd_ds) rnd = numpy.random.RandomState(1231) powerup = PowerupAutoencoder(input, nvis=48*48, nhid=500, momentum=0.66, rho=0.92, num_pieces=4, cost_type="MeanSquaredCost", L2_reg=8.2*1e-5,
parser.add_argument("model_path", nargs=1) options = parser.parse_args() out = options.out[0] <<<<<<< HEAD out = out.strip('.npy') model_path = options.model_path[0] try: model = serial.load(model_path) except Exception, e: print model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: " print e dataset = yaml_parse.load(model.dataset_yaml_src.replace('/data/afew/facetubes/p10','/data/lisatmp/bouthilx/facetubes/p10')) preprocessor = Standardize() preprocessor.apply(dataset.raw,can_fit=True) X = dataset.raw.get_design_matrix() X = X.reshape(X.shape[0],3,96,96).transpose(0,2,3,1) # X = X.reshape(X.shape[0],96,96,3) mean = X.mean(axis=0) std = X.std(axis=0) std_eps = 1e-4 print mean.shape print std.shape # print preprocessor._mean, mean # print preprocessor._std,std class DummyDataset: def __init__(self,X): self.X = X.transpose(0,3,1,2).reshape(X.shape[0],np.prod(X.shape[1:]))
from pylearn2.datasets.norb_small import FoveatedNORB dataset = FoveatedNORB(which_set='train') from pylearn2.datasets.preprocessing import Standardize standardize = Standardize(global_mean=True, global_std=True) standardize.apply(dataset, can_fit=True) from pylearn2.utils import serial serial.save("norb_prepro_global.pkl", standardize)
def get_layer_MLP(): extraset = BlackBoxDataset( which_set = 'extra') processor = Standardize(); processor.apply(extraset,can_fit=True) trainset = BlackBoxDataset( which_set = 'train', start = 0, stop = 900, preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = True, ) validset = BlackBoxDataset( which_set = 'train', start = 900, stop = 1000 , preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = False, ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .05, 'init_momentum': .00, 'cost' : Dropout(**dropCfg), 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0), 'update_callbacks': None } config0 = { 'layer_name': 'h0', 'num_units': 1875, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } config1 = { 'layer_name': 'h1', 'num_units': 700, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } l1 = Maxout(**config0) l2 = Maxout(**config1) l3 = Softmax(**sftmaxCfg) train_algo = SGD(**config) model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, save_path = "maxout_best_model.pkl", save_freq = 1)
from pylearn2.datasets.norb_small import FoveatedNORB dataset = FoveatedNORB(which_set='train') from pylearn2.datasets.preprocessing import Standardize standardize = Standardize() standardize.apply(dataset, can_fit=True) from pylearn2.utils import serial serial.save("norb_prepro.pkl", standardize)
from pylearn2.datasets.preprocessing import Standardize, LeCunLCN, GlobalContrastNormalization from pylearn2.datasets.tfd import TFD import pickle as pkl theano.subtensor_merge_bug = False if __name__ == "__main__": weights_file = "../out/pae_mnist_enc_weights.npy" input = T.matrix("X", dtype=theano.config.floatX) tfd_ds = TFD("unlabeled") print(("TFD shape: ", tfd_ds.X.shape)) gcn = GlobalContrastNormalization() standardizer = Standardize() lcn = LeCunLCN(img_shape=(48, 48), channels=[0]) gcn.apply(tfd_ds, can_fit=True) standardizer.apply(tfd_ds, can_fit=True) lcn.apply(tfd_ds) rnd = numpy.random.RandomState(1231) powerup = PowerupAutoencoder(input, nvis=48 * 48, nhid=500, momentum=0.66, rho=0.92, num_pieces=4, cost_type="MeanSquaredCost", L2_reg=8.2 * 1e-5,