def runAutoencoder(): ds = StockPrice() #print ds.train[0][0] data = np.random.randn(10, 5).astype(config.floatX) #print data print BinomialCorruptor(.2) ae = DenoisingAutoencoder(BinomialCorruptor(corruption_level=.2), 1000, 100, act_enc='sigmoid', act_dec='linear', tied_weights=False) trainer = sgd.SGD(learning_rate=.005, batch_size=5, termination_criterion=EpochCounter(3), cost=cost_ae.MeanSquaredReconstructionError(), monitoring_batches=5, monitoring_dataset=ds) trainer.setup(ae, ds) while True: trainer.train(dataset=ds) ae.monitor() ae.monitor.report_epoch() if not trainer.continue_learning(ae): break #print ds.train[0][0] #print ae.reconstruct(ds.train[0][0]) w = ae.weights.get_value() #ae.hidbias.set_value(np.random.randn(1000).astype(config.floatX)) hb = ae.hidbias.get_value() #ae.visbias.set_value(np.random.randn(100).astype(config.floatX)) vb = ae.visbias.get_value() d = tensor.matrix() result = np.dot(1. / (1 + np.exp(-hb - np.dot(ds.train[0][0], w))), w.T) + vb
def __init__(self, corruptor=BinomialCorruptor(0.5), num_corruptions=2, nvis=0, nhid=10, act_enc='sigmoid', act_dec='sigmoid', tied_weights=False, irange=1e-3, rng=9001, dataset_adaptor=VectorDataset(), trainer=SGDTrainer()): self.configs = { 'corruptor': corruptor, 'num_corruptions': num_corruptions, 'nvis': nvis, 'nhid': nhid, 'act_enc': act_enc, 'act_dec': act_dec, 'tied_weights': tied_weights, 'irange': irange, 'rng': rng, } self.dataset_adaptor = dataset_adaptor self.trainer = trainer
def create_denoising_autoencoder(structure, corruption=0.1, act='tanh'): n_input, n_output = structure curruptor = LoggingCorruptor( BinomialCorruptor(corruption_level=corruption), name='{}'.format(structure)) irange = numpy.sqrt(6. / (n_input + n_output)) if act == theano.tensor.nnet.sigmoid or act == 'sigmoid': irange *= 4 # log.debug('initial weight range: {}'.format(irange)); config = { 'corruptor': curruptor, 'nhid': n_output, 'nvis': n_input, 'tied_weights': True, 'act_enc': act, 'act_dec': act, 'irange': irange, #0.001, } log.debug('creating denoising autoencoder {}'.format(config)) da = AdaptableDenoisingAutoencoder(**config) return da
def new_model(self, model_params, dataset): corruptor = BinomialCorruptor( corruption_level=model_params['noise_level']) model = DenoisingAutoencoder(nvis=dataset.X.shape[1], nhid=model_params['hidden_outputs'], irange=model_params['irange'], corruptor=corruptor, act_enc='tanh', act_dec=None) return model
def get_denoising_autoencoder(structure): n_input, n_output = structure curruptor = BinomialCorruptor(corruption_level=0.5) config = { 'corruptor': curruptor, 'nhid': n_output, 'nvis': n_input, 'tied_weights': True, 'act_enc': 'sigmoid', 'act_dec': 'sigmoid', 'irange': 0.001, } return DenoisingAutoencoder(**config)
def get_denoising_autoencoder(structure,corr_val): n_input, n_output = structure corruptor = BinomialCorruptor(corruption_level=corr_val) #corruptor = GaussianCorruptor(stdev=0.25) config = { 'corruptor': corruptor, 'nhid': n_output, 'nvis': n_input, 'tied_weights': True, 'act_enc': 'sigmoid', 'act_dec': 'sigmoid', 'irange': 4*np.sqrt(6. / (n_input + n_output)), } return DenoisingAutoencoder(**config)
def create_layer_one(self): which_set = "train" one_hot = True start = 0 # Creating 5 random patch layers based on 8,000 samples (Saturation point where the objective no longer improves. stop = 800 # GridPatchCIFAR10 Randomly selects 5 16x16 patches from each image, and we do this 5 times. This helps increase training time and captures more information. Similar to how the neurons in the eye are attached to a specific region in the image. dataset = GridPatchCIFAR10(which_set=which_set, one_hot=one_hot, start=start, stop=stop) # Denoising autoencoder model hyper-parameters nvis = 768 nhid = 512 irange = 0.05 corruption_lvl = 0.2 corruptor = BinomialCorruptor(corruption_level=corruption_lvl) activation_encoder = "tanh" # Linear activation activation_decoder = None # Creating the denoising autoencoder model = DenoisingAutoencoder(nvis=nvis, nhid=nhid, irange=irange, corruptor=corruptor, act_enc=activation_encoder, act_dec=activation_decoder) # Parameters for SGD learning algorithm instantiated below learning_rate = 0.001 batch_size = 100 monitoring_batches = 5 monitoring_dataset = dataset cost = MeanSquaredReconstructionError() max_epochs = 10 termination_criterion = EpochCounter(max_epochs=max_epochs) # SGD Learning algorithm algorithm = SGD(learning_rate=learning_rate, batch_size=batch_size, monitoring_batches=monitoring_batches, monitoring_dataset=dataset, cost=cost, termination_criterion=termination_criterion) processes = [] for i in range(0,5): print "Training DAE Sub-Layer: ", i save_path = self.save_path+str(i)+".pkl" save_freq = 1 train = Train(dataset=dataset,model=model,algorithm=algorithm, save_path=save_path, save_freq=save_freq) p = Process(target=train.main_loop, args=()) p.start() processes.append(p) for process in processes: process.join()
def test_high_order_autoencoder_init(): """ Just test that model initialize and return the penalty without error. """ corruptor = BinomialCorruptor(corruption_level=0.5) model = HigherOrderContractiveAutoencoder(corruptor=corruptor, num_corruptions=5, nvis=20, nhid=30, act_enc='sigmoid', act_dec='sigmoid') X = tensor.matrix() data = np.random.randn(50, 20).astype(config.floatX) ff = theano.function([X], model.higher_order_penalty(X)) assert type(ff(data)) == np.ndarray
def combine_sublayers(self): print "Combining sub-layers" # Create a large 2560 unit DAE. The model is considered trained by the concatenation # of its 512 unit sub-layers. The 2560 hidden units weights will be initialized # by these. nvis = 3072 nhid = 2560 irange = 0.05 corruption = 0.2 corruptor = BinomialCorruptor(corruption_level=corruption) activation_encoder = "tanh" activation_decoder = None # By default, the DAE initializes the weights at random # Since we're using our own already pre-trained weights # We will instead change those values. large_dae = DenoisingAutoencoder(nvis=nvis, nhid=nhid, corruptor=corruptor, irange=irange, act_enc=activation_encoder, act_dec=activation_decoder) # Do not need to change hidden or visible bias. # They are static vars in theory. large_dae._params = [ large_dae.visbias, large_dae.hidbias, # Here is where we change the weights. large_dae.weights ] numpy_array = np.zeros((3072, 2560)) # Load sub-layer models and get their weights. for i in range (0,5): fo = open(self.save_path+str(i)+".pkl", 'rb') # 768 Vis, 512 hidden unit DAE. small_dae = cPickle.load(fo) fo.close() # TODO: Create numpy array of proper values to set the large_dae. # Get the weights from the small_dae's # so that they can be appended together. weights = small_dae.weights.get_value() large_dae_weights.append(weights) print "Successfully combined sub-layers"
def test_sdae(): """ Tests that StackedDenoisingAutoencoder works correctly """ data = np.random.randn(10, 5).astype(config.floatX) * 100 ae = Autoencoder(5, 7, act_enc='tanh', act_dec='cos', tied_weights=False) corruptor = BinomialCorruptor(corruption_level=0.5) model = StackedDenoisingAutoencoder([ae], corruptor) model._ensure_extensions() w = ae.weights.get_value() w_prime = ae.w_prime.get_value() ae.hidbias.set_value(np.random.randn(7).astype(config.floatX)) hb = ae.hidbias.get_value() ae.visbias.set_value(np.random.randn(5).astype(config.floatX)) vb = ae.visbias.get_value() d = tensor.matrix() result = np.cos(np.dot(np.tanh(hb + np.dot(data, w)), w_prime) + vb) ff = theano.function([d], model.reconstruct(d)) assert not _allclose(ff(data), result)
def construct_ae(structure): # some settings irange = 0.1 layers = [] for vsize, hsize in zip(structure[:-1], structure[1:]): # DenoisingAutoencoder / ContractiveAutoencoder / HigherOrderContractiveAutoencoder layers.append( autoencoder.DenoisingAutoencoder( nvis=vsize, nhid=hsize, tied_weights=True, act_enc='sigmoid', act_dec='sigmoid', irange=irange, # for DenoisingAutoencoder / HigherOrderContractiveAutoencoder: corruptor=BinomialCorruptor(0.5), # for HigherOrderContractiveAutoencoder: # num_corruptions=6 )) return StackedBlocks(layers)
def main(): # Only the trainset is processed by this function. print 'getting preprocessed data to train model' pp_trainset, testset = get_processed_dataset() # remember to change here when changing datasets print 'loading unprocessed data for input displays' trainset = cifar10.CIFAR10(which_set="train") dmat = trainset.get_design_matrix() nvis = dmat.shape[1] model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.5), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=1000, monitoring_batches=10, monitoring_dataset=pp_trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='testrun.pkl', save_freq=1, extensions=extensions, dataset=pp_trainset) trainer.main_loop()
def main(): # the data isn't going to be fully processed, so we may have to # do some stuff to the testset still to make it work. trainset, testset = get_processed_dataset() # Creating the patch-pairs: design_matrix = trainset.get_design_matrix() processed_patch_size = design_matrix.shape[1] num_images = train_size examples_per_image = patches_per_image * (patches_per_image - 1) num_examples = examples_per_image * num_images stamps = trainset.stamps max_stamp = input_width - patch_width d_size = (2 * max_stamp + 1)**input_dim patch_pairs = np.zeros((num_examples, 2 * processed_patch_size)) distances = np.zeros((num_examples, input_dim)) distances_onehot = np.zeros((num_examples, d_size)) examples = np.zeros((num_examples, 2 * processed_patch_size + d_size)) nvis = 2 * processed_patch_size + d_size def flatten_encoding(encoding, max_stamp): dims = len(encoding) flat_encoding = 0 for i in xrange(dims - 1): flat_encoding += encoding[i] flat_encoding *= max_stamp flat_encoding += encoding[-1] # Can be done without (or with less) for loops? print 'begin for loop' for i in xrange(num_images): if (i % 1000 == 0): print i, '-th outer loop...' for j in xrange(patches_per_image): patch1_num = i * patches_per_image + j patch1_pos = stamps[patch1_num, :] for k in xrange(patches_per_image): example_num = i * examples_per_image + j * (patches_per_image - 1) + k if (k > j): example_num -= 1 if (k != j): patch2_num = i * patches_per_image + k patch2_pos = stamps[patch2_num, :] distance = patch1_pos - patch2_pos distances[example_num] = distance distance_encoding = distance + max_stamp distance_encoding = flatten_encoding( distance_encoding, max_stamp) distances_onehot[example_num, distance_encoding] = 1 p1 = design_matrix[patch1_num] p2 = design_matrix[patch2_num] patch_pairs[example_num] = np.hstack((p1, p2)) examples[example_num] = np.hstack( (patch_pairs[example_num], distances_onehot[example_num])) print 'end for loop' trainset.set_design_matrix(examples) model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.5), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=100, monitoring_batches=10, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='run.pkl', save_freq=1, extensions=extensions, dataset=trainset) trainer.main_loop()
patch_width = 4 # assume 2D for next line patch_shape = (patch_width, patch_width) patches_per_image = 3 num_components = num_channels * input_width**input_dim #num_components = # keep_var_fraction currently doesn't work properly because of a bug in pca.py keep_var_fraction = .9 train_size = 4900 nhid = 40 # Not sure why this would need to go here... curruptor = BinomialCorruptor(corruption_level=0.5) # Loads the preprocessed dataset, or does the preprocessing and saves the dataset. # Currently using CIFAR10 def get_processed_dataset(): train_path = 'pp_cifar10_train.pkl' test_path = 'pp_cifar10_test.pkl' if os.path.exists(train_path) and \ os.path.exists(test_path): print 'loading preprocessed data' trainset = serial.load(train_path) testset = serial.load(test_path)
def train_sda(params): input_trainset, trainset_yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_sda_dataset_template.yaml'), params=params, ) log.info('... building the model') # build layers layer_dims = [params.input_length] layer_dims.extend(params.hidden_layers_sizes) layers = [] for i in xrange(1, len(layer_dims)): layer_params = { 'name': 'da' + str(i), 'n_inputs': layer_dims[i - 1], 'n_outputs': layer_dims[i], 'corruption_level': params.pretrain.corruption_levels[i - 1], 'input_range': numpy.sqrt(6. / (layer_dims[i - 1] + layer_dims[i])), 'random_seed': params.random_seed, } layers.append( load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_sda_layer_template.yaml'), params=layer_params, )[0]) # unsupervised pre-training log.info('... pre-training the model') start_time = time.clock() for i in xrange(len(layers)): # reset corruption to make sure input is not corrupted for layer in layers: layer.set_corruption_level(0) if i == 0: trainset = input_trainset elif i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:i])) # set corruption for layer to train layers[i].set_corruption_level(params.pretrain.corruption_levels[i]) # FIXME: this is not so nice but we have to do it this way as YAML is not flexible enough trainer = get_layer_trainer_sgd_autoencoder( layers[i], trainset, learning_rate=params.pretrain.learning_rate, max_epochs=params.pretrain.epochs, batch_size=params.pretrain.batch_size, name='pre-train' + str(i)) log.info('unsupervised training layer %d, %s ' % (i, layers[i].__class__)) trainer.main_loop() # theano.printing.pydotprint_variables( # layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0], # outfile='pylearn2-sgd_update.png', # var_with_name_simple=True); end_time = time.clock() log.info('pre-training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) if params.untie_weights: # now untie the decoder weights log.info('untying decoder weights') for layer in layers: layer.untie_weights() # construct multi-layer training functions # unsupervised training log.info('... training the model') sdae = None for depth in xrange(1, len(layers) + 1): first_layer_i = len(layers) - depth log.debug('training layers {}..{}'.format(first_layer_i, len(layers) - 1)) group = layers[first_layer_i:len(layers)] # log.debug(group); # reset corruption for layer in layers: layer.set_corruption_level(0) if first_layer_i == 0: trainset = input_trainset elif first_layer_i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:first_layer_i])) # set corruption for input layer of stack to train # layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]); corruptor = LoggingCorruptor( BinomialCorruptor(corruption_level=params.pretrain_finetune. corruption_levels[first_layer_i]), name='depth {}'.format(depth)) sdae = StackedDenoisingAutoencoder(group, corruptor) trainer = get_layer_trainer_sgd_autoencoder( sdae, trainset, learning_rate=params.pretrain_finetune.learning_rate, max_epochs=params.pretrain_finetune.epochs, batch_size=params.pretrain_finetune.batch_size, name='multi-train' + str(depth)) log.info('unsupervised multi-layer training %d' % (i)) trainer.main_loop() end_time = time.clock() log.info('full training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # save the model model_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl') with log_timing(log, 'saving SDA model to {}'.format(model_file)): serial.save(model_file, sdae) if params.untie_weights: # save individual layers for later (with untied weights) for i, layer in enumerate(sdae.autoencoders): layer_file = os.path.join(params.experiment_root, 'sda', 'sda_layer{}_untied.pkl'.format(i)) with log_timing( log, 'saving SDA layer {} model to {}'.format(i, layer_file)): serial.save(layer_file, layer) # save individual layers for later (with tied weights) for i, layer in enumerate(sdae.autoencoders): if params.untie_weights: layer.tie_weights() layer_file = os.path.join(params.experiment_root, 'sda', 'sda_layer{}_tied.pkl'.format(i)) with log_timing( log, 'saving SDA layer {} model to {}'.format(i, layer_file)): serial.save(layer_file, layer) log.info('done') return sdae
def main(): # Only the trainset is processed by this function. print 'getting preprocessed data for training model' pp_trainset, testset = get_processed_dataset() # remember to change here when changing datasets print 'loading unprocessed data for input displays' trainset = cifar10.CIFAR10(which_set="train") dmat = pp_trainset.get_design_matrix() nvis = dmat.shape[1] model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.3), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=learning_rate, cost=MeanSquaredReconstructionError(), batch_size=100, monitoring_batches=10, monitoring_dataset=pp_trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='run.pkl', save_freq=1, extensions=extensions, dataset=pp_trainset) trainer.main_loop() #################### # Plot and Save: # choose random patch-pairs to plot stamps = pp_trainset.stamps num_examples = stamps.shape[0] to_plot = np.random.randint(0, num_examples, num2plot) # use to_plot indices to extract data stamps_data = stamps[to_plot] image_numbers = stamps[to_plot, 0].astype(int) X = trainset.X images_data = trainset.get_topological_view(X[image_numbers]) p1x = stamps_data[:, 1] p1y = stamps_data[:, 2] p2x = stamps_data[:, 3] p2y = stamps_data[:, 4] # For input ppd's, once we've identified the patches, we just outline them and draw an arrow for d # This might mess with original trainset (I dunno), in which case, we should make a copy add_outlines(images_data, p1x, p1y, patch_width) add_outlines(images_data, p2x, p2y, patch_width) ################################################## # translating outputs back into things we can plot dataset = pp_trainset Xout = dataset.X.astype('float32') max_stamp = input_width - patch_width d_size = (2 * max_stamp + 1)**input_dim # displacement d_enc = Xout[:, -d_size:] d_out_flat = np.argmax(d_enc, axis=1) d_shape = [2 * max_stamp + 1, 2 * max_stamp + 1] # assumed 2D d_out = flat_to_2D(d_out_flat, d_shape) d_out[to_plot, ] # patches vc = dataset.view_converter p_enc = Xout[:, :len(Xout.T) - d_size] p_size = p_enc.shape[1] / 2 p1_enc = p_enc[:, :p_size] p2_enc = p_enc[:, p_size:] p1_enc = vc.design_mat_to_topo_view(p1_enc) p2_enc = vc.design_mat_to_topo_view(p2_enc) pp = dataset.preprocessor gcn = pp.items[1] means = gcn.means normalizers = gcn.normalizers toshape = (num_examples, ) for i in range(input_dim): toshape += (1, ) if num_channels != 1: toshape += (1, ) # When the number of patches and patch-pairs differs, this breaks. # I need to match up normalizers/means with their corresponding patches # undoing the PCA might be breaking too, but without errors... normalizers1 = expand_p1(normalizers) normalizers2 = expand_p2(normalizers) means1 = expand_p1(means) means2 = expand_p2(means) p1_enc *= normalizers1.reshape(toshape) p1_enc += means1.reshape(toshape) p2_enc *= normalizers2.reshape(toshape) p2_enc += means2.reshape(toshape) # Now, we pull off the same examples from the data to compare to dAE inputs in plots outputs = copy.deepcopy(images_data) insertpatches(outputs, p1_enc[to_plot], p1x, p1y, patch_width) insertpatches(outputs, p2_enc[to_plot], p2x, p2y, patch_width) plt.figure() for i in range(num2plot): # Inputs plt.subplot(num2plot, 2, 2 * i + 1) plt.imshow(images_data[i], cmap=cm.Greys_r) print stamps_data[i] a = (stamps_data[i, 2] + patch_width / 2, stamps_data[i, 1] + patch_width / 2, stamps_data[i, 6], stamps_data[i, 5]) plt.arrow(a[0], a[1], a[2], a[3], head_width=1.0, head_length=0.6) # Outputs plt.subplot(num2plot, 2, 2 * i + 2) plt.imshow(outputs[i], cmap=cm.Greys_r) plt.arrow(a[0], a[1], d_out[to_plot[i], 1], d_out[to_plot[i], 0], head_width=1.0, head_length=0.6) plt.show() savestr = 'cifar_ppd.png' plt.savefig(savestr)
def train_SdA(config, dataset): ## load config hidden_layers_sizes = config.get('hidden_layers_sizes', [10, 10]) corruption_levels = config.get('corruption_levels', [0.1, 0.2]) stage2_corruption_levels = config.get('stage2_corruption_levels', [0.1, 0.1]) pretrain_epochs = config.get('pretrain_epochs', 10) pretrain_lr = config.get('pretrain_learning_rate', 0.001) finetune_epochs = config.get('finetune_epochs', 10) finetune_lr = config.get('finetune_learning_rate', 0.01) batch_size = config.get('batch_size', 10) monitoring_batches = config.get('monitoring_batches', 5) output_path = config.get('output_path', './') input_trainset = dataset design_matrix = input_trainset.get_design_matrix() # print design_matrix.shape; n_input = design_matrix.shape[1] log.info('done') log.debug('input dimensions : {0}'.format(n_input)) log.debug('training examples: {0}'.format(design_matrix.shape[0])) # numpy random generator # numpy_rng = numpy.random.RandomState(89677) log.info('... building the model') # build layers layer_dims = [n_input] layer_dims.extend(hidden_layers_sizes) layers = [] for i in xrange(1, len(layer_dims)): structure = [layer_dims[i - 1], layer_dims[i]] layers.append( create_denoising_autoencoder(structure, corruption=corruption_levels[i - 1])) # unsupervised pre-training log.info('... pre-training the model') start_time = time.clock() for i in xrange(len(layers)): # reset corruption to make sure input is not corrupted for layer in layers: layer.set_corruption_level(0) if i == 0: trainset = input_trainset elif i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:i])) # set corruption for layer to train layers[i].set_corruption_level(corruption_levels[i]) trainer = get_layer_trainer_sgd_autoencoder( layers[i], trainset, learning_rate=pretrain_lr, max_epochs=pretrain_epochs, batch_size=batch_size, monitoring_batches=monitoring_batches, name='pre-train' + str(i)) log.info('unsupervised training layer %d, %s ' % (i, layers[i].__class__)) trainer.main_loop() # theano.printing.pydotprint_variables( # layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0], # outfile='pylearn2-sgd_update.png', # var_with_name_simple=True); end_time = time.clock() log.info('pre-training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # now untie the decoder weights log.info('untying decoder weights') for layer in layers: layer.untie_weights() # construct multi-layer training fuctions # unsupervised training log.info('... training the model') sdae = None for depth in xrange(1, len(layers) + 1): first_layer_i = len(layers) - depth log.debug('training layers {}..{}'.format(first_layer_i, len(layers) - 1)) group = layers[first_layer_i:len(layers)] # log.debug(group); # reset corruption for layer in layers: layer.set_corruption_level(0) if first_layer_i == 0: trainset = input_trainset elif first_layer_i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:first_layer_i])) # set corruption for input layer of stack to train # layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]); corruptor = LoggingCorruptor(BinomialCorruptor( corruption_level=stage2_corruption_levels[first_layer_i]), name='depth {}'.format(depth)) sdae = StackedDenoisingAutoencoder(group, corruptor) trainer = get_layer_trainer_sgd_autoencoder( sdae, trainset, learning_rate=finetune_lr, max_epochs=finetune_epochs, batch_size=batch_size, monitoring_batches=monitoring_batches, name='multi-train' + str(depth)) log.info('unsupervised multi-layer training %d' % (i)) trainer.main_loop() end_time = time.clock() log.info('full training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # save the model model_file = os.path.join(output_path, 'sdae-model.pkl') with log_timing(log, 'saving SDA model to {}'.format(model_file)): serial.save(model_file, sdae) # TODO: pylearn2.train_extensions.best_params.KeepBestParams(model, cost, monitoring_dataset, batch_size) # pylearn2.train_extensions.best_params.MonitorBasedSaveBest log.info('done') return sdae
def main_train(work_dir="../results/avicenna/", corruption_level=0.3, nvis=75, nhid=600, tied_weights=True, act_enc="sigmoid", act_dec=None, max_epochs=2, learning_rate=0.001, batch_size=20, monitoring_batches=5, save_freq=1, n_components_trans_pca=7): conf = { 'corruption_level': corruption_level, 'nvis': nvis, 'nhid': nhid, 'tied_weights': tied_weights, 'act_enc': act_enc, 'act_dec': act_dec, 'max_epochs': max_epochs, 'learning_rate': learning_rate, 'batch_size': batch_size, 'monitoring_batches': monitoring_batches, 'save_freq': save_freq, 'n_components_trans_pca': n_components_trans_pca } start = time.clock() ############### TRAIN THE DAE train_file = work_dir + "train_pca" + str(conf['nvis']) + ".npy" save_path = work_dir + "train_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + "_model.pkl" trainset = NpyDataset(file=train_file) trainset.yaml_src = 'script' corruptor = BinomialCorruptor(corruption_level=conf['corruption_level']) dae = DenoisingAutoencoder(nvis=conf['nvis'], nhid=conf['nhid'], tied_weights=conf['tied_weights'], corruptor=corruptor, act_enc=conf['act_enc'], act_dec=conf['act_dec']) cost = MeanSquaredReconstructionError() termination_criterion = EpochCounter(max_epochs=conf['max_epochs']) algorithm = UnsupervisedExhaustiveSGD( learning_rate=conf['learning_rate'], batch_size=conf['batch_size'], monitoring_batches=conf['monitoring_batches'], monitoring_dataset=trainset, cost=cost, termination_criterion=termination_criterion) train_obj = Train(dataset=trainset, model=dae, algorithm=algorithm, save_freq=conf['save_freq'], save_path=save_path) train_obj.main_loop() ############### APPLY THE MODEL ON THE TRAIN DATASET print("Applying the model on the train dataset...") model = load(save_path) save_train_path = work_dir + "train_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=trainset, path=save_train_path) dump_obj.main_loop() ############### APPLY THE MODEL ON THE VALID DATASET print("Applying the model on the valid dataset...") valid_file = work_dir + "valid_pca" + str(conf['nvis']) + ".npy" validset = NpyDataset(file=valid_file) validset.yaml_src = 'script' save_valid_path = work_dir + "valid_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=validset, path=save_valid_path) dump_obj.main_loop() ############### APPLY THE MODEL ON THE TEST DATASET print("Applying the model on the test dataset...") test_file = work_dir + "test_pca" + str(conf['nvis']) + ".npy" testset = NpyDataset(file=test_file) testset.yaml_src = 'script' save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=testset, path=save_test_path) dump_obj.main_loop() ############### COMPUTE THE ALC SCORE ON VALIDATION SET valid_data = ift6266h12.load_npy(save_valid_path) label_data = ift6266h12.load_npy( '/data/lisa/data/UTLC/numpy_data/avicenna_valid_y.npy') alc_1 = score(valid_data, label_data) ############### APPLY THE TRANSDUCTIVE PCA test_data = ift6266h12.load_npy(save_test_path) trans_pca = PCA(n_components=conf['n_components_trans_pca']) final_valid = trans_pca.fit_transform(valid_data) final_test = trans_pca.fit_transform(test_data) save_valid_path = work_dir + "valid_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + "_tpca" + str( conf['n_components_trans_pca']) + ".npy" save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + "_tpca" + str(conf['n_components_trans_pca']) + ".npy" np.save(save_valid_path, final_valid) np.save(save_test_path, final_test) ############### COMPUTE THE NEW ALC SCORE ON VALIDATION SET alc_2 = score(final_valid, label_data) ############### OUTPUT AND RETURN THE RESULTS timeSpent = ((time.clock() - start) / 60.) print 'FINAL RESULTS (PCA-' + str(conf['nvis']) + ' DAE-' + str(conf['nhid']) + ' TransPCA-' + str(conf['n_components_trans_pca']) + ') ALC after DAE: ', alc_1, ' FINAL ALC: ', alc_2, \ ' Computed in %5.2f min' % (timeSpent) return timeSpent, alc_1, alc_2