def get_layer_trainer_sgd_autoencoder(layer, trainset, batch_size=10, learning_rate=0.1, max_epochs=100, name=''): # configs on sgd train_algo = SGD( learning_rate=learning_rate, # learning_rule = AdaDelta(), learning_rule=Momentum(init_momentum=0.5), cost=MeanSquaredReconstructionError(), batch_size=batch_size, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=max_epochs), update_callbacks=None) log_callback = LoggingCallback(name) return Train(model=layer, algorithm=train_algo, extensions=[ log_callback, OneOverEpoch(start=1, half_life=5), MomentumAdjustor(final_momentum=0.7, start=10, saturate=100) ], dataset=trainset)
def get_ae_pretrainer(layer, data, batch_size, epochs=30): init_lr = 0.05 train_algo = SGD( batch_size=batch_size, learning_rate=init_lr, learning_rule=Momentum(init_momentum=0.5), monitoring_batches=batch_size, monitoring_dataset=data, # for ContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')]]), # for HigherOrderContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')], # [0.5, cost.MethodCost(method='higher_order_penalty')]]), # for DenoisingAutoencoder: cost=MeanSquaredReconstructionError(), termination_criterion=EpochCounter(epochs)) return Train(model=layer, algorithm=train_algo, dataset=data, extensions=[ MomentumAdjustor(final_momentum=0.9, start=0, saturate=25), LinearDecayOverEpoch(start=1, saturate=25, decay_factor=.02) ])
def __init__(self, runner, model_params, resume=False, resume_data=None, s3_data=None, **kwargs): dataset = create_dense_design_matrix(x=runner.dp.train_set_x) if resume: model, model_params = self.resume_model(model_params, resume_data) else: model = self.new_model(model_params, dataset=dataset) termination_criterion = MaxEpochNumber(model_params['maxnum_iter']) algorithm = SGD(learning_rate=model_params['learning_rate']['init'], monitoring_dataset=dataset, cost=MeanSquaredReconstructionError(), termination_criterion=termination_criterion, batch_size=model_params['batch_size']) ext = AutoEncoderStatReporter(runner, resume=resume, resume_data=resume_data, save_freq=model_params['save_freq']) self.train_obj = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[ext])
def train_model(): global ninput, noutput simdata = SimulationData( sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/") simdata.load_data() simdata.preprocessor() dataset = simdata.get_matrix() structure = get_structure() layers = [] for pair in structure: layers.append(get_autoencoder(pair)) model = DeepComposedAutoencoder(layers) training_alg = SGD(learning_rate=1e-3, cost=MeanSquaredReconstructionError(), batch_size=1296, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=50)) extensions = [MonitorBasedLRAdjuster()] experiment = Train(dataset=dataset, model=model, algorithm=training_alg, save_path='training2.pkl', save_freq=10, allow_overwrite=True, extensions=extensions) experiment.main_loop()
def set_training_criteria(self, learning_rate=0.05, cost=MeanSquaredReconstructionError(), batch_size=10, max_epochs=10): self.training_alg = SGD(learning_rate = learning_rate, cost = cost, batch_size = batch_size, monitoring_dataset = self.datasets, termination_criterion = EpochCounter(max_epochs))
def create_layer_one(self): which_set = "train" one_hot = True start = 0 # Creating 5 random patch layers based on 8,000 samples (Saturation point where the objective no longer improves. stop = 800 # GridPatchCIFAR10 Randomly selects 5 16x16 patches from each image, and we do this 5 times. This helps increase training time and captures more information. Similar to how the neurons in the eye are attached to a specific region in the image. dataset = GridPatchCIFAR10(which_set=which_set, one_hot=one_hot, start=start, stop=stop) # Denoising autoencoder model hyper-parameters nvis = 768 nhid = 512 irange = 0.05 corruption_lvl = 0.2 corruptor = BinomialCorruptor(corruption_level=corruption_lvl) activation_encoder = "tanh" # Linear activation activation_decoder = None # Creating the denoising autoencoder model = DenoisingAutoencoder(nvis=nvis, nhid=nhid, irange=irange, corruptor=corruptor, act_enc=activation_encoder, act_dec=activation_decoder) # Parameters for SGD learning algorithm instantiated below learning_rate = 0.001 batch_size = 100 monitoring_batches = 5 monitoring_dataset = dataset cost = MeanSquaredReconstructionError() max_epochs = 10 termination_criterion = EpochCounter(max_epochs=max_epochs) # SGD Learning algorithm algorithm = SGD(learning_rate=learning_rate, batch_size=batch_size, monitoring_batches=monitoring_batches, monitoring_dataset=dataset, cost=cost, termination_criterion=termination_criterion) processes = [] for i in range(0,5): print "Training DAE Sub-Layer: ", i save_path = self.save_path+str(i)+".pkl" save_freq = 1 train = Train(dataset=dataset,model=model,algorithm=algorithm, save_path=save_path, save_freq=save_freq) p = Process(target=train.main_loop, args=()) p.start() processes.append(p) for process in processes: process.join()
def get_layer_trainer(layer): # configs on sgd config = {'learning_rate': 0.1, 'cost' : MeanSquaredReconstructionError(), 'batch_size': 10, 'monitoring_batches': 10, 'monitoring_dataset': ToyDataset(), 'termination_criterion': EpochCounter(max_epochs=100), 'update_callbacks': None } train_algo = UnsupervisedExhaustiveSGD(**config) model = layer callbacks = None return LayerTrainer(model, train_algo, callbacks)
def get_layer_trainer_sgd_autoencoder(layer, trainset): # configs on sgd train_algo = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=10, monitoring_batches=10, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) model = layer extensions = None return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def get_ae_trainer(model, dataset, save_path, epochs=5): """ An Autoencoder (AE) trainer """ config = { 'learning_rate': 1e-2, 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 250, #'batches_per_iter' : 2000, 'learning_rule': RMSProp(), 'monitoring_dataset': dataset, 'cost': MeanSquaredReconstructionError(), 'termination_criterion': EpochCounter(max_epochs=epochs), } return Train(model=model, algorithm=SGD(**config), dataset=dataset, save_path=save_path, save_freq=1) #, extensions=extensions)
def main(): # Only the trainset is processed by this function. print 'getting preprocessed data to train model' pp_trainset, testset = get_processed_dataset() # remember to change here when changing datasets print 'loading unprocessed data for input displays' trainset = cifar10.CIFAR10(which_set="train") dmat = trainset.get_design_matrix() nvis = dmat.shape[1] model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.5), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=1000, monitoring_batches=10, monitoring_dataset=pp_trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='testrun.pkl', save_freq=1, extensions=extensions, dataset=pp_trainset) trainer.main_loop()
def get_layer_trainer_sgd_autoencoder(layer, trainset, batch_size=10, monitoring_batches=5, learning_rate=0.1, max_epochs=100, name=''): # configs on sgd train_algo = SGD( learning_rate=learning_rate, # 0.1, # learning_rule = AdaDelta(), cost=MeanSquaredReconstructionError(), batch_size=10, monitoring_batches=5, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=max_epochs), update_callbacks=None) log_callback = LoggingCallback(name) return Train(model=layer, algorithm=train_algo, extensions=[log_callback], dataset=trainset)
def main(): # the data isn't going to be fully processed, so we may have to # do some stuff to the testset still to make it work. trainset, testset = get_processed_dataset() # Creating the patch-pairs: design_matrix = trainset.get_design_matrix() processed_patch_size = design_matrix.shape[1] num_images = train_size examples_per_image = patches_per_image * (patches_per_image - 1) num_examples = examples_per_image * num_images stamps = trainset.stamps max_stamp = input_width - patch_width d_size = (2 * max_stamp + 1)**input_dim patch_pairs = np.zeros((num_examples, 2 * processed_patch_size)) distances = np.zeros((num_examples, input_dim)) distances_onehot = np.zeros((num_examples, d_size)) examples = np.zeros((num_examples, 2 * processed_patch_size + d_size)) nvis = 2 * processed_patch_size + d_size def flatten_encoding(encoding, max_stamp): dims = len(encoding) flat_encoding = 0 for i in xrange(dims - 1): flat_encoding += encoding[i] flat_encoding *= max_stamp flat_encoding += encoding[-1] # Can be done without (or with less) for loops? print 'begin for loop' for i in xrange(num_images): if (i % 1000 == 0): print i, '-th outer loop...' for j in xrange(patches_per_image): patch1_num = i * patches_per_image + j patch1_pos = stamps[patch1_num, :] for k in xrange(patches_per_image): example_num = i * examples_per_image + j * (patches_per_image - 1) + k if (k > j): example_num -= 1 if (k != j): patch2_num = i * patches_per_image + k patch2_pos = stamps[patch2_num, :] distance = patch1_pos - patch2_pos distances[example_num] = distance distance_encoding = distance + max_stamp distance_encoding = flatten_encoding( distance_encoding, max_stamp) distances_onehot[example_num, distance_encoding] = 1 p1 = design_matrix[patch1_num] p2 = design_matrix[patch2_num] patch_pairs[example_num] = np.hstack((p1, p2)) examples[example_num] = np.hstack( (patch_pairs[example_num], distances_onehot[example_num])) print 'end for loop' trainset.set_design_matrix(examples) model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.5), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=100, monitoring_batches=10, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='run.pkl', save_freq=1, extensions=extensions, dataset=trainset) trainer.main_loop()
def __init__(self, cost=MeanSquaredReconstructionError()): self.cost = cost self.train_iteration_mode = 'even_sequential' self.monitor_iteration_mode = 'even_sequential'
def main_train(work_dir="../results/avicenna/", corruption_level=0.3, nvis=75, nhid=600, tied_weights=True, act_enc="sigmoid", act_dec=None, max_epochs=2, learning_rate=0.001, batch_size=20, monitoring_batches=5, save_freq=1, n_components_trans_pca=7): conf = { 'corruption_level': corruption_level, 'nvis': nvis, 'nhid': nhid, 'tied_weights': tied_weights, 'act_enc': act_enc, 'act_dec': act_dec, 'max_epochs': max_epochs, 'learning_rate': learning_rate, 'batch_size': batch_size, 'monitoring_batches': monitoring_batches, 'save_freq': save_freq, 'n_components_trans_pca': n_components_trans_pca } start = time.clock() ############### TRAIN THE DAE train_file = work_dir + "train_pca" + str(conf['nvis']) + ".npy" save_path = work_dir + "train_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + "_model.pkl" trainset = NpyDataset(file=train_file) trainset.yaml_src = 'script' corruptor = BinomialCorruptor(corruption_level=conf['corruption_level']) dae = DenoisingAutoencoder(nvis=conf['nvis'], nhid=conf['nhid'], tied_weights=conf['tied_weights'], corruptor=corruptor, act_enc=conf['act_enc'], act_dec=conf['act_dec']) cost = MeanSquaredReconstructionError() termination_criterion = EpochCounter(max_epochs=conf['max_epochs']) algorithm = UnsupervisedExhaustiveSGD( learning_rate=conf['learning_rate'], batch_size=conf['batch_size'], monitoring_batches=conf['monitoring_batches'], monitoring_dataset=trainset, cost=cost, termination_criterion=termination_criterion) train_obj = Train(dataset=trainset, model=dae, algorithm=algorithm, save_freq=conf['save_freq'], save_path=save_path) train_obj.main_loop() ############### APPLY THE MODEL ON THE TRAIN DATASET print("Applying the model on the train dataset...") model = load(save_path) save_train_path = work_dir + "train_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=trainset, path=save_train_path) dump_obj.main_loop() ############### APPLY THE MODEL ON THE VALID DATASET print("Applying the model on the valid dataset...") valid_file = work_dir + "valid_pca" + str(conf['nvis']) + ".npy" validset = NpyDataset(file=valid_file) validset.yaml_src = 'script' save_valid_path = work_dir + "valid_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=validset, path=save_valid_path) dump_obj.main_loop() ############### APPLY THE MODEL ON THE TEST DATASET print("Applying the model on the test dataset...") test_file = work_dir + "test_pca" + str(conf['nvis']) + ".npy" testset = NpyDataset(file=test_file) testset.yaml_src = 'script' save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + ".npy" dump_obj = FeatureDump(encoder=model, dataset=testset, path=save_test_path) dump_obj.main_loop() ############### COMPUTE THE ALC SCORE ON VALIDATION SET valid_data = ift6266h12.load_npy(save_valid_path) label_data = ift6266h12.load_npy( '/data/lisa/data/UTLC/numpy_data/avicenna_valid_y.npy') alc_1 = score(valid_data, label_data) ############### APPLY THE TRANSDUCTIVE PCA test_data = ift6266h12.load_npy(save_test_path) trans_pca = PCA(n_components=conf['n_components_trans_pca']) final_valid = trans_pca.fit_transform(valid_data) final_test = trans_pca.fit_transform(test_data) save_valid_path = work_dir + "valid_pca" + str( conf['nvis']) + "_dae" + str(conf['nhid']) + "_tpca" + str( conf['n_components_trans_pca']) + ".npy" save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str( conf['nhid']) + "_tpca" + str(conf['n_components_trans_pca']) + ".npy" np.save(save_valid_path, final_valid) np.save(save_test_path, final_test) ############### COMPUTE THE NEW ALC SCORE ON VALIDATION SET alc_2 = score(final_valid, label_data) ############### OUTPUT AND RETURN THE RESULTS timeSpent = ((time.clock() - start) / 60.) print 'FINAL RESULTS (PCA-' + str(conf['nvis']) + ' DAE-' + str(conf['nhid']) + ' TransPCA-' + str(conf['n_components_trans_pca']) + ') ALC after DAE: ', alc_1, ' FINAL ALC: ', alc_2, \ ' Computed in %5.2f min' % (timeSpent) return timeSpent, alc_1, alc_2
def main(): # Only the trainset is processed by this function. print 'getting preprocessed data for training model' pp_trainset, testset = get_processed_dataset() # remember to change here when changing datasets print 'loading unprocessed data for input displays' trainset = cifar10.CIFAR10(which_set="train") dmat = pp_trainset.get_design_matrix() nvis = dmat.shape[1] model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.3), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=learning_rate, cost=MeanSquaredReconstructionError(), batch_size=100, monitoring_batches=10, monitoring_dataset=pp_trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='run.pkl', save_freq=1, extensions=extensions, dataset=pp_trainset) trainer.main_loop() #################### # Plot and Save: # choose random patch-pairs to plot stamps = pp_trainset.stamps num_examples = stamps.shape[0] to_plot = np.random.randint(0, num_examples, num2plot) # use to_plot indices to extract data stamps_data = stamps[to_plot] image_numbers = stamps[to_plot, 0].astype(int) X = trainset.X images_data = trainset.get_topological_view(X[image_numbers]) p1x = stamps_data[:, 1] p1y = stamps_data[:, 2] p2x = stamps_data[:, 3] p2y = stamps_data[:, 4] # For input ppd's, once we've identified the patches, we just outline them and draw an arrow for d # This might mess with original trainset (I dunno), in which case, we should make a copy add_outlines(images_data, p1x, p1y, patch_width) add_outlines(images_data, p2x, p2y, patch_width) ################################################## # translating outputs back into things we can plot dataset = pp_trainset Xout = dataset.X.astype('float32') max_stamp = input_width - patch_width d_size = (2 * max_stamp + 1)**input_dim # displacement d_enc = Xout[:, -d_size:] d_out_flat = np.argmax(d_enc, axis=1) d_shape = [2 * max_stamp + 1, 2 * max_stamp + 1] # assumed 2D d_out = flat_to_2D(d_out_flat, d_shape) d_out[to_plot, ] # patches vc = dataset.view_converter p_enc = Xout[:, :len(Xout.T) - d_size] p_size = p_enc.shape[1] / 2 p1_enc = p_enc[:, :p_size] p2_enc = p_enc[:, p_size:] p1_enc = vc.design_mat_to_topo_view(p1_enc) p2_enc = vc.design_mat_to_topo_view(p2_enc) pp = dataset.preprocessor gcn = pp.items[1] means = gcn.means normalizers = gcn.normalizers toshape = (num_examples, ) for i in range(input_dim): toshape += (1, ) if num_channels != 1: toshape += (1, ) # When the number of patches and patch-pairs differs, this breaks. # I need to match up normalizers/means with their corresponding patches # undoing the PCA might be breaking too, but without errors... normalizers1 = expand_p1(normalizers) normalizers2 = expand_p2(normalizers) means1 = expand_p1(means) means2 = expand_p2(means) p1_enc *= normalizers1.reshape(toshape) p1_enc += means1.reshape(toshape) p2_enc *= normalizers2.reshape(toshape) p2_enc += means2.reshape(toshape) # Now, we pull off the same examples from the data to compare to dAE inputs in plots outputs = copy.deepcopy(images_data) insertpatches(outputs, p1_enc[to_plot], p1x, p1y, patch_width) insertpatches(outputs, p2_enc[to_plot], p2x, p2y, patch_width) plt.figure() for i in range(num2plot): # Inputs plt.subplot(num2plot, 2, 2 * i + 1) plt.imshow(images_data[i], cmap=cm.Greys_r) print stamps_data[i] a = (stamps_data[i, 2] + patch_width / 2, stamps_data[i, 1] + patch_width / 2, stamps_data[i, 6], stamps_data[i, 5]) plt.arrow(a[0], a[1], a[2], a[3], head_width=1.0, head_length=0.6) # Outputs plt.subplot(num2plot, 2, 2 * i + 2) plt.imshow(outputs[i], cmap=cm.Greys_r) plt.arrow(a[0], a[1], d_out[to_plot[i], 1], d_out[to_plot[i], 0], head_width=1.0, head_length=0.6) plt.show() savestr = 'cifar_ppd.png' plt.savefig(savestr)