def get_term_monitorbased(self, term_obj): print 'monitor_based' return MonitorBased( prop_decrease=term_obj.proportional_decrease, N=term_obj.max_epochs, channel_name=term_obj.channel_name )
def train_example(dataset=None): model = GaussianBinaryRBM(nvis=1296, nhid=61, irange=0.5, energy_function_class=grbm_type_1(), learn_sigma=True, init_sigma=.4, init_bias_hid=2., mean_vis=False, sigma_lr_scale=1e-3) cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)) algorithm = SGD(learning_rate=.1, batch_size=5, monitoring_batches=20, monitoring_dataset=dataset, cost=cost, termination_criterion=MonitorBased(prop_decrease=0.01, N=1)) train = Train(dataset=dataset, model=model, save_path="./experiment/training.pkl", save_freq=10, algorithm=algorithm, extensions=[]) train.main_loop()
def _create_trainer(self, dataset): sgd.log.setLevel(logging.WARNING) # Aggregate all the dropout parameters into shared dictionaries. probs, scales = {}, {} for l in [l for l in self.layers if l.dropout is not None]: incl = 1.0 - l.dropout probs[l.name] = incl scales[l.name] = 1.0 / incl if self.cost == "Dropout" or len(probs) > 0: # Use the globally specified dropout rate when there are no layer-specific ones. incl = 1.0 - self.dropout default_prob, default_scale = incl, 1.0 / incl # Pass all the parameters to pylearn2 as a custom cost function. self.cost = Dropout(default_input_include_prob=default_prob, default_input_scale=default_scale, input_include_probs=probs, input_scales=scales) logging.getLogger('pylearn2.monitor').setLevel(logging.WARNING) if dataset is not None: termination_criterion = MonitorBased(channel_name='objective', N=self.n_stable, prop_decrease=self.f_stable) else: termination_criterion = None return sgd.SGD(cost=self.cost, batch_size=self.batch_size, learning_rule=self._learning_rule, learning_rate=self.learning_rate, termination_criterion=termination_criterion, monitoring_dataset=dataset)
def model1(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) valid_set = MNIST(which_set='test', one_hot=True) test_set = MNIST(which_set='test', one_hot=True) #import pdb #pdb.set_trace() #print train_set.X.shape[1] # =====<Create the MLP Model>===== h2_layer = NoisyRELU(layer_name='h1', sparse_init=15, noise_factor=5, dim=1000, desired_active_rate=0.2, bias_factor=20, max_col_norm=1) #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1) mlp = MLP(batch_size=200, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(init_momentum=0.1, learning_rate=0.01, monitoring_dataset={'valid': valid_set}, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model1.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() #import pdb #pdb.set_trace() train_obj.main_loop() # =====<Run the training>===== '''
def create_algorithm(self): cost_crit = MonitorBased(channel_name=self.optimize_for, prop_decrease=0., N=10) epoch_cnt_crit = EpochCounter(max_epochs=self.max_epochs) term = And(criteria=[cost_crit, epoch_cnt_crit]) self.algorithm = SGD(batch_size=100, learning_rate=.01, monitoring_dataset=self.alg_datasets, termination_criterion=term)
def model2(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = MNIST(which_set='test', one_hot=True) # =====<Create the MLP Model>===== h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5) #print h1_layer.get_params() h2_layer = RectifiedLinear(layer_name='h2', dim=1000, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=100, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=100, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=SumOfCosts(costs=[ MethodCost('cost_from_X'), WeightDecay(coeffs=[0.00005, 0.00005, 0.00005]) ]), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.0001, N=5)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)] # =====<Create Training Object>===== save_path = './mlp_model2.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() train_obj.main_loop()
def model3(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = SVHN_On_Memory(which_set='train') # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = SVHN_On_Memory(which_set='test') # =====<Create the MLP Model>===== h1_layer = NoisyRELU(layer_name='h1', dim=2000, threshold=5, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=64, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=64, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=10) #train_obj.setup_extensions() train_obj.main_loop()
def get_term_monitorbased(self, term_id): row = self.db.executeSQL( """ SELECT proportional_decrease, max_epoch, channel_name FROM hps3.term_monitorBased WHERE term_id = %s """, (term_id, ), self.db.FETCH_ONE) if not row or row is None: raise HPSData("No monitorBased term for term_id="\ +str(term_id)) print 'monitor_based' (proportional_decrease, max_epochs, channel_name) = row return MonitorBased(prop_decrease=proportional_decrease, N=max_epochs, channel_name=channel_name)
def get_layer_trainer_logistic(layer, trainset,validset): # configs on sgd config = {'learning_rate': 0.1, 'cost' : Default(), 'batch_size': 150, 'monitoring_dataset': validset, 'termination_criterion': MonitorBased(channel_name='y_misclass',N=10,prop_decrease=0), 'update_callbacks': None } train_algo = SGD(**config) model = layer return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None)
def create_algorithm(self, data, save_best_path=None): self.set_dataset(data) self.create_adjustors() term = EpochCounter(max_epochs=self.max_epochs) if self.valid_stop: cost_crit = MonitorBased(channel_name='valid_objective', prop_decrease=.0, N=3) term = And(criteria=[cost_crit, term]) #(layers, A_weight_decay) coeffs = None if self.reg_factors: rf = self.reg_factors lhdims = len(self.tagger.hdims) l_inputlayer = len(self.tagger.layers[0].layers) coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf) cost = SeqTaggerCost(coeffs, self.dropout) self.cost = cost self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective', save_path=save_best_path) mon_dataset = dict(self.dataset) if not self.monitor_train: del mon_dataset['train'] _learning_rule = (self.momentum_rule if self.use_momentum else None) self.algorithm = SGD( batch_size=1, learning_rate=self.lr, termination_criterion=term, monitoring_dataset=mon_dataset, cost=cost, learning_rule=_learning_rule, ) self.algorithm.setup(self, self.dataset['train']) if self.plot_monitor: cn = ["valid_objective", "test_objective"] if self.monitor_train: cn.append("train_objective") plots = Plots(channel_names=cn, save_path=self.plot_monitor) self.pm = PlotManager([plots], freq=1) self.pm.setup(self, None, self.algorithm)
def test_correctness(): """ Test that the cost function works with float64 """ x_train, y_train, x_valid, y_valid = create_dataset() trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train) validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid) n_inputs = trainset.X.shape[1] n_outputs = 1 n_hidden = 10 hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5 output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5 model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden', istdev=hidden_istdev), Sigmoid(dim=n_outputs, layer_name='output', istdev=output_istdev)], nvis=n_inputs, seed=[2013, 9, 16]) termination_criterion = And([EpochCounter(max_epochs=1), MonitorBased(prop_decrease=1e-7, N=2)]) cost = SumOfCosts([(0.99, Default()), (0.01, L1WeightDecay({}))]) algo = SGD(1e-1, update_callbacks=[ExponentialDecay(decay_factor=1.00001, min_lr=1e-10)], cost=cost, monitoring_dataset=validset, termination_criterion=termination_criterion, monitor_iteration_mode='even_shuffled_sequential', batch_size=2) train = Train(model=model, dataset=trainset, algorithm=algo) train.main_loop()
def get_trainer(model, trainset, validset, save_path): monitoring = dict(valid=validset, train=trainset) termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100) extensions = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path), #MomentumAdjustor(start=1, saturate=100, final_momentum=.9), LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)] config = { 'learning_rate': .01, #'learning_rule': Momentum(0.5), 'learning_rule': RMSProp(), 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 1200,#250, #'batches_per_iter' : 100, 'monitoring_dataset': monitoring, 'monitor_iteration_mode' : 'shuffled_sequential', 'termination_criterion' : termination, } return Train(model=model, algorithm=SGD(**config), dataset=trainset, extensions=extensions)
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = 48 lr = 1.0 #0.1/4 finMomentum = 0.9 maxout_units = 2000 num_pcs = 4 lay1_reg = lay2_reg = maxout_reg = None #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib' #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib' save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib' best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib' #numBatches = 400000/batch_size ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train') ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid') #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train') layer1 = ConvRectifiedLinear(layer_name = 'convRect1', output_channels = 64, irange = .05, kernel_shape = [5, 5], pool_shape = [4, 4], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay1_reg) layer2 = ConvRectifiedLinear(layer_name = 'convRect2', output_channels = 128, irange = .05, kernel_shape = [5, 5], pool_shape = [3, 3], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name= 'maxout', irange= .005, num_units= maxout_units, num_pieces= num_pcs, W_lr_scale = 0.1, max_col_norm= maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name) #setting up MLP MLPerc = MLP(batch_size = batch_size, input_space = Conv2DSpace(shape = [96, 96], num_channels = 3, axes=('b', 0, 1, 'c')), layers = [ layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value ) mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. }) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain} term_crit = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective') kp_ada = KeypointADADELTA(decay_factor = 0.95, #init_momentum = 0.5, monitoring_dataset = monitoring_dataset, batch_size = batch_size, termination_criterion = term_crit, cost = mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01) #train_ext = ADADELTA(0.95) #train object train = Train(dataset = ddmTrain, save_path= save_path, save_freq=10, model = MLPerc, algorithm= kp_ada, extensions = [#train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path= best_path)#, # MomentumAdjustor(start = 1,# # saturate = 25, # final_momentum = finMomentum) ] ) train.main_loop() train.save()
initial_momentum = .5 final_momentum = .99 start = 1 saturate = 20 momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate) momentum_rule = learning_rule.Momentum(initial_momentum) # learning rate start = .1 saturate = 20 decay_factor = .00001 learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor) # termination criterion that stops after 50 epochs without # any increase in misclassification on the validation set termination_criterion = MonitorBased(channel_name='objective', N=20, prop_decrease=0.0) # create Stochastic Gradient Descent trainer trainer = sgd.SGD(learning_rate=.001, batch_size=10, monitoring_dataset=ds_valid, termination_criterion=termination_criterion, cost=L1_cost) #learning_rule=momentum_rule, trainer.setup(ann, ds_train) # add monitor for saving the model with best score monitor_save_best = best_params.MonitorBasedSaveBest('objective','./tmp/best.pkl') #####################################
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = params.batch_size lr = params.lr finMomentum = params.momentum maxout_units = params.units num_pcs = params.pieces lay1_reg = lay2_reg = maxout_reg = params.norm_reg #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + '.joblib' best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + 'best.joblib' numBatches = 400000 / batch_size from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = EmotiwKeypoints(hack='train', preproc='STD') ddmValid = EmotiwKeypoints(hack='val', preproc='STD') layer1 = ConvRectifiedLinear(layer_name='convRect1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay1_reg) layer2 = ConvRectifiedLinear(layer_name='convRect2', output_channels=128, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name='maxout', irange=.005, num_units=maxout_units, num_pieces=num_pcs, W_lr_scale=0.1, max_col_norm=maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups, irange=0.05, n_classes=n_classes, layer_name=layer_name) #setting up MLP MLPerc = MLP(batch_size=batch_size, input_space=Conv2DSpace(shape=[96, 96], num_channels=3), layers=[layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value) mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0}, input_scales={'convRect1': 1.}) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation': ddmValid} term_crit = MonitorBased(prop_decrease=1e-7, N=100, channel_name='validation_objective') kpSGD = KeypointSGD(learning_rate=lr, init_momentum=0.5, monitoring_dataset=monitoring_dataset, batch_size=batch_size, termination_criterion=term_crit, cost=mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) #train object train = Train(dataset=ddmTrain, save_path=save_path, save_freq=10, model=MLPerc, algorithm=kpSGD, extensions=[ train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path=best_path), MomentumAdjustor(start=1, saturate=25, final_momentum=finMomentum) ]) train.main_loop() train.save()
def main(): training_data, validation_data, test_data, std_scale = load_training_data() kaggle_test_features = load_test_data(std_scale) ############### # pylearn2 ML hl1 = mlp.Sigmoid(layer_name='hl1', dim=200, irange=.1, init_bias=1.) hl2 = mlp.Sigmoid(layer_name='hl2', dim=100, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Softmax(9, 'output', irange=.1) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.05, batch_size=300, learning_rule=learning_rule.Momentum(.5), termination_criterion=MonitorBased( channel_name='valid_objective', prop_decrease=0., N=10), monitoring_dataset={ 'valid': validation_data, 'train': training_data }) layers = [hl1, hl2, output_layer] # create neural net model = mlp.MLP(layers, nvis=93) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_objective', save_path='pylearn2_results/pylearn2_test.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.6, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) ###################### experiment = Train(dataset=training_data, model=model, algorithm=trainer, extensions=[watcher, velocity, decay]) experiment.main_loop() #load best model and test ################ model = serial.load('pylearn2_results/pylearn2_test.pkl') # get an prediction of the accuracy from the test_data test_results = model.fprop(theano.shared(test_data[0], name='test_data')).eval() print test_results.shape loss = multiclass_log_loss(test_data[1], test_results) print 'Test multiclass log loss:', loss out_file = 'pylearn2_results/' + str(loss) + 'ann' #exp.save(out_file + '.pkl') #save the kaggle results results = model.fprop( theano.shared(kaggle_test_features, name='kaggle_test_data')).eval() save_results(out_file + '.csv', kaggle_test_features, results)
def get_layer_MLP(layers,trainset,validset): #processor = Standardize(); # trainset = BlackBoxDataset( which_set = 'train', # start = 0, # stop = 900, # preprocessor = Standardize(), # fit_preprocessor = True, # fit_test_preprocessor = True, # ) # # validset = BlackBoxDataset( which_set = 'train', # start = 900, # stop = 1000 , # preprocessor = Standardize(), # fit_preprocessor = True, # fit_test_preprocessor = False, # ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .1, 'init_momentum': .5, 'cost' : Default(), #Dropout(**dropCfg), 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=10,prop_decrease=0), 'update_callbacks': None } # configCfg0 = {'layer_name' : 'h0', # 'dim' : 1875, # 'irange' : .05, # # Rather than using weight decay, we constrain the norms of the weight vectors # 'max_col_norm' : 1.} # # configCfg1 = {'layer_name' : 'h1', # 'dim' : 1875, # 'irange' : .05, # # Rather than using weight decay, we constrain the norms of the weight vectors # 'max_col_norm' : 1.} sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } layers.append(Softmax(**sftmaxCfg)) train_algo = SGD(**config) model = MLP(batch_size=10,layers=layers,nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, #[LinearDecayOverEpoch(start= 5, saturate= 100, decay_factor= .01)], save_path = "best_dbn_model.pkl", save_freq = 100)
def test_works(): load = True if load == False: ddmTrain = FacialKeypoint(which_set='train', start=0, stop=6000) ddmValid = FacialKeypoint(which_set='train', start=6000, stop=7049) # valid can_fit = false pipeline = preprocessing.Pipeline() stndrdz = preprocessing.Standardize() stndrdz.apply(ddmTrain, can_fit=True) #doubt, how about can_fit = False? stndrdz.apply(ddmValid, can_fit=False) GCN = preprocessing.GlobalContrastNormalization() GCN.apply(ddmTrain, can_fit=True) GCN.apply(ddmValid, can_fit=False) pcklFile = open('kpd.pkl', 'wb') obj = (ddmTrain, ddmValid) pickle.dump(obj, pcklFile) pcklFile.close() return else: pcklFile = open('kpd.pkl', 'rb') (ddmTrain, ddmValid) = pickle.load(pcklFile) pcklFile.close() #creating layers #2 convolutional rectified layers, border mode valid layer1 = ConvRectifiedLinear(layer_name='convRect1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], max_kernel_norm=1.9365) layer2 = ConvRectifiedLinear(layer_name='convRect2', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], max_kernel_norm=1.9365) # Rectified linear units layer3 = RectifiedLinear(dim=3000, sparse_init=15, layer_name='RectLin3') #multisoftmax n_groups = 30 n_classes = 98 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups, irange=0.05, n_classes=n_classes, layer_name=layer_name) #setting up MLP MLPerc = MLP(batch_size=8, input_space=Conv2DSpace(shape=[96, 96], num_channels=1), layers=[layer1, layer2, layer3, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value) #algorithm # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria term_crit = MonitorBased(prop_decrease=0.00001, N=30, channel_name='validation_objective') kpSGD = KeypointSGD(learning_rate=0.001, init_momentum=0.5, monitoring_dataset={ 'validation': ddmValid, 'training': ddmTrain }, batch_size=8, batches_per_iter=750, termination_criterion=term_crit, train_iteration_mode='random_uniform', cost=mlp_cost) #train extension train_ext = ExponentialDecayOverEpoch(decay_factor=0.998, min_lr_scale=0.01) #train object train = Train(dataset=ddmTrain, save_path='kpd_model2.pkl', save_freq=1, model=MLPerc, algorithm=kpSGD, extensions=[ train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path='kpd_best.pkl'), MomentumAdjustor(start=1, saturate=20, final_momentum=.9) ]) train.main_loop() train.save()
dim=13, istdev=0.01, monitor_style="bit_vector_class")], nvis=26); print "[MESSAGE] The model is built"; ### build algorithm algorithm=SGD(batch_size=100, learning_rate=0.05, monitoring_dataset={'train':valid_data, 'valid':valid_data, 'test':test_data}, termination_criterion=Or(criteria=[MonitorBased(channel_name="valid_objective", prop_decrease=0.00001, N=40), EpochCounter(max_epochs=200)]), cost = Dropout(input_include_probs={'hidden_0':1., 'hidden_1':1., 'y':0.5}, input_scales={ 'hidden_0': 1., 'hidden_1':1., 'y':2.}), update_callbacks=ExponentialDecay(decay_factor=1.0000003, min_lr=.000001)); print "[MESSAGE] Training algorithm is built"; ### build training idpath = os.path.splitext(os.path.abspath(__file__))[0]; # ID for output files. save_path = idpath + '.pkl'; train=Train(dataset=train_data,
# create datasets ds_train = Pima() ds_train, ds_valid = ds_train.split(0.7) ds_valid, ds_test = ds_valid.split(0.7) # create sigmoid hidden layer with 20 nodes, init weights in range -0.05 to 0.05 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=20, irange=.05, init_bias=1.) # softmax output layer output_layer = mlp.Softmax(2, 'output', irange=.05) layers = [hidden_layer, output_layer] # termination criterion that stops after 50 epochs without # any increase in misclassification on the validation set termination_criterion = MonitorBased(channel_name='output_misclass', N=50, prop_decrease=0.0) # momentum initial_momentum = .5 final_momentum = .99 start = 1 saturate = 50 momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate) momentum_rule = learning_rule.Momentum(initial_momentum) # learning rate start = 1 saturate = 50 decay_factor = .1 learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)
print 'Val Dataset Loaded' last_ndim = 240 n_classes = 7 import pdb pdb.set_trace() algorithm = SGD( batch_size=batch_size, learning_rate=learning_rate, init_momentum=.5, monitoring_dataset={'valid': val_ds}, cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}), termination_criterion=MonitorBased(channel_name="valid_y_misclass", prop_decrease=0., N=100), #termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter {max_epochs: 1}, update_callbacks=ExponentialDecay(decay_factor=1.00004, min_lr=.000001)) extensions = [ MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_best_path), MomentumAdjustor(start=1, saturate=250, final_momentum=.7) ] model = MLP(batch_size=batch_size, input_space=Conv2DSpace(shape=[48, 48], num_channels=num_chan, axes=['c', 0, 1, 'b']), layers=[
nn.monitor() save_best.on_monitor(nn, train, algo) if not algo.continue_learning(nn): break # SoftPlus with Dropout h0 = mlp.Softplus(layer_name='h0', dim=60, sparse_init=0) h1 = mlp.Softplus(layer_name='h1', dim=60, sparse_init=0) y0 = mlp.Softmax(layer_name='y0', n_classes=5, irange=0) layers = [h0, h1, y0] model = mlp.MLP(layers, nvis=train.X.shape[1]) monitoring = dict(valid=valid) termination = MonitorBased(channel_name="valid_y0_misclass", N=5) extensions = [ best_params.MonitorBasedSaveBest(channel_name="valid_y0_misclass", save_path="train_best.pkl") ] algorithm = sgd.SGD(0.1, batch_size=100, cost=Dropout(), monitoring_dataset=monitoring, termination_criterion=termination) print 'Running training' train_job = Train(train, model, algorithm,
def get_layer_MLP(): extraset = BlackBoxDataset( which_set = 'extra') processor = Standardize(); processor.apply(extraset,can_fit=True) trainset = BlackBoxDataset( which_set = 'train', start = 0, stop = 900, preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = True, ) validset = BlackBoxDataset( which_set = 'train', start = 900, stop = 1000 , preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = False, ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .05, 'init_momentum': .00, 'cost' : Dropout(**dropCfg), 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0), 'update_callbacks': None } config0 = { 'layer_name': 'h0', 'num_units': 1875, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } config1 = { 'layer_name': 'h1', 'num_units': 700, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } l1 = Maxout(**config0) l2 = Maxout(**config1) l3 = Softmax(**sftmaxCfg) train_algo = SGD(**config) model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, save_path = "maxout_best_model.pkl", save_freq = 1)
def main( x ): l1_dim = x[0] l2_dim = x[1] learning_rate = x[2] momentum = x[3] l1_dropout = x[4] decay_factor = x[5] min_lr = 1e-7 # train = np.loadtxt( train_file, delimiter = ',' ) x_train = train[:,0:-1] y_train = train[:,-1] y_train.shape = ( y_train.shape[0], 1 ) # validation = np.loadtxt( validation_file, delimiter = ',' ) x_valid = validation[:,0:-1] y_valid = validation[:,-1] y_valid.shape = ( y_valid.shape[0], 1 ) # #input_space = VectorSpace( dim = x.shape[1] ) full = DenseDesignMatrix( X = x_train, y = y_train ) valid = DenseDesignMatrix( X = x_valid, y = y_valid ) l1 = mlp.RectifiedLinear( layer_name='l1', irange=.001, dim = l1_dim, # "Rather than using weight decay, we constrain the norms of the weight vectors" max_col_norm=1. ) l2 = mlp.RectifiedLinear( layer_name='l2', irange=.001, dim = l2_dim, max_col_norm=1. ) output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 ) layers = [l1, l2, output] nvis = x_train.shape[1] mdl = mlp.MLP( layers, nvis = nvis ) # input_space = input_space #lr = .001 #epochs = 100 decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr ) trainer = sgd.SGD( learning_rate = learning_rate, batch_size=128, learning_rule=learning_rule.Momentum( momentum ), update_callbacks = [ decay ], # Remember, default dropout is .5 cost = Dropout( input_include_probs = {'l1': l1_dropout}, input_scales={'l1': 1.}), #termination_criterion = EpochCounter(epochs), termination_criterion = MonitorBased( channel_name = "valid_objective", prop_decrease = 0.001, # 0.1% of objective N = 10 ), # valid_objective is MSE monitoring_dataset = { 'train': full, 'valid': valid } ) watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file ) experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] ) experiment.main_loop() ### error = get_error_from_model( output_model_file ) print "*** error: {} ***".format( error ) return error