def get_train_sgd(self): cost = MethodCost('cost_from_X') #cost = self.get_costs() num_train_batch = (self.ntrain/self.batch_size) print "num training batches:", num_train_batch termination_criterion = self.get_terminations() monitoring_dataset = {} for dataset_id in self.state.monitoring_dataset: if dataset_id == 'test' and self.test_ddm is not None: monitoring_dataset['test'] = self.test_ddm elif dataset_id == 'valid' and self.valid_ddm is not None: monitoring_dataset['valid'] = self.valid_ddm else: monitoring_dataset = None return SGD( learning_rate=self.state.learning_rate, batch_size=self.state.batch_size, cost=cost, batches_per_iter=num_train_batch, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, init_momentum=self.state.init_momentum, train_iteration_mode=self.state.train_iteration_mode)
def model1(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) valid_set = MNIST(which_set='test', one_hot=True) test_set = MNIST(which_set='test', one_hot=True) #import pdb #pdb.set_trace() #print train_set.X.shape[1] # =====<Create the MLP Model>===== h2_layer = NoisyRELU(layer_name='h1', sparse_init=15, noise_factor=5, dim=1000, desired_active_rate=0.2, bias_factor=20, max_col_norm=1) #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1) mlp = MLP(batch_size=200, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(init_momentum=0.1, learning_rate=0.01, monitoring_dataset={'valid': valid_set}, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model1.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() #import pdb #pdb.set_trace() train_obj.main_loop() # =====<Run the training>===== '''
def main(argv): try: opts, args = getopt.getopt(argv, '') student_yaml = args[0] except getopt.GetoptError: usage() sys.exit(2) # # TRAIN WITH TARGETS # # Load student with open(student_yaml, "r") as sty: student = yaml_parse.load(sty) # Remove teacher decay over epoch if there is one for ext in range(len(student.extensions)): if isinstance(student.extensions[ext], TeacherDecayOverEpoch): del student.extensions[ext] student.algorithm.cost = MethodCost(method='cost_from_X') # Change save paths for ext in range(len(student.extensions)): if isinstance(student.extensions[ext], MonitorBasedSaveBest): student.extensions[ext].save_path = student.save_path[ 0:-4] + "_noteacher_best.pkl" student.save_path = student.save_path[0:-4] + "_noteacher.pkl" student.main_loop() # # TRAIN WITH TEACHER (TOP LAYER) # # Load student with open(student_yaml, "r") as sty: student = yaml_parse.load(sty) # Change save paths for ext in range(len(student.extensions)): if isinstance(student.extensions[ext], MonitorBasedSaveBest): student.extensions[ ext].save_path = student.save_path[0:-4] + "_toplayer_best.pkl" student.save_path = student.save_path[0:-4] + "_toplayer.pkl" student.main_loop() # # TRAIN WITH HINTS # hints.main([student_yaml, 'conv'])
def model2(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = MNIST(which_set='test', one_hot=True) # =====<Create the MLP Model>===== h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5) #print h1_layer.get_params() h2_layer = RectifiedLinear(layer_name='h2', dim=1000, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=100, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=100, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=SumOfCosts(costs=[ MethodCost('cost_from_X'), WeightDecay(coeffs=[0.00005, 0.00005, 0.00005]) ]), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.0001, N=5)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)] # =====<Create Training Object>===== save_path = './mlp_model2.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() train_obj.main_loop()
def model3(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = SVHN_On_Memory(which_set='train') # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = SVHN_On_Memory(which_set='test') # =====<Create the MLP Model>===== h1_layer = NoisyRELU(layer_name='h1', dim=2000, threshold=5, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=64, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=64, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=10) #train_obj.setup_extensions() train_obj.main_loop()
def get_trainer(model, trainset, validset, epochs=50): monitoring_batches = None if validset is None else 50 train_algo = SGD( batch_size = 200, init_momentum = 0.5, learning_rate = 0.5, monitoring_batches = monitoring_batches, monitoring_dataset = validset, cost = MethodCost(method='cost_from_X', supervised=1), termination_criterion = EpochCounter(epochs), update_callbacks = ExponentialDecay(decay_factor=1.0005, min_lr=0.001) ) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.95, start=0, saturate=int(epochs*0.8)), ])
from pylearn2.costs.cost import MethodCost from pylearn2.datasets.mnist import MNIST from pylearn2.models.mlp import MLP, Sigmoid, Softmax from pylearn2.train import Train from pylearn2.training_algorithms.sgd import SGD from pylearn2.training_algorithms.learning_rule import Momentum, MomentumAdjustor from pylearn2.termination_criteria import EpochCounter train_set = MNIST(which_set='train', start=0, stop=50000) valid_set = MNIST(which_set='train', start=50000, stop=60000) test_set = MNIST(which_set='test') model = MLP(nvis=784, layers=[Sigmoid(layer_name='h', dim=500, irange=0.01), Softmax(layer_name='y', n_classes=10, irange=0.01)]) algorithm = SGD(batch_size=100, learning_rate=0.01, learning_rule=Momentum(init_momentum=0.5), monitoring_dataset={'train': train_set, 'valid': valid_set, 'test': test_set}, cost=MethodCost('cost_from_X'), termination_criterion=EpochCounter(10)) train = Train(dataset=train_set, model=model, algorithm=algorithm, save_path="mnist_example.pkl", save_freq=1, extensions=[MomentumAdjustor(start=5, saturate=6, final_momentum=0.95)]) train.main_loop()
border_mode="full") h2 = Tanh(dim=200, layer_name="h2", irange=0.1) h3 = Tanh(dim=200, layer_name="h3", irange=0.1) y = Softmax(n_classes=2, layer_name="y", irange=0.1) inputSpace = Conv2DSpace(shape=[cropSize, cropSize], num_channels=3) model = MLP(layers=[h0, h1, h2, h3, y], batch_size=batchSize, input_space=inputSpace) algorithm = SGD(learning_rate=0.01, cost=MethodCost("cost_from_X"), batch_size=batchSize, monitoring_batch_size=batchSize, monitoring_dataset={ 'train': train, 'valid': valid }, monitor_iteration_mode="even_batchwise_shuffled_sequential", termination_criterion=EpochCounter(max_epochs=200), learning_rule=Momentum(init_momentum=0.99), train_iteration_mode="even_batchwise_shuffled_sequential") train = Train(dataset=train, model=model, algorithm=algorithm, save_path="ConvNet4.pkl",