def create_layer_one(self): which_set = "train" one_hot = True start = 0 # Creating 5 random patch layers based on 8,000 samples (Saturation point where the objective no longer improves. stop = 800 # GridPatchCIFAR10 Randomly selects 5 16x16 patches from each image, and we do this 5 times. This helps increase training time and captures more information. Similar to how the neurons in the eye are attached to a specific region in the image. dataset = GridPatchCIFAR10(which_set=which_set, one_hot=one_hot, start=start, stop=stop) # Denoising autoencoder model hyper-parameters nvis = 768 nhid = 512 irange = 0.05 corruption_lvl = 0.2 corruptor = BinomialCorruptor(corruption_level=corruption_lvl) activation_encoder = "tanh" # Linear activation activation_decoder = None # Creating the denoising autoencoder model = DenoisingAutoencoder(nvis=nvis, nhid=nhid, irange=irange, corruptor=corruptor, act_enc=activation_encoder, act_dec=activation_decoder) # Parameters for SGD learning algorithm instantiated below learning_rate = 0.001 batch_size = 100 monitoring_batches = 5 monitoring_dataset = dataset cost = MeanSquaredReconstructionError() max_epochs = 10 termination_criterion = EpochCounter(max_epochs=max_epochs) # SGD Learning algorithm algorithm = SGD(learning_rate=learning_rate, batch_size=batch_size, monitoring_batches=monitoring_batches, monitoring_dataset=dataset, cost=cost, termination_criterion=termination_criterion) processes = [] for i in range(0,5): print "Training DAE Sub-Layer: ", i save_path = self.save_path+str(i)+".pkl" save_freq = 1 train = Train(dataset=dataset,model=model,algorithm=algorithm, save_path=save_path, save_freq=save_freq) p = Process(target=train.main_loop, args=()) p.start() processes.append(p) for process in processes: process.join()
def test_sgd_topo(): # tests that we can run the sgd algorithm # on data with topology # does not test for correctness at all, just # that the algorithm runs without dying rows = 3 cols = 4 channels = 2 dim = rows * cols * channels m = 10 rng = np.random.RandomState([25, 9, 2012]) dataset = get_topological_dataset(rng, rows, cols, channels, m) # including a monitoring datasets lets us test that # the monitor works with supervised data m = 15 monitoring_dataset = get_topological_dataset(rng, rows, cols, channels, m) model = TopoSoftmaxModel(rows, cols, channels) learning_rate = 1e-3 batch_size = 5 cost = CrossEntropy() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def get_layer_trainer_sgd_rbm(layer, trainset): train_algo = SGD( learning_rate = 1e-1, batch_size = 5, #"batches_per_iter" : 2000, monitoring_batches = 20, monitoring_dataset = trainset, cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)), termination_criterion = EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), ) model = layer extensions = [MonitorBasedLRAdjuster()] return Train(model = model, algorithm = train_algo, save_path='grbm.pkl',save_freq=1, extensions = extensions, dataset = trainset)
def testing_multiple_datasets_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. # This case happens when the user has not specified either channel_name or dataset_name in the constructor dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test}, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) monitor_lr = MonitorBasedLRAdjuster() train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError: return raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + "either channel_name or database_name in the constructor to disambiguate.")
def test_sgd_no_mon(): # tests that we can run the sgd algorithm # wihout a monitoring dataset # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25,9,2012]) X = rng.randn(m, dim) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m,dim)) for i in xrange(m): Y[i,idx[i]] = 1 dataset = DenseDesignMatrix(X=X, y=Y) m = 15 X = rng.randn(m, dim) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m,dim)) for i in xrange(m): Y[i,idx[i]] = 1 model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 cost = CrossEntropy() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_dataset=None, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_bgd_unsup(): # tests that we can run the bgd algorithm # on an supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25,9,2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 class DummyCost(Cost): def __call__(self, model, X): return T.square(model(X)-X).mean() cost = DummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = BGD(cost, batch_size=5, monitoring_batches=2, monitoring_dataset= monitoring_dataset, termination_criterion = termination_criterion) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, callbacks=None) train.main_loop()
def get_ae_pretrainer(layer, data, batch_size): init_lr = 0.1 dec_fac = 1.0001 train_algo = sgd.SGD( batch_size=batch_size, learning_rate=init_lr, init_momentum=0.5, monitoring_batches=100 / batch_size, monitoring_dataset={'train': data}, #cost = MeanSquaredReconstructionError(), cost=CAE_cost(), termination_criterion=EpochCounter(20), update_callbacks=sgd.ExponentialDecay(decay_factor=dec_fac, min_lr=0.02)) return Train(model=layer, algorithm=train_algo, dataset=data, \ extensions=[sgd.MomentumAdjustor(final_momentum=0.9, start=0, saturate=15), ])
def get_finetuner(model, cost, trainset, validset=None, batch_size=100, iters=100): train_algo = sgd.SGD( batch_size=batch_size, init_momentum=0.5, learning_rate=0.5, #monitoring_batches = 100/batch_size, #monitoring_dataset = {'train': trainset, 'valid': validset}, cost=cost, termination_criterion=EpochCounter(iters), update_callbacks=sgd.ExponentialDecay(decay_factor=1.005, min_lr=0.05)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, \ extensions=[sgd.MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(0.8*iters)), ])
def get_layer_trainer_sgd_autoencoder(layer, trainset): # configs on sgd train_algo = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=10, monitoring_batches=10, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) model = layer extensions = None return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def get_trainer(model, trainset, validset, epochs=20, batch_size=100): monitoring_batches = None if validset is None else 20 train_algo = SGD( batch_size=batch_size, init_momentum=0.5, learning_rate=0.05, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={'h0': 0.8}, input_scales={'h0': 1.}, default_input_include_prob=0.5, default_input_scale=1. / 0.5), #termination_criterion = MonitorBased(channel_name='y_misclass', prop_decrease=0., N=50), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.00002, min_lr=0.0001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(0.8*epochs))])
def test_kmeans(): """ Tests kmeans.Kmeans by using it as a model in a Train object. """ X = np.random.random(size=(100, 10)) Y = np.random.randint(5, size=(100, 1)) dataset = DenseDesignMatrix(X, y=Y) model = KMeans( k=5, nvis=10 ) train = Train(model=model, dataset=dataset) train.main_loop()
def get_layer_trainer_logistic(layer, trainset,validset): # configs on sgd config = {'learning_rate': 0.1, 'cost' : Default(), 'batch_size': 150, 'monitoring_dataset': validset, 'termination_criterion': MonitorBased(channel_name='y_misclass',N=10,prop_decrease=0), 'update_callbacks': None } train_algo = SGD(**config) model = layer return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None)
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use the spcified dataset_name in the constructor when # multiple datasets exist. dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test} algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=2, monitoring_dataset= monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) dataset_name = monitoring_dataset.keys()[0] monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) train.main_loop()
def test_flattener_layer_state_separation_for_softmax(): """ Creates a CompositeLayer wrapping two Softmax layers and ensures that state gets correctly picked apart. """ soft1 = Softmax(5, 'sf1', .1) soft2 = Softmax(5, 'sf2', .1) mlp = MLP(layers=[FlattenerLayer(CompositeLayer('comp', [soft1, soft2]))], nvis=2) X = np.random.rand(20, 2).astype(theano.config.floatX) y = np.random.rand(20, 10).astype(theano.config.floatX) dataset = DenseDesignMatrix(X=X, y=y) train = Train(dataset, mlp, SGD(0.1, batch_size=5, monitoring_dataset=dataset)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def test_flattener_layer_state_separation_for_conv(): """ Creates a CompositeLayer wrapping two Conv layers and ensures that state gets correctly picked apart. """ conv1 = ConvElemwise(8, [2, 2], 'sf1', SigmoidConvNonlinearity(), .1) conv2 = ConvElemwise(8, [2, 2], 'sf2', SigmoidConvNonlinearity(), .1) mlp = MLP(layers=[FlattenerLayer(CompositeLayer('comp', [conv1, conv2]))], input_space=Conv2DSpace(shape=[5, 5], num_channels=2)) topo_view = np.random.rand(10, 5, 5, 2).astype(theano.config.floatX) y = np.random.rand(10, 256).astype(theano.config.floatX) dataset = DenseDesignMatrix(topo_view=topo_view, y=y) train = Train(dataset, mlp, SGD(0.1, batch_size=5, monitoring_dataset=dataset)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def get_layer_trainer_logistic(layer, trainset): # configs on sgd config = {'learning_rate': 0.1, 'cost' : Default(), 'batch_size': 10, 'monitoring_batches': 10, 'monitoring_dataset': trainset, 'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED), 'update_callbacks': None } train_algo = SGD(**config) model = layer return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None)
def test_sgd_unsup(): # tests that we can run the sgd algorithm # on an supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25,9,2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) # Including a monitoring dataset lets us test that # the monitor works with unsupervised data monitoring_dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 cost = DummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset= monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def produce_train_obj(new_epochs, model=None): if model is None: model = MLP( layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) else: model = push_monitor(model, 'old_monitor', transfer_experience=True) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=N, new_epochs=new_epochs) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) return Train(dataset=dataset, model=model, algorithm=algorithm)
def get_rbm_trainer(model, dataset, save_path, epochs=5): """ A Restricted Boltzmann Machine (RBM) trainer """ config = { 'learning_rate': 1e-2, 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 250, #'batches_per_iter' : 100, 'learning_rule': RMSProp(), 'monitoring_dataset': dataset, 'cost': SML(250, 1), 'termination_criterion': EpochCounter(max_epochs=epochs), } return Train(model=model, algorithm=SGD(**config), dataset=dataset, save_path=save_path, save_freq=1) #, extensions=extensions)
def get_ae_trainer(model, dataset, save_path, epochs=5): """ An Autoencoder (AE) trainer """ config = { 'learning_rate': 1e-2, 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 250, #'batches_per_iter' : 2000, 'learning_rule': RMSProp(), 'monitoring_dataset': dataset, 'cost': MeanSquaredReconstructionError(), 'termination_criterion': EpochCounter(max_epochs=epochs), } return Train(model=model, algorithm=SGD(**config), dataset=dataset, save_path=save_path, save_freq=1) #, extensions=extensions)
def get_layer_trainer_sgd(model, trainset): drop_cost = Dropout(input_include_probs={'h0': .4}, input_scales={'h0': 1.}) # configs on sgd train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential', learning_rate=0.2, cost=drop_cost, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS), update_callbacks=None) extensions = [ MonitorBasedSaveBest(channel_name="y_kl", save_path="./convnet_test_best.pkl") ] return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def get_trainer(self, model, trainset): MAX_EPOCHS_UNSUPERVISED = 100 # configs on sgd train_algo = SGD( learning_rate=0.1, # cost = MeanSquaredReconstructionError(), cost=self.cost, batch_size=10, monitoring_batches=10, monitoring_dataset=trainset, monitor_iteration_mode='even_sequential', train_iteration_mode='even_sequential', termination_criterion=EpochCounter( max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def test_correctness(): """ Test that the cost function works with float64 """ x_train, y_train, x_valid, y_valid = create_dataset() trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train) validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid) n_inputs = trainset.X.shape[1] n_outputs = 1 n_hidden = 10 hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5 output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5 model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden', istdev=hidden_istdev), Sigmoid(dim=n_outputs, layer_name='output', istdev=output_istdev)], nvis=n_inputs, seed=[2013, 9, 16]) termination_criterion = And([EpochCounter(max_epochs=1), MonitorBased(prop_decrease=1e-7, N=2)]) cost = SumOfCosts([(0.99, Default()), (0.01, L1WeightDecay({}))]) algo = SGD(1e-1, update_callbacks=[ExponentialDecay(decay_factor=1.00001, min_lr=1e-10)], cost=cost, monitoring_dataset=validset, termination_criterion=termination_criterion, monitor_iteration_mode='even_shuffled_sequential', batch_size=2) train = Train(model=model, dataset=trainset, algorithm=algo) train.main_loop()
def main(): # Only the trainset is processed by this function. print 'getting preprocessed data to train model' pp_trainset, testset = get_processed_dataset() # remember to change here when changing datasets print 'loading unprocessed data for input displays' trainset = cifar10.CIFAR10(which_set="train") dmat = trainset.get_design_matrix() nvis = dmat.shape[1] model = DenoisingAutoencoder( corruptor=BinomialCorruptor(corruption_level=0.5), nhid=nhid, nvis=nvis, act_enc='sigmoid', act_dec='sigmoid', irange=.01) algorithm = SGD( learning_rate=0.1, cost=MeanSquaredReconstructionError(), batch_size=1000, monitoring_batches=10, monitoring_dataset=pp_trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), update_callbacks=None) extensions = None trainer = Train(model=model, algorithm=algorithm, save_path='testrun.pkl', save_freq=1, extensions=extensions, dataset=pp_trainset) trainer.main_loop()
def test_training_a_model(): """ tests wether SparseDataset can be trained with a dummy model. """ dim = 3 m = 10 rng = np.random.RandomState([22, 4, 2014]) X = rng.randn(m, dim) ds = csr_matrix(X) dataset = SparseDataset(from_scipy_sparse_dataset=ds) model = SoftmaxModel(dim) learning_rate = 1e-1 batch_size = 5 epoch_num = 2 termination_criterion = EpochCounter(epoch_num) cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=batch_size, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_batch_size_specialization(): # Tests that using a batch size of 1 for training and a batch size # other than 1 for monitoring does not result in a crash. # This catches a bug reported in the [email protected] # e-mail "[pylearn-dev] monitor assertion error: channel_X.type != X.type" # The training data was specialized to a row matrix (theano tensor with # first dim broadcastable) and the monitor ended up with expressions # mixing the specialized and non-specialized version of the expression. m = 2 rng = np.random.RandomState([25, 9, 2012]) X = np.zeros((m, 1)) dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(1) learning_rate = 1e-3 cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=1, monitoring_batches=1, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=1), update_callbacks=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_train_ae(): """ .. todo:: WRITEME """ GC = GaussianCorruptor gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print("done training")
def get_finetuner(model, trainset, batch_size=100, epochs=100): train_algo = SGD(batch_size=batch_size, learning_rule=Momentum(init_momentum=0.5), learning_rate=0.5, monitoring_batches=batch_size, monitoring_dataset=trainset, cost=Dropout(input_include_probs={'h0': .5}, input_scales={'h0': 2.}), termination_criterion=EpochCounter(epochs)) path = DATA_DIR + 'model' + str(SUBMODEL) + 'saved_daex.pkl' return Train(model=model, algorithm=train_algo, dataset=trainset, save_path=path, save_freq=10, extensions=[ MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs * 0.8)), LinearDecayOverEpoch(start=1, saturate=int(epochs * 0.7), decay_factor=.02) ])
def test_afew2ft_train(): n_classes = 7 dataset = AFEW2FaceTubes(which_set='Train') monitoring_dataset = { 'train': dataset, 'valid': AFEW2FaceTubes(which_set='Val')} model = DummyModel(n_classes=n_classes, input_space=dataset.get_data_specs()[0].components[0]) cost = DummyCost() termination_criterion = EpochCounter(10) learning_rate = 1e-6 batch_size = 1 algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=batch_size, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion) train = Train(dataset, model, algorithm, save_path=None) train.main_loop()
def get_trainer2(model, trainset, epochs=50): train_algo = SGD( batch_size=bsize, learning_rate=0.5, learning_rule=Momentum(init_momentum=0.5), monitoring_batches=bsize, monitoring_dataset=trainset, cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}), termination_criterion=EpochCounter(epochs), ) path = DATA_DIR + 'model2saved_conv.pkl' return Train(model=model, algorithm=train_algo, dataset=trainset, save_path=path, save_freq=1, extensions=[ MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(epochs * 0.5)), LinearDecayOverEpoch(start=1, saturate=int(epochs * 0.8), decay_factor=.01) ])