print "Done." ############# # FINE-TUNE # ############# normal_cost = model_ft.models_stack[-1].cost() + \ model_ft.models_stack[-1].weightdecay(weightdecay) # gradients = T.grad(normal_cost, model_ft.models_stack[0].w) # grad_cost = T.sum((gradients * (1 - mask_l0_theano))**2) trainer = GraddescentMinibatch( varin=model_ft.varin, data=train_x, truth=model_ft.models_stack[-1].vartruth, truth_data=train_y, supervised=True, cost=normal_cost, params=model_ft.params, batchsize=batchsize, learningrate=finetune_lr, momentum=momentum, rng=npy_rng ) # trainer2 = GraddescentMinibatch( # varin=model_ft.varin, data=train_x, # truth=model_ft.models_stack[-1].vartruth, truth_data=train_y, # supervised=True, # cost=grad_cost, # params=model_ft.params, # batchsize=batchsize, learningrate=finetune_lr * 1e-5, momentum=momentum, # rng=npy_rng # )
) def test_error(): return numpy.mean([test_set_error_rate(i) for i in xrange(test_smp/batchsize)]) print "Done." print "Initial error rate: train: %f, test: %f" % (train_error(), test_error()) ############# # FINE-TUNE # ############# print "\n\n... BP through the whole network" trainer = GraddescentMinibatch( varin=model.varin, data=train_x, truth=model.models_stack[-1].vartruth, truth_data=train_y, supervised=True, cost=model.models_stack[-1].cost(), params=model.params, batchsize=batchsize, learningrate=finetune_lr, momentum=momentum, rng=npy_rng ) init_lr = trainer.learningrate prev_cost = numpy.inf for epoch in xrange(finetune_epc): cost = trainer.epoch() if prev_cost <= cost: if trainer.learningrate < (init_lr * 1e-7): break trainer.set_learningrate(trainer.learningrate*0.8) prev_cost = cost if epoch % 10 == 0:
print "\n\nPre-training layer %d:" % i layer_dropout = Dropout(model.models_stack[i], droprates=[0.2, 0.5], theano_rng=theano_rng).dropout_model layer_dropout.varin = model.models_stack[i].varin if (i + 2) % 4 == 0: model.models_stack[i-2].threshold = 0. pretrain_lr = pretrain_lr_lin layer_cost = layer_dropout.cost() + layer_dropout.weightdecay(weightdecay) else: pretrain_lr = pretrain_lr_zae layer_cost = layer_dropout.cost() trainer = GraddescentMinibatch( varin=model.varin, data=train_x, cost=layer_cost, params=layer_dropout.params_private, supervised=False, batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum, rng=npy_rng ) prev_cost = numpy.inf patience = 0 for epoch in xrange(pretrain_epc): cost = trainer.epoch() if prev_cost <= cost: patience += 1 if patience > 10: patience = 0 trainer.set_learningrate(0.9 * trainer.learningrate) if trainer.learningrate < 1e-10: break
layer_dropout.varin = model.models_stack[i].varin if (i + 2) % 4 == 0: model.models_stack[i - 2].threshold = 0. pretrain_lr = pretrain_lr_lin layer_cost = layer_dropout.cost() + layer_dropout.weightdecay( weightdecay) else: pretrain_lr = pretrain_lr_zae layer_cost = layer_dropout.cost() trainer = GraddescentMinibatch(varin=model.varin, data=train_x, cost=layer_cost, params=layer_dropout.params_private, supervised=False, batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum, rng=npy_rng) prev_cost = numpy.inf patience = 0 for epoch in xrange(pretrain_epc): cost = trainer.epoch() if prev_cost <= cost: patience += 1 if patience > 10: patience = 0 trainer.set_learningrate(0.9 * trainer.learningrate) if trainer.learningrate < 1e-10:
total_mean += numpy.mean([test_set_error_rate(i) for i in xrange(25)]) test_dg.partidx = mem return total_mean / 2. print "Done." print "Initial error rate: train: %f, test: %f" % (train_error(), test_error()) ############# # FINE-TUNE # ############# print "\n\n... fine-tuning the whole network" trainer = GraddescentMinibatch( varin=model.varin, data=train_x, truth=model.models_stack[-1].vartruth, truth_data=train_y, supervised=True, cost=model.models_stack[-1].cost(), params=model.params, batchsize=batchsize, learningrate=finetune_lr, momentum=momentum, rng=npy_rng ) init_lr = trainer.learningrate prev_cost = numpy.inf for epoch in xrange(finetune_epc): i = 0 cost = 0. for ipart in train_dg: print "part %d " % i, noise_mask = numpy.tile(npy_rng.binomial(1, 1-noise, (2500, 1, 250, 250)).astype(theano.config.floatX), (1, 3, 1, 1)) train_x.set_value(noise_mask * ipart[0]) train_y.set_value(ipart[1])
##| Instantiating model ##`-------------------- model = cAE(n_visible=n_visible, n_hiddens=n_hiddens, contraction=contraction, activation=activation, n_samples=n_samples, BINARY=BINARY, MINIBATCH_SIZE=MINIBATCH_SIZE, corruptF=corruptF) ##,---------------------- ##| Instantiating trainer ##`---------------------- gd = GraddescentMinibatch(model=model, data=data_s, batchsize=MINIBATCH_SIZE, learningrate=learningrate, momentum=momentum, normalizefilters=normalizefilters, rng=rng, verbose=verbose) ##,--------- ##| Training ##`--------- #for epoch in np.arange(epochs): # gd.step() while (model.cost(data_s.get_value()) > eps) and gd.epochcount < epochs: gd.step() ################################## ## Stage 4: Visualizing results ##
error_rate = theano.function( [], T.mean(T.neq(model.models_stack[-1].predict(), test_set_y)), givens = {model.models_stack[0].varin : test_set_x}, ) ############# # PRE-TRAIN # ############# for i in range(len(model.models_stack)-1): print "\n\nPre-training layer %d:" % i trainer = GraddescentMinibatch( varin=model.varin, data=train_set_x, cost=model.models_stack[i].cost(), params=model.models_stack[i].params_private, supervised=False, batchsize=1, learningrate=0.001, momentum=0., rng=npy_rng ) layer_analyse = model.models_stack[i].encoder() layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100) layer_analyse.hist_weight() for epoch in xrange(15): trainer.step() layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100) layer_analyse.hist_weight() save_params(model=model, filename="mnist_sae_784_784_784_10.npy")
[], T.mean(T.neq(model.models_stack[-1].predict(), test_set_y)), givens={model.models_stack[0].varin: test_set_x}, ) ############# # PRE-TRAIN # ############# for i in range(len(model.models_stack) - 1): print "\n\nPre-training layer %d:" % i trainer = GraddescentMinibatch(varin=model.varin, data=train_set_x, cost=model.models_stack[i].cost(), params=model.models_stack[i].params_private, supervised=False, batchsize=1, learningrate=0.001, momentum=0., rng=npy_rng) layer_analyse = model.models_stack[i].encoder() layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100) layer_analyse.hist_weight() for epoch in xrange(15): trainer.step() layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100) layer_analyse.hist_weight() save_params(model=model, filename="mnist_sae_784_784_784_10.npy")
) print "Done." ############# # FINE-TUNE # ############# pdb.set_trace() print "\n\n... fine-tuning the whole network" trainer = GraddescentMinibatch(varin=model.varin, data=train_x, truth=model.models_stack[-1].vartruth, truth_data=train_y, supervised=True, cost=model.models_stack[-1].cost(), params=model.params, batchsize=batchsize, learningrate=lr, momentum=momentum, rng=npy_rng) trainer_adam = Adam(varin=model_adam.varin, data=train_x, truth=model_adam.models_stack[-1].vartruth, truth_data=train_y, supervised=True, cost=model_adam.models_stack[-1].cost(), params=model_adam.params, batchsize=batchsize, learningrate=lr / 5.,
print "\n\nPre-training layer %d:" % i layer_dropout = Dropout(model.models_stack[i], droprates=[0.2, 0.5], theano_rng=theano_rng).dropout_model layer_dropout.varin = model.models_stack[i].varin if (i + 2) % 4 == 0: model.models_stack[i-2].threshold = 0. pretrain_lr = pretrain_lr_lin layer_cost = layer_dropout.cost() + layer_dropout.contraction(weightdecay) else: pretrain_lr = pretrain_lr_zae layer_cost = layer_dropout.cost() trainer = GraddescentMinibatch( varin=model.varin, data=train_x, cost=layer_cost, params=layer_dropout.params_private, supervised=False, batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum, rng=npy_rng ) prev_cost = numpy.inf patience = 0 origin_x = train_x.get_value() reshaped_x = pcaback(origin_x).reshape((50000, 32, 32, 3), order='F') for epoch in xrange(pretrain_epc): cost = 0. # original data cost += trainer.epoch() # data augmentation: horizontal flip flipped_x = reshaped_x[:, ::-1, :, :].reshape((50000, 3072), order='F')