def train_model(): rng = numpy.random.RandomState(23455) #size = [480,640] orijinal size size = [120,160] nkerns = [20, 50] nkern1_size = [5, 5] nkern2_size = [5, 5] npool1_size = [2, 2] npool2_size = [2, 2] batch_size = 30 fl_size = size[0] * size[1] multi = 100 learning_rate = 0.001 n_epochs = 400 datasets = dataset_loader.load_tum_dataV2(size, multi) X_train, y_train = datasets[0] X_val, y_val = datasets[1] X_test, y_test = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = len(X_train) n_valid_batches = len(X_val) n_test_batches = len(X_test) n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size x = T.tensor3(name='input') # the data is presented as rasterized images y = T.matrix('y') # the output are presented as matrix 1*3. x_inp = T.tensor3(name='x_inp') # the data is presented as rasterized images y_inp = T.matrix('y_inp') ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # number of channels layer0_input = x.reshape((batch_size, 2, size[0], size[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (640-5+1 , 480-5+1) = (636, 476) # maxpooling reduces this further to (636/2, 476/2) = (318, 238) # 4D output tensor is thus of shape (batch_size, nkerns[0], size[0], size[1]) layer0 = ConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 2, size[0], size[1]), filter_shape=(nkerns[0], 2, nkern1_size[0], nkern1_size[1]), poolsize=(npool1_size[0], npool1_size[1]) ) l0out = ((size[0] - nkern1_size[0] + 1) / npool1_size[0], (size[1] - nkern1_size[0] + 1) / npool1_size[1]) # Construct the second convolutional pooling layer # filtering reduces the image size to (318-5+1, 238-5+1) = (314, 234) # maxpooling reduces this further to (314/2, 234/2) = (157, 117) # 4D output tensor is thus of shape (batch_size, nkerns[1], 157, 117) layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0]) + l0out, filter_shape=(nkerns[1], nkerns[0], nkern2_size[0], nkern2_size[1]), poolsize=(npool2_size[0], npool2_size[1]) ) l2out = ((l0out[0] - nkern2_size[0] + 1) / npool2_size[0], (l0out[1] - nkern2_size[0] + 1) / npool2_size[0]) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 5 * 3), # or (500, 50 * 5 * 3) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * l2out[0] * l2out[1], n_out=500 ) # classify the values of the fully-connected sigmoidal layer layer3 = OutputLayer(input=layer2.output, n_in=500, n_out=3) # create a function to compute the mistakes that are made by the model test_model = theano.function( [x_inp, y_inp], layer3.errors(y), givens={ x: x_inp, y: y_inp, } ) validate_model = theano.function( [x_inp, y_inp], layer3.errors(y), givens={ x: x_inp, y: y_inp, } ) #Creat cost L1 = (abs(layer3.W).sum()) + (abs(layer2.W).sum()) + (abs(layer1.W).sum()) + (abs(layer0.W).sum()) L2_sqr = ((layer3.W ** 2).sum()) + ((layer1.W ** 2).sum()) + ((layer1.W ** 2).sum()) + ((layer0.W ** 2).sum()) lambda_1 = 0.1 lambda_2 = 0.1 cost = layer3.mse(y) + lambda_1 * L1 + lambda_2 * L2_sqr # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [x_inp, y_inp], cost, updates=updates, givens={ x: x_inp, y: y_inp, } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter x = X_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] data_x = dataset_loader.load_batch_imagesV2(size, x) data_y = y_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] cost_ij = train_model(data_x, data_y) #model_saver.save_model(epoch % 3, params) print('epoch %i, minibatch %i/%i, training cost %f ' % (epoch, minibatch_index + 1, n_train_batches, cost_ij)) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = 0 for i in xrange(n_valid_batches): x = X_val[i * batch_size: (i + 1) * batch_size] data_x = dataset_loader.load_batch_imagesV2(size, x) data_y = y_val[i * batch_size: (i + 1) * batch_size] validation_losses = validation_losses + validate_model(data_x, data_y) this_validation_loss = validation_losses / n_valid_batches print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter test_losses = 0 for i in xrange(n_test_batches): x = X_test[i * batch_size: (i + 1) * batch_size] data_x = dataset_loader.load_batch_imagesV2(size, x) data_y = y_test[i * batch_size: (i + 1) * batch_size] test_losses = test_losses + validate_model(data_x, data_y) test_score = test_losses / n_valid_batches model_saver.save_model(epoch % 3, params) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_model(): dataset = "/home/coskun/PycharmProjects/data/rgbd_dataset_freiburg3_large_cabinet/" rng = numpy.random.RandomState(23455) # size = [480,640] orijinal size rn_id = 1 size = [120, 160] nc = 1 # number of channcels nkerns = [20, 50] nkern1_size = [5, 5] nkern2_size = [5, 5] npool1_size = [2, 2] npool2_size = [2, 2] batch_size = 30 multi = 10 learning_rate = 0.0005 n_epochs = 400 initial_learning_rate = 0.0005 learning_rate_decay = 0.998 squared_filter_length_limit = 15.0 n_epochs = 3000 learning_rate = theano.shared(numpy.asarray(initial_learning_rate, dtype=theano.config.floatX)) #### the params for momentum mom_start = 0.5 mom_end = 0.99 # for epoch in [0, mom_epoch_interval], the momentum increases linearly # from mom_start to mom_end. After mom_epoch_interval, it stay at mom_end mom_epoch_interval = 500 mom_params = {"start": mom_start, "end": mom_end, "interval": mom_epoch_interval} lambda_1 = 0.01 # regulizer param lambda_2 = 0.01 datasets = dataset_loader.load_tum_dataV2(dataset, rn_id, multi) X_train, y_train = datasets[0] X_val, y_val = datasets[1] X_test, y_test = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = len(X_train) n_valid_batches = len(X_val) n_test_batches = len(X_test) n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size epoch = T.scalar() Fx = T.matrix(name="Fx_input") # the data is presented as rasterized images Sx = T.matrix(name="Sx_input") # the data is presented as rasterized images y = T.matrix("y") # the output are presented as matrix 1*3. Fx_inp = T.matrix(name="Fx_inp") # the data is presented as rasterized images Sx_inp = T.matrix(name="Sx_inp") # the data is presented as rasterized images y_inp = T.matrix("y_inp") print "... building the model" cnnr = CNNRNet(rng, input, batch_size, nc, size, nkerns, nkern1_size, nkern2_size, npool1_size, npool2_size, Fx, Sx) # create a function to compute the mistakes that are made by the model test_model = theano.function([Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp}) validate_model = theano.function([Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp}) cost = cnnr.mse(y) + lambda_1 * cnnr.L1 + lambda_2 * cnnr.L2_sqr # Compute gradients of the model wrt parameters gparams = [] for param in cnnr.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in cnnr.params: gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ( mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval) if T.lt(epoch, mom_epoch_interval) else mom_end ) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version # updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1.0 - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(cnnr.params, gparams_mom): # Misha Denil's original version # stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: # squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) # scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) # updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = cost train_model = theano.function( [Fx_inp, Sx_inp, y_inp, epoch], outputs=output, updates=updates, givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp} ) # create a function to compute the mistakes that are made by the model predict_model = theano.function([Fx_inp, Sx_inp], cnnr.y_pred, givens={Fx: Fx_inp, Sx: Sx_inp}) decay_learning_rate = theano.function( inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay} ) # end-snippet-1 ############### # TRAIN MODEL # ############### print "... training" # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_counter = 0 while (epoch_counter < n_epochs) and (not done_looping): epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch_counter - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print "training @ iter = ", iter Fx = X_train[minibatch_index * batch_size : (minibatch_index + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_train[minibatch_index * batch_size : (minibatch_index + 1) * batch_size] cost_ij = train_model(data_Fx, data_Sx, data_y, epoch) # model_saver.save_model(epoch % 3, params) print ( "epoch %i, minibatch %i/%i, training cost %f " % (epoch_counter, minibatch_index + 1, n_train_batches, cost_ij) ) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = 0 for i in xrange(n_valid_batches): Fx = X_val[i * batch_size : (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_val[i * batch_size : (i + 1) * batch_size] validation_losses = validation_losses + validate_model(data_Fx, data_Sx, data_y) this_validation_loss = validation_losses / n_valid_batches new_learning_rate = decay_learning_rate() print ( "epoch %i, minibatch %i/%i, learning_rate %f validation error %f %%" % ( epoch_counter, minibatch_index + 1, n_train_batches, learning_rate.get_value(borrow=True), this_validation_loss * 100.0, ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter test_losses = 0 for i in xrange(n_test_batches): Fx = X_test[i * batch_size : (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_test[i * batch_size : (i + 1) * batch_size] err = test_model(data_Fx, data_Sx, data_y) test_losses = test_losses + err if i % 10 == 0: store = [] ypred = predict_model(data_Fx, data_Sx) store.append(Fx) store.append(ypred) store.append(data_y) model_saver.save_garb(store) print ("Iteration saved %i, err %f" % (i, err)) test_score = test_losses / n_test_batches ext = str(rn_id) + "_" + str(epoch_counter % 3) model_saver.save_model(ext, cnnr.params) print ( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch_counter, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print ("Optimization complete.") print ( "Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print >> sys.stderr, ( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) )