def train_siamese_mnist(): batch_size = 50 learning_rate = 0.1 rng = np.random.RandomState(42) srng = RandomStreams(seed=42) datasets = load_data('mnist.pkl.gz') train_set_x1, train_set_x2, train_set_y = datasets[0] valid_set_x1, valid_set_x2, valid_set_y = datasets[1] test_set_x1, test_set_x2, test_set_y = datasets[2] n_train_batches = train_set_x1.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x1.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x1.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() print 'building model' input_shape = (batch_size, 1, 28, 28) conv_layer_specs = [ {"filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2), "activation": "tanh"}, {"filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2), "activation": "tanh"} ] hidden_layer_specs = [ {"units": 800, "activation": "tanh"}, {"units": 500, "activation": "tanh"}, {"units": 200, "activation": "tanh"}, {"units": 10, "activation": "tanh"} ] dropout_rates = None y = T.ivector("y") input_x1 = T.matrix("x1") input_x2 = T.matrix("x2") model = SiameseCNN( rng, input_x1, input_x2, input_shape, conv_layer_specs, hidden_layer_specs, srng, dropout_rates=dropout_rates, ) params = model.parameters cost = model.loss_cos_cos2(y) test_model = theano.function( inputs=[index], outputs=cost, givens={ input_x1: test_set_x1[index * batch_size: (index + 1) * batch_size], input_x2: test_set_x2[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] }, ) validate_model = theano.function( inputs=[index], outputs=cost, givens={ input_x1: valid_set_x1[index * batch_size: (index + 1) * batch_size], input_x2: valid_set_x2[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }, ) grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ input_x1: train_set_x1[index * batch_size: (index + 1) * batch_size], input_x2: train_set_x2[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, ) print 'model built, start training' n_epochs=200 patience = 10000 patience_increase = 2 improvement_threshold = 0.995 # validation_frequency = min(n_train_batches, patience / 2) validation_frequency = 500 best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_siamese_cnn(): # Random number generators rng = np.random.RandomState(42) srng = RandomStreams(seed=42) # Generate random data n_data = 4 n_pairs = 6 height = 39 width = 200 in_channels = 1 X = rng.randn(n_data, in_channels, height, width) Y = np.asarray(rng.randint(2, size=n_pairs), dtype=np.int32) print "Same/diff:", Y # Generate random pairs possible_pairs = list(itertools.combinations(range(n_data), 2)) x1_indices = [] x2_indices = [] for i_pair in rng.choice(np.arange(len(possible_pairs)), size=n_pairs, replace=False): x1, x2 = possible_pairs[i_pair] x1_indices.append(x1) x2_indices.append(x2) x1_indices = np.array(x1_indices) x2_indices = np.array(x2_indices) print "x1 index: ", x1_indices print "x2 index: ", x2_indices # Setup Theano model batch_size = n_pairs input_shape = (batch_size, in_channels, height, width) conv_layer_specs = [ {"filter_shape": (32, in_channels, 39, 9), "pool_shape": (1, 3), "activation": "tanh"}, ] hidden_layer_specs = [{"units": 128, "activation": "tanh"}] dropout_rates = None y = T.ivector("y") input_x1 = T.matrix("x1") input_x2 = T.matrix("x2") model = SiameseCNN( rng, input_x1, input_x2, input_shape, conv_layer_specs, hidden_layer_specs, srng, dropout_rates=dropout_rates, ) loss = model.loss_cos_cos2(y) # Compile Theano function theano_siamese_loss = theano.function( inputs=[], outputs=loss, givens={ input_x1: X.reshape((n_data, -1))[x1_indices], input_x2: X.reshape((n_data, -1))[x2_indices], y: Y }, ) theano_loss = theano_siamese_loss() print "Theano loss:", theano_loss # Calculate Numpy output conv_layers_W = [] conv_layers_b = [] conv_layers_pool_shape = [] hidden_layers_W = [] hidden_layers_b = [] for i_layer in xrange(len(conv_layer_specs)): W = model.layers[i_layer].W.get_value(borrow=True) b = model.layers[i_layer].b.get_value(borrow=True) pool_shape = conv_layer_specs[i_layer]["pool_shape"] conv_layers_W.append(W) conv_layers_b.append(b) conv_layers_pool_shape.append(pool_shape) for i_layer in xrange(i_layer + 1, i_layer + 1 + len(hidden_layer_specs)): W = model.layers[i_layer].W.get_value(borrow=True) b = model.layers[i_layer].b.get_value(borrow=True) hidden_layers_W.append(W) hidden_layers_b.append(b) np_x1_layers_output = np_cnn_layers_output( X[x1_indices], conv_layers_W, conv_layers_b, conv_layers_pool_shape, hidden_layers_W, hidden_layers_b ) np_x2_layers_output = np_cnn_layers_output( X[x2_indices], conv_layers_W, conv_layers_b, conv_layers_pool_shape, hidden_layers_W, hidden_layers_b ) numpy_loss = np_loss_cos_cos2(np_x1_layers_output, np_x2_layers_output, Y) print "Numpy loss:", numpy_loss npt.assert_almost_equal(numpy_loss, theano_loss)