Ejemplo n.º 1
0
def train_siamese_mnist():

    batch_size = 50
    learning_rate = 0.1
    rng = np.random.RandomState(42)
    srng = RandomStreams(seed=42)

    datasets = load_data('mnist.pkl.gz')
    train_set_x1, train_set_x2, train_set_y = datasets[0]
    valid_set_x1, valid_set_x2, valid_set_y = datasets[1]
    test_set_x1, test_set_x2, test_set_y = datasets[2]

    n_train_batches = train_set_x1.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x1.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x1.get_value(borrow=True).shape[0] / batch_size

    index = T.lscalar()

    print 'building model'

    input_shape = (batch_size, 1, 28, 28)
    conv_layer_specs = [
        {"filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2), "activation": "tanh"},
        {"filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2), "activation": "tanh"}
        ]

    hidden_layer_specs = [
        {"units": 800, "activation": "tanh"},
        {"units": 500, "activation": "tanh"},
        {"units": 200, "activation": "tanh"},
        {"units": 10, "activation": "tanh"}
        ]

    dropout_rates = None
    y = T.ivector("y")
    input_x1 = T.matrix("x1")
    input_x2 = T.matrix("x2")
    model = SiameseCNN(
            rng, input_x1, input_x2, input_shape,
            conv_layer_specs, hidden_layer_specs, srng,
            dropout_rates=dropout_rates,
        )
    params = model.parameters
    cost = model.loss_cos_cos2(y)

    test_model = theano.function(
        inputs=[index],
        outputs=cost,
        givens={
            input_x1: test_set_x1[index * batch_size: (index + 1) * batch_size],
            input_x2: test_set_x2[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
            },
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=cost,
        givens={
            input_x1: valid_set_x1[index * batch_size: (index + 1) * batch_size],
            input_x2: valid_set_x2[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            },
    )

    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            input_x1: train_set_x1[index * batch_size: (index + 1) * batch_size],
            input_x2: train_set_x2[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
            },
    )

    print 'model built, start training'

    n_epochs=200
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    # validation_frequency = min(n_train_batches, patience / 2)
    validation_frequency = 500
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 2
0
def test_siamese_cnn():

    # Random number generators
    rng = np.random.RandomState(42)
    srng = RandomStreams(seed=42)

    # Generate random data
    n_data = 4
    n_pairs = 6
    height = 39
    width = 200
    in_channels = 1
    X = rng.randn(n_data, in_channels, height, width)
    Y = np.asarray(rng.randint(2, size=n_pairs), dtype=np.int32)
    print "Same/diff:", Y

    # Generate random pairs
    possible_pairs = list(itertools.combinations(range(n_data), 2))
    x1_indices = []
    x2_indices = []
    for i_pair in rng.choice(np.arange(len(possible_pairs)), size=n_pairs, replace=False):
        x1, x2 = possible_pairs[i_pair]
        x1_indices.append(x1)
        x2_indices.append(x2)
    x1_indices = np.array(x1_indices)
    x2_indices = np.array(x2_indices)
    print "x1 index: ", x1_indices
    print "x2 index: ", x2_indices

    # Setup Theano model
    batch_size = n_pairs
    input_shape = (batch_size, in_channels, height, width)
    conv_layer_specs = [
        {"filter_shape": (32, in_channels, 39, 9), "pool_shape": (1, 3), "activation": "tanh"}, 
        ]
    hidden_layer_specs = [{"units": 128, "activation": "tanh"}]
    dropout_rates = None
    y = T.ivector("y")
    input_x1 = T.matrix("x1")
    input_x2 = T.matrix("x2")
    model = SiameseCNN(
            rng, input_x1, input_x2, input_shape,
            conv_layer_specs, hidden_layer_specs, srng,
            dropout_rates=dropout_rates,
        )
    loss = model.loss_cos_cos2(y)

    # Compile Theano function
    theano_siamese_loss = theano.function(
        inputs=[], outputs=loss,
        givens={
            input_x1: X.reshape((n_data, -1))[x1_indices],
            input_x2: X.reshape((n_data, -1))[x2_indices],
            y: Y
            },
        )
    theano_loss = theano_siamese_loss()
    print "Theano loss:", theano_loss

    # Calculate Numpy output
    conv_layers_W = []
    conv_layers_b = []
    conv_layers_pool_shape = []
    hidden_layers_W = []
    hidden_layers_b = []
    for i_layer in xrange(len(conv_layer_specs)):
        W = model.layers[i_layer].W.get_value(borrow=True)
        b = model.layers[i_layer].b.get_value(borrow=True)
        pool_shape = conv_layer_specs[i_layer]["pool_shape"]
        conv_layers_W.append(W)
        conv_layers_b.append(b)
        conv_layers_pool_shape.append(pool_shape)
    for i_layer in xrange(i_layer + 1, i_layer + 1 + len(hidden_layer_specs)):
        W = model.layers[i_layer].W.get_value(borrow=True)
        b = model.layers[i_layer].b.get_value(borrow=True)
        hidden_layers_W.append(W)
        hidden_layers_b.append(b)
    np_x1_layers_output = np_cnn_layers_output(
        X[x1_indices], conv_layers_W, conv_layers_b, conv_layers_pool_shape,
        hidden_layers_W, hidden_layers_b
        )
    np_x2_layers_output = np_cnn_layers_output(
        X[x2_indices], conv_layers_W, conv_layers_b, conv_layers_pool_shape,
        hidden_layers_W, hidden_layers_b
        )

    numpy_loss = np_loss_cos_cos2(np_x1_layers_output, np_x2_layers_output, Y)
    print "Numpy loss:", numpy_loss

    npt.assert_almost_equal(numpy_loss, theano_loss)