def test_softmax_binary_targets(): """ Constructs softmax layers with binary target and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)], nvis=100 ) mlp_vec = MLP( layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100 ) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1)) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size),y_bin_data.flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def test_softmax_binary_targets(): """ Constructs softmax layers with binary target and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)], nvis=100) mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1)) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data.flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def test_sigmoid_detection_cost(): # This is only a smoke test: verifies that it compiles and runs, # not any particular value. rng = np.random.RandomState(0) y = (rng.uniform(size=(4, 3)) > 0.5).astype('uint8') X = theano.shared(rng.uniform(size=(4, 2))) model = MLP(nvis=2, layers=[Sigmoid(monitor_style='detection', dim=3, layer_name='y', irange=0.8)]) y_hat = model.fprop(X) model.cost(y, y_hat).eval()
def test_softmax_two_binary_targets(): """ Constructs softmax layers with two binary targets and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)], nvis=100) mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) # binary and vector costs can only match # if binary targets are mutually exclusive y_bin_data = np.concatenate([ np.random.permutation(10)[:2].reshape((1, 2)) for _ in range(batch_size) ]) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1 y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def test_softmax_two_binary_targets(): """ Constructs softmax layers with two binary targets and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)], nvis=100 ) mlp_vec = MLP( layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100 ) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) # binary and vector costs can only match # if binary targets are mutually exclusive y_bin_data = np.concatenate([np.random.permutation(10)[:2].reshape((1, 2)) for _ in range(batch_size)]) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1 y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def check_unimplemented_case(ConvNonlinearity): conv_model = MLP( input_space = Conv2DSpace(shape = [1,1], axes = ['b', 0, 1, 'c'], num_channels = 1), layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \ output_channels = 1, kernel_shape = [1,1], \ pool_shape = [1,1], pool_stride = [1,1], irange= 1.0)], batch_size = 1 ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) assert np.testing.assert_raises(NotImplementedError, conv_model.cost(Y, Y_hat))
def test_cost(self): """ Use an RNN to calculate Mersenne number sequences of different lengths and check whether the costs make sense. """ rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[Recurrent(dim=1, layer_name='recurrent', irange=0, nonlinearity=lambda x: x), Linear(dim=1, layer_name='linear', irange=0)]) W, U, b = rnn.layers[0].get_params() W.set_value([[1]]) U.set_value([[2]]) W, b = rnn.layers[1].get_params() W.set_value([[1]]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_data, y_mask = rnn.get_output_space().make_theano_batch() y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask)) seq_len = 20 X_data_vals = np.ones((seq_len, seq_len, 1)) X_mask_vals = np.triu(np.ones((seq_len, seq_len))) y_data_vals = np.tile((2 ** np.arange(1, seq_len + 1) - 1), (seq_len, 1)).T[:, :, np.newaxis] y_mask_vals = np.triu(np.ones((seq_len, seq_len))) f = function([X_data, X_mask, y_data, y_mask], rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)), allow_input_downcast=True) # The cost for two exact sequences should be zero assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0 # If the input is different, the cost should be non-zero assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0 # And same for the target data; using squared L2 norm, so should be 1 assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1 # But if the masked data changes, the cost should remain the same X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None]) assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0 y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None]) assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0
def test_sigmoid_detection_cost(): """ Tests whether the sigmoid convolutional layer returns the right value. """ rng = np.random.RandomState(0) sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection") (rows, cols) = (10, 10) axes = ('c', 0, 1, 'b') nchs = 1 space_shp = (nchs, rows, cols, 1) X_vals = np.random.uniform(-0.01, 0.01, size=space_shp).astype(config.floatX) X = theano.shared(X_vals, name="X") Y_vals = (np.random.uniform(-0.01, 0.01, size=(rows, cols)) > 0.005).astype("uint8") Y = theano.shared(Y_vals, name="y_vals") conv_elemwise = ConvElemwise(layer_name="h0", output_channels=1, irange=.005, kernel_shape=(1, 1), max_kernel_norm=0.9, nonlinearity=sigmoid_nonlin) input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols), num_channels=nchs, axes=axes) model = MLP(batch_size=1, layers=[conv_elemwise], input_space=input_space) Y_hat = model.fprop(X) cost = model.cost(Y, Y_hat).eval() assert not(np.isnan(cost) or np.isinf(cost) or (cost < 0.0) or (cost is None)), ("cost returns illegal " "value.")
def test_sigmoid_detection_cost(): """ Tests whether the sigmoid convolutional layer returns the right value. """ rng = np.random.RandomState(0) sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection") (rows, cols) = (10, 10) axes = ('c', 0, 1, 'b') nchs = 1 space_shp = (nchs, rows, cols, 1) X_vals = np.random.uniform(-0.01, 0.01, size=space_shp).astype(config.floatX) X = theano.shared(X_vals, name="X") Y_vals = (np.random.uniform(-0.01, 0.01, size=(rows, cols)) > 0.005).astype("uint8") Y = theano.shared(Y_vals, name="y_vals") conv_elemwise = ConvElemwise(layer_name="h0", output_channels=1, irange=.005, kernel_shape=(1, 1), max_kernel_norm=0.9, nonlinearity=sigmoid_nonlin) input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols), num_channels=nchs, axes=axes) model = MLP(batch_size=1, layers=[conv_elemwise], input_space=input_space) Y_hat = model.fprop(X) cost = model.cost(Y, Y_hat).eval() assert not (np.isnan(cost) or np.isinf(cost) or (cost < 0.0) or (cost is None)), ("cost returns illegal " "value.")
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True): """Check that ConvNonLinearity and MLPNonlinearity are consistent. This is done by building an MLP with a ConvElemwise layer with the supplied non-linearity, an MLP with a dense layer, and checking that the outputs (and costs if applicable) are consistent. Parameters ---------- conv_nonlinearity: instance of `ConvNonlinearity` The non-linearity to provide to a `ConvElemwise` layer. mlp_nonlinearity: subclass of `mlp.Linear` The fully-connected MLP layer (including non-linearity). check_implemented: bool If `True`, check that both costs give consistent results. If `False`, check that both costs raise `NotImplementedError`. """ # Create fake data np.random.seed(12345) r = 31 s = 21 shape = [r, s] nvis = r*s output_channels = 13 batch_size = 103 x = np.random.rand(batch_size, r, s, 1) y = np.random.randint(2, size=[batch_size, output_channels, 1, 1]) x = x.astype(config.floatX) y = y.astype(config.floatX) x_mlp = x.flatten().reshape(batch_size, nvis) y_mlp = y.flatten().reshape(batch_size, output_channels) # Initialize convnet with random weights. conv_model = MLP( input_space=Conv2DSpace(shape=shape, axes=['b', 0, 1, 'c'], num_channels=1), layers=[ConvElemwise(layer_name='conv', nonlinearity=conv_nonlinearity, output_channels=output_channels, kernel_shape=shape, pool_shape=[1, 1], pool_stride=shape, irange=1.0)], batch_size=batch_size ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) g = theano.function([X], Y_hat) # Construct an equivalent MLP which gives the same output # after flattening both. mlp_model = MLP( layers=[mlp_nonlinearity(dim=output_channels, layer_name='mlp', irange=1.0)], batch_size=batch_size, nvis=nvis ) W, b = conv_model.get_param_values() W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX) for k in range(output_channels): W_mlp[k] = W[k, 0].flatten()[::-1] W_mlp = W_mlp.T b_mlp = b.flatten() mlp_model.set_param_values([W_mlp, b_mlp]) X1 = mlp_model.get_input_space().make_theano_batch() Y1 = mlp_model.get_target_space().make_theano_batch() Y1_hat = mlp_model.fprop(X1) f = theano.function([X1], Y1_hat) # Check that the two models give the same output assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5) if cost_implemented: # Check that the two models have the same costs mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp)) else: # Check that both costs are not implemented assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat) assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True): """Check that ConvNonLinearity and MLPNonlinearity are consistent. This is done by building an MLP with a ConvElemwise layer with the supplied non-linearity, an MLP with a dense layer, and checking that the outputs (and costs if applicable) are consistent. Parameters ---------- conv_nonlinearity: instance of `ConvNonlinearity` The non-linearity to provide to a `ConvElemwise` layer. mlp_nonlinearity: subclass of `mlp.Linear` The fully-connected MLP layer (including non-linearity). check_implemented: bool If `True`, check that both costs give consistent results. If `False`, check that both costs raise `NotImplementedError`. """ # Create fake data np.random.seed(12345) r = 31 s = 21 shape = [r, s] nvis = r * s output_channels = 13 batch_size = 103 x = np.random.rand(batch_size, r, s, 1) y = np.random.randint(2, size=[batch_size, output_channels, 1, 1]) x = x.astype(config.floatX) y = y.astype(config.floatX) x_mlp = x.flatten().reshape(batch_size, nvis) y_mlp = y.flatten().reshape(batch_size, output_channels) # Initialize convnet with random weights. conv_model = MLP(input_space=Conv2DSpace(shape=shape, axes=['b', 0, 1, 'c'], num_channels=1), layers=[ ConvElemwise(layer_name='conv', nonlinearity=conv_nonlinearity, output_channels=output_channels, kernel_shape=shape, pool_shape=[1, 1], pool_stride=shape, irange=1.0) ], batch_size=batch_size) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) g = theano.function([X], Y_hat) # Construct an equivalent MLP which gives the same output # after flattening both. mlp_model = MLP(layers=[ mlp_nonlinearity(dim=output_channels, layer_name='mlp', irange=1.0) ], batch_size=batch_size, nvis=nvis) W, b = conv_model.get_param_values() W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX) for k in range(output_channels): W_mlp[k] = W[k, 0].flatten()[::-1] W_mlp = W_mlp.T b_mlp = b.flatten() mlp_model.set_param_values([W_mlp, b_mlp]) X1 = mlp_model.get_input_space().make_theano_batch() Y1 = mlp_model.get_target_space().make_theano_batch() Y1_hat = mlp_model.fprop(X1) f = theano.function([X1], Y1_hat) # Check that the two models give the same output assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5) if cost_implemented: # Check that the two models have the same costs mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp)) else: # Check that both costs are not implemented assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat) assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_implemented_case(ConvNonlinearity, MLPNonlinearity): # Create fake data np.random.seed(12345) r = 31 s = 21 shape = [r, s] nvis = r*s output_channels = 13 batch_size = 103 x = np.random.rand(batch_size, r, s, 1) y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1]) x = x.astype('float32') y = y.astype('float32') x_mlp = x.flatten().reshape(batch_size, nvis) y_mlp = y.flatten().reshape(batch_size, output_channels) # Initialize convnet with random weights. conv_model = MLP( input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1), layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \ output_channels = output_channels, kernel_shape = shape, \ pool_shape = [1,1], pool_stride = shape, irange= 1.0)], batch_size = batch_size ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) g = theano.function([X], Y_hat) # Construct an equivalent MLP which gives the same output after flattening both. mlp_model = MLP( layers = [MLPNonlinearity(dim = output_channels, layer_name = 'mlp', irange = 1.0)], batch_size = batch_size, nvis = nvis ) W, b = conv_model.get_param_values() W = W.astype('float32') b = b.astype('float32') W_mlp = np.zeros(shape = (output_channels, nvis)) for k in range(output_channels): W_mlp[k] = W[k, 0].flatten()[::-1] W_mlp = W_mlp.T b_mlp = b.flatten() W_mlp = W_mlp.astype('float32') b_mlp = b_mlp.astype('float32') mlp_model.set_param_values([W_mlp, b_mlp]) X1 = mlp_model.get_input_space().make_theano_batch() Y1 = mlp_model.get_target_space().make_theano_batch() Y1_hat = mlp_model.fprop(X1) f = theano.function([X1], Y1_hat) # Check that the two models give the same output assert np.linalg.norm(f(x_mlp).flatten() - g(x).flatten()) < 10**-3 # Check that the two models have the same costs: mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) assert np.linalg.norm(conv_cost(x,y) - mlp_cost(x_mlp, y_mlp)) < 10**-3