Exemple #1
0
def test_softmax_binary_targets():
    """
    Constructs softmax layers with binary target and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)],
        nvis=100
    )
    mlp_vec = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1)],
        nvis=100
    )

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1))
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size),y_bin_data.flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
Exemple #2
0
def test_softmax_binary_targets():
    """
    Constructs softmax layers with binary target and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)],
        nvis=100)
    mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100)

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin],
                               mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec],
                               mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1))
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size), y_bin_data.flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
Exemple #3
0
def test_sigmoid_detection_cost():
    # This is only a smoke test: verifies that it compiles and runs,
    # not any particular value.
    rng = np.random.RandomState(0)
    y = (rng.uniform(size=(4, 3)) > 0.5).astype('uint8')
    X = theano.shared(rng.uniform(size=(4, 2)))
    model = MLP(nvis=2, layers=[Sigmoid(monitor_style='detection', dim=3,
                layer_name='y', irange=0.8)])
    y_hat = model.fprop(X)
    model.cost(y, y_hat).eval()
Exemple #4
0
def test_softmax_two_binary_targets():
    """
    Constructs softmax layers with two binary targets and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)],
        nvis=100)
    mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100)

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin],
                               mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec],
                               mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    # binary and vector costs can only match
    # if binary targets are mutually exclusive
    y_bin_data = np.concatenate([
        np.random.permutation(10)[:2].reshape((1, 2))
        for _ in range(batch_size)
    ])
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1
    y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
Exemple #5
0
def test_softmax_two_binary_targets():
    """
    Constructs softmax layers with two binary targets and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)],
        nvis=100
    )
    mlp_vec = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1)],
        nvis=100
    )

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    # binary and vector costs can only match
    # if binary targets are mutually exclusive
    y_bin_data = np.concatenate([np.random.permutation(10)[:2].reshape((1, 2))
                                 for _ in range(batch_size)])
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1
    y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
def check_unimplemented_case(ConvNonlinearity):

    conv_model = MLP(
        input_space = Conv2DSpace(shape = [1,1], axes = ['b', 0, 1, 'c'], num_channels = 1),
        layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \
                  output_channels = 1, kernel_shape = [1,1], \
                  pool_shape = [1,1], pool_stride = [1,1], irange= 1.0)],
        batch_size = 1
    )        

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)

    assert np.testing.assert_raises(NotImplementedError, conv_model.cost(Y, Y_hat))
Exemple #7
0
    def test_cost(self):
        """
        Use an RNN to calculate Mersenne number sequences of different
        lengths and check whether the costs make sense.
        """
        rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=1, layer_name='recurrent',
                                    irange=0, nonlinearity=lambda x: x),
                          Linear(dim=1, layer_name='linear', irange=0)])
        W, U, b = rnn.layers[0].get_params()
        W.set_value([[1]])
        U.set_value([[2]])

        W, b = rnn.layers[1].get_params()
        W.set_value([[1]])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()
        y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask))

        seq_len = 20
        X_data_vals = np.ones((seq_len, seq_len, 1))
        X_mask_vals = np.triu(np.ones((seq_len, seq_len)))
        y_data_vals = np.tile((2 ** np.arange(1, seq_len + 1) - 1),
                              (seq_len, 1)).T[:, :, np.newaxis]
        y_mask_vals = np.triu(np.ones((seq_len, seq_len)))

        f = function([X_data, X_mask, y_data, y_mask],
                     rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)),
                     allow_input_downcast=True)
        # The cost for two exact sequences should be zero
        assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0
        # If the input is different, the cost should be non-zero
        assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0
        # And same for the target data; using squared L2 norm, so should be 1
        assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1
        # But if the masked data changes, the cost should remain the same
        X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None])
        assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0
        y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None])
        assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0
def test_sigmoid_detection_cost():
    """
    Tests whether the sigmoid convolutional layer returns the right value.
    """

    rng = np.random.RandomState(0)
    sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection")
    (rows, cols) = (10, 10)
    axes = ('c', 0, 1, 'b')
    nchs = 1

    space_shp = (nchs, rows, cols, 1)
    X_vals = np.random.uniform(-0.01, 0.01,
                               size=space_shp).astype(config.floatX)
    X = theano.shared(X_vals, name="X")

    Y_vals = (np.random.uniform(-0.01, 0.01,
                                size=(rows, cols)) > 0.005).astype("uint8")
    Y = theano.shared(Y_vals, name="y_vals")

    conv_elemwise = ConvElemwise(layer_name="h0",
                                 output_channels=1,
                                 irange=.005,
                                 kernel_shape=(1, 1),
                                 max_kernel_norm=0.9,
                                 nonlinearity=sigmoid_nonlin)

    input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols),
                                             num_channels=nchs,
                                             axes=axes)
    model = MLP(batch_size=1,
                layers=[conv_elemwise],
                input_space=input_space)
    Y_hat = model.fprop(X)
    cost = model.cost(Y, Y_hat).eval()

    assert not(np.isnan(cost) or np.isinf(cost) or (cost < 0.0)
               or (cost is None)), ("cost returns illegal "
                                    "value.")
def test_sigmoid_detection_cost():
    """
    Tests whether the sigmoid convolutional layer returns the right value.
    """

    rng = np.random.RandomState(0)
    sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection")
    (rows, cols) = (10, 10)
    axes = ('c', 0, 1, 'b')
    nchs = 1

    space_shp = (nchs, rows, cols, 1)
    X_vals = np.random.uniform(-0.01, 0.01,
                               size=space_shp).astype(config.floatX)
    X = theano.shared(X_vals, name="X")

    Y_vals = (np.random.uniform(-0.01, 0.01, size=(rows, cols)) >
              0.005).astype("uint8")
    Y = theano.shared(Y_vals, name="y_vals")

    conv_elemwise = ConvElemwise(layer_name="h0",
                                 output_channels=1,
                                 irange=.005,
                                 kernel_shape=(1, 1),
                                 max_kernel_norm=0.9,
                                 nonlinearity=sigmoid_nonlin)

    input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols),
                                             num_channels=nchs,
                                             axes=axes)
    model = MLP(batch_size=1, layers=[conv_elemwise], input_space=input_space)
    Y_hat = model.fprop(X)
    cost = model.cost(Y, Y_hat).eval()

    assert not (np.isnan(cost) or np.isinf(cost) or (cost < 0.0) or
                (cost is None)), ("cost returns illegal "
                                  "value.")
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True):
    """Check that ConvNonLinearity and MLPNonlinearity are consistent.

    This is done by building an MLP with a ConvElemwise layer with the
    supplied non-linearity, an MLP with a dense layer, and checking that
    the outputs (and costs if applicable) are consistent.

    Parameters
    ----------
    conv_nonlinearity: instance of `ConvNonlinearity`
        The non-linearity to provide to a `ConvElemwise` layer.

    mlp_nonlinearity: subclass of `mlp.Linear`
        The fully-connected MLP layer (including non-linearity).

    check_implemented: bool
        If `True`, check that both costs give consistent results.
        If `False`, check that both costs raise `NotImplementedError`.
    """

    # Create fake data
    np.random.seed(12345)

    r = 31
    s = 21
    shape = [r, s]
    nvis = r*s
    output_channels = 13
    batch_size = 103

    x = np.random.rand(batch_size, r, s, 1)
    y = np.random.randint(2, size=[batch_size, output_channels, 1, 1])

    x = x.astype(config.floatX)
    y = y.astype(config.floatX)

    x_mlp = x.flatten().reshape(batch_size, nvis)
    y_mlp = y.flatten().reshape(batch_size, output_channels)

    # Initialize convnet with random weights.

    conv_model = MLP(
        input_space=Conv2DSpace(shape=shape,
                                axes=['b', 0, 1, 'c'],
                                num_channels=1),
        layers=[ConvElemwise(layer_name='conv',
                             nonlinearity=conv_nonlinearity,
                             output_channels=output_channels,
                             kernel_shape=shape,
                             pool_shape=[1, 1],
                             pool_stride=shape,
                             irange=1.0)],
        batch_size=batch_size
    )

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)
    g = theano.function([X], Y_hat)

    # Construct an equivalent MLP which gives the same output
    # after flattening both.
    mlp_model = MLP(
        layers=[mlp_nonlinearity(dim=output_channels,
                                 layer_name='mlp',
                                 irange=1.0)],
        batch_size=batch_size,
        nvis=nvis
    )

    W, b = conv_model.get_param_values()

    W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX)
    for k in range(output_channels):
        W_mlp[k] = W[k, 0].flatten()[::-1]
    W_mlp = W_mlp.T
    b_mlp = b.flatten()

    mlp_model.set_param_values([W_mlp, b_mlp])

    X1 = mlp_model.get_input_space().make_theano_batch()
    Y1 = mlp_model.get_target_space().make_theano_batch()
    Y1_hat = mlp_model.fprop(X1)
    f = theano.function([X1], Y1_hat)

    # Check that the two models give the same output
    assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5)

    if cost_implemented:
        # Check that the two models have the same costs
        mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
        conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))
        assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp))
    else:
        # Check that both costs are not implemented
        assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat)
        assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True):
    """Check that ConvNonLinearity and MLPNonlinearity are consistent.

    This is done by building an MLP with a ConvElemwise layer with the
    supplied non-linearity, an MLP with a dense layer, and checking that
    the outputs (and costs if applicable) are consistent.

    Parameters
    ----------
    conv_nonlinearity: instance of `ConvNonlinearity`
        The non-linearity to provide to a `ConvElemwise` layer.

    mlp_nonlinearity: subclass of `mlp.Linear`
        The fully-connected MLP layer (including non-linearity).

    check_implemented: bool
        If `True`, check that both costs give consistent results.
        If `False`, check that both costs raise `NotImplementedError`.
    """

    # Create fake data
    np.random.seed(12345)

    r = 31
    s = 21
    shape = [r, s]
    nvis = r * s
    output_channels = 13
    batch_size = 103

    x = np.random.rand(batch_size, r, s, 1)
    y = np.random.randint(2, size=[batch_size, output_channels, 1, 1])

    x = x.astype(config.floatX)
    y = y.astype(config.floatX)

    x_mlp = x.flatten().reshape(batch_size, nvis)
    y_mlp = y.flatten().reshape(batch_size, output_channels)

    # Initialize convnet with random weights.

    conv_model = MLP(input_space=Conv2DSpace(shape=shape,
                                             axes=['b', 0, 1, 'c'],
                                             num_channels=1),
                     layers=[
                         ConvElemwise(layer_name='conv',
                                      nonlinearity=conv_nonlinearity,
                                      output_channels=output_channels,
                                      kernel_shape=shape,
                                      pool_shape=[1, 1],
                                      pool_stride=shape,
                                      irange=1.0)
                     ],
                     batch_size=batch_size)

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)
    g = theano.function([X], Y_hat)

    # Construct an equivalent MLP which gives the same output
    # after flattening both.
    mlp_model = MLP(layers=[
        mlp_nonlinearity(dim=output_channels, layer_name='mlp', irange=1.0)
    ],
                    batch_size=batch_size,
                    nvis=nvis)

    W, b = conv_model.get_param_values()

    W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX)
    for k in range(output_channels):
        W_mlp[k] = W[k, 0].flatten()[::-1]
    W_mlp = W_mlp.T
    b_mlp = b.flatten()

    mlp_model.set_param_values([W_mlp, b_mlp])

    X1 = mlp_model.get_input_space().make_theano_batch()
    Y1 = mlp_model.get_target_space().make_theano_batch()
    Y1_hat = mlp_model.fprop(X1)
    f = theano.function([X1], Y1_hat)

    # Check that the two models give the same output
    assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5)

    if cost_implemented:
        # Check that the two models have the same costs
        mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
        conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))
        assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp))
    else:
        # Check that both costs are not implemented
        assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat)
        assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_implemented_case(ConvNonlinearity, MLPNonlinearity):

    # Create fake data
    np.random.seed(12345)

    r = 31
    s = 21
    shape = [r, s]
    nvis = r*s
    output_channels = 13
    batch_size = 103

    x = np.random.rand(batch_size, r, s, 1)
    y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1])

    x = x.astype('float32')
    y = y.astype('float32')

    x_mlp = x.flatten().reshape(batch_size, nvis)
    y_mlp = y.flatten().reshape(batch_size, output_channels)

    # Initialize convnet with random weights.  

    conv_model = MLP(
        input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1),
        layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \
                  output_channels = output_channels, kernel_shape = shape, \
                  pool_shape = [1,1], pool_stride = shape, irange= 1.0)],
        batch_size = batch_size
    )

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)
    g = theano.function([X], Y_hat)

    # Construct an equivalent MLP which gives the same output after flattening both.

    mlp_model = MLP(
        layers = [MLPNonlinearity(dim = output_channels, layer_name = 'mlp', irange = 1.0)],
        batch_size = batch_size,
        nvis = nvis
    )

    W, b = conv_model.get_param_values()

    W = W.astype('float32')
    b = b.astype('float32')

    W_mlp = np.zeros(shape = (output_channels, nvis))
    for k in range(output_channels):
        W_mlp[k] = W[k, 0].flatten()[::-1]
    W_mlp = W_mlp.T
    b_mlp = b.flatten()

    W_mlp = W_mlp.astype('float32')
    b_mlp = b_mlp.astype('float32')

    mlp_model.set_param_values([W_mlp, b_mlp])

    X1 = mlp_model.get_input_space().make_theano_batch()
    Y1 = mlp_model.get_target_space().make_theano_batch()
    Y1_hat = mlp_model.fprop(X1)
    f = theano.function([X1], Y1_hat)


    # Check that the two models give the same output
    assert np.linalg.norm(f(x_mlp).flatten() -  g(x).flatten()) < 10**-3

    # Check that the two models have the same costs:
    mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
    conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))

    assert np.linalg.norm(conv_cost(x,y) - mlp_cost(x_mlp, y_mlp)) < 10**-3