Beispiel #1
0
def test_softmax_binary_targets():
    """
    Constructs softmax layers with binary target and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)],
        nvis=100
    )
    mlp_vec = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1)],
        nvis=100
    )

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1))
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size),y_bin_data.flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
Beispiel #2
0
def test_composite_layer():
    """
    Test the routing functionality of the CompositeLayer
    """
    # Without routing
    composite_layer = CompositeLayer('composite_layer',
                                     [Linear(2, 'h0', irange=0),
                                      Linear(2, 'h1', irange=0),
                                      Linear(2, 'h2', irange=0)])
    mlp = MLP(nvis=2, layers=[composite_layer])
    for i in range(3):
        composite_layer.layers[i].set_weights(
            np.eye(2, dtype=theano.config.floatX)
        )
        composite_layer.layers[i].set_biases(
            np.zeros(2, dtype=theano.config.floatX)
        )
    X = theano.tensor.matrix()
    y = mlp.fprop(X)
    funs = [theano.function([X], y_elem) for y_elem in y]
    x_numeric = np.random.rand(2, 2).astype('float32')
    y_numeric = [f(x_numeric) for f in funs]
    assert np.all(x_numeric == y_numeric)

    # With routing
    for inputs_to_layers in [{0: [1], 1: [2], 2: [0]},
                             {0: [1], 1: [0, 2], 2: []},
                             {0: [], 1: []}]:
        composite_layer = CompositeLayer('composite_layer',
                                         [Linear(2, 'h0', irange=0),
                                          Linear(2, 'h1', irange=0),
                                          Linear(2, 'h2', irange=0)],
                                         inputs_to_layers)
        input_space = CompositeSpace([VectorSpace(dim=2),
                                      VectorSpace(dim=2),
                                      VectorSpace(dim=2)])
        mlp = MLP(input_space=input_space, layers=[composite_layer])
        for i in range(3):
            composite_layer.layers[i].set_weights(
                np.eye(2, dtype=theano.config.floatX)
            )
            composite_layer.layers[i].set_biases(
                np.zeros(2, dtype=theano.config.floatX)
            )
        X = [theano.tensor.matrix() for _ in range(3)]
        y = mlp.fprop(X)
        funs = [theano.function(X, y_elem, on_unused_input='ignore')
                for y_elem in y]
        x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX)
                     for _ in range(3)]
        y_numeric = [f(*x_numeric) for f in funs]
        assert all([all([np.all(x_numeric[i] == y_numeric[j])
                         for j in inputs_to_layers[i]])
                    for i in inputs_to_layers])
Beispiel #3
0
def test_kl():
    """
    Test whether function kl() has properly processed the input.
    """
    init_mode = theano.config.compute_test_value
    theano.config.compute_test_value = 'raise'
    
    try:
        mlp = MLP(layers=[Sigmoid(dim=10, layer_name='Y', irange=0.1)],
                  nvis=10)
        X = mlp.get_input_space().make_theano_batch()
        Y = mlp.get_output_space().make_theano_batch()
        X.tag.test_value = np.random.random(
            get_debug_values(X)[0].shape).astype(theano.config.floatX)
        Y_hat = mlp.fprop(X)

        # This call should not raise any error:
        ave = kl(Y, Y_hat, 1)

        # The following calls should raise ValueError exceptions:
        Y.tag.test_value[2][3] = 1.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
        Y.tag.test_value[2][3] = -0.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
    
    finally:
        theano.config.compute_test_value = init_mode
def build_mlp_fn(x0, y0, x1, y1, s0, s1, c, axes):
    """
    Creates an theano function to test the WindowLayer

    Parameters
    ----------
    x0: x coordinate of the left of the window
    y0: y coordinate of the top of the window
    x1: x coordinate of the right of the window
    y1: y coordinate of the bottom of the window
    s0: x shape of the images of the input space
    s1: y shape of the images of the input space
    c: number of channels of the input space
    axes: description of the axes of the input space

    Returns
    -------
    f: a theano function applicating the window layer
    of window (x0, y0, x1, y1).
    """
    mlp = MLP(layers=[WindowLayer('h0', window=(x0, y0, x1, y1))],
              input_space=Conv2DSpace(shape=(s0, s1),
                                      num_channels=c, axes=axes))
    X = mlp.get_input_space().make_batch_theano()
    f = theano.function([X], mlp.fprop(X))
    return f
Beispiel #5
0
def test_sigmoid_layer_misclass_reporting():
    mlp = MLP(nvis=3, layers=[Sigmoid(layer_name='h0', dim=1, irange=0.005,
                                      monitor_style='classification')])
    target = theano.tensor.matrix(dtype=theano.config.floatX)
    batch = theano.tensor.matrix(dtype=theano.config.floatX)
    rval = mlp.layers[0].get_monitoring_channels_from_state(mlp.fprop(batch), target)

    f = theano.function([batch, target], [tensor.gt(mlp.fprop(batch), 0.5),
                                          rval['misclass']],
                        allow_input_downcast=True)
    rng = np.random.RandomState(0)

    for _ in range(10):  # repeat a few times for statistical strength
        targets = (rng.uniform(size=(30, 1)) > 0.5).astype('uint8')
        out, misclass = f(rng.normal(size=(30, 3)), targets)
        np.testing.assert_allclose((targets != out).mean(), misclass)
Beispiel #6
0
def test_sigmoid_detection_cost():
    # This is only a smoke test: verifies that it compiles and runs,
    # not any particular value.
    rng = np.random.RandomState(0)
    y = (rng.uniform(size=(4, 3)) > 0.5).astype('uint8')
    X = theano.shared(rng.uniform(size=(4, 2)))
    model = MLP(nvis=2, layers=[Sigmoid(monitor_style='detection', dim=3,
                layer_name='y', irange=0.8)])
    y_hat = model.fprop(X)
    model.cost(y, y_hat).eval()
Beispiel #7
0
def test_identity_layer():
    nvis = 10

    mlp = MLP(nvis=nvis, layers=[util.IdentityLayer(layer_name='ident')])

    X = T.matrix()
    f = theano.function([X], mlp.fprop(X))

    for _ in range(5):
        X = np.random.rand(10, nvis).astype(theano.config.floatX)
        yield _test_identity_layer, f, X
Beispiel #8
0
def test_nested_mlp():
    """
    Constructs a nested MLP and tries to fprop through it
    """
    inner_mlp = MLP(layers=[Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)],
                    layer_name='inner_mlp')
    outer_mlp = MLP(layers=[CompositeLayer(layer_name='composite',
                                           layers=[inner_mlp,
                                                   Linear(10, 'h2', 0.1)])],
                    nvis=10)
    X = outer_mlp.get_input_space().make_theano_batch()
    f = theano.function([X], outer_mlp.fprop(X))
    f(np.random.rand(5, 10).astype(theano.config.floatX))
Beispiel #9
0
def test_softmax_two_binary_targets():
    """
    Constructs softmax layers with two binary targets and with vector targets
    to check that they give the same cost.
    """
    num_classes = 10
    batch_size = 20
    mlp_bin = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)],
        nvis=100
    )
    mlp_vec = MLP(
        layers=[Softmax(num_classes, 's1', irange=0.1)],
        nvis=100
    )

    X = mlp_bin.get_input_space().make_theano_batch()
    y_bin = mlp_bin.get_target_space().make_theano_batch()
    y_vec = mlp_vec.get_target_space().make_theano_batch()

    y_hat_bin = mlp_bin.fprop(X)
    y_hat_vec = mlp_vec.fprop(X)
    cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin),
                               allow_input_downcast=True)
    cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec),
                               allow_input_downcast=True)

    X_data = np.random.random(size=(batch_size, 100))
    # binary and vector costs can only match
    # if binary targets are mutually exclusive
    y_bin_data = np.concatenate([np.random.permutation(10)[:2].reshape((1, 2))
                                 for _ in range(batch_size)])
    y_vec_data = np.zeros((batch_size, num_classes))
    y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1
    y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1
    np.testing.assert_allclose(cost_bin(X_data, y_bin_data),
                               cost_vec(X_data, y_vec_data))
def check_unimplemented_case(ConvNonlinearity):

    conv_model = MLP(
        input_space = Conv2DSpace(shape = [1,1], axes = ['b', 0, 1, 'c'], num_channels = 1),
        layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \
                  output_channels = 1, kernel_shape = [1,1], \
                  pool_shape = [1,1], pool_stride = [1,1], irange= 1.0)],
        batch_size = 1
    )        

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)

    assert np.testing.assert_raises(NotImplementedError, conv_model.cost(Y, Y_hat))
Beispiel #11
0
def test_min_zero():
    """
    This test guards against a bug where the size of the zero buffer used with
    the min_zero flag was specified to have the wrong size. The bug only
    manifested when compiled with optimizations off, because the optimizations
    discard information about the size of the zero buffer.
    """
    mlp = MLP(input_space=VectorSpace(1),
            layers= [Maxout(layer_name="test_layer", num_units=1, num_pieces = 2,
            irange=.05, min_zero=True)])
    X = T.matrix()
    output = mlp.fprop(X)
    # Compile in debug mode so we don't optimize out the size of the buffer
    # of zeros
    f = function([X], output, mode="DEBUG_MODE")
    f(np.zeros((1, 1)).astype(X.dtype))
Beispiel #12
0
def test_conditional_encode_conditional_parameters():
    """
    Conditional.encode_conditional_parameters calls its MLP's fprop method
    """
    mlp = MLP(layers=[Linear(layer_name="h", dim=5, irange=0.01, max_col_norm=0.01)])
    conditional = DummyConditional(mlp=mlp, name="conditional")
    vae = DummyVAE()
    conditional.set_vae(vae)
    input_space = VectorSpace(dim=5)
    conditional.initialize_parameters(input_space=input_space, ndim=5)

    X = T.matrix("X")
    mlp_Y1, mlp_Y2 = mlp.fprop(X)
    cond_Y1, cond_Y2 = conditional.encode_conditional_params(X)
    f = theano.function([X], [mlp_Y1, mlp_Y2, cond_Y1, cond_Y2])
    rval = f(as_floatX(numpy.random.uniform(size=(10, 5))))
    numpy.testing.assert_allclose(rval[0], rval[2])
    numpy.testing.assert_allclose(rval[1], rval[3])
Beispiel #13
0
    def test_cost(self):
        """
        Use an RNN to calculate Mersenne number sequences of different
        lengths and check whether the costs make sense.
        """
        rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=1, layer_name='recurrent',
                                    irange=0, nonlinearity=lambda x: x),
                          Linear(dim=1, layer_name='linear', irange=0)])
        W, U, b = rnn.layers[0].get_params()
        W.set_value([[1]])
        U.set_value([[2]])

        W, b = rnn.layers[1].get_params()
        W.set_value([[1]])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()
        y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask))

        seq_len = 20
        X_data_vals = np.ones((seq_len, seq_len, 1))
        X_mask_vals = np.triu(np.ones((seq_len, seq_len)))
        y_data_vals = np.tile((2 ** np.arange(1, seq_len + 1) - 1),
                              (seq_len, 1)).T[:, :, np.newaxis]
        y_mask_vals = np.triu(np.ones((seq_len, seq_len)))

        f = function([X_data, X_mask, y_data, y_mask],
                     rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)),
                     allow_input_downcast=True)
        # The cost for two exact sequences should be zero
        assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0
        # If the input is different, the cost should be non-zero
        assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0
        # And same for the target data; using squared L2 norm, so should be 1
        assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1
        # But if the masked data changes, the cost should remain the same
        X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None])
        assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0
        y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None])
        assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0
def test_sigmoid_detection_cost():
    """
    Tests whether the sigmoid convolutional layer returns the right value.
    """

    rng = np.random.RandomState(0)
    sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection")
    (rows, cols) = (10, 10)
    axes = ('c', 0, 1, 'b')
    nchs = 1

    space_shp = (nchs, rows, cols, 1)
    X_vals = np.random.uniform(-0.01, 0.01,
                               size=space_shp).astype(config.floatX)
    X = theano.shared(X_vals, name="X")

    Y_vals = (np.random.uniform(-0.01, 0.01,
                                size=(rows, cols)) > 0.005).astype("uint8")
    Y = theano.shared(Y_vals, name="y_vals")

    conv_elemwise = ConvElemwise(layer_name="h0",
                                 output_channels=1,
                                 irange=.005,
                                 kernel_shape=(1, 1),
                                 max_kernel_norm=0.9,
                                 nonlinearity=sigmoid_nonlin)

    input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols),
                                             num_channels=nchs,
                                             axes=axes)
    model = MLP(batch_size=1,
                layers=[conv_elemwise],
                input_space=input_space)
    Y_hat = model.fprop(X)
    cost = model.cost(Y, Y_hat).eval()

    assert not(np.isnan(cost) or np.isinf(cost) or (cost < 0.0)
               or (cost is None)), ("cost returns illegal "
                                    "value.")
def test_conv_pooling_nonlin():
    """
    Tests whether the nonlinearity is applied before the pooling.
    """

    rng = np.random.RandomState(0)
    sigm_nonlin = SigmoidConvNonlinearity(monitor_style="detection")
    (rows, cols) = (5, 5)
    axes = ('c', 0, 1, 'b')
    nchs = 1

    space_shp = (nchs, rows, cols, 1)
    X_vals = np.random.uniform(-0.01, 0.01,
                               size=space_shp).astype(config.floatX)
    X = theano.shared(X_vals, name="X")

    conv_elemwise = ConvElemwise(layer_name="h0",
                                 output_channels=1,
                                 pool_type="max",
                                 irange=.005,
                                 kernel_shape=(1, 1),
                                 pool_shape=(1, 1),
                                 pool_stride=(1, 1),
                                 nonlinearity=sigm_nonlin)

    input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols),
                                             num_channels=nchs,
                                             axes=axes)
    model = MLP(batch_size=1,
                layers=[conv_elemwise],
                input_space=input_space)

    Y_hat = model.fprop(X)
    assert "max" in str(Y_hat.name)
    ancestors = theano.gof.graph.ancestors([Y_hat])
    lcond = ["sigm" in str(anc.owner) for anc in ancestors]
    assert np.array(lcond).nonzero()[0].shape[0] > 0, ("Nonlinearity should be "
                                                       "applied before pooling.")
Beispiel #16
0
    def test_fprop(self):
        """
        Use an RNN without non-linearity to create the Mersenne numbers
        (2 ** n - 1) to check whether fprop works correctly.
        """
        rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=1, layer_name='recurrent',
                                    irange=0.1, indices=[-1],
                                    nonlinearity=lambda x: x)])
        W, U, b = rnn.layers[0].get_params()
        W.set_value([[1]])
        U.set_value([[2]])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_hat = rnn.fprop((X_data, X_mask))

        seq_len = 20
        X_data_vals = np.ones((seq_len, seq_len, 1))
        X_mask_vals = np.triu(np.ones((seq_len, seq_len)))

        f = function([X_data, X_mask], y_hat, allow_input_downcast=True)
        np.testing.assert_allclose(2 ** np.arange(1, seq_len + 1) - 1,
                                   f(X_data_vals, X_mask_vals).flatten())
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True):
    """Check that ConvNonLinearity and MLPNonlinearity are consistent.

    This is done by building an MLP with a ConvElemwise layer with the
    supplied non-linearity, an MLP with a dense layer, and checking that
    the outputs (and costs if applicable) are consistent.

    Parameters
    ----------
    conv_nonlinearity: instance of `ConvNonlinearity`
        The non-linearity to provide to a `ConvElemwise` layer.

    mlp_nonlinearity: subclass of `mlp.Linear`
        The fully-connected MLP layer (including non-linearity).

    check_implemented: bool
        If `True`, check that both costs give consistent results.
        If `False`, check that both costs raise `NotImplementedError`.
    """

    # Create fake data
    np.random.seed(12345)

    r = 31
    s = 21
    shape = [r, s]
    nvis = r*s
    output_channels = 13
    batch_size = 103

    x = np.random.rand(batch_size, r, s, 1)
    y = np.random.randint(2, size=[batch_size, output_channels, 1, 1])

    x = x.astype(config.floatX)
    y = y.astype(config.floatX)

    x_mlp = x.flatten().reshape(batch_size, nvis)
    y_mlp = y.flatten().reshape(batch_size, output_channels)

    # Initialize convnet with random weights.

    conv_model = MLP(
        input_space=Conv2DSpace(shape=shape,
                                axes=['b', 0, 1, 'c'],
                                num_channels=1),
        layers=[ConvElemwise(layer_name='conv',
                             nonlinearity=conv_nonlinearity,
                             output_channels=output_channels,
                             kernel_shape=shape,
                             pool_shape=[1, 1],
                             pool_stride=shape,
                             irange=1.0)],
        batch_size=batch_size
    )

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)
    g = theano.function([X], Y_hat)

    # Construct an equivalent MLP which gives the same output
    # after flattening both.
    mlp_model = MLP(
        layers=[mlp_nonlinearity(dim=output_channels,
                                 layer_name='mlp',
                                 irange=1.0)],
        batch_size=batch_size,
        nvis=nvis
    )

    W, b = conv_model.get_param_values()

    W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX)
    for k in range(output_channels):
        W_mlp[k] = W[k, 0].flatten()[::-1]
    W_mlp = W_mlp.T
    b_mlp = b.flatten()

    mlp_model.set_param_values([W_mlp, b_mlp])

    X1 = mlp_model.get_input_space().make_theano_batch()
    Y1 = mlp_model.get_target_space().make_theano_batch()
    Y1_hat = mlp_model.fprop(X1)
    f = theano.function([X1], Y1_hat)

    # Check that the two models give the same output
    assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5)

    if cost_implemented:
        # Check that the two models have the same costs
        mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
        conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))
        assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp))
    else:
        # Check that both costs are not implemented
        assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat)
        assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_implemented_case(ConvNonlinearity, MLPNonlinearity):

    # Create fake data
    np.random.seed(12345)

    r = 31
    s = 21
    shape = [r, s]
    nvis = r*s
    output_channels = 13
    batch_size = 103

    x = np.random.rand(batch_size, r, s, 1)
    y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1])

    x = x.astype('float32')
    y = y.astype('float32')

    x_mlp = x.flatten().reshape(batch_size, nvis)
    y_mlp = y.flatten().reshape(batch_size, output_channels)

    # Initialize convnet with random weights.  

    conv_model = MLP(
        input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1),
        layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \
                  output_channels = output_channels, kernel_shape = shape, \
                  pool_shape = [1,1], pool_stride = shape, irange= 1.0)],
        batch_size = batch_size
    )

    X = conv_model.get_input_space().make_theano_batch()
    Y = conv_model.get_target_space().make_theano_batch()
    Y_hat = conv_model.fprop(X)
    g = theano.function([X], Y_hat)

    # Construct an equivalent MLP which gives the same output after flattening both.

    mlp_model = MLP(
        layers = [MLPNonlinearity(dim = output_channels, layer_name = 'mlp', irange = 1.0)],
        batch_size = batch_size,
        nvis = nvis
    )

    W, b = conv_model.get_param_values()

    W = W.astype('float32')
    b = b.astype('float32')

    W_mlp = np.zeros(shape = (output_channels, nvis))
    for k in range(output_channels):
        W_mlp[k] = W[k, 0].flatten()[::-1]
    W_mlp = W_mlp.T
    b_mlp = b.flatten()

    W_mlp = W_mlp.astype('float32')
    b_mlp = b_mlp.astype('float32')

    mlp_model.set_param_values([W_mlp, b_mlp])

    X1 = mlp_model.get_input_space().make_theano_batch()
    Y1 = mlp_model.get_target_space().make_theano_batch()
    Y1_hat = mlp_model.fprop(X1)
    f = theano.function([X1], Y1_hat)


    # Check that the two models give the same output
    assert np.linalg.norm(f(x_mlp).flatten() -  g(x).flatten()) < 10**-3

    # Check that the two models have the same costs:
    mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
    conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))

    assert np.linalg.norm(conv_cost(x,y) - mlp_cost(x_mlp, y_mlp)) < 10**-3
Beispiel #19
0
class DBL_model(object):
    def __init__(self,algo_id,model_id,num_epoch,num_dim,train_id,test_id): 
        self.algo_id = algo_id
        self.model_id = model_id
        self.num_epoch = num_epoch
        self.num_dim = num_dim
        self.train_id = train_id
        self.test_id = test_id

        self.path_train = None
        self.path_test = None
        self.p_data = None
        self.batch_size = None
        self.do_savew = True

        self.param = paramSet()
        self.p_monitor = {}
    def loadData(self,basepath,which_set,data_ind=None):
        self.DataLoader.loadData(self.p_data,basepath,which_set,data_ind)
            
    def loadWeight(self, fname):
         # create DBL_model          
        # load and rebuild model
        if fname[-3:] == 'pkl':
            layer_params = cPickle.load(open(fname))
        elif fname[-3:] == 'mat':
            mat = scipy.io.loadmat(fname)            
            layer_params = mat['param']            
        else:
            raise('cannot recognize: '+fname)

        layer_id = 0
        num_layers = len(self.model.layers)
        for layer in self.model.layers:
            # squeeze for matlab structure
            #aa=layer.get_params();print aa[0].shape,aa[1].shape
            dims =[np.squeeze(layer_params[layer_id][k]).ndim for k in [0,1]]
            if fname[-3:] == 'mat':
                for id in [0,1]:
                    if dims[id] ==0:
                        layer_params[layer_id][id] = layer_params[layer_id][id][0]
                    
            if dims[0]>=dims[1]:
                layer.set_weights(layer_params[layer_id][0])
                layer.set_biases(layer_params[layer_id][1])
                #tmp = np.squeeze(layer_params[layer_id][1])                
            else:
                layer.set_weights(layer_params[layer_id][1])
                layer.set_biases(layer_params[layer_id][0])
                #tmp = np.squeeze(layer_params[layer_id][0])
            #print "aa:",layer_params[layer_id][1].shape,layer_params[layer_id][0].shape
            #print "sss:",layer_params[layer_id][1][:10]
            #print "ttt:",layer_params[layer_id][0][0]
            layer_id = layer_id + 1                            

    def saveWeight(self,pklname):                
        # save the model
        layer_params = []
        for layer in self.model.layers:
            param = layer.get_params()      
            #print param
            #print param[0].get_value().shape
            #print param[1].get_value().shape
            layer_params.append([param[0].get_value(), param[1].get_value()])
            
        cPickle.dump(layer_params, open(pklname, 'wb'))
    
    def loadAlgo(self,p_algo):
        # setup algo
        #print self.DataLoader.data
        if p_algo.algo_type==0:
            self.algo =  SGD(learning_rate = p_algo.learning_rate,
            cost = p_algo.cost,
            batch_size = p_algo.batch_size,
            monitoring_batches = p_algo.monitoring_batches,
            monitoring_dataset = p_algo.monitoring_dataset,
            monitor_iteration_mode = p_algo.monitor_iteration_mode,
            termination_criterion = p_algo.termination_criterion,
            update_callbacks = p_algo.update_callbacks,
            learning_rule = p_algo.learning_rule,
            init_momentum = p_algo.init_momentum,
            set_batch_size = p_algo.set_batch_size,
            train_iteration_mode = p_algo.train_iteration_mode,
            batches_per_iter = p_algo.batches_per_iter,
            theano_function_mode = p_algo.theano_function_mode,
            monitoring_costs = p_algo.monitoring_costs,
            seed = p_algo.seed)
        elif p_algo.algo_type==1:
                self.algo = BGD(
                cost = p_algo.cost,
                batch_size=p_algo.batch_size, 
                batches_per_iter=p_algo.batches_per_iter,
                updates_per_batch = p_algo.updates_per_batch,
                monitoring_batches=p_algo.monitoring_batches,
                monitoring_dataset=p_algo.monitoring_dataset,
                termination_criterion =p_algo.termination_criterion, 
                set_batch_size = p_algo.set_batch_size,
                reset_alpha = p_algo.reset_alpha, 
                conjugate = p_algo.conjugate,
                min_init_alpha = p_algo.min_init_alpha,
                reset_conjugate = p_algo.reset_conjugate, 
                line_search_mode = p_algo.line_search_mode,
                verbose_optimization=p_algo.verbose_optimization, 
                scale_step=p_algo.scale_step, 
                theano_function_mode=p_algo.theano_function_mode,
                init_alpha = p_algo.init_alpha, 
                seed = p_algo.seed)
        self.algo.setup(self.model, self.DataLoader.data['train'])

    def setup(self):
        self.setupParam()
        self.check_setupParam()

        self.dl_id = str(self.algo_id)+'_'+str(self.model_id)+'_'+str(self.num_dim).strip('[]').replace(', ','_')+'_'+str(self.train_id)+'_'+str(self.num_epoch)
        self.param_pkl = 'dl_p'+self.dl_id+'.pkl'
        self.result_mat = 'result/'+self.dl_id+'/dl_r'+str(self.test_id)+'.mat'
        self.buildModel()
        self.buildLayer()                
        
        self.DataLoader = DBL_Data()
        self.do_test = True
        print self.param_pkl
        if not os.path.exists(self.param_pkl):
            self.do_test = False
            # training
            self.loadData_train()
            self.buildAlgo()


    def setupParam(self):
        raise NotImplementedError(str(type(self)) + " does not implement: setupParam().")
    def check_setupParam(self):
        varnames = ['path_train','path_test','p_data','batch_size']
        for varname in varnames:
            if eval('self.'+varname+'== None'):
                raise ValueError('Need to set "'+varname+'" in setupParam()')
    def buildModel(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildModel().")
    def buildAlgo(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().")
    def train(self):
        raise NotImplementedError(str(type(self)) + " does not implement: train().")
    def test(self):
        raise NotImplementedError(str(type(self)) + " does not implement: test().")
    def loadData_train(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().")
    def run(self):
        if self.do_test:
            self.test()
        else:
            # training
            self.train()
    
    def buildLayer(self):    
        # setup layer
        self.layers = []
        for param in self.p_layers:            
            if param[0].param_type==0:
                self.layers = self.layers + DBL_ConvLayers(param)
            elif param[0].param_type==1:
                self.layers = self.layers + DBL_FcLayers(param)
            elif param[0].param_type==2:
                self.layers = self.layers + DBL_CfLayers(param)        
        self.model = MLP(self.layers, input_space=self.ishape)

        # load available weight
        pre_dl_id = self.param_pkl[:self.param_pkl.rfind('_')+1]
        fns = glob.glob(pre_dl_id+'*.pkl')
        epoch_max = 0
        if len(fns)==0:
            # first time to do it, load matlab prior
            mat_init = 'init_p'+str(self.model_id)+'_'+str(self.train_id)+'.mat'
            if os.path.exists(mat_init):
                print "load initial mat weight: ", mat_init
                self.loadWeight(mat_init)
        else:
            for fn in fns:
                epoch_id = int(fn[fn.rfind('_')+1:fn.find('.pkl')])
                if (epoch_id>epoch_max and epoch_id<=self.num_epoch):
                    epoch_max = epoch_id
            if epoch_max>0:
                print "load weight at epoch: ", epoch_max
                self.loadWeight(pre_dl_id+str(epoch_max)+'.pkl')
                self.num_epoch -= epoch_max
        self.p_monitor['epoch'] = epoch_max

    def runTrain(self):        
        self.loadAlgo(self.p_algo)
        self.train_monitor = trainMonitor(self.model.monitor,self.p_monitor)
        #self.model.monitor.report_epoch()            
        self.train_monitor.run()
        while self.algo.continue_learning(self.model):
            self.algo.train(self.DataLoader.data['train'])            
            self.train_monitor.run()
            if self.do_savew and (self.train_monitor.monitor._epochs_seen+1)%10 == 0:
                self.saveWeight(self.param_pkl)
            #self.model.monitor()            
        if self.do_savew:
            self.saveWeight(self.param_pkl)


    def runTest(self,data_test=None,metric=-1):
        """
        metric: evaluation metric
        0: classfication error
        1: L1 regression error
        2: L2 regression error
        """
        if data_test == None:
            data_test = self.DataLoader.data['test']
        batch_size = self.batch_size
        # make batches
        m = data_test.X.shape[0]
        extra = (batch_size - m % batch_size) % batch_size
        #print extra,batch_size,m
        assert (m + extra) % batch_size == 0
        #print data_test.X[0]
        if extra > 0:
            data_test.X = np.concatenate((data_test.X, np.zeros((extra, data_test.X.shape[1]),
                    dtype=data_test.X.dtype)), axis=0)
            assert data_test.X.shape[0] % batch_size == 0
        X = self.model.get_input_space().make_batch_theano()
        Y = self.model.fprop(X)
        """
        print 'load param:'
        param = self.model.layers[0].get_params()
        aa = param[0].get_value()
        bb = param[1].get_value()
        print aa[:3,:3],bb[:10]   
        """
        from theano import function
        if metric==0:
            from theano import tensor as T
            y = T.argmax(Y, axis=1)        
            f = function([X], y)
        else:
            f = function([X], Y)
        
        yhat = []
        for i in xrange(data_test.X.shape[0] / batch_size):
            x_arg = data_test.X[i*batch_size:(i+1)*batch_size,:]
            if X.ndim > 2:
                x_arg = data_test.get_topological_view(x_arg)
            yhat.append(f(x_arg.astype(X.dtype)))
        #print "ww:",x_arg.shape
        #print f(x_arg.astype(X.dtype)).shape
        yhat = np.concatenate(yhat)
        yhat = yhat[:m]
        data_test.X = data_test.X[:m,:]
        y = data_test.y
        #print m,extra
        acc = -1
        if y != None:
            if metric == 0:
                if data_test.y.ndim>1:
                    y = np.argmax(data_test.y,axis=1)
                assert len(y)==len(yhat)
                acc = float(np.sum(y-yhat==0))/m
            elif metric == 1:
                acc = float(np.sum(abs(y-yhat)))/m
            elif metric == 2: 
                #print y.shape,yhat.shape
                #print float(np.sum((y-yhat)**2))
                print y[:30]
                print yhat[:30]
                print m
                acc = float(np.sum((y-np.reshape(yhat,y.shape))**2))/m
                #print "y: ",y
                #print "yhat: ",yhat
            print "acc: ",acc
            
        return [[yhat],[acc]]
Beispiel #20
0
import numpy as np
from theano import config
from theano import function
from theano import tensor

from pylearn2.models.mlp import MLP, Tanh
from pylearn2.sandbox.rnn.space import SequenceSpace
from pylearn2.sandbox.rnn.models.mlp import Recurrent
from pylearn2.space import VectorSpace

mlp = MLP(layers=[Tanh(dim=25, layer_name='pre_rnn', irange=0.01),
                  Recurrent(dim=50, layer_name='recurrent', irange=0.01),
                  Tanh(dim=100, layer_name='h', irange=0.01)],
          input_space=SequenceSpace(VectorSpace(dim=25)))

# Very simple test
input = tensor.tensor3()
output = mlp.fprop(input)
f = function([input], output)

assert f(np.random.rand(10, 5, 25).astype(config.floatX)).shape == (5, 100)
import theano

from adversarial.deconv import Deconv

input_space = Conv2DSpace(shape=(2, 1), num_channels=16, axes=('c', 0, 1, 'b'))

deconv = Deconv(layer_name='deconv',
                num_channels=1,
                kernel_shape=(4, 4),
                output_stride=(2, 2),
                irange=0.)

mlp = MLP(input_space=input_space, layers=[deconv])

X = input_space.get_theano_batch()
f = theano.function([X], mlp.fprop(X))

# Construct dummy filters.
# Just use two for simplicity.
filter1 = np.array([[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]])
filter2 = np.array([[-1, 0, -1, 0], [0, -1, 0, -1], [-1, 0, -1, 0],
                    [0, -1, 0, -1]])

filters_dest = deconv.transformer._filters
new_filters = np.zeros((16, 4, 4), dtype=filters_dest.dtype)
new_filters[0] = filter1
new_filters[1] = filter2
new_filters = new_filters.reshape(16, 4, 4, 1).swapaxes(0, 3)
deconv.transformer._filters.set_value(new_filters)

    print ""
    print "CONFIG: input =", ni, "x", iw, "x", ih, "* ker =", ni, "x", no, "x", kw, "x", kh, "( bs =", bs, ", stride =", dw, ")"

    conv = MLP(
        batch_size=bs,
        input_space=Conv2DSpace((ih, iw), num_channels=ni, axes=("b", "c", 0, 1)),
        layers=[ConvElemwise(no, (kw, kh), "ConvTest", ConvNonlinearity(), irange=0.1)],
    )

    inputBatch = np.random.randn(bs, ni, ih, iw)
    sharedX = theano.shared(inputBatch.astype("float32"))
    sharedY = theano.shared(np.random.randn(bs, no, (ih - kh) / dh + 1, (iw - kw) / dw + 1).astype("float32"))

    X = theano.tensor.tensor4()

    Y = conv.fprop(X)

    fprop = theano.function([], [], givens=[(X, sharedX)], updates=[(sharedY, Y)], on_unused_input="ignore")

    theano.sandbox.cuda.synchronize()
    start = time.time()
    for i in range(steps):
        fprop()
    theano.sandbox.cuda.synchronize()
    tm = (time.time() - start) / steps

    del fprop
    del sharedX
    del conv
    del sharedY
Beispiel #23
0
class MLPTraining:
	def __init__(self, data_path="./datasets/", save_path="training.pkl", simulation_data = None, identifier = 0, preprocessor='uniform'):
		self.id = identifier
		self.data_path = data_path
		self.save_path = save_path
		if simulation_data != None:
			self.sim_data = simulation_data
		else:
			self.sim_data = SimulationData(data_path)
		if not self.sim_data.is_loaded:
			self.sim_data.load_data()

		self.sim_data.preprocessor(kind = preprocessor)

		tmp = self.sim_data.split_train_test()
		self.datasets = {'train' : tmp[0], 'test' : tmp[1]}

		self.num_simulations = self.sim_data.num_simulations
		self.input_values = self.sim_data.input_values
		self.output_values = self.sim_data.output_values

	def set_structure(self, num_layers = 4, shape = 'linear'):
		structure = []

		lower_number = self.input_values
		for i in range(num_layers):
			upper_number = lower_number
			lower_number = self.input_values-(i+1)*(self.input_values-self.output_values)/num_layers
			structure.append([upper_number, lower_number])
		
		self.structure = structure
		return structure
		
	def get_structure(self):
		return self.structure
		
	def get_Linear_Layer(self, structure, i = 0):
		n_input, n_output = structure
		config = {
			'dim': n_output,
			'layer_name': ("l%d" % i),
			'irange': .5,
			'use_abs_loss': False,
			'use_bias': False,
			}
		return Linear(**config)

	def get_Sigmoid_Layer(self, structure, i = 0):
		n_input, n_output = structure
		config = {
			'dim': n_output,
			'layer_name': ("s%d" % i),
			'irange' : 0.05,
			}
		return Sigmoid(**config)

	def get_Tanh_Layer(self, structure, i = 0):
		n_input, n_output = structure
		config = {
			'dim': n_output,
			'layer_name': ("t%d" % i),
			'irange' : 0.05,
			}
		return Tanh(**config)
		
	def get_layers(self, act_function='linear'):
		self.layers = []
		i = 0
		for pair in self.structure:
			i += 1
			if(act_function == 'linear'):
				self.layers.append(self.get_Linear_Layer(structure = pair, i = i))
			if(act_function == 'sigmoid'):
				self.layers.append(self.get_Sigmoid_Layer(structure = pair, i = i))
			if(act_function == 'tanh'):
				self.layers.append(self.get_Tanh_Layer(structure = pair, i = i))
		return self.layers
		   
	def get_model(self, batch_size):
		vis = self.structure[0][0]
		self.model = MLP(layers = self.layers, nvis = vis, batch_size = batch_size, layer_name = None)
		return self.model
	   
	def set_training_criteria(self, 
							learning_rate=0.05, 
							cost=Default(), 
							batch_size=10, 
							max_epochs=10):
		
		self.training_alg = SGD(learning_rate = learning_rate, 
								cost = cost, 
								batch_size = batch_size, 
								monitoring_dataset = self.datasets, 
								termination_criterion = EpochCounter(max_epochs))
	
	def set_extensions(self, extensions):
		self.extensions = extensions #[MonitorBasedSaveBest(channel_name='objective',
												#save_path = './training/training_monitor_best.pkl')]
		
	def set_attributes(self, attributes):
		self.attributes = attributes

	def define_training_experiment(self, save_freq = 10):
		self.experiment = Train(dataset=self.datasets['train'], 
								model=self.model, 
								algorithm=self.training_alg, 
								save_path=self.save_path , 
								save_freq=save_freq, 
								allow_overwrite=True, 
								extensions=self.extensions)

	def train_experiment(self):
		self.experiment.main_loop()
		self.save_model()

	def save_model(self):
		self.model = serial.load(self.save_path)
		
	def predict(self, test=None, X=None, y=None):
		if test != None:
			x_test = test.X
			y_test = test.y
		else:
			x_test = X
			y_test = y

		X=self.model.get_input_space().make_theano_batch()
		Y=self.model.fprop(X)
		f=theano.function([X], Y)

		y_pred = f(x_test)

		if y_test != None:
			MSE = np.mean(np.square(y_test - y_pred))
			print "MSE:", MSE
			var = np.mean(np.square(y_test))
			print "Var:", var
			self.plot_prediction(y_test, y_pred)
		else:
			return y_pred

	def plot_prediction(self, y_test, y_pred):
		m = int(np.sqrt(self.output_values)) + 1
		f, axarr = plt.subplots(m,m)

		r = []
		s = []
		f = 0;
		c = 0;
		for i in range(self.output_values):
			x = np.array([])
			y = np.array([])
			for j in range(len(y_test)):
				x = np.append(x, y_test[j][i])
				y = np.append(y, y_pred[j][i])

			slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
			r.append(r_value**2)
			axarr[f,c].plot(x, y, 'ro')
			c += 1
			if (c==m):
				c = 0
				f += 1

		plt.show()
Beispiel #24
0
                        num_pieces = 1,
                        kernel_shape = (4, 4),
                        pool_shape = (1, 1),
                        pool_stride=(1, 1),
                        irange = 0.05)
deconv = Deconv(layer_name = 'deconv',
                num_channels = 1,
                kernel_shape = (4, 4),
                irange = 0.05)

mlp = MLP(input_space =input_space,
        layers = [conv, deconv])

mlp.layers[1].transformer._filters.set_value(mlp.layers[0].transformer._filters.get_value())

x = input_space.get_theano_batch()
out = mlp.fprop(x)
f = theano.function([x], out)

data = MNIST('test')
data_specs = (input_space, 'features')
iter = data.iterator(mode = 'sequential', batch_size = 2, data_specs = data_specs)
pv = patch_viewer.PatchViewer((10, 10), (28, 28))
for item in iter:
    res = f(item)
    pv.add_patch(item[0,:,:,0])
    pv.add_patch(res[0,:,:,0])
    pv.show()
    break

from adversarial.deconv import Deconv


input_space = Conv2DSpace(shape=(2, 1), num_channels=16, axes=('c', 0, 1, 'b'))

deconv = Deconv(layer_name='deconv',
                num_channels=1,
                kernel_shape=(4, 4),
                output_stride=(2, 2),
                irange=0.)

mlp = MLP(input_space=input_space, layers=[deconv])

X = input_space.get_theano_batch()
f = theano.function([X], mlp.fprop(X))

# Construct dummy filters.
# Just use two for simplicity.
filter1 = np.array([[0, 1, 0, 1],
                    [1, 0, 1, 0],
                    [0, 1, 0, 1],
                    [1, 0, 1, 0]])
filter2 = np.array([[-1, 0, -1, 0],
                    [0, -1, 0, -1],
                    [-1, 0, -1, 0],
                    [0, -1, 0, -1]])

filters_dest = deconv.transformer._filters
new_filters = np.zeros((16, 4, 4), dtype=filters_dest.dtype)
new_filters[0] = filter1
Beispiel #26
0
def test_flattener_layer():
    # To test the FlattenerLayer we create a very simple feed-forward neural
    # network with two parallel linear layers. We then create two separate
    # feed-forward neural networks with single linear layers. In principle,
    # these two models should be identical if we start from the same
    # parameters. This makes it easy to test that the composite layer works
    # as expected.

    # Create network with composite layers.
    mlp_composite = MLP(
        layers=[
            FlattenerLayer(
                CompositeLayer(
                    'composite',
                    [Linear(2, 'h0', 0.1),
                     Linear(2, 'h1', 0.1)],
                    {
                        0: [0],
                        1: [1]
                    }
                )
            )
        ],
        input_space=CompositeSpace([VectorSpace(5), VectorSpace(10)]),
        input_source=('features0', 'features1')
    )

    # Create network with single linear layer, corresponding to first
    # layer in the composite network.
    mlp_first_part = MLP(
        layers=[
            Linear(2, 'h0', 0.1)
        ],
        input_space=VectorSpace(5),
        input_source=('features0')
    )

    # Create network with single linear layer, corresponding to second
    # layer in the composite network.
    mlp_second_part = MLP(
        layers=[
            Linear(2, 'h1', 0.1)
        ],
        input_space=VectorSpace(10),
        input_source=('features1')
    )

    # Create dataset which we will test our networks against.
    shared_dataset = np.random.rand(20, 19).astype(theano.config.floatX)

    # Make dataset for composite network.
    dataset_composite = VectorSpacesDataset(
        (shared_dataset[:, 0:5],
         shared_dataset[:, 5:15],
         shared_dataset[:, 15:19]),
        (CompositeSpace([
            VectorSpace(5),
            VectorSpace(10),
            VectorSpace(4)]),
         ('features0', 'features1', 'targets'))
    )

    # Make dataset for first single linear layer network.
    dataset_first_part = VectorSpacesDataset(
        (shared_dataset[:, 0:5],
         shared_dataset[:, 15:17]),
        (CompositeSpace([
            VectorSpace(5),
            VectorSpace(2)]),
         ('features0', 'targets'))
    )

    # Make dataset for second single linear layer network.
    dataset_second_part = VectorSpacesDataset(
        (shared_dataset[:, 5:15],
         shared_dataset[:, 17:19]),
        (CompositeSpace([
            VectorSpace(10),
            VectorSpace(2)]),
         ('features1', 'targets'))
    )

    # Initialize all MLPs to start from zero weights.
    mlp_composite.layers[0].raw_layer.layers[0].set_weights(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights() * 0.0)
    mlp_composite.layers[0].raw_layer.layers[1].set_weights(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights() * 0.0)
    mlp_first_part.layers[0].set_weights(
        mlp_first_part.layers[0].get_weights() * 0.0)
    mlp_second_part.layers[0].set_weights(
        mlp_second_part.layers[0].get_weights() * 0.0)

    # Train all models with their respective datasets.
    train_composite = Train(dataset_composite, mlp_composite,
                            SGD(0.0001, batch_size=20))
    train_composite.algorithm.termination_criterion = EpochCounter(1)
    train_composite.main_loop()

    train_first_part = Train(dataset_first_part, mlp_first_part,
                             SGD(0.0001, batch_size=20))
    train_first_part.algorithm.termination_criterion = EpochCounter(1)
    train_first_part.main_loop()

    train_second_part = Train(dataset_second_part, mlp_second_part,
                              SGD(0.0001, batch_size=20))
    train_second_part.algorithm.termination_criterion = EpochCounter(1)
    train_second_part.main_loop()

    # Check that the composite feed-forward neural network has learned
    # same parameters as each individual feed-forward neural network.
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights(),
        mlp_first_part.layers[0].get_weights())
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights(),
        mlp_second_part.layers[0].get_weights())

    # Check that we get same output given the same input on a randomly
    # generated dataset.
    X_composite = mlp_composite.get_input_space().make_theano_batch()
    X_first_part = mlp_first_part.get_input_space().make_theano_batch()
    X_second_part = mlp_second_part.get_input_space().make_theano_batch()

    fprop_composite = theano.function(X_composite,
                                      mlp_composite.fprop(X_composite))
    fprop_first_part = theano.function([X_first_part],
                                       mlp_first_part.fprop(X_first_part))
    fprop_second_part = theano.function([X_second_part],
                                        mlp_second_part.fprop(X_second_part))

    X_data = np.random.random(size=(10, 15)).astype(theano.config.floatX)
    y_data = np.random.randint(low=0, high=10, size=(10, 4))

    np.testing.assert_allclose(fprop_composite(X_data[:, 0:5],
                               X_data[:, 5:15])[:, 0:2],
                               fprop_first_part(X_data[:, 0:5]))
    np.testing.assert_allclose(fprop_composite(X_data[:, 0:5],
                               X_data[:, 5:15])[:, 2:4],
                               fprop_second_part(X_data[:, 5:15]))

    # Finally check that calling the internal FlattenerLayer behaves
    # as we would expect. First, retrieve the FlattenerLayer.
    fl = mlp_composite.layers[0]

    # Check that it agrees on the input space.
    assert mlp_composite.get_input_space() == fl.get_input_space()

    # Check that it agrees on the parameters.
    for i in range(0, 4):
        np.testing.assert_allclose(fl.get_params()[i].eval(),
                                   mlp_composite.get_params()[i].eval())
Beispiel #27
0
def test_composite_layer():
    """
    Test the routing functionality of the CompositeLayer
    """
    # Without routing
    composite_layer = CompositeLayer('composite_layer',
                                     [Linear(2, 'h0', irange=0),
                                      Linear(2, 'h1', irange=0),
                                      Linear(2, 'h2', irange=0)])
    mlp = MLP(nvis=2, layers=[composite_layer])
    for i in range(3):
        composite_layer.layers[i].set_weights(
            np.eye(2, dtype=theano.config.floatX)
        )
        composite_layer.layers[i].set_biases(
            np.zeros(2, dtype=theano.config.floatX)
        )
    X = tensor.matrix()
    y = mlp.fprop(X)
    funs = [theano.function([X], y_elem) for y_elem in y]
    x_numeric = np.random.rand(2, 2).astype('float32')
    y_numeric = [f(x_numeric) for f in funs]
    assert np.all(x_numeric == y_numeric)

    # With routing
    for inputs_to_layers in [{0: [1], 1: [2], 2: [0]},
                             {0: [1], 1: [0, 2], 2: []},
                             {0: [], 1: []}]:
        composite_layer = CompositeLayer('composite_layer',
                                         [Linear(2, 'h0', irange=0),
                                          Linear(2, 'h1', irange=0),
                                          Linear(2, 'h2', irange=0)],
                                         inputs_to_layers)
        input_space = CompositeSpace([VectorSpace(dim=2),
                                      VectorSpace(dim=2),
                                      VectorSpace(dim=2)])
        input_source = ('features0', 'features1', 'features2')
        mlp = MLP(input_space=input_space, input_source=input_source,
                  layers=[composite_layer])
        for i in range(3):
            composite_layer.layers[i].set_weights(
                np.eye(2, dtype=theano.config.floatX)
            )
            composite_layer.layers[i].set_biases(
                np.zeros(2, dtype=theano.config.floatX)
            )
        X = [tensor.matrix() for _ in range(3)]
        y = mlp.fprop(X)
        funs = [theano.function(X, y_elem, on_unused_input='ignore')
                for y_elem in y]
        x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX)
                     for _ in range(3)]
        y_numeric = [f(*x_numeric) for f in funs]
        assert all([all([np.all(x_numeric[i] == y_numeric[j])
                         for j in inputs_to_layers[i]])
                    for i in inputs_to_layers])

    # Get the weight decay expressions from a composite layer
    composite_layer = CompositeLayer('composite_layer',
                                     [Linear(2, 'h0', irange=0.1),
                                      Linear(2, 'h1', irange=0.1)])
    input_space = VectorSpace(dim=10)
    mlp = MLP(input_space=input_space, layers=[composite_layer])
    for attr, coeff in product(['get_weight_decay', 'get_l1_weight_decay'],
                               [[0.7, 0.3], 0.5]):
        f = theano.function([], getattr(composite_layer, attr)(coeff))
        if is_iterable(coeff):
            g = theano.function(
                [], tensor.sum([getattr(layer, attr)(c) for c, layer
                                in zip(coeff, composite_layer.layers)])
            )
            assert np.allclose(f(), g())
        else:
            g = theano.function(
                [], tensor.sum([getattr(layer, attr)(coeff) for layer
                                in composite_layer.layers])
            )
            assert np.allclose(f(), g())
Beispiel #28
0
                        num_pieces = 1,
                        kernel_shape = (4, 4),
                        pool_shape = (1, 1),
                        pool_stride=(1, 1),
                        irange = 0.05)
deconv = Deconv(layer_name = 'deconv',
                num_channels = 1,
                kernel_shape = (4, 4),
                irange = 0.05)

mlp = MLP(input_space =input_space,
        layers = [conv, deconv])

mlp.layers[1].transformer._filters.set_value(mlp.layers[0].transformer._filters.get_value())

x = input_space.get_theano_batch()
out = mlp.fprop(x)
f = theano.function([x], out)

data = MNIST('test')
data_specs = (input_space, 'features')
iter = data.iterator(mode = 'sequential', batch_size = 2, data_specs = data_specs)
pv = patch_viewer.PatchViewer((10, 10), (28, 28))
for item in iter:
    res = f(item)
    pv.add_patch(item[0,:,:,0])
    pv.add_patch(res[0,:,:,0])
    pv.show()
    break

Beispiel #29
0
def test_flattener_layer():
    # To test the FlattenerLayer we create a very simple feed-forward neural
    # network with two parallel linear layers. We then create two separate
    # feed-forward neural networks with single linear layers. In principle,
    # these two models should be identical if we start from the same
    # parameters. This makes it easy to test that the composite layer works
    # as expected.

    # Create network with composite layers.
    mlp_composite = MLP(
        layers=[
            FlattenerLayer(
                CompositeLayer(
                    'composite',
                    [Linear(2, 'h0', 0.1),
                     Linear(2, 'h1', 0.1)],
                    {
                        0: [0],
                        1: [1]
                    }
                )
            )
        ],
        input_space=CompositeSpace([VectorSpace(5), VectorSpace(10)]),
        input_source=('features0', 'features1')
    )

    # Create network with single linear layer, corresponding to first
    # layer in the composite network.
    mlp_first_part = MLP(
        layers=[
            Linear(2, 'h0', 0.1)
        ],
        input_space=VectorSpace(5),
        input_source=('features0')
    )

    # Create network with single linear layer, corresponding to second
    # layer in the composite network.
    mlp_second_part = MLP(
        layers=[
            Linear(2, 'h1', 0.1)
        ],
        input_space=VectorSpace(10),
        input_source=('features1')
    )

    # Create dataset which we will test our networks against.
    shared_dataset = np.random.rand(20, 19).astype(theano.config.floatX)

    # Make dataset for composite network.
    dataset_composite = VectorSpacesDataset(
        (shared_dataset[:, 0:5],
         shared_dataset[:, 5:15],
         shared_dataset[:, 15:19]),
        (CompositeSpace([
            VectorSpace(5),
            VectorSpace(10),
            VectorSpace(4)]),
         ('features0', 'features1', 'targets'))
    )

    # Make dataset for first single linear layer network.
    dataset_first_part = VectorSpacesDataset(
        (shared_dataset[:, 0:5],
         shared_dataset[:, 15:17]),
        (CompositeSpace([
            VectorSpace(5),
            VectorSpace(2)]),
         ('features0', 'targets'))
    )

    # Make dataset for second single linear layer network.
    dataset_second_part = VectorSpacesDataset(
        (shared_dataset[:, 5:15],
         shared_dataset[:, 17:19]),
        (CompositeSpace([
            VectorSpace(10),
            VectorSpace(2)]),
         ('features1', 'targets'))
    )

    # Initialize all MLPs to start from zero weights.
    mlp_composite.layers[0].raw_layer.layers[0].set_weights(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights() * 0.0)
    mlp_composite.layers[0].raw_layer.layers[1].set_weights(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights() * 0.0)
    mlp_first_part.layers[0].set_weights(
        mlp_first_part.layers[0].get_weights() * 0.0)
    mlp_second_part.layers[0].set_weights(
        mlp_second_part.layers[0].get_weights() * 0.0)

    # Train all models with their respective datasets.
    train_composite = Train(dataset_composite, mlp_composite,
                            SGD(0.0001, batch_size=20))
    train_composite.algorithm.termination_criterion = EpochCounter(1)
    train_composite.main_loop()

    train_first_part = Train(dataset_first_part, mlp_first_part,
                             SGD(0.0001, batch_size=20))
    train_first_part.algorithm.termination_criterion = EpochCounter(1)
    train_first_part.main_loop()

    train_second_part = Train(dataset_second_part, mlp_second_part,
                              SGD(0.0001, batch_size=20))
    train_second_part.algorithm.termination_criterion = EpochCounter(1)
    train_second_part.main_loop()

    # Check that the composite feed-forward neural network has learned
    # same parameters as each individual feed-forward neural network.
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights(),
        mlp_first_part.layers[0].get_weights())
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights(),
        mlp_second_part.layers[0].get_weights())

    # Check that we get same output given the same input on a randomly
    # generated dataset.
    X_composite = mlp_composite.get_input_space().make_theano_batch()
    X_first_part = mlp_first_part.get_input_space().make_theano_batch()
    X_second_part = mlp_second_part.get_input_space().make_theano_batch()

    fprop_composite = theano.function(X_composite,
                                      mlp_composite.fprop(X_composite))
    fprop_first_part = theano.function([X_first_part],
                                       mlp_first_part.fprop(X_first_part))
    fprop_second_part = theano.function([X_second_part],
                                        mlp_second_part.fprop(X_second_part))

    X_data = np.random.random(size=(10, 15)).astype(theano.config.floatX)
    y_data = np.random.randint(low=0, high=10, size=(10, 4))

    np.testing.assert_allclose(fprop_composite(X_data[:, 0:5],
                               X_data[:, 5:15])[:, 0:2],
                               fprop_first_part(X_data[:, 0:5]))
    np.testing.assert_allclose(fprop_composite(X_data[:, 0:5],
                               X_data[:, 5:15])[:, 2:4],
                               fprop_second_part(X_data[:, 5:15]))

    # Finally check that calling the internal FlattenerLayer behaves
    # as we would expect. First, retrieve the FlattenerLayer.
    fl = mlp_composite.layers[0]

    # Check that it agrees on the input space.
    assert mlp_composite.get_input_space() == fl.get_input_space()

    # Check that it agrees on the parameters.
    for i in range(0, 4):
        np.testing.assert_allclose(fl.get_params()[i].eval(),
                                   mlp_composite.get_params()[i].eval())
Beispiel #30
0
    ann.monitor()
    if not t_algo.continue_learning(ann):
        break

# test: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/make_submission.py
ds2 = DataPylearn2([test_set_x,test_set_y],[48,48,1],-1)
m = ds2.X.shape[0]
batch_size = 100 
extra = (batch_size - m % batch_size) % batch_size
assert (m + extra) % batch_size == 0
if extra > 0:
    ds2.X = np.concatenate((ds2.X, np.zeros((extra, ds2.X.shape[1]),
            dtype=ds2.X.dtype)), axis=0)
    assert ds2.X.shape[0] % batch_size == 0
X = ann.get_input_space().make_batch_theano()
Y = ann.fprop(X)

from theano import tensor as T
y = T.argmax(Y, axis=1)
from theano import function
f = function([X], y)
y = []
for i in xrange(ds2.X.shape[0] / batch_size):
        x_arg = ds2.X[i*batch_size:(i+1)*batch_size,:]
        if X.ndim > 2:
            x_arg = ds2.get_topological_view(x_arg)
        y.append(f(x_arg.astype(X.dtype)))
y = np.concatenate(y)
print y[:m]
print test_set_y
import theano
import numpy as np

n = 200
p = 2
X = np.random.normal(0, 1, (n, p))
y = X[:,0]* X[:, 1] + np.random.normal(0, .1, n)
y.shape = (n, 1)

ds = DenseDesignMatrix(X=X, y=y)

hidden_layer = Sigmoid(layer_name='hidden', dim=10, irange=.1, init_bias=1.)
output_layer = Linear(dim=1, layer_name='y', irange=.1)
trainer = SGD(learning_rate=.05, batch_size=10,
              termination_criterion=EpochCounter(200))
layers = [hidden_layer, output_layer]
ann = MLP(layers, nvis=2)
trainer.setup(ann, ds)

while True:
    trainer.train(dataset=ds)
    ann.monitor.report_epoch()
    ann.monitor()
    if not trainer.continue_learning(ann):
        break

inputs = X 
y_est = ann.fprop(theano.shared(inputs, name='inputs')).eval()

print(y_est.shape)
Beispiel #32
0
def test_composite_layer():
    """
    Test the routing functionality of the CompositeLayer
    """
    # Without routing
    composite_layer = CompositeLayer('composite_layer',
                                     [Linear(2, 'h0', irange=0),
                                      Linear(2, 'h1', irange=0),
                                      Linear(2, 'h2', irange=0)])
    mlp = MLP(nvis=2, layers=[composite_layer])
    for i in range(3):
        composite_layer.layers[i].set_weights(
            np.eye(2, dtype=theano.config.floatX)
        )
        composite_layer.layers[i].set_biases(
            np.zeros(2, dtype=theano.config.floatX)
        )
    X = tensor.matrix()
    y = mlp.fprop(X)
    funs = [theano.function([X], y_elem) for y_elem in y]
    x_numeric = np.random.rand(2, 2).astype('float32')
    y_numeric = [f(x_numeric) for f in funs]
    assert np.all(x_numeric == y_numeric)

    # With routing
    for inputs_to_layers in [{0: [1], 1: [2], 2: [0]},
                             {0: [1], 1: [0, 2], 2: []},
                             {0: [], 1: []}]:
        composite_layer = CompositeLayer('composite_layer',
                                         [Linear(2, 'h0', irange=0),
                                          Linear(2, 'h1', irange=0),
                                          Linear(2, 'h2', irange=0)],
                                         inputs_to_layers)
        input_space = CompositeSpace([VectorSpace(dim=2),
                                      VectorSpace(dim=2),
                                      VectorSpace(dim=2)])
        mlp = MLP(input_space=input_space, layers=[composite_layer])
        for i in range(3):
            composite_layer.layers[i].set_weights(
                np.eye(2, dtype=theano.config.floatX)
            )
            composite_layer.layers[i].set_biases(
                np.zeros(2, dtype=theano.config.floatX)
            )
        X = [tensor.matrix() for _ in range(3)]
        y = mlp.fprop(X)
        funs = [theano.function(X, y_elem, on_unused_input='ignore')
                for y_elem in y]
        x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX)
                     for _ in range(3)]
        y_numeric = [f(*x_numeric) for f in funs]
        assert all([all([np.all(x_numeric[i] == y_numeric[j])
                         for j in inputs_to_layers[i]])
                    for i in inputs_to_layers])

    # Get the weight decay expressions from a composite layer
    composite_layer = CompositeLayer('composite_layer',
                                     [Linear(2, 'h0', irange=0.1),
                                      Linear(2, 'h1', irange=0.1)])
    input_space = VectorSpace(dim=10)
    mlp = MLP(input_space=input_space, layers=[composite_layer])
    for attr, coeff in product(['get_weight_decay', 'get_l1_weight_decay'],
                               [[0.7, 0.3], 0.5]):
        f = theano.function([], getattr(composite_layer, attr)(coeff))
        if is_iterable(coeff):
            g = theano.function(
                [], tensor.sum([getattr(layer, attr)(c) for c, layer
                                in zip(coeff, composite_layer.layers)])
            )
            assert np.allclose(f(), g())
        else:
            g = theano.function(
                [], tensor.sum([getattr(layer, attr)(coeff) for layer
                                in composite_layer.layers])
            )
            assert np.allclose(f(), g())
               layers=[
                   ConvElemwise(no, (kw, kh),
                                'ConvTest',
                                ConvNonlinearity(),
                                irange=0.1)
               ])

    inputBatch = np.random.randn(bs, ni, ih, iw)
    sharedX = theano.shared(inputBatch.astype('float32'))
    sharedY = theano.shared(
        np.random.randn(bs, no, (ih - kh) / dh + 1,
                        (iw - kw) / dw + 1).astype('float32'))

    X = theano.tensor.tensor4()

    Y = conv.fprop(X)

    fprop = theano.function([], [],
                            givens=[(X, sharedX)],
                            updates=[(sharedY, Y)],
                            on_unused_input='ignore')

    theano.sandbox.cuda.synchronize()
    start = time.time()
    for i in range(steps):
        fprop()
    theano.sandbox.cuda.synchronize()
    tm = (time.time() - start) / steps

    del fprop
    del sharedX