Beispiel #1
0
    def test_density_scaling_with_genarator(self):
        # We have different size generators

        def true_dens():
            g = gen1()
            for i, point in enumerate(g):
                yield stats.norm.logpdf(point).sum() * 10

        t = true_dens()
        # We have same size models
        with pm.Model() as model1:
            Normal('n', observed=gen1(), total_size=100)
            p1 = theano.function([], model1.logpt)

        with pm.Model() as model2:
            gen_var = generator(gen2())
            Normal('n', observed=gen_var, total_size=100)
            p2 = theano.function([], model2.logpt)

        for i in range(10):
            _1, _2, _t = p1(), p2(), next(t)
            np.testing.assert_almost_equal(_1,
                                           _t,
                                           decimal=select_by_precision(
                                               float64=7,
                                               float32=2))  # Value O(-50,000)
            np.testing.assert_almost_equal(_1, _2)
Beispiel #2
0
    def test_density_scaling_with_genarator(self):
        # We have different size generators
        def gen1():
            i = 0
            while True:
                yield np.ones((10, 100)) * i
                i += 1

        def gen2():
            i = 0
            while True:
                yield np.ones((20, 100)) * i
                i += 1

        # We have same size models
        with pm.Model() as model1:
            Normal('n', observed=gen1(), total_size=100)
            p1 = theano.function([], model1.logpt)

        with pm.Model() as model2:
            gen_var = generator(gen2())
            Normal('n', observed=gen_var, total_size=100)
            p2 = theano.function([], model2.logpt)

        # We want densities to be equal
        for _ in range(10):
            np.testing.assert_almost_equal(p1(), p2())
Beispiel #3
0
    def test_density_scaling(self):
        with pm.Model() as model1:
            Normal('n', observed=[[1]], total_size=1)
            p1 = theano.function([], model1.logpt)

        with pm.Model() as model2:
            Normal('n', observed=[[1]], total_size=2)
            p2 = theano.function([], model2.logpt)
        self.assertEqual(p1() * 2, p2())
Beispiel #4
0
    def test_density_scaling(self):
        with pm.Model() as model1:
            Normal('n', observed=[[1]], total_size=1)
            p1 = theano.function([], model1.logpt)

        with pm.Model() as model2:
            Normal('n', observed=[[1]], total_size=2)
            p2 = theano.function([], model2.logpt)
        assert p1() * 2 == p2()
Beispiel #5
0
 def test_cloning_available(self):
     gop = generator(integers())
     res = gop**2
     shared = theano.shared(np.float32(10))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.float32(100)
Beispiel #6
0
 def test_cloning_available(self):
     gop = generator(integers())
     res = gop ** 2
     shared = theano.shared(np.float32(10))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.float32(100)
Beispiel #7
0
    def test_take_along_axis_grad(self, shape, axis, samples):
        if axis < 0:
            _axis = len(shape) + axis
        else:
            _axis = axis
        # Setup the theano function
        t_arr, t_indices = self.get_input_tensors(shape)
        t_out2 = theano.grad(
            tt.sum(self._output_tensor(t_arr**2, t_indices, axis)),
            t_arr,
        )
        func = theano.function([t_arr, t_indices], [t_out2])

        # Test that the gradient gives the same output as what is expected
        arr, indices = self.get_input_values(shape, axis, samples)
        expected_grad = np.zeros_like(arr)
        slicer = [slice(None)] * len(shape)
        for i in range(indices.shape[axis]):
            slicer[axis] = i
            inds = indices[slicer].reshape(shape[:_axis] + (1, ) +
                                           shape[_axis + 1:])
            inds = _make_along_axis_idx(shape, inds, _axis)
            expected_grad[inds] += 1
        expected_grad *= 2 * arr
        out = func(arr, indices)[0]
        assert np.allclose(out, expected_grad)
def test_elbo():
    mu0 = 1.5
    sigma = 1.0
    y_obs = np.array([1.6, 1.4])

    post_mu = np.array([1.88], dtype=theano.config.floatX)
    post_sd = np.array([1], dtype=theano.config.floatX)
    # Create a model for test
    with Model() as model:
        mu = Normal('mu', mu=mu0, sd=sigma)
        Normal('y', mu=mu, sd=1, observed=y_obs)

    # Create variational gradient tensor
    mean_field = MeanField(model=model)
    elbo = -KL(mean_field)()(mean_field.random())

    mean_field.shared_params['mu'].set_value(post_mu)
    mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

    f = theano.function([], elbo)
    elbo_mc = sum(f() for _ in range(10000)) / 10000

    # Exact value
    elbo_true = (-0.5 * (
        3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu +
        y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) +
                 0.5 * (np.log(2 * np.pi) + 1))
    np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_elbo():
    mu0 = 1.5
    sigma = 1.0
    y_obs = np.array([1.6, 1.4])

    post_mu = np.array([1.88], dtype=theano.config.floatX)
    post_sd = np.array([1], dtype=theano.config.floatX)
    # Create a model for test
    with pm.Model() as model:
        mu = pm.Normal('mu', mu=mu0, sd=sigma)
        pm.Normal('y', mu=mu, sd=1, observed=y_obs)

    # Create variational gradient tensor
    mean_field = MeanField(model=model)
    with pm.theanof.change_flags(compute_test_value='off'):
        elbo = -pm.operators.KL(mean_field)()(10000)

    mean_field.shared_params['mu'].set_value(post_mu)
    mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

    f = theano.function([], elbo)
    elbo_mc = f()

    # Exact value
    elbo_true = (-0.5 * (
        3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu +
        y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) +
                 0.5 * (np.log(2 * np.pi) + 1))
    np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
Beispiel #10
0
def test_elbo():
    mu0 = 1.5
    sigma = 1.0
    y_obs = np.array([1.6, 1.4])

    post_mu = np.array([1.88], dtype=theano.config.floatX)
    post_sd = np.array([1], dtype=theano.config.floatX)
    # Create a model for test
    with Model() as model:
        mu = Normal('mu', mu=mu0, sd=sigma)
        Normal('y', mu=mu, sd=1, observed=y_obs)

    # Create variational gradient tensor
    mean_field = MeanField(model=model)
    elbo = -KL(mean_field)()(10000)

    mean_field.shared_params['mu'].set_value(post_mu)
    mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

    f = theano.function([], elbo)
    elbo_mc = f()

    # Exact value
    elbo_true = (-0.5 * (3 + 3 * post_mu**2 - 2 *
                         (y_obs[0] + y_obs[1] + mu0) * post_mu + y_obs[0]**2 +
                         y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 *
                 (np.log(2 * np.pi) + 1))
    np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
Beispiel #11
0
    def test_gradient_with_scaling(self):
        with pm.Model() as model1:
            genvar = generator(gen1())
            m = Normal('m')
            Normal('n', observed=genvar, total_size=1000)
            grad1 = theano.function([m], tt.grad(model1.logpt, m))
        with pm.Model() as model2:
            m = Normal('m')
            shavar = theano.shared(np.ones((1000, 100)))
            Normal('n', observed=shavar)
            grad2 = theano.function([m], tt.grad(model2.logpt, m))

        for i in range(10):
            shavar.set_value(np.ones((100, 100)) * i)
            g1 = grad1(1)
            g2 = grad2(1)
            np.testing.assert_almost_equal(g1, g2)
Beispiel #12
0
    def compile(self, parameters, x, mask, y, cost):
        weight_decay = ((self.w**2).sum()) * self.decay
        cost += weight_decay
        gradients = [T.grad(cost, param) for param in self.params]

        updates = [(p, p - self.lr * g) for p, g in zip(parameters, gradients)]
        train = theano.function([x, mask, y, self.lr], cost, updates=updates)
        return train
Beispiel #13
0
    def test_gradient_with_scaling(self):
        with pm.Model() as model1:
            genvar = generator(gen1())
            m = Normal('m')
            Normal('n', observed=genvar, total_size=1000)
            grad1 = theano.function([m], tt.grad(model1.logpt, m))
        with pm.Model() as model2:
            m = Normal('m')
            shavar = theano.shared(np.ones((1000, 100)))
            Normal('n', observed=shavar)
            grad2 = theano.function([m], tt.grad(model2.logpt, m))

        for i in range(10):
            shavar.set_value(np.ones((100, 100)) * i)
            g1 = grad1(1)
            g2 = grad2(1)
            np.testing.assert_almost_equal(g1, g2)
Beispiel #14
0
    def __init__(self,
                 cost,
                 grad_vars,
                 extra_vars=None,
                 dtype=None,
                 casting='no',
                 **kwargs):
        if extra_vars is None:
            extra_vars = []

        names = [arg.name for arg in grad_vars + extra_vars]
        if any(name is None for name in names):
            raise ValueError('Arguments must be named.')
        if len(set(names)) != len(names):
            raise ValueError('Names of the arguments are not unique.')

        if cost.ndim > 0:
            raise ValueError('Cost must be a scalar.')

        self._grad_vars = grad_vars
        self._extra_vars = extra_vars
        self._extra_var_names = set(var.name for var in extra_vars)
        self._cost = cost
        self._ordering = ArrayOrdering(grad_vars)
        self.size = self._ordering.size
        self._extra_are_set = False
        if dtype is None:
            dtype = theano.config.floatX
        self.dtype = dtype
        for var in self._grad_vars:
            if not np.can_cast(var.dtype, self.dtype, casting):
                raise TypeError('Invalid dtype for variable %s. Can not '
                                'cast to %s with casting rule %s.' %
                                (var.name, self.dtype, casting))
            if not np.issubdtype(var.dtype, float):
                raise TypeError('Invalid dtype for variable %s. Must be '
                                'floating point but is %s.' %
                                (var.name, var.dtype))

        givens = []
        self._extra_vars_shared = {}
        for var in extra_vars:
            shared = theano.shared(var.tag.test_value, var.name + '_shared__')
            self._extra_vars_shared[var.name] = shared
            givens.append((var, shared))

        self._vars_joined, self._cost_joined = self._build_joined(
            self._cost, grad_vars, self._ordering.vmap)

        grad = tt.grad(self._cost_joined, self._vars_joined)
        grad.name = '__grad'

        inputs = [self._vars_joined]

        self._theano_function = theano.function(inputs,
                                                [self._cost_joined, grad],
                                                givens=givens,
                                                **kwargs)
Beispiel #15
0
 def test_ndim(self):
     for ndim in range(10):
         res = list(itertools.islice(integers_ndim(ndim), 0, 2))
         generator = GeneratorAdapter(integers_ndim(ndim))
         gop = GeneratorOp(generator)()
         f = theano.function([], gop)
         assert ndim == res[0].ndim
         np.testing.assert_equal(f(), res[0])
         np.testing.assert_equal(f(), res[1])
Beispiel #16
0
 def test_ndim(self):
     for ndim in range(10):
         res = list(itertools.islice(integers_ndim(ndim), 0, 2))
         generator = GeneratorAdapter(integers_ndim(ndim))
         gop = GeneratorOp(generator)()
         f = theano.function([], gop)
         assert ndim == res[0].ndim
         np.testing.assert_equal(f(), res[0])
         np.testing.assert_equal(f(), res[1])
Beispiel #17
0
 def test_basic(self):
     generator = GeneratorAdapter(integers())
     gop = GeneratorOp(generator)()
     assert gop.tag.test_value == np.float32(0)
     f = theano.function([], gop)
     assert f() == np.float32(0)
     assert f() == np.float32(1)
     for _ in range(2, 100):
         f()
     assert f() == np.float32(100)
Beispiel #18
0
 def test_basic(self):
     generator = GeneratorAdapter(integers())
     gop = GeneratorOp(generator)()
     assert gop.tag.test_value == np.float32(0)
     f = theano.function([], gop)
     assert f() == np.float32(0)
     assert f() == np.float32(1)
     for _ in range(2, 100):
         f()
     assert f() == np.float32(100)
Beispiel #19
0
    def test_default_value(self):
        def gen():
            for i in range(2):
                yield np.ones((10, 10)) * i

        gop = generator(gen(), np.ones((10, 10)) * 10)
        f = theano.function([], gop)
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
        np.testing.assert_equal(np.ones((10, 10)) * 10, f())
        with pytest.raises(ValueError):
            gop.set_default(1)
Beispiel #20
0
    def test_default_value(self):
        def gen():
            for i in range(2):
                yield np.ones((10, 10)) * i

        gop = generator(gen(), np.ones((10, 10)) * 10)
        f = theano.function([], gop)
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
        np.testing.assert_equal(np.ones((10, 10)) * 10, f())
        with pytest.raises(ValueError):
            gop.set_default(1)
Beispiel #21
0
    def compile(self, params, x1, x2, y, cost):
        gradients = [T.grad(cost, param) for param in params]

        zipped_grads = [
            theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % p)
            for p in params
        ]
        running_up2 = [
            theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % p)
            for p in params
        ]
        running_grads2 = [
            theano.shared(p.get_value() * numpy_floatX(0.),
                          name='%s_rgrad2' % p) for p in params
        ]

        zgup = [(zg, g) for zg, g in zip(zipped_grads, gradients)]
        rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g**2))
                 for rg2, g in zip(running_grads2, gradients)]

        train = theano.function([x1, x2, y],
                                cost,
                                updates=zgup + rg2up,
                                name='adadelta_train')

        updir = [
            -T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg
            for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)
        ]

        ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud**2))
                 for ru2, ud in zip(running_up2, updir)]
        param_up = [(p, p + ud) for p, ud in zip(params, updir)]

        update = theano.function([], [],
                                 updates=ru2up + param_up,
                                 on_unused_input='ignore',
                                 name='adadelta_update')
        return train, update
Beispiel #22
0
    def test_density_scaling_with_genarator(self):
        # We have different size generators

        def true_dens():
            g = gen1()
            for i, point in enumerate(g):
                yield stats.norm.logpdf(point).sum() * 10
        t = true_dens()
        # We have same size models
        with pm.Model() as model1:
            Normal('n', observed=gen1(), total_size=100)
            p1 = theano.function([], model1.logpt)

        with pm.Model() as model2:
            gen_var = generator(gen2())
            Normal('n', observed=gen_var, total_size=100)
            p2 = theano.function([], model2.logpt)

        for i in range(10):
            _1, _2, _t = p1(), p2(), next(t)
            np.testing.assert_almost_equal(_1, _t)
            np.testing.assert_almost_equal(_1, _2)
Beispiel #23
0
    def __init__(self, cost, grad_vars, extra_vars=None, dtype=None,
                 casting='no', **kwargs):
        if extra_vars is None:
            extra_vars = []

        names = [arg.name for arg in grad_vars + extra_vars]
        if any(name is None for name in names):
            raise ValueError('Arguments must be named.')
        if len(set(names)) != len(names):
            raise ValueError('Names of the arguments are not unique.')

        if cost.ndim > 0:
            raise ValueError('Cost must be a scalar.')

        self._grad_vars = grad_vars
        self._extra_vars = extra_vars
        self._extra_var_names = set(var.name for var in extra_vars)
        self._cost = cost
        self._ordering = ArrayOrdering(grad_vars)
        self.size = self._ordering.size
        self._extra_are_set = False
        if dtype is None:
            dtype = theano.config.floatX
        self.dtype = dtype
        for var in self._grad_vars:
            if not np.can_cast(var.dtype, self.dtype, casting):
                raise TypeError('Invalid dtype for variable %s. Can not '
                                'cast to %s with casting rule %s.'
                                % (var.name, self.dtype, casting))
            if not np.issubdtype(var.dtype, float):
                raise TypeError('Invalid dtype for variable %s. Must be '
                                'floating point but is %s.'
                                % (var.name, var.dtype))

        givens = []
        self._extra_vars_shared = {}
        for var in extra_vars:
            shared = theano.shared(var.tag.test_value, var.name + '_shared__')
            self._extra_vars_shared[var.name] = shared
            givens.append((var, shared))

        self._vars_joined, self._cost_joined = self._build_joined(
            self._cost, grad_vars, self._ordering.vmap)

        grad = tt.grad(self._cost_joined, self._vars_joined)
        grad.name = '__grad'

        inputs = [self._vars_joined]

        self._theano_function = theano.function(
            inputs, [self._cost_joined, grad], givens=givens, **kwargs)
Beispiel #24
0
def encode_images(images):
  """Encodes images using model saved to disk earlier."""

  json_file = open('model.json', 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  model = model_from_json(loaded_model_json)
  model.load_weights('model.h5')
  model.compile(optimizer='adadelta', loss='binary_crossentropy')
  get_activations = theano.function([model.layers[0].input],
                                    model.layers[3].output,
                                    allow_input_downcast=True)
  data = get_activations(images)
  np.save('encoded_imgs.npy', data)
Beispiel #25
0
    def test_set_gen_and_exc(self):
        def gen():
            for i in range(2):
                yield np.ones((10, 10)) * i

        gop = generator(gen())
        f = theano.function([], gop)
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
        with pytest.raises(StopIteration):
            f()
        gop.set_gen(gen())
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
Beispiel #26
0
    def test_set_gen_and_exc(self):
        def gen():
            for i in range(2):
                yield np.ones((10, 10)) * i

        gop = generator(gen())
        f = theano.function([], gop)
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
        with pytest.raises(StopIteration):
            f()
        gop.set_gen(gen())
        np.testing.assert_equal(np.ones((10, 10)) * 0, f())
        np.testing.assert_equal(np.ones((10, 10)) * 1, f())
Beispiel #27
0
def encode_images(images):
    """Encodes images using model saved to disk earlier."""

    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights('model.h5')
    model.compile(optimizer='adadelta', loss='binary_crossentropy')
    get_activations = theano.function([model.layers[0].input],
                                      model.layers[3].output,
                                      allow_input_downcast=True)
    data = get_activations(images)
    np.save('encoded_imgs.npy', data)
Beispiel #28
0
def abstractfit(model, X_train, y):
    nfeat = X_train.shape[1]
    model.configs['nvis'] = nfeat
    dataset = model.dataset_adaptor.fit_transform(X_train, y)
    super(type(model), model).__init__(**model.configs)
    trainer = model.trainer.get_trainer(model, dataset)
    trainer.main_loop()
    # define estimator
    X = tensor.matrix()
    # ff = theano.function([X], model.encode(X), compile.mode.Mode(linker='py', optimizer='fast_compile'))
    ff = theano.function([X], model.fprop(X))

    # print ff(X_train)
    model.estimator = ff
    return model
Beispiel #29
0
def abstractfit(model, X_train, y):
    nfeat = X_train.shape[1]
    model.configs['nvis'] = nfeat
    dataset = model.dataset_adaptor.fit_transform(X_train, y)
    super(type(model), model).__init__(**model.configs)
    trainer = model.trainer.get_trainer(model, dataset)
    trainer.main_loop()
    # define estimator
    X = tensor.matrix()
    # ff = theano.function([X], model.encode(X), compile.mode.Mode(linker='py', optimizer='fast_compile'))
    ff = theano.function([X], model.fprop(X))

    # print ff(X_train)
    model.estimator = ff
    return model
Beispiel #30
0
    def makefn(self, outs, mode=None, *args, **kwargs):
        """Compiles a Theano function which returns `outs` and takes the variable
        ancestors of `outs` as inputs.

        Parameters
        ----------
        outs : Theano variable or iterable of Theano variables
        mode : Theano compilation mode

        Returns
        -------
        Compiled Theano function
        """
        return theano.function(self.vars, outs,
                               allow_input_downcast=True,
                               on_unused_input='ignore',
                               accept_inplace=True,
                               mode=mode, *args, **kwargs)
Beispiel #31
0
    def makefn(self, outs, mode=None, *args, **kwargs):
        """Compiles a Theano function which returns `outs` and takes the variable
        ancestors of `outs` as inputs.

        Parameters
        ----------
        outs : Theano variable or iterable of Theano variables
        mode : Theano compilation mode

        Returns
        -------
        Compiled Theano function
        """
        with self:
            return theano.function(self.vars, outs,
                                   allow_input_downcast=True,
                                   on_unused_input='ignore',
                                   accept_inplace=True,
                                   mode=mode, *args, **kwargs)
            PyErr_Format(PyExc_RuntimeError,
                         "gpuarray error: kEye: %%s. n%%lu, m=%%lu.",
                         GpuKernel_error(&%(kname)s, err),
                         (unsigned long)dims[0], (unsigned long)dims[1]);
            %(fail)s;
        }
        if(%(sync)d)
            GpuArray_sync(&%(z)s->ga);
        """ % locals()

        return s

    def c_code_cache_version(self):
        return (21, 4)


mult4plus5op = GpuAXPBOp(4, 5)

x = theano.tensor.matrix('x')
z = mult4plus5op(x)

theano.printing.debugprint(z)
print("Compiling")
f = theano.function([x], z)
theano.printing.debugprint(f)

print("Eval")
ind = numpy.random.rand(3, 2).astype(theano.config.floatX)
print("Equality", numpy.allclose(f(ind), 2 * ind))
print(mult4plus5op)
Beispiel #33
0
    def __init__(self, x, y, batch_size, videos, kernels, pools, n_input, n_output, hidden_input, params=None):
        learning_rate = 0.1
        rng = numpy.random.RandomState(1234)

        print '... building the model'
        sys.stdout.flush()

        if not params:
            # Construct the first convolutional pooling layer:
            # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
            # maxpooling reduces this further to (24/2,24/2) = (12,12)
            # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
            layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0],
                               batch_size, 'L0', rng)

            layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1],
                               batch_size, 'L1', rng)

            layer2_input = layer1.output.flatten(2)

            # construct a fully-connected sigmoidal layer
            layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input,
                                 n_out=batch_size, activation=T.tanh)

            # classify the values of the fully-connected sigmoidal layer
            layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2)
        else:

            layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0],
                               batch_size, 'L0', rng, True, params[6], params[7])

            layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1],
                               batch_size, 'L1', rng, True, params[4], params[5])

            layer2_input = layer1.output.flatten(2)

            # construct a fully-connected sigmoidal layer
            layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input,
                                 n_out=batch_size, activation=T.tanh, W=params[2], b=params[3])

            # classify the values of the fully-connected sigmoidal layer
            layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2, W=params[0], b=params[1])

        # the cost we minimize during training is the NLL of the model
        cost = layer3.negative_log_likelihood(y)

        # create a list of all model parameters to be fit by gradient descent
        self.params = layer3.params + layer2.params + layer1.params + layer0.params

        # create a list of gradients for all model parameters
        grads = T.grad(cost, self.params)

        # train_model is a function that updates the model parameters by
        # SGD Since this model has many parameters, it would be tedious to
        # manually create an update rule for each model parameter. We thus
        # create the updates list by automatically looping over all
        # (params[i],grads[i]) pairs.
        updates = []
        for param_i, grad_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * grad_i))

        self.train_model = theano.function([x, y], cost, updates=updates)
        self.validate_model = theano.function(inputs=[x, y], outputs=layer3.errors(y))
        self.predict = theano.function(inputs=[x], outputs=layer3.y_pred)

        print '... building done'
        sys.stdout.flush()
Beispiel #34
0
def fit(values, data_set, params):
    model = tied_dropout_iterative_model

    OutputLog().write('Model: {0}'.format(model.__name__))

    if len(params) == 1:
        update_param(params[0], values)
    else:
        for value, param in zip(values, params):
            update_param(param, value)

    model_x, model_y, hidden_x, hidden_y, loss, outputs, hooks = model.build_model(x_var,
                                                                                   data_set.trainset[0].shape[1],
                                                                                   y_var,
                                                                                   data_set.trainset[1].shape[1],
                                                                                   layer_sizes=Params.LAYER_SIZES,
                                                                                   parallel_width=Params.PARALLEL_WIDTH,
                                                                                   drop_prob=Params.DROPOUT,
                                                                                   weight_init=Params.WEIGHT_INIT)

    params_x = lasagne.layers.get_all_params(model_x, trainable=True)
    params_y = lasagne.layers.get_all_params(model_y, trainable=True)

    updates = OrderedDict(batchnormalizeupdates(hooks, 100))

    params_x.extend(params_y)

    params = lasagne.utils.unique(params_x)

    current_learning_rate = Params.BASE_LEARNING_RATE

    updates.update(
        lasagne.updates.momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM))

    train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates)

    test_y = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_x],
                             on_unused_input='ignore')
    test_x = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_y],
                             on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE

    output_string = '{0}/{1} loss: {2} '
    output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())])

    for epoch in range(Params.EPOCH_NUMBER):
        OutputLog().write('Epoch {0}'.format(epoch))
        for index, batch in enumerate(
                iterate_minibatches(data_set.trainset[0], data_set.trainset[1], Params.BATCH_SIZE, True)):
            input_x, input_y = batch
            train_loss = train_fn(input_x, input_y)
            OutputLog().write(output_string.format(index, batch_number, *train_loss))

    x_values = test_y(data_set.tuning[0], data_set.tuning[1])
    y_values = test_x(data_set.tuning[0], data_set.tuning[1])

    OutputLog().write('\nValidating model\n')

    for index, (x, y) in enumerate(zip(x_values, y_values)):
        search_recall, describe_recall = complete_rank(x, y, data_set.reduce_val)
        validation_loss = calculate_reconstruction_error(x, y)
        correlation = calculate_mardia(x, y, 0)

        OutputLog().write('Layer {0} - loss: {1}, correlation: {2}, recall: {3}'.format(index,
                                                                                        validation_loss,
                                                                                        correlation,
                                                                                        sum(search_recall) + sum(
                                                                                            describe_recall)))

    return sum(search_recall) + sum(describe_recall)
Beispiel #35
0
cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y]) + 0.0001 * (
    W1**2).sum() + 0.0001 * (W2**2).sum()

g_params = T.grad(cost=cost, wrt=params)

learning_rate = 0.01
updates = [(param, param - learning_rate * gparam)
           for param, gparam in zip(params, g_params)]

index = T.lscalar()

train_model = theano.function(
    inputs=[index],
    outputs=[cost, error],
    updates=updates,
    givens={
        x: train_set_x[index * batch_size:(index + 1) * batch_size],
        y: train_set_y[index * batch_size:(index + 1) * batch_size]
    })

validate_model = theano.function(inputs=[x, y], outputs=[cost, error])

print("Training")

n_epochs = 1000
n_train_batches = train_set[0].shape[0] // batch_size

n_iters = n_epochs * n_train_batches
train_loss = numpy.zeros(n_iters)
train_error = numpy.zeros(n_iters)
Beispiel #36
0
    else:
        updates = OrderedDict()

    params_x.extend(params_y)

    params = lasagne.utils.unique(params_x)

    current_learning_rate = Params.BASE_LEARNING_RATE

    updates.update(
        lasagne.updates.nesterov_momentum(loss,
                                          params,
                                          learning_rate=current_learning_rate,
                                          momentum=Params.MOMENTUM))

    train_fn = theano.function([x_var, y_var], [loss] + outputs.values(),
                               updates=updates)

    inference_model_y = theano.function([x_var], [
        lasagne.layers.get_output(
            layer, moving_avg_hooks=hooks, deterministic=True)
        for layer in hidden_x
    ],
                                        on_unused_input='ignore')
    inference_model_x = theano.function([y_var], [
        lasagne.layers.get_output(
            layer, moving_avg_hooks=hooks, deterministic=True)
        for layer in hidden_y
    ],
                                        on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE
def run_experiment(experiment_values, data_parameters, path):
    id = uuid.uuid4()
    OutputLog().set_output_path(path, suffix=str(id))

    top = 0

    param_backup = copy.deepcopy(Params.__dict__)
    update_param(experiment_values)

    y_var = tensor.fmatrix()
    x_var = tensor.fmatrix()

    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)
    data_set.load()

    model_results = {'train': [], 'validate': []}

    model = tied_dropout_iterative_model

    model_x, model_y, hidden_x, hidden_y, loss, outputs, hooks = model.build_model(x_var,
                                                                                   data_set.trainset[0].shape[1],
                                                                                   y_var,
                                                                                   data_set.trainset[1].shape[1],
                                                                                   layer_sizes=Params.LAYER_SIZES,
                                                                                   parallel_width=Params.PARALLEL_WIDTH,
                                                                                   drop_prob=Params.DROPOUT,
                                                                                   weight_init=Params.WEIGHT_INIT)

    params_x = lasagne.layers.get_all_params(model_x, trainable=True)
    params_y = lasagne.layers.get_all_params(model_y, trainable=True)

    if hooks:
        updates = OrderedDict(batchnormalizeupdates(hooks, 100))
    else:
        updates = OrderedDict()

    current_learning_rate = Params.BASE_LEARNING_RATE

    params_x.extend(params_y)
    params = lasagne.utils.unique(params_x)

    updates.update(
        lasagne.updates.momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM))

    train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates)

    test_y = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_x],
                             on_unused_input='ignore')
    test_x = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_y],
                             on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE

    output_string = '{0}/{1} loss: {2} '
    output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())])

    for epoch in range(Params.EPOCH_NUMBER):
        OutputLog().write('Epoch {0}'.format(epoch))

        model_results['train'].append({'loss': []})
        model_results['validate'].append({})

        for label in outputs.keys():
            model_results['train'][epoch][label] = []

        for index, batch in enumerate(
                iterate_minibatches(data_set.trainset[0], data_set.trainset[1], Params.BATCH_SIZE, True)):
            input_x, input_y = batch
            train_loss = train_fn(numpy.cast[theano.config.floatX](input_x),
                                  numpy.cast[theano.config.floatX](input_y))

            model_results['train'][epoch]['loss'].append(train_loss[0])
            for label, value in zip(outputs.keys(), train_loss[1:]):
                model_results['train'][epoch][label].append(value)

            OutputLog().write(output_string.format(index, batch_number, *train_loss))

        if Params.CROSS_VALIDATION:
            x_values = test_y(data_set.tuning[0], data_set.tuning[1])
            y_values = test_x(data_set.tuning[0], data_set.tuning[1])

            OutputLog().write('\nValidating model\n')

            if VALIDATE_ALL:
                for index, (x, y) in enumerate(zip(x_values, y_values)):
                    search_recall, describe_recall = complete_rank(x, y, data_set.reduce_val)
                    validation_loss = calculate_reconstruction_error(x, y)
                    correlation = calculate_mardia(x, y, top)

                    OutputLog().write('Layer {0} - loss: {1}, correlation: {2}, recall: {3}'.format(index,
                                                                                                    validation_loss,
                                                                                                    correlation,
                                                                                                    sum(
                                                                                                        search_recall) + sum(
                                                                                                        describe_recall)))
            else:
                middle_x = x_values[Params.TEST_LAYER]
                middle_y = y_values[Params.TEST_LAYER]
                search_recall, describe_recall = complete_rank(middle_x, middle_y, data_set.reduce_val)
                validation_loss = calculate_reconstruction_error(middle_x, middle_y)
                correlation = calculate_mardia(middle_x, middle_y, top)
                mean_x = numpy.mean(numpy.mean(middle_x, axis=0)),
                mean_y = numpy.mean(numpy.mean(middle_y, axis=0)),
                var_x = numpy.mean(numpy.var(middle_x, axis=0)),
                var_y = numpy.mean(numpy.var(middle_y, axis=0)),

                OutputLog().write('Layer - loss: {1}, correlation: {2}, recall: {3}, mean_x: {4}, mean_y: {5},'
                                  'var_x: {6}, var_y: {7}'.format(index,
                                                                  validation_loss,
                                                                  correlation,
                                                                  sum(search_recall) + sum(
                                                                      describe_recall),
                                                                  mean_x,
                                                                  mean_y,
                                                                  var_x,
                                                                  var_y))

                model_results['validate'][epoch]['loss'] = validation_loss
                model_results['validate'][epoch]['correlation'] = correlation
                model_results['validate'][epoch]['search_recall'] = sum(search_recall)
                model_results['validate'][epoch]['describe_recall'] = sum(describe_recall)
                model_results['validate'][epoch]['mean_x'] = mean_x
                model_results['validate'][epoch]['mean_y'] = mean_y
                model_results['validate'][epoch]['var_x'] = var_x
                model_results['validate'][epoch]['var_y'] = var_y

        if epoch in Params.DECAY_EPOCH:
            current_learning_rate *= Params.DECAY_RATE
            if hooks:
                updates = OrderedDict(batchnormalizeupdates(hooks, 100))
            else:
                updates = OrderedDict()

            updates.update(
                lasagne.updates.nesterov_momentum(loss, params, learning_rate=current_learning_rate, momentum=0.9))
            del train_fn
            train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates)

    model_results['experiment'] = experiment_values

    with file(os.path.join(path, 'results_{0}.p'.format(id)), 'wb') as results_file:
        pickle.dump(model_results, results_file)

    Params.__dict__ = param_backup

    del train_fn
    del test_x
    del test_y
    del model_x
    del model_y

    return model_results
Beispiel #38
0
    model_y = cPickle.load(open(os.path.join(INPUT_PATH, 'model_y.p'), 'rb'))

    x_var = model_x[0].input_var
    y_var = model_y[0].input_var

    hidden_x = filter(lambda layer: isinstance(layer, TiedDropoutLayer),
                      model_x)
    hidden_y = filter(lambda layer: isinstance(layer, TiedDropoutLayer),
                      model_y)
    hidden_y = list(reversed(hidden_y))

    hooks = OrderedDict()

    test_y = theano.function([x_var], [
        lasagne.layers.get_output(hidden_x[Params.OUTPUT_LAYER],
                                  moving_avg_hooks=hooks,
                                  deterministic=True)
    ],
                             on_unused_input='ignore')
    test_x = theano.function([y_var], [
        lasagne.layers.get_output(hidden_y[Params.OUTPUT_LAYER],
                                  moving_avg_hooks=hooks,
                                  deterministic=True)
    ],
                             on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE

    test_model(test_x,
               test_y,
               x_test,
               y_test,
Beispiel #39
0
word_to_index = dict([(w, i) for i, w in enumerate(index_)])
print("Dictionary:")
for i in range(10):
    print(i, "->", index_[i])
embedding_size = len(index_)
print("Embedding size:", embedding_size)
print("Hidden layer dimension:", args.hidden)

print("Model:", args.model)
x = T.ivector()
model, params = getattr(models, args.model).model(x, embedding_size,
                                                  args.hidden)
y_out = T.argmax(model, axis=-1)

print("Compiling...")
predict_model = theano.function(inputs=[x], outputs=y_out)

print("Loading parameters")
load_params(
    "params_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden,
                                  args.epochs), params)

print("Predicting", args.predicts, "sentences")
for i in range(args.predicts):
    sentence = [0]
    words = raw_input("Type a few words:")
    print(words)
    indices = [
        word_to_index[word] for word in nltk.word_tokenize(words.lower())
    ]
    sentence = sentence + indices
Beispiel #40
0
 def _function(self, arr, indices, out):
     return theano.function([arr, indices], [out])
Beispiel #41
0
    if hooks:
        updates = OrderedDict(batchnormalizeupdates(hooks, 100))
    else:
        updates = OrderedDict()

    params_x.extend(params_y)

    params = lasagne.utils.unique(params_x)

    current_learning_rate = Params.BASE_LEARNING_RATE

    updates.update(
        lasagne.updates.nesterov_momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM))

    train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates)

    test_y = theano.function([x_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_x],
                             on_unused_input='ignore')
    test_x = theano.function([y_var],
                             [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in
                              hidden_y],
                             on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE

    output_string = '{0}/{1} loss: {2} '
    output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())])
    model_x, hidden_x, weights_x, biases_x, prediction_y, hooks_x = model.build_single_channel(var_x,
                                                                                               data_set.trainset[
                                                                                                   0].shape[1],
                                                                                               data_set.trainset[
                                                                                                   1].shape[1],
                                                                                               layer_sizes=layer_sizes,
                                                                                               drop_prob=drop_prob,
                                                                                               name='x')

    loss_y = lasagne.objectives.squared_error(var_y, prediction_y).sum(axis=1).mean()

    params_x = lasagne.layers.get_all_params(model_x, trainable=True)
    updates = OrderedDict(batchnormalizeupdates(hooks_x, 100))
    updates.update(lasagne.updates.nesterov_momentum(loss_y, params_x, 0.001, 0.5))

    train_fn_x = theano.function([var_x, var_y], [loss_y], updates=updates)

    batch_number = data_set.trainset[0].shape[0] / BATCH_SIZE

    for epoch in range(EPOCH_NUMBER):
        OutputLog().write('Epoch {0}'.format(epoch))
        for index, batch in enumerate(
                iterate_minibatches(data_set.trainset[0], data_set.trainset[1], BATCH_SIZE, True)):
            input_x, input_y = batch
            loss = train_fn_x(input_x, input_y)
            OutputLog().write('{0}/{1} loss: {2}'.format(index, batch_number, loss[0]))

    model_y, hidden_y, weights_y, biases_y, prediction_x, hooks_y = model.build_single_channel(var_y,
                                                                                               data_set.trainset[
                                                                                                   1].shape[1],
                                                                                               data_set.trainset[
Beispiel #43
0
                       input=layer0_input,
                       image_shape=(1, 1, 1, window),
                       filter_shape=(1, 1, 1, 5),
                       poolsize=(1, 2))

#prepare data
xi = []
for i in range(len(x1)):
    starti = i - window + 1
    e = [0 for col in range(-starti)]
    e.extend(x1[max(0, starti):i + 1])
    #print(len(e))
    xi.append(e)
xinumpy = np.array(xi)
xis = theano.shared(name='xi', value=xinumpy.astype(theano.config.floatX))

index = T.iscalar()
action = theano.function([index], [layer0.conv_out],
                         givens={
                             x: xis[index:(index + 1)],
                         })

learning_rate = 0.01
grads = T.grad(cost, layer0.params)
updates = [(param_i, param_i - learning_rate * grad_i)
           for param_i, grad_i in zip(layer0.params, grads)]

v = [action(i) for i in range(lenthX)]
print(x1)
print(v)
print("end")
Beispiel #44
0
x = T.ivector()
model, params = getattr(models, args.model).model(x, embedding_size,
                                                  args.hidden)

y_out = T.argmax(model, axis=-1)

y = T.ivector()
cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y])
g_params = T.grad(cost=cost, wrt=params)

lr = T.scalar('learning_rate')
updates = [(param, param - lr * gparam)
           for param, gparam in zip(params, g_params)]

print("Compiling...")
train_model = theano.function(inputs=[x, y, lr], outputs=cost, updates=updates)

learning_rate = 0.01
n_train = len(y_train)
n_iters = args.epochs * n_train
print("Training:", args.epochs, "epochs of", n_train, "iterations")
train_loss = numpy.zeros(n_iters)

start_time = timeit.default_timer()
for epoch in range(args.epochs):
    for i in range(n_train):
        iteration = i + n_train * epoch
        train_loss[iteration] = train_model(
            numpy.asarray(X_train[i], dtype='int32'),
            numpy.asarray(y_train[i], dtype='int32'), learning_rate)
        if (len(train_loss) > 1 and train_loss[-1] > train_loss[-2]):
Beispiel #45
0
    embeddings_dim = 17
    output_voca_size = 134

    # n_decodesteps = 100
    # nb_passes = 3

    input_sentences = T.tensor3("input_sentences", dtype=floatX)
    encoder_output = T.tensor3("encoder_output", dtype=floatX)
    # output_sentences = T.tensor3("output_sentences", dtype=floatX)

    l_in = InputLayer(shape=(batch_size, input_sentence_length,
                             embeddings_dim),
                      input_var=input_sentences)
    layer = GRUDecoder(l_in, hidden_dim, attention_dim, output_voca_size)

    output_sentences = layer.get_output_for([input_sentences, encoder_output])
    fn = theano.function(
        [input_sentences, encoder_output],
        output_sentences,
        # mode='DebugMode',
        on_unused_input='ignore')

    np_encoder_output = inputs = np.random.normal(
        size=(batch_size, input_sentence_length, hidden_dim)).astype(floatX)
    np_input_sentences = np.random.normal(size=(batch_size,
                                                input_sentence_length,
                                                embeddings_dim)).astype(floatX)
    np_output_sentences = fn(np_input_sentences, np_encoder_output)
    print np_output_sentences
    print np_output_sentences.shape
model = T.nnet.softmax(T.dot(x, W) + b)

y_pred = T.argmax(model, axis=1)
error = T.mean(T.neq(y_pred, y))

cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y])
g_W = T.grad(cost=cost, wrt=W)
g_b = T.grad(cost=cost, wrt=b)

learning_rate = 0.13
index = T.lscalar()

train_model = theano.function(
    inputs=[index],
    outputs=[cost, error],
    updates=[(W, W - learning_rate * g_W), (b, b - learning_rate * g_b)],
    givens={
        x: train_set_x[index * batch_size:(index + 1) * batch_size],
        y: train_set_y[index * batch_size:(index + 1) * batch_size]
    })

validate_model = theano.function(inputs=[x, y], outputs=[cost, error])

print("Training")

n_epochs = 100
n_train_batches = train_set[0].shape[0] // batch_size

n_iters = n_epochs * n_train_batches
train_loss = numpy.zeros(n_iters)
train_error = numpy.zeros(n_iters)
Beispiel #47
0
    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)
    data_set.load()

    x_var = model_x[0].input_var
    y_var = model_y[0].input_var

    # Export network
    path = OutputLog().output_path

    hidden_x = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_x)
    hidden_y = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_y)
    hidden_y = reversed(hidden_y)

    test_y = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, deterministic=True) for layer in
                              hidden_x],
                             on_unused_input='ignore')
    test_x = theano.function([x_var, y_var],
                             [lasagne.layers.get_output(layer, deterministic=True) for layer in
                              hidden_y],
                             on_unused_input='ignore')

    batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE

    OutputLog().write('Test results')

    # cca = cross_decomposition.CCA(top)

    t_x = test_y(data_set.testset[0], data_set.testset[1])
    t_y = test_x(data_set.testset[0], data_set.testset[1])
Beispiel #48
0
    def __init__(self, structure, datasets, activation_function=T.nnet.sigmoid, learning_rate=0.1,
                 regression_layer=SumOfSquaredErrors, gui_worker=None, normalize=normalize_images):
        """
        Creates a neural net.

        :param structure: list of number of nodes per layer: [inputLayer, hiddenLayers... , outputLayer]
        :param datasets: training and test sets as list [training set, test set]
        :param activation_function: e.g. T.nnet.sigmoid
        :param learning_rate: learing rate at each layer except output layer
        :return:
        """

        if not regression_layer:
            regression_layer = SumOfSquaredErrors

        if not learning_rate:
            learning_rate = 0.1

        self.random_feed = numpy.random.RandomState(23455)
        self.gui_worker = gui_worker

        self.learning_rate = learning_rate
        self.activation_function = activation_function

        self.layers = []
        self.params = []
        self.labels = []

        self.n_outputs = structure[-1]

        self.train_set_images, self.train_set_labels = datasets[0]
        self.test_set_images, self.test_set_labels = datasets[1]

        if self.gui_worker:
            self.gui_worker.gui.status_message.emit("Normalizing cases...")
        else:
            print('----> Normalizing cases...')

        self.train_set_images = normalize(self.train_set_images)
        self.test_set_images = normalize(self.test_set_images)

        if self.gui_worker:
            self.gui_worker.gui.status_message.emit("Constructing the neural net...")
        else:
            print('----> Constructing the neural net...')

        self.input = T.dvector('input')
        self.label = T.dvector('label')

        input_to_next_layer = self.input

        # Create the layers
        for i in range(len(structure) - 1):
            a = self.activation_function[i] if type(self.activation_function) is list else self.activation_function
            if i < len(structure) - 2:
                self.layers.append(
                    HiddenLayer(
                        self.random_feed,
                        _input=input_to_next_layer,
                        n_in=structure[i],
                        n_out=structure[i+1],
                        activation=a
                    )
                )
                input_to_next_layer = self.layers[i].output
            else:
                # create last layer
                self.layers.append(
                    regression_layer(
                        _input=input_to_next_layer,
                        n_in=structure[i],
                        n_out=structure[i+1],
                        activation=a
                    )
                )

        self.error = self.layers[-1].error(self.label)

        for layer in self.layers:
            self.params += layer.params

        self.gradients = T.grad(self.error, self.params)

        self.updates = [
            (param, param - self.learning_rate * grad)
            for param, grad in zip(self.params, self.gradients)
        ]

        self.predictor = theano.function(
            [self.input],
            self.layers[-1].prediction
        )

        self.trainer = theano.function(
            [self.input, self.label],
            [self.layers[-1].prediction, self.error],
            updates=self.updates
        )
    batch_size = 10
    input_sentence_length = 88
    hidden_dim = 13
    attention_dim = 11
    embeddings_dim = 17
    output_voca_size = 134

    # n_decodesteps = 100
    # nb_passes = 3

    input_sentences = T.tensor3("input_sentences", dtype=floatX)
    encoder_output = T.tensor3("encoder_output", dtype=floatX)
    # output_sentences = T.tensor3("output_sentences", dtype=floatX)

    l_in = InputLayer(shape=(batch_size, input_sentence_length, embeddings_dim), input_var=input_sentences)
    layer = GRUDecoder(l_in, hidden_dim, attention_dim, output_voca_size)

    output_sentences = layer.get_output_for([input_sentences, encoder_output])
    fn = theano.function(
        [input_sentences, encoder_output],
        output_sentences,
        # mode='DebugMode',
        on_unused_input="ignore",
    )

    np_encoder_output = inputs = np.random.normal(size=(batch_size, input_sentence_length, hidden_dim)).astype(floatX)
    np_input_sentences = np.random.normal(size=(batch_size, input_sentence_length, embeddings_dim)).astype(floatX)
    np_output_sentences = fn(np_input_sentences, np_encoder_output)
    print np_output_sentences
    print np_output_sentences.shape