Example #1
0
    def test_graph_bprop_rand(self):
        for i in range(10):
            xorig = numpy.random.rand(3, 2)
            for mtype in _mtypes:
                x = tensor.matrix('x')
                w = SparseType(dtype='float64',
                               format=_mtype_to_str[mtype]).make_variable()
                xw = dense_from_sparse(true_dot(w, x))
                y = dense_from_sparse(true_dot(w.T, xw))
                diff = x - y
                loss = tensor.sum(tensor.sqr(diff))
                gw = tensor.grad(loss, w)
                trainfn = compile.function([x, w], [y, loss, gw])

                x = xorig
                w = mtype((500, 3))
                w[(10, 1)] = 1
                w[(20, 2)] = 2
                lr = 0.001
                y, origloss, gw = trainfn(x, w)
                for epoch in xrange(50):
                    y, loss, gw = trainfn(x, w)
                    w = w - (lr * gw)

                self.assertTrue(origloss > loss)
Example #2
0
    def test_graph_bprop0(self):
        for mtype in _mtypes:
            x = tensor.matrix(
                'x'
            )  #TensorType('float64', broadcastable=[False,False], name='x')
            w = SparseType(dtype='float64',
                           format=_mtype_to_str[mtype]).make_variable()
            xw = dense_from_sparse(true_dot(w, x))
            y = dense_from_sparse(true_dot(w.T, xw))
            diff = x - y
            loss = tensor.sum(tensor.sqr(diff))
            gw = tensor.grad(loss, w)
            trainfn = compile.function([x, w], [y, loss, gw])

            x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
            w = mtype((500, 3))
            w[(10, 1)] = 1
            w[(20, 2)] = 2
            lr = 0.001
            y, origloss, gw = trainfn(x, w)
            for epoch in xrange(50):
                y, loss, gw = trainfn(x, w)
                w = w - (lr * gw)
                print loss

            self.assertTrue(origloss > loss)
            self.assertTrue('1.05191241115' == str(loss))
Example #3
0
 def test_structured_dot_grad(self):
     # We also need the grad of CSM to be implemetned.
     raise SkipTest("infer_shape not implemented for the grad" " of structured_dot")
     for format, op in [("csc", StructuredDotGradCSC), ("csr", StructuredDotGradCSR)]:
         x = SparseType(format, dtype=config.floatX)()
         y = SparseType(format, dtype=config.floatX)()
         grads = tensor.grad(dense_from_sparse(structured_dot(x, y)).sum(), [x, y])
         self._compile_and_check(
             [x, y],
             [grads[0]],
             [
                 as_sparse_format(random_lil((4, 5), config.floatX, 3), format),
                 as_sparse_format(random_lil((5, 3), config.floatX, 3), format),
             ],
             op,
         )
         self._compile_and_check(
             [x, y],
             [grads[1]],
             [
                 as_sparse_format(random_lil((4, 5), config.floatX, 3), format),
                 as_sparse_format(random_lil((5, 3), config.floatX, 3), format),
             ],
             op,
         )
Example #4
0
 def test_dense_from_sparse(self):
     x = SparseType("csr", dtype=config.floatX)()
     self._compile_and_check(
         [x],
         [dense_from_sparse(x)],
         [sp.csr_matrix(random_lil((10, 40), config.floatX, 3))],
         dense_from_sparse.__class__,
     )
Example #5
0
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """
        input = sparse.dense_from_sparse(input)
        # Since we are dealing with a one hidden layer MLP, this will
        # translate into a TanhLayer connected to the LogisticRegression
        # layer; this can be replaced by a SigmoidalLayer, or a layer
        # implementing any other nonlinearity
        self.hiddenLayer = HiddenLayer(rng=rng, input=input,
                                       n_in=n_in, n_out=n_hidden,
                                       activation=T.tanh)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                    + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params
Example #6
0
 def test_sparsevariable(self):
     ## Re-init counter
     Variable.__count__ = count(0)
     r1 = sparse.csc_matrix(name='x', dtype='float32')
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_0"
     assert r2.auto_name == "auto_1"
     assert r3.auto_name == "auto_2"
Example #7
0
 def test_sparsevariable(self):
     ## Re-init counter
     Variable.__count__ = count(0)
     r1 = sparse.csc_matrix(name='x', dtype='float32')
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_0"
     assert r2.auto_name == "auto_1"
     assert r3.auto_name == "auto_2"
Example #8
0
 def test_sparsevariable(self):
     ## Get counter value
     autoname_id = next(Variable.__count__)
     Variable.__count__ = count(autoname_id)
     r1 = sparse.csc_matrix(name='x', dtype='float32')
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_" + str(autoname_id)
     assert r2.auto_name == "auto_" + str(autoname_id + 1)
     assert r3.auto_name == "auto_" + str(autoname_id + 2)
Example #9
0
 def grad(self, inp, grads):
     x, y = inp
     gz, = grads
     assert _is_sparse_variable(gz)
     assert _is_sparse_variable(x)
     rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
     if _is_dense_variable(y):
         if self.grad_preserves_dense:
             rval[1] = dense_from_sparse(rval[1])
     return rval
Example #10
0
 def grad(self, inp, grads):
     x, y = inp
     gz, = grads
     assert _is_sparse_variable(gz)
     assert _is_sparse_variable(x)
     rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
     if _is_dense_variable(y):
         if self.grad_preserves_dense:
             rval[1] = dense_from_sparse(rval[1])
     return rval
Example #11
0
 def test_sparsevariable(self):
     # Get counter value
     autoname_id = next(Variable.__count__)
     Variable.__count__ = count(autoname_id)
     r1 = sparse.csc_matrix(name="x", dtype="float32")
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_" + str(autoname_id)
     assert r2.auto_name == "auto_" + str(autoname_id + 1)
     assert r3.auto_name == "auto_" + str(autoname_id + 2)
Example #12
0
 def test_csm_grad(self):
     for sparsetype in ("csr", "csc"):
         x = tensor.vector()
         y = tensor.ivector()
         z = tensor.ivector()
         s = tensor.ivector()
         call = getattr(sp, sparsetype + "_matrix")
         spm = call(random_lil((300, 400), config.floatX, 5))
         out = tensor.grad(dense_from_sparse(CSM(sparsetype)(x, y, z, s)).sum(), x)
         self._compile_and_check([x, y, z, s], [out], [spm.data, spm.indices, spm.indptr, spm.shape], CSMGrad)
Example #13
0
 def test2(self):
     #call dense_from_sparse
     for t in _mtypes:
         s = t(scipy.sparse.identity(5))
         d = dense_from_sparse(s)
         # s should be copied into the graph as a constant
         s[0, 0] = 3.0  # changes s, but not the copy
         val = eval_outputs([d])
         return
         self.assertTrue(str(val.dtype) == s.dtype)
         self.assertTrue(numpy.all(val[0] == [1, 0, 0, 0, 0]))
Example #14
0
    def check_format_ndim(format, ndim):
        x = tensor.tensor(dtype=config.floatX, broadcastable=([False] * ndim), name="x")

        s = SparseFromDense(format)(x)
        s_m = -s
        d = dense_from_sparse(s_m)
        c = d.sum()
        g = tensor.grad(c, x)
        f = theano.function([x], [s, g])
        f(numpy.array(0, dtype=config.floatX, ndmin=ndim))
        f(numpy.array(7, dtype=config.floatX, ndmin=ndim))
Example #15
0
 def test2(self):
     # call dense_from_sparse
     for t in _mtypes:
         s = t(scipy.sparse.identity(5))
         d = dense_from_sparse(s)
         # s should be copied into the graph as a constant
         s[0, 0] = 3.0  # changes s, but not the copy
         val = eval_outputs([d])
         return
         self.assertTrue(str(val.dtype) == s.dtype)
         self.assertTrue(numpy.all(val[0] == [1, 0, 0, 0, 0]))
Example #16
0
 def test_sparsevariable(self):
     # Get counter value
     if not sparse.enable_sparse:
         raise SkipTest('Optional package SciPy not installed')
     autoname_id = next(Variable.__count__)
     Variable.__count__ = count(autoname_id)
     r1 = sparse.csc_matrix(name='x', dtype='float32')
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_" + str(autoname_id)
     assert r2.auto_name == "auto_" + str(autoname_id + 1)
     assert r3.auto_name == "auto_" + str(autoname_id + 2)
Example #17
0
 def test_sparsevariable(self):
     # Get counter value
     if not sparse.enable_sparse:
         raise SkipTest('Optional package SciPy not installed')
     autoname_id = next(Variable.__count__)
     Variable.__count__ = count(autoname_id)
     r1 = sparse.csc_matrix(name='x', dtype='float32')
     r2 = sparse.dense_from_sparse(r1)
     r3 = sparse.csc_from_dense(r2)
     assert r1.auto_name == "auto_" + str(autoname_id)
     assert r2.auto_name == "auto_" + str(autoname_id + 1)
     assert r3.auto_name == "auto_" + str(autoname_id + 2)
Example #18
0
def densify_sparse_variable(sparse_variable, force_gpu):
    assert isinstance(sparse_variable.type, S.type.SparseType)

    variable = S.dense_from_sparse(sparse_variable)

    if (theano.config.device == 'gpu' and force_gpu and not isinstance(
            variable.type, theano.sandbox.cuda.CudaNdarrayType)):
        variable = theano.sandbox.cuda.basic_ops.gpu_from_host(variable)

    logging.debug('Densified variable "%s" to "%s".', sparse_variable,
                  variable)

    return variable
Example #19
0
    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            train_set_x_batch = sparse.dense_from_sparse(train_set_x[batch_begin:batch_end])
            
            # compile the theano function
            fn = theano.function(inputs=[index,
                            theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x:
                                    train_set_x_batch})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
Example #20
0
def densify_sparse_variable(sparse_variable, force_gpu):
    assert isinstance(sparse_variable.type, S.type.SparseType)

    variable = S.dense_from_sparse(sparse_variable)

    if (theano.config.device == 'gpu' and
            force_gpu and
            not isinstance(variable.type,
                           theano.sandbox.cuda.CudaNdarrayType)):
        variable = theano.sandbox.cuda.basic_ops.gpu_from_host(variable)

    logging.debug('Densified variable "%s" to "%s".',
                  sparse_variable, variable)

    return variable
Example #21
0
    def test_graph_bprop0(self):
        for mtype in _mtypes:
            x = tensor.matrix('x') #TensorType('float64', broadcastable=[False,False], name='x')
            w = SparseType(dtype = 'float64', format = _mtype_to_str[mtype]).make_variable()
            xw = dense_from_sparse(true_dot(w, x))
            y = dense_from_sparse(true_dot(w.T, xw))
            diff = x-y
            loss = tensor.sum(tensor.sqr(diff))
            gw = tensor.grad(loss, w)
            trainfn = compile.function([x, w], [y, loss, gw])

            x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
            w = mtype((500,3))
            w[(10, 1)] = 1
            w[(20, 2)] = 2
            lr = 0.001
            y, origloss, gw = trainfn(x, w)
            for epoch in xrange(50):
                y, loss, gw = trainfn(x, w)
                w = w - (lr * gw)
                print loss

            self.assertTrue(origloss > loss)
            self.assertTrue('1.05191241115' == str(loss))
Example #22
0
    def test_graph_bprop_rand(self):
        for i in range(10):
            xorig = numpy.random.rand(3,2)
            for mtype in _mtypes:
                x = tensor.matrix('x')
                w = SparseType(dtype = 'float64', format = _mtype_to_str[mtype]).make_variable()
                xw = dense_from_sparse(true_dot(w, x))
                y = dense_from_sparse(true_dot(w.T, xw))
                diff = x-y
                loss = tensor.sum(tensor.sqr(diff))
                gw = tensor.grad(loss, w)
                trainfn = compile.function([x, w], [y, loss, gw])

                x = xorig
                w = mtype((500,3))
                w[(10, 1)] = 1
                w[(20, 2)] = 2
                lr = 0.001
                y, origloss, gw = trainfn(x, w)
                for epoch in xrange(50):
                    y, loss, gw = trainfn(x, w)
                    w = w - (lr * gw)

                self.assertTrue(origloss > loss)
Example #23
0
 def test_infer_shape_csr_csc_grad(self):
     for sparsetype in ('csr', 'csc'):
         a = SparseType(sparsetype, dtype=config.floatX)()
         b = SparseType(sparsetype, dtype=config.floatX)()
         grads = tensor.grad(dense_from_sparse(structured_dot(a, b)).sum(),
                             [a, b])
         f = theano.function([a, b], [g.shape for g in grads])
         topo = f.maker.env.toposort()
         assert not any(isinstance(t, self.__class__) for t in topo)
         call = getattr(sp, sparsetype + '_matrix')
         x = call(random_lil((500, 300), config.floatX, 10))
         y = call(random_lil((300, 400), config.floatX, 5))
         out1, out2 = f(x, y)
         assert numpy.all(out1 == x.shape)
         assert numpy.all(out2 == y.shape)
Example #24
0
    def build_prediction_functions(self, data_x, batch_size):
        index = T.lscalar('index')
        n_batches = data_x.get_value(borrow=True).shape[0] / batch_size
            
        data_x_batch = sparse.dense_from_sparse(data_x[index * batch_size:
                                                       (index + 1) * batch_size])
            
        pred_proba_i = theano.function([index], self.logLayer.p_y_given_x,
                                       givens = {self.x: data_x_batch})
            
        pred_i = theano.function([index], self.logLayer.y_pred,
                                 givens = {self.x: data_x_batch})

        def pred_proba():
            return numpy.vstack([pred_proba_i(i) for i in xrange(n_batches)])

        def pred():
            return numpy.vstack([pred_i(i) for i in xrange(n_batches)])

        return pred_proba, pred
Example #25
0
        # We verify that the size is correctly updated as we store more data
        # into the sparse matrix (including zeros).
        check()
        y[0, 0] = 1
        check()
        y[0, 1] = 0
        check()


import theano.tensor.tests.test_sharedvar
test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester(
    shared_constructor_=theano.sparse.shared,
    dtype_='float64',
    get_value_borrow_true_alias_=True,
    shared_borrow_true_alias_=True,
    set_value_borrow_true_alias_=True,
    set_value_inplace_=False,
    set_cast_value_inplace_=False,
    shared_constructor_accept_ndarray_=False,
    internal_type_=scipy.sparse.csc_matrix,
    test_internal_type_=scipy.sparse.issparse,
    theano_fct_=lambda a: dense_from_sparse(a * 2.),
    ref_fct_=lambda a: numpy.asarray((a * 2).todense()),
    cast_value_=scipy.sparse.csr_matrix,
    name='test_shared_options',
)

if __name__ == '__main__':
    unittest.main()
Example #26
0
    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_set_x_batch = sparse.dense_from_sparse(train_set_x[index * batch_size:
                                                                 (index + 1) * batch_size])
        test_set_x_batch = sparse.dense_from_sparse(test_set_x[index * batch_size:
                                                               (index + 1) * batch_size])
        valid_set_x_batch = sparse.dense_from_sparse(valid_set_x[index * batch_size:
                                                                 (index + 1) * batch_size])
                                                                 
            
        train_fn = theano.function(inputs=[index],
              outputs=self.finetune_cost,
              updates=updates,
              givens={self.x: train_set_x_batch,
                      self.y: train_set_y[index * batch_size:
                                          (index + 1) * batch_size]})

        test_score_i = theano.function([index], self.errors,
                 givens={self.x: test_set_x_batch,
                         self.y: test_set_y[index * batch_size:
                                            (index + 1) * batch_size]})

        test_pred_proba_i = theano.function([index], self.logLayer.p_y_given_x,
                 givens={self.x: test_set_x_batch})
                                      
        valid_score_i = theano.function([index], self.errors,
              givens={self.x: valid_set_x_batch,
                      self.y: valid_set_y[index * batch_size:
                                          (index + 1) * batch_size]})

        valid_pred_proba_i = theano.function([index], self.logLayer.p_y_given_x,
              givens={self.x: valid_set_x_batch})

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        def valid_auc():
            probs = numpy.ravel([valid_pred_proba_i(i)[:,1] for i in xrange(n_valid_batches)])
            if numpy.all(valid_set_y.get_value()) and numpy.all(probs):
                return 1.
            if numpy.all(valid_set_y.get_value() == 0) and numpy.all(probs == 0):
                return 0.
            return auc_score(valid_set_y.get_value()[:n_valid_batches*batch_size], probs)

        def test_auc():
            probs = numpy.ravel([test_pred_proba_i(i)[:,1] for i in xrange(n_test_batches)])
            if numpy.all(test_set_y.get_value()) and numpy.all(probs):
                return 1.
            if numpy.all(test_set_y.get_value() == 0) and numpy.all(probs == 0):
                return 0.
            return auc_score(test_set_y.get_value()[:n_test_batches*batch_size], probs)

        return train_fn, valid_score, test_score, valid_auc, test_auc
Example #27
0
            f4 = theano.function([x], x[50, 42])
            r4 = f4(vx)
            t4 = vx[m, n]
            assert r3.shape == t3.shape
            assert numpy.all(t4 == r4)


import theano.tensor.tests.test_sharedvar

test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester(
    shared_constructor_=theano.sparse.shared,
    dtype_="float64",
    get_value_borrow_true_alias_=True,
    shared_borrow_true_alias_=True,
    set_value_borrow_true_alias_=True,
    set_value_inplace_=False,
    set_cast_value_inplace_=False,
    shared_constructor_accept_ndarray_=False,
    internal_type_=scipy.sparse.csc_matrix,
    test_internal_type_=scipy.sparse.issparse,
    theano_fct_=lambda a: dense_from_sparse(a * 2.0),
    ref_fct_=lambda a: numpy.asarray((a * 2).todense()),
    cast_value_=scipy.sparse.csr_matrix,
    name="test_shared_options",
)


if __name__ == "__main__":
    unittest.main()
Example #28
0
def to_dense(tensor):
    if is_sparse(tensor):
        return th_sparse_module.dense_from_sparse(tensor)
    else:
        return tensor
Example #29
0
import numpy as np
import scipy.sparse as sp
import theano
from theano import sparse

# pylint: disable = bad-whitespace, invalid-name, no-member, bad-continuation, assignment-from-no-return

# if shape[0] > shape[1], use csr. Otherwise, use csc
# but, not all ops are available for both yet
# so use the one that has what you need

# to and fro

x = sparse.csc_matrix(name='x', dtype='float32')
y = sparse.dense_from_sparse(x)
z = sparse.csc_from_dense(y)

# resconstruct a csc from a csr

x = sparse.csc_matrix(name='x', dtype='int64')
data, indices, indptr, shape = sparse.csm_properties(x)
y = sparse.CSR(data, indices, indptr, shape)
f = theano.function([x], y)
a = sp.csc_matrix(np.asarray([[0, 1, 1], [0, 0, 0], [1, 0, 0]]))
print a.toarray()
print f(a).toarray()

# "structured" operations
# act only on (originally) nonzero elements
Example #30
0
		Create a small KB rule and run BP on it.
	'''
    ENT = 5  #Total entities in the KB
    x = node.Variable('x')
    x.u = sparse.csr_matrix('x')
    y = node.Variable('y')
    y.u = sparse.csr_matrix('y')
    p = node.Factor('p', x, y)
    p.M = sparse.csr_matrix('p')
    q = node.Factor('q', y, x)
    q.M = sparse.csr_matrix('p')

    # uc = np.eye(ENT)[2]

    g = Graph(_variables=[x, y], _factors=[p], _fictional_factor=q)
    f, s = g.propagate_thy_beliefs()

    x = np.random.rand(1, 10)
    y = np.random.rand(1, 10)
    m = np.asarray([np.eye(10, 10)[i] for i in np.random.randint(0, 10, 10)])
    x = sps.csr_matrix(x, dtype=np.float64)
    y = sps.csr_matrix(y, dtype=np.float64)
    m = sps.csr_matrix(m, dtype=np.float64)

    op = f(x, y, m)

    sm = sparse.csr_matrix()
    eval = theano.function([sm], sparse.dense_from_sparse(sm))

    for s in op:
        print eval(s)
Example #31
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 input=None,
                 n_visible=784,
                 n_hidden=500,
                 W=None,
                 bhid=None,
                 bvis=None):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None


        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(numpy_rng.uniform(
                low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                size=(n_visible, n_hidden)),
                                      dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                                   dtype=theano.config.floatX),
                                 borrow=True)

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input == None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = sparse.dense_from_sparse(input)

        self.params = [self.W, self.b, self.b_prime]
Example #32
0
    def propagate_thy_beliefs(self):
        '''
			Call this function to receive a string containing the path of the belief propagation algorithm.
			We implement the algorithm listed in the paper mentioned in the comments above
			

			Pseudocode:
				-> Create an empty theano vector whose definitions will be iteratively changed.
				-> Call compile_message_node_to_factor from the o node of the head predicate. 
				-> Let the functions recursively call each other
				-> Collect their things somehow. @TODO: how. what format. Shall we use theano variables altogether or what
				-> Return said stuff.
		'''

        # print "graph:bp: Starting belief propagation."
        equation = self._comiple_message_node_(self.head_predicate.o,
                                               "Fictional Label")
        symbols = self._comiple_message_symbols_node_(self.head_predicate.o,
                                                      "Fictional Label")

        #Define an empty dvector to be used as the 'y' label (which will later contain n hot information about desired entities)
        y = sparse.csr_dmatrix('y')

        # Do a softmax over the final BP Equation
        equation = sparse.structured_exp(equation)
        equation = sparse.row_scale(equation,
                                    1.0 / sparse.sp_sum(equation, axis=1))

        # Collect all the parameters (shared vars), found in the factors of this graph.
        #parameters is a list of matrices (relation)
        parameters = [x.M for x in symbols]

        #Cross entropy loss
        # loss = - y * T.log(equation) + (y - 1)*T.log(1-equation) # unregularized cross-entropy loss in theano
        a = sparse.mul(y, sparse.structured_log(equation))
        b = sparse.mul(
            sparse.structured_add(y, -1.0),
            sparse.structured_log(sparse.structured_add(equation, -1.0)))
        loss = sparse.sub(b, a)

        # Unregularied Loss
        loss_dense = sparse.dense_from_sparse(loss)
        cost = loss_dense.mean()
        # cost = sparse.sp_sum(loss, axis = 1)/float(ne)

        gradients = theano.grad(cost, parameters)

        updated_matrices = [
            sparse.sub(parameters[i], 0.1 * gradients[i])
            for i in range(len(parameters))
        ]
        # updated_matrices = [sparse.sub(parameters[i], sparse.row_scale(gradients[i], 0.1)) for i in range(len(parameters))]
        # updated_matrices = [parameters[i] - 0.1 * gradients[i] for i in range(len(parameters))]

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        #  DEBUG
        # print "Equation: ", equation
        # print "Type of equation: ",type(equation)
        # print "Symbols: ", symbols
        # print "graph:bp: Belief propagation complete."

        # print "Parameters are"
        # for p in parameters:
        # 	print p," and the type is :",type(p)

        # print gradients
        # print "Updated Matrices are :", type(updated_matrices[0])

        # print colored(type(self.head_predicate.i.u),'red')

        # print "Inputs: \n"
        # print type(self.head_predicate.i.u)
        # print type(y)
        # print [ type(x) for x in parameters ]

        # raw_input("Verify Symbols and Gradients ")
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        function = theano.function(
            inputs=[self.head_predicate.i.u, y] +
            parameters,  #Inputs to this is the head predicates' symbolic var, and another dvector
            # inputs = [self.head_predicate.i.u,parameters[0]],		#Inputs to this is the head predicates' symbolic var, and another dvector
            # outputs = updated_matrices			#Output to this thing is the BP algorithm's output expression
            outputs=[equation] + updated_matrices
            # mode=theano.compile.MonitorMode(
            #               pre_func=self.inspect_inputs,
            #               post_func=self.inspect_outputs)			#Output to this thing is the BP algorithm's output expression
            # updates=tuple([(parameters[i], parameters[i] - 0.1 * gradients[i]) for i in range(len(parameters))])		#Updates are the gradients of cost wrt parameters
        )

        return function, symbols
Example #33
0
# That is why there are two kinds of constructors of sparse variable:
# csc_matrix and csr_matrix. These can be called with the usual name
# and dtype parameters, but no broadcastable flags are allowed. This is
# forbidden since the sparse package, as the SciPy sparse module, does not
# provide any way to handle a number of dimensions different from two. The
# set of all accepted dtype for the sparse matrices can be found in
# sparse.all_dtypes.
print sparse.all_dtypes

# 2.1 To and Fro
# To move back and forth from a dense matrix to a sparse matrix
# representation, Theano provides the dense_from_sparse and csc_from_dense
# functions. No additional detail must be provided. Here is an example
# that performs a full cycle from sparse to sparse:
x = sparse.csc_matrix(name='x', dtype='float32')
y = sparse.dense_from_sparse(x)
z = sparse.csc_from_dense(y)

# 2.2 Properties and Construction
# Although sparse variables do not allow direct to their properties, this
# can be accomplished using the csm_properties function. This will return
# a tuple of one-dimensional tensor variables that represents the internal
# characteristics of the sparse matrix.

# In order to reconstruct a sparse matrix from some properties, the
# function CSC and CSR can be used. This will create the sparse matrix in
# desired format. As an example, the following code reconstructs a csc
# matrix into a csr one.
x = sparse.csc_matrix(name='x', dtype='int64')
data, indices, indptr, shape = sparse.csm_properties(x)
y = sparse.CSR(data, indices, indptr, shape)
Example #34
0
def to_dense(tensor):
    if is_sparse(tensor):
        return th_sparse_module.dense_from_sparse(tensor)
    else:
        return tensor
Example #35
0
def test_rbm(
    datasets,
    learning_rate=0.1,
    training_epochs=15,
    batch_size=20,
    n_chains=20,
    n_samples=10,
    output_folder="rbm_plots",
    n_hidden=500,
):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param datasets: train/val/test set tensors

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    n_features = train_set_x.get_value(borrow=True).shape[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=n_features, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    train_set_x_batch = sparse.dense_from_sparse(train_set_x[index * batch_size : (index + 1) * batch_size])

    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function([index], cost, updates=updates, givens={x: train_set_x_batch}, name="train_rbm")

    plotting_time = 0.0
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print "Training epoch %d, cost is " % epoch, np.mean(mean_cost)

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(rbm.W.get_value(borrow=True).T)
        image.save("filters_at_epoch_%i.png" % epoch)
        plotting_stop = time.clock()
        plotting_time += plotting_stop - plotting_start

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print ("Training took %f minutes" % (pretraining_time / 60.0))

    #################################
    #     Sampling from the RBM     #
    #################################
    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        np.asarray(test_set_x.get_value(borrow=True)[test_idx : test_idx + n_chains], dtype=theano.config.floatX)
    )

    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = theano.scan(
        rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every
    )

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name="sample_fn")

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = np.zeros((n_samples, n_features), dtype="uint8")
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print " ... plotting sample ", idx
        image_data[idx, :] = vis_mf
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save("samples.png")
    os.chdir("../")

    return rbm