Example #1
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true dropout and scaling is disabled, see notes
        """
        from .. import utils
        if deterministic or self.p == 0:
            return input
        else:
            # Using theano constant to prevent upcasting
            one = T.constant(1)

            retain_prob = one - self.p
            if self.rescale:
                # According to pull-request 595 from eduardo4jesus
                # It needs a proper call in case the input is an sparse variable
                if type(input) == S.SparseVariable:
                    input = S.mul(input, utils.floatX(1.)/retain_prob)
                else:
                    input /= retain_prob

            # use nonsymbolic shape for dropout mask if possible
            input_shape = self.input_shape
            if any(s is None for s in input_shape):
                input_shape = input.shape

            return input * self._srng.binomial(input_shape, p=retain_prob,
                                               dtype=input.dtype)
Example #2
0
    def _get_diagonal_term(self, X_left, X_right, diag_init):
        diag = tn.shared(value=diag_init, name='diag')

        if _tn_is_sparse(X_left) or _tn_is_sparse(X_right):
            XlXr = tsp.mul(X_left, X_right)
            y_pred = tsp.dot(XlXr, diag)
        else:
            XlXr = T.mul(X_left, X_right)
            y_pred = T.dot(XlXr, diag)

        return y_pred, [diag]
Example #3
0
    def _get_diagonal_term(self, X_left, X_right, diag_init):
        diag = tn.shared(value=diag_init, name='diag')

        if _tn_is_sparse(X_left) or _tn_is_sparse(X_right):
            XlXr = tsp.mul(X_left, X_right)
            y_pred = tsp.dot(XlXr, diag)
        else:
            XlXr = T.mul(X_left, X_right)
            y_pred = T.dot(XlXr, diag)

        return y_pred, [diag]
Example #4
0
    def _comiple_message_node_(self, _node, _factor):
        '''
			Pseudocode: 
				(treat _node as X and _factor as L )

				if X is the input variable (global) then
					return u_c , the input
				else
					generate a new variable name v_x
					collect neighbouring L_i of X excluding L
					for [L_1, L_2 .. L_i ], do
						v_i = compile_message(L_i -> X)
					emit(v_x = v1 dot v2 ... dot vi)
					return v_x
		'''

        if _node == self.head_predicate.i:

            #This is the input variable.
            if _node.u is None:
                print _node
                # raw_input("Node has nothing")
            return _node.u

        #This is NOT the input variable.
        neighbors = self._get_neighbours(
            _node, _exclude=_factor)  #Will be a list of factors.

        #Send the neighbour + current node to compilemessage_factor and collect what they have to say.
        neighboring_values = [
            self._compile_message_factor_(_factor=factor, _node=_node)
            for factor in neighbors
        ]
        if len(neighboring_values) > 0:
            v_x = neighboring_values[0]
            for remaining_values in neighboring_values[1:]:
                v_x = sparse.mul(v_x, remaining_values)
                # v_x = v_x * remaining_values
        else:
            #In this case, since there are no neighbors, there's literally nothing to return.
            #@TODO: What do we do here
            # print "belief_propagation:Graph:compile_message: Part where there are no neighbours!"
            pass

        return v_x
Example #5
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        if not isinstance(input, (S.SparseVariable, S.SparseConstant,
                                  S.sharedvar.SparseTensorSharedVariable)):
            raise ValueError("Input for this layer must be sparse")

        if deterministic or self.p == 0:
            return input
        else:
            # Using Theano constant to prevent upcasting
            one = T.constant(1, name='one')
            retain_prob = one - self.p

            if self.rescale:
                input = S.mul(input, one/retain_prob)

            input_shape = self.input_shape
            if any(s is None for s in input_shape):
                input_shape = input.shape

            return input * self._srng.binomial(input_shape, p=retain_prob,
                                               dtype=input.dtype)
Example #6
0
    def test_upcast(self):
        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype="float32")
        array2 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype="int32")
        array3 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype="int8")

        # AddSS and MulSS
        for mtype in _mtypes:
            a = mtype(array1)
            aR = as_sparse_variable(a)
            b = mtype(array2)
            bR = as_sparse_variable(b)
            c = mtype(array3)
            cR = as_sparse_variable(c)

            # Ops that do not upcast
            self.assertRaises(NotImplementedError, add, aR, bR)
            self.assertRaises(NotImplementedError, add, bR, aR)
            self.assertRaises(NotImplementedError, add, bR, cR)
            self.assertRaises(NotImplementedError, add, cR, bR)
            self.assertRaises(NotImplementedError, add, aR, cR)
            self.assertRaises(NotImplementedError, add, cR, aR)

            self.assertRaises(NotImplementedError, mul, aR, bR)
            self.assertRaises(NotImplementedError, mul, bR, aR)
            self.assertRaises(NotImplementedError, mul, bR, cR)
            self.assertRaises(NotImplementedError, mul, cR, bR)
            self.assertRaises(NotImplementedError, mul, aR, cR)
            self.assertRaises(NotImplementedError, mul, cR, aR)

        # AddSD and MulSD
        for mtype in _mtypes:
            a = mtype(array1)
            a_sv = as_sparse_variable(a)
            a_dv = tensor.as_tensor_variable(array1)
            b = mtype(array2)
            b_sv = as_sparse_variable(b)
            b_dv = tensor.as_tensor_variable(array2)
            c = mtype(array3)
            c_sv = as_sparse_variable(c)
            c_dv = tensor.as_tensor_variable(array3)

            # add does not upcast
            self.assertRaises(NotImplementedError, add, a_sv, b_dv)
            self.assertRaises(NotImplementedError, add, b_sv, a_dv)
            self.assertRaises(NotImplementedError, add, b_sv, c_dv)
            self.assertRaises(NotImplementedError, add, c_sv, b_dv)
            self.assertRaises(NotImplementedError, add, a_sv, c_dv)
            self.assertRaises(NotImplementedError, add, c_sv, a_dv)

            # mul may upcast the dense input if needed
            if config.cast_policy in ("custom", "numpy") or (
                config.cast_policy == "numpy+floatX" and config.floatX == "float64"
            ):
                # The result should be a float64 (not implemented).
                self.assertRaises(NotImplementedError, mul, a_sv, b_dv)
            elif config.cast_policy == "numpy+floatX" and config.floatX == "float32":
                # The result should be a float32.
                assert mul(a_sv, b_dv).dtype == "float32"
            else:
                raise NotImplementedError()
            self.assertRaises(NotImplementedError, mul, b_sv, a_dv)
            assert mul(b_sv, c_dv).dtype == "int32"
            self.assertRaises(NotImplementedError, mul, c_sv, b_dv)
            assert mul(a_sv, c_dv).dtype == "float32"
            self.assertRaises(NotImplementedError, mul, c_sv, a_dv)
Example #7
0
    def _get_gradients_adagrad(self, J):
        """Get the AdaGrad gradients and squared gradients updates.

        The returned gradients still need to be multiplied with the general
        learning rate.

        Parameters
        ----------
        J : theano variable
            cost

        Returns
        -------
        theano variable
            gradients that are adapted by the AdaGrad algorithm
        theano variable
            updated sum of squares for all previous steps
        """
        grads = T.grad(J, [self.__dict__[self.updatable_parameters[i]]
                for i in xrange(len(self.updatable_parameters))])

        for i, _ in enumerate(grads):
            grads[i] = debug_print(grads[i], 'grads_' + self.updatable_parameters[i])

        updated_squares = dict()

        # Add squared gradient to the squared gradient matrix for AdaGrad and
        # recalculate the gradient.
        for i, p in enumerate(self.updatable_parameters):

            # We need to handle sparse gradient variables differently
            if isinstance(grads[i], sparse.SparseVariable):
                # Add the sqares to the matrix
                power = debug_print(sparse.structured_pow(grads[i], 2.), 'pow_' + p)
                # Remove zeros (might happen when squaring near zero values)
                power = sparse.remove0(power)
                updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power

                # Get only those squares that will be altered, for all others we
                # don't have gradients, i.e., we don't need to consider them at
                # all.
                sqrt_matrix = sparse.sp_ones_like(power)
                sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix, 'adagrad_squares_subset_' + p)

                # Take the square root of the matrix subset.
                sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix), 'adagrad_sqrt_' + p)
                # Calc 1. / the square root.
                sqrt_matrix = debug_print(sparse.structured_pow(sqrt_matrix, -1.), 'adagrad_pow-1_' + p)
                grads[i] = sparse.mul(grads[i], sqrt_matrix)
            else:
                power = debug_print(T.pow(grads[i], 2.), 'pow_' + p)
                updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power

                # Call sqrt only for those items that are non-zero.
                denominator = T.switch(T.neq(updated_squares[p], 0.0),
                        T.sqrt(updated_squares[p]),
                        T.ones_like(updated_squares[p], dtype=floatX))
                grads[i] = T.mul(grads[i], 1. / denominator)

            updated_squares[p] = debug_print(updated_squares[p], 'upd_squares_' + p)

        for i, _ in enumerate(grads):
            grads[i] = debug_print(grads[i], 'grads_updated_' + self.updatable_parameters[i])

        return grads, updated_squares
    def __init__(self,
                 feature_count,
                 classifier=False,
                 k=8,
                 stdev=0.1,
                 sparse=False):
        self.classifier = classifier
        d = feature_count

        # *** Symbolic variables ***
        if sparse:
            X = S.csr_matrix(name='inputs', dtype='float32')
        else:
            X = T.matrix()
        y = T.vector()
        beta_w1 = T.scalar()
        beta_v = T.scalar()

        # *** Model parameters ***
        # bias term (intercept)
        w0_init = np.zeros(1)
        self.w0 = theano.shared(w0_init, allow_downcast=True)
        # first order coefficients
        w1_init = np.zeros(d)
        self.w1 = theano.shared(w1_init, allow_downcast=True)
        # interaction factors
        v_init = stdev * np.random.randn(k, d)
        self.v = theano.shared(v_init, allow_downcast=True)

        # *** The Model ***
        # The formula for pairwise interactions is from the bottom left
        # of page 997 of Rendle 2010, "Factorization Machines."
        # This version scales linearly in k and d, as opposed to O(d^2).
        if sparse:
            interactions = 0.5 * T.sum((S.dot(X, T.transpose(self.v)) ** 2) - \
                                       S.dot(S.mul(X,X), T.transpose(self.v ** 2)), axis=1)
            y_hat = T.addbroadcast(self.w0, 0) + S.dot(X,
                                                       self.w1) + interactions
        else:
            interactions = 0.5 * T.sum((T.dot(X, T.transpose(self.v)) ** 2) - \
                                       T.dot(X ** 2, T.transpose(self.v ** 2)), axis=1)
            y_hat = T.addbroadcast(self.w0, 0) + T.dot(X,
                                                       self.w1) + interactions
        if self.classifier:
            y_hat = T.nnet.sigmoid(y_hat)

        # *** Loss Function ***
        if self.classifier:
            error = T.mean(T.nnet.binary_crossentropy(y_hat, y))
        else:
            error = T.mean((y - y_hat)**2)
        # regularization
        L2 = beta_w1 * T.mean(self.w1**2) + beta_v * T.mean(self.v**2)
        loss = error + L2

        # *** Learning ***
        updates = []
        params = [self.w0, self.w1, self.v]
        grads = T.grad(cost=loss, wrt=params)
        # RMSProp
        lr, rho, epsilon = 0.001, 0.9, 1e-6
        for p, g in zip(params, grads):
            acc = theano.shared(p.get_value() * 0.)
            acc_new = rho * acc + (1 - rho) * g**2
            gradient_scaling = T.sqrt(acc_new + epsilon)
            g = g / gradient_scaling
            updates.append((acc, acc_new))
            updates.append((p, p - lr * g))

        self.theano_train = theano.function(inputs=[X, y, beta_w1, beta_v],
                                            outputs=loss,
                                            updates=updates,
                                            allow_input_downcast=True)

        self.theano_cost = theano.function(inputs=[X, y, beta_w1, beta_v],
                                           outputs=loss,
                                           allow_input_downcast=True)

        # *** Prediction ***
        self.theano_predict = theano.function(inputs=[X],
                                              outputs=y_hat,
                                              allow_input_downcast=True)
Example #9
0
    def propagate_thy_beliefs(self):
        '''
			Call this function to receive a string containing the path of the belief propagation algorithm.
			We implement the algorithm listed in the paper mentioned in the comments above
			

			Pseudocode:
				-> Create an empty theano vector whose definitions will be iteratively changed.
				-> Call compile_message_node_to_factor from the o node of the head predicate. 
				-> Let the functions recursively call each other
				-> Collect their things somehow. @TODO: how. what format. Shall we use theano variables altogether or what
				-> Return said stuff.
		'''

        # print "graph:bp: Starting belief propagation."
        equation = self._comiple_message_node_(self.head_predicate.o,
                                               "Fictional Label")
        symbols = self._comiple_message_symbols_node_(self.head_predicate.o,
                                                      "Fictional Label")

        #Define an empty dvector to be used as the 'y' label (which will later contain n hot information about desired entities)
        y = sparse.csr_dmatrix('y')

        # Do a softmax over the final BP Equation
        equation = sparse.structured_exp(equation)
        equation = sparse.row_scale(equation,
                                    1.0 / sparse.sp_sum(equation, axis=1))

        # Collect all the parameters (shared vars), found in the factors of this graph.
        #parameters is a list of matrices (relation)
        parameters = [x.M for x in symbols]

        #Cross entropy loss
        # loss = - y * T.log(equation) + (y - 1)*T.log(1-equation) # unregularized cross-entropy loss in theano
        a = sparse.mul(y, sparse.structured_log(equation))
        b = sparse.mul(
            sparse.structured_add(y, -1.0),
            sparse.structured_log(sparse.structured_add(equation, -1.0)))
        loss = sparse.sub(b, a)

        # Unregularied Loss
        loss_dense = sparse.dense_from_sparse(loss)
        cost = loss_dense.mean()
        # cost = sparse.sp_sum(loss, axis = 1)/float(ne)

        gradients = theano.grad(cost, parameters)

        updated_matrices = [
            sparse.sub(parameters[i], 0.1 * gradients[i])
            for i in range(len(parameters))
        ]
        # updated_matrices = [sparse.sub(parameters[i], sparse.row_scale(gradients[i], 0.1)) for i in range(len(parameters))]
        # updated_matrices = [parameters[i] - 0.1 * gradients[i] for i in range(len(parameters))]

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        #  DEBUG
        # print "Equation: ", equation
        # print "Type of equation: ",type(equation)
        # print "Symbols: ", symbols
        # print "graph:bp: Belief propagation complete."

        # print "Parameters are"
        # for p in parameters:
        # 	print p," and the type is :",type(p)

        # print gradients
        # print "Updated Matrices are :", type(updated_matrices[0])

        # print colored(type(self.head_predicate.i.u),'red')

        # print "Inputs: \n"
        # print type(self.head_predicate.i.u)
        # print type(y)
        # print [ type(x) for x in parameters ]

        # raw_input("Verify Symbols and Gradients ")
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        function = theano.function(
            inputs=[self.head_predicate.i.u, y] +
            parameters,  #Inputs to this is the head predicates' symbolic var, and another dvector
            # inputs = [self.head_predicate.i.u,parameters[0]],		#Inputs to this is the head predicates' symbolic var, and another dvector
            # outputs = updated_matrices			#Output to this thing is the BP algorithm's output expression
            outputs=[equation] + updated_matrices
            # mode=theano.compile.MonitorMode(
            #               pre_func=self.inspect_inputs,
            #               post_func=self.inspect_outputs)			#Output to this thing is the BP algorithm's output expression
            # updates=tuple([(parameters[i], parameters[i] - 0.1 * gradients[i]) for i in range(len(parameters))])		#Updates are the gradients of cost wrt parameters
        )

        return function, symbols
Example #10
0
    def _get_gradients_adagrad(self, J):
        """Get the AdaGrad gradients and squared gradients updates.

        The returned gradients still need to be multiplied with the general
        learning rate.

        Parameters
        ----------
        J : theano variable
            cost

        Returns
        -------
        theano variable
            gradients that are adapted by the AdaGrad algorithm
        theano variable
            updated sum of squares for all previous steps
        """
        grads = T.grad(J, [
            self.__dict__[self.updatable_parameters[i]]
            for i in xrange(len(self.updatable_parameters))
        ])

        for i, _ in enumerate(grads):
            grads[i] = debug_print(grads[i],
                                   'grads_' + self.updatable_parameters[i])

        updated_squares = dict()

        # Add squared gradient to the squared gradient matrix for AdaGrad and
        # recalculate the gradient.
        for i, p in enumerate(self.updatable_parameters):

            # We need to handle sparse gradient variables differently
            if isinstance(grads[i], sparse.SparseVariable):
                # Add the sqares to the matrix
                power = debug_print(sparse.structured_pow(grads[i], 2.),
                                    'pow_' + p)
                # Remove zeros (might happen when squaring near zero values)
                power = sparse.remove0(power)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Get only those squares that will be altered, for all others we
                # don't have gradients, i.e., we don't need to consider them at
                # all.
                sqrt_matrix = sparse.sp_ones_like(power)
                sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix,
                                          'adagrad_squares_subset_' + p)

                # Take the square root of the matrix subset.
                sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix),
                                          'adagrad_sqrt_' + p)
                # Calc 1. / the square root.
                sqrt_matrix = debug_print(
                    sparse.structured_pow(sqrt_matrix, -1.),
                    'adagrad_pow-1_' + p)
                grads[i] = sparse.mul(grads[i], sqrt_matrix)
            else:
                power = debug_print(T.pow(grads[i], 2.), 'pow_' + p)
                updated_squares[p] = self.__dict__['adagrad_matrix_' +
                                                   p] + power

                # Call sqrt only for those items that are non-zero.
                denominator = T.switch(
                    T.neq(updated_squares[p], 0.0), T.sqrt(updated_squares[p]),
                    T.ones_like(updated_squares[p], dtype=floatX))
                grads[i] = T.mul(grads[i], 1. / denominator)

            updated_squares[p] = debug_print(updated_squares[p],
                                             'upd_squares_' + p)

        for i, _ in enumerate(grads):
            grads[i] = debug_print(
                grads[i], 'grads_updated_' + self.updatable_parameters[i])

        return grads, updated_squares
Example #11
0
    def test_upcast(self):
        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='float32')
        array2 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int32')
        array3 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int8')

        # AddSS and MulSS
        for mtype in _mtypes:
            a = mtype(array1)
            aR = as_sparse_variable(a)
            b = mtype(array2)
            bR = as_sparse_variable(b)
            c = mtype(array3)
            cR = as_sparse_variable(c)

            # Ops that do not upcast
            self.assertRaises(NotImplementedError, add, aR, bR)
            self.assertRaises(NotImplementedError, add, bR, aR)
            self.assertRaises(NotImplementedError, add, bR, cR)
            self.assertRaises(NotImplementedError, add, cR, bR)
            self.assertRaises(NotImplementedError, add, aR, cR)
            self.assertRaises(NotImplementedError, add, cR, aR)

            self.assertRaises(NotImplementedError, mul, aR, bR)
            self.assertRaises(NotImplementedError, mul, bR, aR)
            self.assertRaises(NotImplementedError, mul, bR, cR)
            self.assertRaises(NotImplementedError, mul, cR, bR)
            self.assertRaises(NotImplementedError, mul, aR, cR)
            self.assertRaises(NotImplementedError, mul, cR, aR)

        # AddSD and MulSD
        for mtype in _mtypes:
            a = mtype(array1)
            a_sv = as_sparse_variable(a)
            a_dv = tensor.as_tensor_variable(array1)
            b = mtype(array2)
            b_sv = as_sparse_variable(b)
            b_dv = tensor.as_tensor_variable(array2)
            c = mtype(array3)
            c_sv = as_sparse_variable(c)
            c_dv = tensor.as_tensor_variable(array3)

            # add does not upcast
            self.assertRaises(NotImplementedError, add, a_sv, b_dv)
            self.assertRaises(NotImplementedError, add, b_sv, a_dv)
            self.assertRaises(NotImplementedError, add, b_sv, c_dv)
            self.assertRaises(NotImplementedError, add, c_sv, b_dv)
            self.assertRaises(NotImplementedError, add, a_sv, c_dv)
            self.assertRaises(NotImplementedError, add, c_sv, a_dv)

            # mul may upcast the dense input if needed
            if (config.cast_policy in ('custom', 'numpy')
                    or (config.cast_policy == 'numpy+floatX'
                        and config.floatX == 'float64')):
                # The result should be a float64 (not implemented).
                self.assertRaises(NotImplementedError, mul, a_sv, b_dv)
            elif (config.cast_policy == 'numpy+floatX'
                  and config.floatX == 'float32'):
                # The result should be a float32.
                assert mul(a_sv, b_dv).dtype == 'float32'
            else:
                raise NotImplementedError()
            self.assertRaises(NotImplementedError, mul, b_sv, a_dv)
            assert mul(b_sv, c_dv).dtype == 'int32'
            self.assertRaises(NotImplementedError, mul, c_sv, b_dv)
            assert mul(a_sv, c_dv).dtype == 'float32'
            self.assertRaises(NotImplementedError, mul, c_sv, a_dv)