Esempio n. 1
0
    def __init__(self,
                 steps      = 1,
                 num_layers = 2,
                 num_units  = 32,
                 eps        = 1e-2):

        self.X, self.Z         = T.fvectors('X','Z')
        self.P, self.Q, self.R = T.fmatrices('P','Q','R')
        self.dt                = T.scalar('dt')

        self.matrix_inv = T.nlinalg.MatrixInverse()

        self.ar = AutoRegressiveModel(steps      = steps,
                                      num_layers = num_layers,
                                      num_units  = num_units,
                                      eps        = eps)

        l = InputLayer(input_var = self.X,
                       shape     = (steps,))
        l = ReshapeLayer(l, shape = (1,steps,))
        l = self.ar.network(l)
        l = ReshapeLayer(l, shape=(1,))

        self.l_ = l
        self.f_ = get_output(self.l_)

        self.X_  = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.X)], axis=0)
        self.fX_ = G.jacobian(self.X_.flatten(), self.X)
        self.P_  = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + \
                    T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:])

        self.h = T.dot(T.eye(steps)[0:1], self.X_)
        self.y = self.Z - self.h

        self.hX_ = G.jacobian(self.h, self.X_)

        self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R
        self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S))

        self.X__ = self.X_ + T.dot(self.K, self.y)
        self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_)


        self.prediction = theano.function(inputs  = [self.X,
                                                     self.P,
                                                     self.Q,
                                                     self.dt],
                                          outputs = [self.X_,
                                                     self.P_],
                                          allow_input_downcast = True)

        self.update = theano.function(inputs  = [self.X,
                                                 self.Z,
                                                 self.P,
                                                 self.Q,
                                                 self.R,
                                                 self.dt],
                                      outputs = [self.X__,
                                                 self.P__],
                                      allow_input_downcast = True)
Esempio n. 2
0
    def initialize_calc_ll_gmm_fun(self):
        Yvec = T.dvector('Y')
        meansvec = T.dvector('means')
        covarsvec = T.dvector('covars')
        weights = T.dvector('weights')
        lam = T.dscalar('lambda')
        ndim = meansvec.shape[0]/self.gm_num
        Y = T.reshape(Yvec, (Yvec.shape[0]/ndim, ndim))
        LL, p1, p2 = self.calc_ll_gmm(Y, T.reshape(meansvec, (self.gm_num, meansvec.shape[0]/self.gm_num)),
                              T.reshape(covarsvec, (self.gm_num, meansvec.shape[0]/self.gm_num)),
                              weights)
        LL_lag = T.sum(LL)+lam*(T.sum(weights)-1)
        LL_sum = T.sum(LL)
        self.gmm_f = function([Yvec, meansvec, covarsvec, weights, lam], LL_lag)

        LLg = gradient.jacobian(LL_lag, [Yvec, meansvec, covarsvec, weights, lam])

        LL_sum_g = gradient.jacobian(LL_sum, [Yvec, meansvec, covarsvec, weights])

        llhm = gradient.jacobian(LLg[1], [Yvec, meansvec, covarsvec, weights])
        llhc = gradient.jacobian(LLg[2], [Yvec, meansvec, covarsvec, weights])
        llhw = gradient.jacobian(LLg[3], [Yvec, meansvec, covarsvec, weights, lam])

        self.gmm_df = function([Yvec, meansvec, covarsvec, weights], LL_sum_g)
        self.gmm_hm = function([Yvec, meansvec, covarsvec, weights, lam], llhm)
        self.gmm_hc = function([Yvec, meansvec, covarsvec, weights, lam], llhc)
        self.gmm_hw = function([Yvec, meansvec, covarsvec, weights, lam], llhw)
Esempio n. 3
0
 def apply(self, y, y_hat, biases):
     cost = tensor.nnet.categorical_crossentropy(y_hat, y.flatten())
     predicted = y_hat.argmax(axis=1)
     # Here we just count the number of unit biases with nonzero gradient
     jacobians = gradient.jacobian(cost, biases)
     counts = tensor.zeros_like(y)
     for j in jacobians:
         counts += tensor.neq(j, 0).sum(axis=1)
     return counts
Esempio n. 4
0
def grad(self):
    #print(self.model)
    params = self.model.weights
    netInputs = self.model.input
    netOutputs = numpy.log(self.model.output.flatten())
    gradients = [jacobian(netOutputs, w) for w in self.model.weights]
    return K.function(inputs=[netInputs],
                      outputs=gradients,
                      updates=self.model.state_updates)
Esempio n. 5
0
 def _compute_jacobians(self):
     if self.case_costs is None or self.case_costs.ndim == 0:
         raise ValueError("can't infer jacobians; no case_costs specified")
     elif self.intpic_parameters is None or len(self.parameters) == 0:
         raise ValueError("can't infer jacobians; no parameters specified")
     logging.info("Taking the intpic jacobians")
     jacobians = gradient.jacobian(self.case_costs, self.intpic_parameters)
     jacobian_map = OrderedDict(equizip(self.intpic_parameters, jacobians))
     logging.info("The intpic jacobian computation graph is built")
     return jacobian_map
Esempio n. 6
0
 def _compute_jacobians(self):
     if self.case_costs is None or self.case_costs.ndim == 0:
         raise ValueError("can't infer jacobians; no case_costs specified")
     elif self.intpic_parameters is None or len(self.parameters) == 0:
         raise ValueError("can't infer jacobians; no parameters specified")
     logging.info("Taking the intpic jacobians")
     jacobians = gradient.jacobian(self.case_costs, self.intpic_parameters)
     jacobian_map = OrderedDict(equizip(self.intpic_parameters, jacobians))
     logging.info("The intpic jacobian computation graph is built")
     return jacobian_map
Esempio n. 7
0
 def apply(self, y, y_hat, biases, outs):
     cost = tensor.nnet.categorical_crossentropy(y_hat, y.flatten())
     predicted = y_hat.argmax(axis=1)
     # Here we just count the number of unit biases with nonzero gradient
     jacobians = gradient.jacobian(cost, biases)
     counts = tensor.zeros_like(y)
     for j, o in zip(jacobians, outs):
         while o.ndim > 2:
             o = o.max(axis=o.ndim - 1)
         counts += (tensor.gt(o, 0) * tensor.eq(j, 0)).sum(axis=1)
     return counts
Esempio n. 8
0
def test_gn_product_rnn():
    raise SkipTest()
    np.random.seed(1010)
    n_timesteps = 3
    n_inpt = 3
    n_output = 2

    rnn = SupervisedRnn(n_inpt,
                        1,
                        n_output,
                        out_transfer='sigmoid',
                        loss='squared')
    rnn.parameters.data[:] = np.random.normal(0, 1, rnn.parameters.data.shape)
    X = np.random.random((n_timesteps, 1, n_inpt)).astype(theano.config.floatX)
    Z = np.random.random(
        (n_timesteps, 1, n_output)).astype(theano.config.floatX)

    # Calculate the GN explicitly.

    # Shortcuts.
    loss = rnn.exprs['loss']
    output_in = rnn.exprs['output_in']
    p = T.vector('some-vector')

    J = jacobian(output_in[:, 0, :].flatten(), rnn.parameters.flat)

    little_J = T.grad(loss, output_in)[:, 0, :]
    little_H = [[T.grad(little_J[i, j], output_in) for j in range(n_output)]
                for i in range(n_timesteps)]

    f_J = rnn.function(['inpt'], J)
    f_H = rnn.function(['inpt', 'target'], little_H)

    J_ = f_J(X)
    H_ = np.array(f_H(X, Z))[:, :, :, 0, :]
    H_.shape = H_.shape[0] * H_.shape[1], H_.shape[2] * H_.shape[3]

    G_expl = np.dot(J_.T, np.dot(H_, J_))

    p = np.random.random(rnn.parameters.data.shape)
    Gp_expl = np.dot(G_expl, p)

    Hp = rnn._gauss_newton_product()
    args = list(rnn.data_arguments)
    f_Hp = rnn.function(['some-vector'] + args, Hp, explicit_pars=True)
    Gp = f_Hp(rnn.parameters.data, p, X, Z)

    assert np.allclose(Gp, Gp_expl)
Esempio n. 9
0
def test_gn_product_rnn():
    raise SkipTest()
    np.random.seed(1010)
    n_timesteps = 3
    n_inpt = 3
    n_output = 2

    rnn = SupervisedRnn(n_inpt, [1], n_output, out_transfer='sigmoid',
                        loss='squared')
    rnn.parameters.data[:] = np.random.normal(0, 1, rnn.parameters.data.shape)
    X = np.random.random((n_timesteps, 1, n_inpt)).astype(theano.config.floatX)
    Z = np.random.random((n_timesteps, 1, n_output)
                         ).astype(theano.config.floatX)
    X, Z = theano_floatx(X, Z)

    # Calculate the GN explicitly.

    # Shortcuts.
    loss = rnn.exprs['loss']
    output_in = rnn.exprs['output_in']
    p = T.vector('some-vector')

    J = jacobian(output_in[:, 0, :].flatten(), rnn.parameters.flat)

    little_J = T.grad(loss, output_in)[:, 0, :]
    little_H = [[T.grad(little_J[i, j], output_in)
                 for j in range(n_output)]
                for i in range(n_timesteps)]

    f_J = rnn.function(['inpt'], J)
    f_H = rnn.function(['inpt', 'target'], little_H)

    J_ = f_J(X)
    H_ = np.array(f_H(X, Z))[:, :, :, 0, :]
    H_.shape = H_.shape[0] * H_.shape[1], H_.shape[2] * H_.shape[3]

    G_expl = np.dot(J_.T, np.dot(H_, J_))

    p = np.random.random(rnn.parameters.data.shape)
    Gp_expl = np.dot(G_expl, p)

    Hp = rnn._gauss_newton_product()
    args = list(rnn.data_arguments)
    f_Hp = rnn.function(
        ['some-vector'] + args, Hp, explicit_pars=True)
    Gp = f_Hp(rnn.parameters.data, p, X, Z)

    assert np.allclose(Gp, Gp_expl)
Esempio n. 10
0
    def __init__(self,
                 state       = 'x',
                 measurement = 'z',
                 motion_transition      = None,
                 measurement_transition = None):

        self.N = len(state.split(' '))
        self.M = len(measurement.split(' '))


        self.X, self.Z         = T.fvectors('X','Z')
        self.P, self.Q, self.R = T.fmatrices('P','Q','R')
        self.F, self.H         = T.matrices('F','H')
        self.dt                = T.scalar('dt')


        self.X_  = T.dot(self.F, self.X)
        self.fX_ = G.jacobian(T.flatten(self.X_), self.X)
        self.P_  = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + self.dt * self.Q

        self.h = T.dot(self.H, self.X_)
        self.y = self.Z - self.h

        self.hX_ = G.jacobian(self.h, self.X_)

        self.matrix_inv = T.nlinalg.MatrixInverse()

        self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R
        self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S))

        self.X__ = self.X_ + T.dot(self.K, self.y)
        self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_)


        self.prediction = theano.function(inputs  = [self.X,
                                                     self.P,
                                                     self.Q,
                                                     self.F,
                                                     self.dt],
                                          outputs = [self.X_,
                                                     self.P_],
                                          allow_input_downcast = True)

        self.update = theano.function(inputs  = [self.X,
                                                 self.Z,
                                                 self.P,
                                                 self.Q,
                                                 self.R,
                                                 self.F,
                                                 self.H,
                                                 self.dt],
                                      outputs = [self.X__,
                                                 self.P__],
                                      allow_input_downcast = True)

        if motion_transition == None:
            self.motion_transition = np.eye(self.N)
        else:
            self.motion_transition = np.array(motion_transition)

        if measurement_transition == None:
            self.measurement_transition = np.eye(self.M)
        else:
            self.measurement_transition = np.array(motion_transition)
Esempio n. 11
0
def contractive_regularizer(op, examples):
    jacobian = G.jacobian(op.flatten(), examples)
    regularizer = T.sum(T.abs_(jacobian) ** 2)
    return regularizer
Esempio n. 12
0
def fProp():
    """
    Returns the Theano-style forward propagation and gradient calculation function
    
    The generalized function will have the following structure:
    
    [o, J, dIN, dW, dOUT] = fProp(X,Y,IN,W,OUT,L)
    
    Inputs:
    
    X: numpy array containing the examples to be forward propagated. Each row
    is an example, each column is a feature.
    
    Y: target values, each item has to correspond to an example in X. If not
    training just pass an array with zeros 
    
    IN: numpy 2D array with weights that map input layer to first hidden layer
    
    W: numpy 3D array with weights that map within hidden layers. W[:,:,i]
    corresponds to the weights mapping from hidden layer i to hidden layer i+1
    
    OUT: numpy 2D array that maps from last hidden layer to output unit
    
    L: regularization parameter
    
    Outputs:
    
    o: output for each example in X
    
    J: cost calculated using negative log-likelihood
    
    dIN, dW, dOUT: partial derivatives of the cost with respect to the weights
    
    This function was developed in the Multiscale Cardiovascular Engineering
    Group (MUSE) at University College London by Carlos Ledezma.
    """    
    
    import theano.tensor as T
    from theano import function
    from theano.gradient import jacobian    
    from theano import scan
    
    # Define the forward propagation function
    # This function will process all examples at the same time
    
    L = T.dscalar('L') # Regularization term
    X = T.dmatrix('X') # Input cases
    numEx = T.shape(X)[0]
    Y = T.dmatrix('Y') # Target
    IN = T.dmatrix('IN') # ANN weights mapping input layer to first hidden layer
    W = T.dtensor3('W') # ANN weigths mapping between hidden layers
    OUT = T.dmatrix('OUT') # ANN weights mapping last hidden layer to output
    
    # Start forward prop by mapping inputs to first hidden layer
    Xb = T.concatenate([T.ones((numEx,1)),X],axis=1) # Add bias term
    a = T.dot(IN,Xb.T) # Linear combination of inputs
    A = T.nnet.relu(a) # ReLU
    
    '''
    Propagate through the network
    
    Each step is as follows:
    
    actb = T.concatenate([T.ones((1,numEx)),act], axis=0) # Add bias term
    b = T.dot(W[:,:,i],actb) # Linear combination of inputs
    B = T.nnet.relu(b) # ReLU
    
    '''
    
    B, update = scan(lambda i, act,W: T.nnet.relu(T.dot(W[:,:,i], T.concatenate([T.ones((1,numEx)),act], axis=0))),\
                            sequences=T.arange(W.shape[2]),\
                            outputs_info=A,\
                            non_sequences=W)
    
    
    B_final = B[-1]
    # Map to output layer
    Bb = T.concatenate([T.ones((1,numEx)),B_final], axis=0) # Add bias term
    o = T.dot(OUT,Bb) # Linear combination of inputs
    o = T.nnet.sigmoid(o) # Sigmoid for classification output
    
    J = T.nnet.nnet.binary_crossentropy(o,Y).sum() / numEx# Calculate cost
    J += L/(2*numEx) * ((W**2).sum().sum().sum() + (OUT**2).sum().sum() + (IN**2).sum().sum())# Add regularization 
    
    # Calculate jacobians of cost
    dIN = jacobian(J,IN)
    dW = jacobian(J,W)
    dOUT = jacobian(J,OUT)
    
    return function([X,Y,IN,W,OUT,L],[o,J,dIN,dW,dOUT])
Esempio n. 13
0
import theano
import theano.tensor as T
import theano.gradient as grad
from theano import function

x = T.vector('x')
y = x ** 2
J, updates = theano.scan(lambda i, y, x: T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
f = function([x], J, updates=updates)
print f([4, 4])

f_grad = function([x], grad.jacobian(y, x))
print f_grad([4, 4])
Esempio n. 14
0
def _compute_jacobians(components, parameters):
    logging.info("Taking the component jacobians")
    jacobians = gradient.jacobian(components, parameters)
    jacobian_map = OrderedDict(equizip(parameters, jacobians))
    logging.info("The component jacobian computation graph is built")
    return jacobian_map
Esempio n. 15
0
File: ffnn.py Progetto: elnaaz/alex
    def __init__(self, n_inputs, n_hidden_units, n_hidden_layers, n_outputs, hidden_activation = 'tanh', weight_l2 = 1e-6):
        self.n_inputs = n_inputs
        self.n_hidden_units = n_hidden_units
        self.n_hidden_layers = n_hidden_layers

        if hidden_activation == 'tanh':
            self.hidden_activation = T.tanh
        elif hidden_activation == 'sigmoid':
            self.hidden_activation = T.nnet.sigmoid
        elif hidden_activation == 'softplus':
            self.hidden_activation = T.nnet.softplus
        elif hidden_activation == 'relu':
            self.hidden_activation = lambda x: T.maximum(0, x)
        else:
            raise NotImplementedError

        self.n_outputs = n_outputs
        self.n_hidden_activation = hidden_activation

        self.n_hidden = [n_hidden_units,]*n_hidden_layers
        self.activations = [self.hidden_activation,]*self.n_hidden_layers
        self.activations.extend([T.nnet.softmax,]) # NOTE: The last function goes to the output layer.

        assert len(self.n_hidden) + 1 == len(self.activations)

        # Model definition.
        x = T.fmatrix('X')
        self.params = []  # Keep model params here.

        # Build the layered neural network.
        y = x
        layers = [self.n_inputs] + self.n_hidden + [self.n_outputs]

        # Iterate over pairs of adjacent layers.
        for i, (n1, n2, act) in enumerate(zip(layers[:-1], layers[1:], self.activations)):
            w = theano.shared(
                               np.asarray(rng.uniform(
                                                      low=-np.sqrt(6. / (n1 + n2)),
                                                      high=np.sqrt(6. / (n1 + n2)),
                                                      size=(n1, n2)),
                                          dtype=np.float32),
                               'W%d' % i, borrow=True)
            b = theano.shared(np.zeros(n2, dtype=np.float32), 'b%d' % (i + 1))
            self.params.append((w, b))

            y = act(T.dot(y, w) + b)

        self.f_y = function([x], y) # PREDICTION FUNCTION

        # Define the loss function.
        true_y = T.ivector('true_Y')  # The desired output vector.
        loss = -T.log(y[T.arange(y.shape[0]), true_y])  # Negative log-likelihood.
        toss = T.sum(loss)                              # SUM negative log-likelihood.

        # Add regularization.
        l2 = 0
        for w, b in self.params:
            l2 += (w**2).sum() + (b**2).sum()
        loss += weight_l2 * l2

        self.f_toss = function([x, true_y], toss, allow_input_downcast=True)

        # Derive the gradients for the parameters.
        self.f_g_losses = []
        self.f_j_losses = []
        for w, b in self.params:
            g_loss = T.grad(toss, wrt=[w, b])
            f_g_loss = function([x, true_y], g_loss)
            self.f_g_losses.append(f_g_loss)
            
            j_loss = jacobian(loss, wrt=[w, b])
#            j_loss, updates = theano.scan(lambda i: T.grad(loss[i], [w, b]), sequences=T.arange(loss.shape[0]), non_sequences=[])
            f_j_loss = function([x, true_y], j_loss)
            self.f_j_losses.append(f_j_loss)

        self.rprop_init()
        self.adalr_init()
Esempio n. 16
0
    def __init__(self,
                 n_inputs,
                 n_hidden_units,
                 n_hidden_layers,
                 n_outputs,
                 hidden_activation='tanh',
                 weight_l2=1e-6):
        self.n_inputs = n_inputs
        self.n_hidden_units = n_hidden_units
        self.n_hidden_layers = n_hidden_layers

        if hidden_activation == 'tanh':
            self.hidden_activation = T.tanh
        elif hidden_activation == 'sigmoid':
            self.hidden_activation = T.nnet.sigmoid
        elif hidden_activation == 'softplus':
            self.hidden_activation = T.nnet.softplus
        elif hidden_activation == 'relu':
            self.hidden_activation = lambda x: T.maximum(0, x)
        else:
            raise NotImplementedError

        self.n_outputs = n_outputs
        self.n_hidden_activation = hidden_activation

        self.n_hidden = [
            n_hidden_units,
        ] * n_hidden_layers
        self.activations = [
            self.hidden_activation,
        ] * self.n_hidden_layers
        self.activations.extend([
            T.nnet.softmax,
        ])  # NOTE: The last function goes to the output layer.

        assert len(self.n_hidden) + 1 == len(self.activations)

        # Model definition.
        x = T.fmatrix('X')
        self.params = []  # Keep model params here.

        # Build the layered neural network.
        y = x
        layers = [self.n_inputs] + self.n_hidden + [self.n_outputs]

        # Iterate over pairs of adjacent layers.
        for i, (n1, n2, act) in enumerate(
                zip(layers[:-1], layers[1:], self.activations)):
            w = theano.shared(np.asarray(rng.uniform(
                low=-np.sqrt(6. / (n1 + n2)),
                high=np.sqrt(6. / (n1 + n2)),
                size=(n1, n2)),
                                         dtype=np.float32),
                              'W%d' % i,
                              borrow=True)
            b = theano.shared(np.zeros(n2, dtype=np.float32), 'b%d' % (i + 1))
            self.params.append((w, b))

            y = act(T.dot(y, w) + b)

        self.f_y = function([x], y)  # PREDICTION FUNCTION

        # Define the loss function.
        true_y = T.ivector('true_Y')  # The desired output vector.
        loss = -T.log(y[T.arange(y.shape[0]),
                        true_y])  # Negative log-likelihood.
        toss = T.sum(loss)  # SUM negative log-likelihood.

        # Add regularization.
        l2 = 0
        for w, b in self.params:
            l2 += (w**2).sum() + (b**2).sum()
        loss += weight_l2 * l2

        self.f_toss = function([x, true_y], toss, allow_input_downcast=True)

        # Derive the gradients for the parameters.
        self.f_g_losses = []
        self.f_j_losses = []
        for w, b in self.params:
            g_loss = T.grad(toss, wrt=[w, b])
            f_g_loss = function([x, true_y], g_loss)
            self.f_g_losses.append(f_g_loss)

            j_loss = jacobian(loss, wrt=[w, b])
            #            j_loss, updates = theano.scan(lambda i: T.grad(loss[i], [w, b]), sequences=T.arange(loss.shape[0]), non_sequences=[])
            f_j_loss = function([x, true_y], j_loss)
            self.f_j_losses.append(f_j_loss)

        self.rprop_init()
        self.adalr_init()
Esempio n. 17
0
def contractive_regularizer(op, examples):
    jacobian = G.jacobian(op.flatten(), examples)
    regularizer = T.sum(T.abs_(jacobian)**2)
    return regularizer
Esempio n. 18
0
def _compute_jacobians(components, parameters):
    logging.info("Taking the component jacobians")
    jacobians = gradient.jacobian(components, parameters)
    jacobian_map = OrderedDict(equizip(parameters, jacobians))
    logging.info("The component jacobian computation graph is built")
    return jacobian_map
Esempio n. 19
0
import theano
import theano.tensor as T
import theano.gradient as grad
from theano import function

x = T.vector('x')
y = x**2
J, updates = theano.scan(lambda i, y, x: T.grad(y[i], x),
                         sequences=T.arange(y.shape[0]),
                         non_sequences=[y, x])
f = function([x], J, updates=updates)
print f([4, 4])

f_grad = function([x], grad.jacobian(y, x))
print f_grad([4, 4])