Example #1
0
 def compute_output(self, network, h_vw, x_vw):
     batch_axis = network.find_hyperparameter(["batch_axis"])
     if batch_axis is None:
         # NOTE: this code path is not tested!
         jacobian = T.jacobian(h_vw.variable.ravel(), x_vw.variable)
         res = (jacobian ** 2).mean()
         res_shape = ()
     else:
         batch_size = h_vw.symbolic_shape()[batch_axis]
         # sum across batch to avoid disconnected input error
         # ravel to be a vector
         h_var = h_vw.variable.sum(axis=batch_axis).ravel()
         x_var = x_vw.variable
         # shape of result = h_var.shape + x_var.shape
         jacobian = T.jacobian(h_var, x_var)
         # put batch axis as first dimension
         # adding 1 to batch axis, because len(h_var.shape) == 1
         swapped_jacobian = jacobian.swapaxes(0, batch_axis + 1)
         # convert to a matrix and mean over elements in a batch
         reshaped_jacobian = swapped_jacobian.reshape((batch_size, -1))
         res = (reshaped_jacobian ** 2).mean(axis=1)
         res_shape = (h_vw.shape[batch_axis],)
     network.create_variable(
         "default",
         variable=res,
         shape=res_shape,
         tags={"output"},
     )
Example #2
0
    def test_dot_not_output(self):
        """
        Test the case where the vector input to the dot is not already an
        output of the inner function.
        """

        v = T.vector()
        m = T.matrix()
        output = T.dot(v, m)

        # Compile the function twice, once with the optimization and once
        # without
        opt_mode = mode.including("scan")
        f_opt = theano.function([v, m], T.jacobian(output, v), mode=opt_mode)

        no_opt_mode = mode.excluding("scanOp_pushout_output")
        f_no_opt = theano.function([v, m], T.jacobian(output, v), mode=no_opt_mode)

        # Ensure that the optimization was performed correctly in f_opt
        # The inner function of scan should have only one output and it should
        # not be the result of a Dot
        scan_node = [node for node in f_opt.maker.fgraph.toposort()
                     if isinstance(node.op, Scan)][0]
        assert len(scan_node.op.outputs) == 1
        assert not isinstance(scan_node.op.outputs[0], T.Dot)

        # Ensure that the function compiled with the optimization produces
        # the same results as the function compiled without
        v_value = numpy.random.random((4)).astype(config.floatX)
        m_value = numpy.random.random((4, 5)).astype(config.floatX)

        output_opt = f_opt(v_value, m_value)
        output_no_opt = f_no_opt(v_value, m_value)

        utt.assert_allclose(output_opt, output_no_opt)
Example #3
0
    def _gen_deriv_functions(self):
        '''
        _gen_deriv_functions
        To be called by the derived class to compile all the required functions.
        '''

        ##################################
        # Define some Theano derivatives
        # Derivative w.r.t. the hyperparameters
        self.th_dhyp, uhyp = theano.scan(
            lambda i, y, x: T.jacobian(y[i], x),
            sequences=T.arange(self.th_K.shape[0]),
            non_sequences=[self.th_K, self.th_hyp])
        # Derivative w.r.t. the inputs
        self.th_dX, ux = theano.scan(lambda i, y, x: T.jacobian(y[i], x),
                                     sequences=T.arange(self.th_K.shape[0]),
                                     non_sequences=[self.th_K, self.th_X])

        ##################################
        # Compilation
        # Kxx: self covariance matrix
        self.K = theano.function([self.th_X, self.th_hyp], self.th_K)
        # Kxy: cross covariance matrix
        self.Kc = theano.function([self.th_X, self.th_Xc, self.th_hyp],
                                  self.th_Kc)
        self.dK_dhyp = theano.function([self.th_X, self.th_hyp],
                                       self.th_dhyp,
                                       updates=uhyp)
        self.dK_dX = theano.function([self.th_X, self.th_hyp],
                                     self.th_dX,
                                     updates=ux)
Example #4
0
 def compute_output(self, network, h_vw, x_vw):
     batch_axis = network.find_hyperparameter(["batch_axis"])
     if batch_axis is None:
         # NOTE: this code path is not tested!
         jacobian = T.jacobian(h_vw.variable.ravel(), x_vw.variable)
         res = (jacobian**2).mean()
         res_shape = ()
     else:
         batch_size = h_vw.symbolic_shape()[batch_axis]
         # sum across batch to avoid disconnected input error
         # ravel to be a vector
         h_var = h_vw.variable.sum(axis=batch_axis).ravel()
         x_var = x_vw.variable
         # shape of result = h_var.shape + x_var.shape
         jacobian = T.jacobian(h_var, x_var)
         # put batch axis as first dimension
         # adding 1 to batch axis, because len(h_var.shape) == 1
         swapped_jacobian = jacobian.swapaxes(0, batch_axis + 1)
         # convert to a matrix and mean over elements in a batch
         reshaped_jacobian = swapped_jacobian.reshape((batch_size, -1))
         res = (reshaped_jacobian**2).mean(axis=1)
         res_shape = (h_vw.shape[batch_axis], )
     network.create_vw(
         "default",
         variable=res,
         shape=res_shape,
         tags={"output"},
     )
Example #5
0
    def test_dot_not_output(self):
        # Test the case where the vector input to the dot is not already an
        # output of the inner function.

        v = tt.vector()
        m = tt.matrix()
        output = tt.dot(v, m)

        # Compile the function twice, once with the optimization and once
        # without
        opt_mode = mode.including("scan")
        f_opt = theano.function([v, m], tt.jacobian(output, v), mode=opt_mode)

        no_opt_mode = mode.excluding("scanOp_pushout_output")
        f_no_opt = theano.function([v, m], tt.jacobian(output, v), mode=no_opt_mode)

        # Ensure that the optimization was performed correctly in f_opt
        # The inner function of scan should have only one output and it should
        # not be the result of a Dot
        scan_node = [
            node for node in f_opt.maker.fgraph.toposort() if isinstance(node.op, Scan)
        ][0]
        assert len(scan_node.op.outputs) == 1
        assert not isinstance(scan_node.op.outputs[0], tt.Dot)

        # Ensure that the function compiled with the optimization produces
        # the same results as the function compiled without
        v_value = np.random.random(4).astype(config.floatX)
        m_value = np.random.random((4, 5)).astype(config.floatX)

        output_opt = f_opt(v_value, m_value)
        output_no_opt = f_no_opt(v_value, m_value)

        utt.assert_allclose(output_opt, output_no_opt)
Example #6
0
def augment_system(ode_func,t_n, t_m):
    '''Function to create augmented system.

    Take a function which specifies a set of differential equations and return
    a compiled function which allows for computation of gradients of the
    differential equation's solition with repsect to the parameters.

    Args:
        ode_func (function): Differential equation.  Returns array-like

    Returns:
        system (function): Augemted system of differential equations.

    '''

    #Shapes for the dydp dmatrix
    #TODO: Should this be int64 or other dtype?
    # t_n = tt.scalar('n', dtype = 'int64')
    # t_m = tt.scalar('m', dtype = 'int64')

    #Present state of the system
    t_y = tt.vector('y', dtype=theano.config.floatX)

    #Parameter(s).  Should be vector to allow for generaliztion to multiparameter
    #systems of ODEs
    t_p = tt.vector('p', dtype=theano.config.floatX)

    #Time.  Allow for non-automonous systems of ODEs to be analyzed
    t_t = tt.scalar('t', dtype=theano.config.floatX)

    #Present state of the gradients:
    #Will always be 0 unless the parameter is the inital condition
    #Entry i,j is partial of y[i] wrt to p[j]
    dydp_vec = tt.vector('dydp', dtype=theano.config.floatX)

    dydp = dydp_vec.reshape((t_n,t_m))

    #Stack the results of the ode_func
    #TODO: Does this behave the same of ODE is scalar?
    f_tensor = tt.stack(ode_func(t_y, t_t, t_p))

    #Now compute gradients
    J = tt.jacobian(f_tensor,t_y)

    Jdfdy = tt.dot(J, dydp)

    grad_f = tt.jacobian(f_tensor, t_p)

    #This is the time derivative of dydp
    ddt_dydp = (Jdfdy + grad_f).flatten()


    system = theano.function(
            inputs=[t_y, t_t, t_p, dydp_vec],
            outputs=[f_tensor, ddt_dydp],
            on_unused_input='ignore')

    return system
Example #7
0
    def _grad_single(self, ct, s, lnC2, GAMMI2):
        lnC = lnC2
        GAMMI = GAMMI2
        v = self.v#T.as_tensor(self.v)[:,ct:]
        v0 = T.as_tensor(v[v[:,0]==0, :])
        v1 = T.as_tensor(v[v[:,0]==1, :])

        cnp = v.shape[0]

        # Gradient of fE wrt the priors over final state
        [ofE, oxS], upd_fE_single = th.scan(fn=self._free_energy,
                                   sequences=v,
                                   non_sequences=[s,self.h,lnC,self.b])
        ofE0 = ofE[v0].sum()
        ofE1 = ofE[v1].sum()

        dFE0dlnC = T.jacobian(ofE0, lnC)
        dFE1dlnC = T.jacobian(ofE1, lnC)
        dFEdlnC  = T.jacobian(ofE,  lnC)
        ofE_ = T.vector()
        ofE_.tag.test_value = ofE.tag.test_value

        # Gradient of Gamma with respect to its initial condition:
        GAMMA, upd_GAMMA = th.scan(fn=self._upd_gamma,
               outputs_info=[GAMMI],
               non_sequences=[ofE, self.lambd, self.alpha, self.beta, cnp],
               n_steps=4)
        dGdg = T.grad(GAMMA[-1], GAMMI)

        dGdfE = T.jacobian(GAMMA[-1], ofE)
        dGdlnC = dGdfE.dot(dFEdlnC)

        out1 = ofE0
        out2 = ofE1
        maxout = T.max([out1, out2])

        exp_out1 = T.exp(GAMMA[-1]*(out1 - maxout))
        exp_out2 = T.exp(GAMMA[-1]*(out2 - maxout))
        norm_const = exp_out1 + exp_out2

        # Derivative wrt the second output (gammi):
        Jac1_gammi = (-(out1-out2)*dGdg*
                T.exp(GAMMA[-1]*(out1+out2 - 2*maxout))/(norm_const**2))
        Jac2_gammi = -Jac1_gammi
#        dfd1_tZ = Jac1_gammi*dCdf[1][0]+ Jac2_gammi*dCdf[1][1]

        # Derivative wrt first input (lnc)
        Jac1_lnC = (T.exp(GAMMA[-1]*(out1 + out2 - 2*maxout))/(norm_const**2)*
                  (-dGdlnC*(out1 - out2) - GAMMA[-1]*(dFE0dlnC - dFE1dlnC)))
        Jac2_lnC = -Jac1_lnC

        Jac1 = T.concatenate([T.stack(Jac1_gammi), Jac1_lnC])
        Jac2 = T.concatenate([T.stack(Jac2_gammi), Jac2_lnC])
        self.debug = [Jac1_lnC, Jac2_lnC, Jac2_gammi, Jac1_gammi, dFE0dlnC,
                      dFE1dlnC, dGdg, out1, out2, v0, v1, v, ct]
        return Jac1, Jac2
def compute_jacobian(errors, parameters):
    """
    Compute jacobian.

    Parameters
    ----------
    errors : Theano variable
        Computed MSE for each sample separetly.

    parameters : list of Theano variable
        Neural network parameters (e.g. weights, biases).

    Returns
    -------
    Theano variable
    """
    n_samples = errors.shape[0]
    J = T.jacobian(errors, wrt=parameters)

    jacobians = []
    for jacobian, parameter in zip(J, parameters):
        jacobian = jacobian.reshape((n_samples, parameter.size))
        jacobians.append(jacobian)

    return T.concatenate(jacobians, axis=1)
Example #9
0
def hessian(objective, argument):
    """
    Compute the directional derivative of the gradient
    (which is equal to the hessian multiplied by direction).
    """
    g = T.grad(objective, argument)

    # Create a new tensor A, which has the same type (i.e. same dimensionality)
    # as argument.
    A = argument.type()

    try:
        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient, rather than explicitly
        # calculating the hessian and then multiplying.
        R = T.Rop(g, argument, A)
    except NotImplementedError:
        shp = T.shape(argument)
        H = T.jacobian(g.flatten(),
                       argument).reshape(T.concatenate([shp, shp]), 2 * A.ndim)
        R = T.tensordot(H, A, A.ndim)

    try:
        hess = theano.function([argument, A], R, on_unused_input='raise')
    except theano.compile.UnusedInputError:
        warn('Theano detected unused input - suggests hessian may be zero or '
             'constant.')
        hess = theano.function([argument, A], R, on_unused_input='ignore')
    return hess
Example #10
0
		def fixedPointIteration(*args):
			fpiIns         = dict(zip(self.getFPIArgNames(), args))
			gOpFreeIns     = [fpiIns[k] for k in self.getGOpFreeArgNames()]
			gOpClampedIns  = [fpiIns[k] for k in self.getGOpClampedArgNames()]
			gOpFreeRets    = self.g(*gOpFreeIns)
			gOpClampedRets = self.g(*gOpClampedIns)
			gOpDiffs       = [c-f for c,f in zip(gOpClampedRets, gOpFreeRets)]
			for i, (s, fi, fr, ci, cr) in enumerate(zip(self.getStateIter(),
			    gOpFreeIns,    gOpFreeRets, gOpClampedIns, gOpClampedRets)):
				if i>0:
					fpiIns["free_"   +s.name] = self.rho(fi+fr)
					fpiIns["clamped_"+s.name] = self.rho(ci+cr)
			
			allStates      = TT.concatenate([s.flatten() for s in gOpFreeRets])
			allDiffs       = TT.concatenate([d.flatten() for d in gOpDiffs])
			
			for t, tName in [(fpiIns[t.name], t.name) for t in self.getThetaIter()]:
				J  = TT.jacobian(allStates, t.flatten(), disconnected_inputs="ignore")
				dt = J.T.dot(allDiffs).reshape(t.shape)
				fpiIns[tName] += fpiIns["lr"]*dt
			
			fpiIns["i"]   += 1
			
			fpiRets        = [fpiIns[k] for k in self.getFPIRetNames()]
			return fpiRets
Example #11
0
    def _get_updates(self):
        n = self.params['batch_size']
        N = self.params['train_size']
        prec_lik = self.params['prec_lik']
        prec_prior = self.params['prec_prior']
        gc_norm = self.params['gc_norm']
        gamma = float(n + N) / n

        # compute log-likelihood
        error = self.model_outputs - self.true_outputs
        logliks = log_normal(error, prec_lik)
        sumloglik = logliks.sum()

        # compute gradient of likelihood wrt each data point
        grads = tensor.jacobian(expression=logliks, wrt=self.weights)
        grads = tensor.concatenate([g.flatten(ndim=2) for g in grads], axis=1)
        avg_grads = grads.mean(axis=0)
        dist_grads = grads - avg_grads

        # compute variance of gradient
        var_grads = (1. / (n - 1)) * tensor.dot(dist_grads.T, dist_grads)

        logprior = log_prior_normal(self.weights, prec_prior)
        grads_prior = tensor.grad(cost=logprior, wrt=self.weights)
        grads_prior = tensor.concatenate([g.flatten() for g in grads_prior])

        # update Fisher information
        I_t_next = (1 - 1 / self.it) * self.I_t + 1 / self.it * var_grads

        # compute noise
        if 'B' in self.params:
            B = self.params['B']
        else:
            B = gamma * I_t_next * N
        # B += np.eye(self.n_weights) * (10 ** -9)
        B_ch = slinalg.cholesky(B)
        noise = tensor.dot(((2. / tensor.sqrt(self.lr)) * B_ch),
                           trng.normal((self.n_weights, 1)))

        # expensive inversion
        inv_cond_mat = gamma * N * I_t_next + (4. / self.lr) * B
        cond_mat = nlinalg.matrix_inverse(inv_cond_mat)

        updates = []
        updates.append((self.I_t, I_t_next))
        updates.append((self.it, self.it + 1))

        # update the parameters
        updated_params = 2 * tensor.dot(
            cond_mat, grads_prior + N * avg_grads + noise.flatten())
        updated_params = updated_params.flatten()
        last_row = 0
        for p in self.weights:
            sub_index = np.prod(p.get_value().shape)
            up = updated_params[last_row:last_row + sub_index]
            up = up.reshape(p.shape)
            updates.append((p, up))
            last_row += sub_index

        return updates, sumloglik
Example #12
0
 def logdet_dinv_num(self, y):
     # return debug(tt.log(sT.det(debug(tt.jacobian(self.inv(y), y), 'jacobian_inv'))), 'automatic_logdet_dinv')
     return tt.sum(
         debug(
             tt.log(
                 tt.diag(debug(tt.jacobian(self.inv(y), y),
                               'jacobian_inv'))), 'automatic_logdet_dinv'))
Example #13
0
 def grad_wrt_input(self, inputf):
     fx = theano.function([self.model.layers[0].input],
                          T.jacobian(self.model.layers[-1].output.flatten(),
                                     self.model.layers[0].input),
                          allow_input_downcast=True)
     grad = fx(inputf)
     return grad
Example #14
0
def compute_jacobian(errors, parameters):
    """
    Compute jacobian.

    Parameters
    ----------
    errors : Theano variable
        Computed MSE for each sample separetly.

    parameters : list of Theano variable
        Neural network parameters (e.g. weights, biases).

    Returns
    -------
    Theano variable
    """
    n_samples = errors.shape[0]
    J = T.jacobian(errors, wrt=parameters)

    jacobians = []
    for jacobian, parameter in zip(J, parameters):
        jacobian = jacobian.reshape((n_samples, parameter.size))
        jacobians.append(jacobian)

    return T.concatenate(jacobians, axis=1)
Example #15
0
 def dM2_f_i(mx, beta, hyp, X):
     hyps = (hyp[:idims + 1], hyp[idims + 1])
     kernel_func = partial(cov.Sum, hyps, self.covs)
     k = kernel_func(mx[None, :], X).flatten()
     mean = k.dot(beta)
     dmean = tt.jacobian(mean.flatten(), mx)
     return tt.square(dmean.flatten())
Example #16
0
def hessian(objective, argument):
    """
    Compute the directional derivative of the gradient
    (which is equal to the hessian multiplied by direction).
    """
    g = T.grad(objective, argument)

    # Create a new tensor A, which has the same type (i.e. same dimensionality)
    # as argument.
    A = argument.type()

    try:
        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient, rather than explicitly
        # calculating the hessian and then multiplying.
        R = T.Rop(g, argument, A)
    except NotImplementedError:
        shp = T.shape(argument)
        H = T.jacobian(g.flatten(), argument).reshape(
                                        T.concatenate([shp, shp]), 2*A.ndim)
        R = T.tensordot(H, A, A.ndim)

    try:
        hess = theano.function([argument, A], R, on_unused_input='raise')
    except theano.compile.UnusedInputError:
        warn('Theano detected unused input - suggests hessian may be zero or '
             'constant.')
        hess = theano.function([argument, A], R, on_unused_input='ignore')
    return hess
Example #17
0
def auto4check2(input, dataset):
    a = theano.shared(value=dataset[0], name="a")
    b = theano.shared(value=dataset[1], name="b")
    c = theano.shared(value=dataset[2], name="c")
    x = T.vector('x')
    u = x[0] - 0.8
    v = x[1] - (a[0] + a[1] * u ** 2 * (1 - u) ** 0.5 - a[2] * u)
    alpha = -b[0] + b[1] * u ** 2 * (1 + u) ** 0.5 + b[2] * u
    beta = c[0] * v ** 2 * (1 - c[1] * v) / (1 + c[2] * u ** 2)
    fx = alpha * np.e ** (-beta)
    g_f_x = T.jacobian(fx, x)
    grad = theano.function([x], g_f_x)
    Hessian = theano.function([x], T.hessian(fx, x))
    H_alpha_x = theano.function([x], T.hessian(alpha, x))
    H_beta_x = theano.function([x], T.hessian(beta, x))
    J_f_alpha = theano.function([x], T.grad(fx, alpha))
    J_f_beta = theano.function([x], T.grad(fx, beta))
    J_alpha_x = theano.function([x], T.grad(alpha, x))

    J_beta_x = theano.function([x], T.grad(beta, x))

    J_f_y = [J_f_alpha(input), J_f_beta(input)]
    J_y_x = [J_alpha_x(input), J_beta_x(input)]
    # print "H_alpha_x"
    # print H_alpha_x(input)
    # print "H_beta_x"
    # print H_beta_x(input)
    # print "J_f_y"
    # print J_f_y
    # print "J_y_x"
    # print J_y_x
    # print grad(input)

    return Hessian(input)
Example #18
0
    def compile_tan_force(self, u_np, s_np, *args, **kargs):
        grid = u_np.grid
        grid_math = grid._math
        grid._math = T

        tensor_dim = u_np.ndim + 2
        input_data = T.TensorType('float64', (False,) * tensor_dim)()

        tensor_dim = s_np.ndim
        param = T.TensorType('float64', (False,) * tensor_dim)()
        #param = T.dvector('s')

        u_theano = grid.array(input_data.copy(), u_np.shape)
        s_theano = np.array(param.copy(), s_np.shape)
        
        ret = self._function(u_theano, s_theano, *args, **kargs)

        out_tan = T.jacobian(ret._data, param)

        if _VERBOSE_: print('tangent derived in theano mode, compiling')
        f = theano.function([input_data, param], [out_tan])
        if _VERBOSE_: print('tangent sucessfully compiled')

        grid._math = grid_math
        return f
def hypernet_elbo(X, y, loglik_primary_f, logprior_f, hypernet_f, z_noise, N,
                  log_det_dtheta_dz_f=None):
    assert(X.ndim == 2 and y.ndim == 2)
    assert(z_noise.ndim == 1)

    B = X.shape[0]
    rescale = float(N) / B  # Ensure not integer division

    theta = hypernet_f(z_noise)

    loglik = loglik_primary_f(X, y, theta)
    assert(loglik.ndim == 1)
    loglik_total = T.sum(loglik)
    assert(loglik_total.ndim == 0)

    logprior_theta = logprior_f(theta)
    assert(logprior_theta.ndim == 0)

    if log_det_dtheta_dz_f is None: # This is slower, but good for testing
        assert(theta.ndim == 1)  # Use vector theta for this mode
        J = T.jacobian(theta, z_noise)
        penalty = log_abs_det_T(J)
    else:
        penalty = log_det_dtheta_dz_f(z_noise)
    assert(penalty.ndim == 0)

    logprior_z = 0.5 * T.dot(z_noise, z_noise)
    assert(logprior_z.ndim == 0)

    elbo = rescale * loglik_total + logprior_theta + penalty + logprior_z
    return elbo
def test_flow_det(flow_spec):
    z0 = tt.arange(0, 20).astype('float32')
    flow = flow_spec(dim=20, z0=z0.dimshuffle('x', 0))
    with change_flags(compute_test_value='off'):
        z1 = flow.forward.flatten()
        J = tt.jacobian(z1, z0)
        logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
        det = flow.logdet[0]
    np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
def test_flow_det(flow_spec):
    z0 = tt.arange(0, 20).astype('float32')
    flow = flow_spec(dim=20, z0=z0.dimshuffle('x', 0))
    with change_flags(compute_test_value='off'):
        z1 = flow.forward.flatten()
        J = tt.jacobian(z1, z0)
        logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
        det = flow.logdet[0]
    np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
Example #22
0
def test002_jacobian_matrix():
    x = tensor.matrix()
    y = 2 * x.sum(axis=0)
    rng = numpy.random.RandomState(seed=utt.fetch_seed())
    ev = numpy.zeros((10, 10, 10))
    for dx in xrange(10):
        ev[dx, :, dx] = 2.

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), ev)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), ev)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), ev)

    # test when the jacobian is called with a list of two elements
    z = tensor.matrix()
    y = (x * z).sum(axis=1)
    Js = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Js)
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    vz = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    vJs = f(vx, vz)
    evx = numpy.zeros((10, 10, 10))
    evz = numpy.zeros((10, 10, 10))
    for dx in xrange(10):
        evx[dx, dx, :] = vx[dx, :]
        evz[dx, dx, :] = vz[dx, :]
    assert numpy.allclose(vJs[0], evz)
    assert numpy.allclose(vJs[1], evx)
Example #23
0
    def get_gradients(self, model, data, ** kwargs):

        space,  sources = self.get_data_specs(model)
        space.validate(data)
        X, Y = data


        theano_rng = RandomStreams(seed = model.rng.randint(2 ** 15))
        noise = theano_rng.random_integers(size = (X.shape[0] * model.k,), low=0, high = model.dict_size - 1)


        delta = model.delta(data)
        p = model.score(X, Y)
        params = model.get_params()

        pos_ = T.jacobian(model.score(X, Y), params, disconnected_inputs='ignore')
        pos_coeff = 1 - T.nnet.sigmoid(model.delta(data))
        pos = []
        for param in pos_:
            axes = [0]
            axes.extend(['x' for item in range(param.ndim - 1)])
            pos.append(pos_coeff.dimshuffle(axes) * param)
        del pos_, pos_coeff

        noise_x = T.tile(X, (model.k, 1))
        neg_ = T.jacobian(model.score(noise_x, noise), params, disconnected_inputs='ignore')
        neg_coeff = T.nnet.sigmoid(model.delta((noise_x, noise)))
        neg = []
        for param in neg_:
            axes = [0]
            axes.extend(['x' for item in range(param.ndim - 1)])
            tmp = neg_coeff.dimshuffle(axes) * param
            new_shape = [X.shape[0], model.k]
            new_shape.extend([tmp.shape[i] for i in range(1, tmp.ndim)])
            neg.append(tmp.reshape(new_shape).sum(axis=1))
        del neg_, neg_coeff


        grads = [(pos_ - neg_).mean(axis=0) for pos_, neg_ in zip(pos, neg)]
        gradients = OrderedDict(izip(params, grads))
        updates = OrderedDict()

        return gradients, updates
Example #24
0
def get_stat(f, thetahat):
    fhat = theano.function([theta], f)(thetahat)
    dfhat = theano.function([theta], T.jacobian(f, [theta])[0])(thetahat)
    fhatcov = np.dot(np.dot(dfhat, covhat), dfhat.transpose())
    try:
        fse = np.sqrt(np.diag(fhatcov))
    except:
        fse = np.sqrt(fhatcov)
    ftstat = fhat/fse
    return fhat, fse, ftstat
Example #25
0
def test_jacobian_matrix():
    x = tensor.matrix()
    y = 2 * x.sum(axis=0)
    rng = np.random.RandomState(seed=utt.fetch_seed())
    ev = np.zeros((10, 10, 10))
    for dx in range(10):
        ev[dx, :, dx] = 2.0

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert np.allclose(f(vx), ev)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert np.allclose(f(vx), ev)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    assert np.allclose(f(vx), ev)

    # test when the jacobian is called with a list of two elements
    z = tensor.matrix()
    y = (x * z).sum(axis=1)
    Js = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Js)
    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    vz = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
    vJs = f(vx, vz)
    evx = np.zeros((10, 10, 10))
    evz = np.zeros((10, 10, 10))
    for dx in range(10):
        evx[dx, dx, :] = vx[dx, :]
        evz[dx, dx, :] = vz[dx, :]
    assert np.allclose(vJs[0], evz)
    assert np.allclose(vJs[1], evx)
Example #26
0
    def test_vectors(self):

        try:
            import theano.tensor as T
            from theano import function
        except:
            return

        for MT in [False, True]:

            # Set up variables and function
            vals = [np.random.randn(20) for i in range(5)]
            f = lambda a, b, c, d, e: a + (b * c) - d**e

            # Set up our objects
            Cs = [ch.Ch(v) for v in vals]
            C_result = f(*Cs)
            C_result.MT = MT

            # Set up Theano equivalents
            Ts = T.dvectors('T1', 'T2', 'T3', 'T4', 'T5')
            TF = f(*Ts)
            T_result = function(Ts, TF)

            if False:
                import theano.gradient
                which = 1
                theano_sse = (TF**2.).sum()
                theano_grad = theano.gradient.grad(theano_sse, Ts[which])
                theano_fn = function(Ts, theano_grad)
                print(theano_fn(*vals))
                C_result_grad = ch.SumOfSquares(C_result).dr_wrt(Cs[which])
                print(C_result_grad)

                # if True:
                #     aaa = np.linalg.solve(C_result_grad.T.dot(C_result_grad), C_result_grad.dot(np.zeros(C_result_grad.shape[1])))
                #     theano_hes = theano.R_obbb = theano.R_op()

                import pdb
                pdb.set_trace()

            # Make sure values and derivatives are equal
            np.testing.assert_array_equal(C_result.r, T_result(*vals))
            for k in range(len(vals)):
                theano_derivative = function(Ts, T.jacobian(TF, Ts[k]))(*vals)
                our_derivative = np.array(C_result.dr_wrt(Cs[k]).todense())
                #print(theano_derivative, our_derivative)

                # Theano produces has more nans than we do during exponentiation.
                # So we test only on entries where Theano is without NaN's
                without_nans = np.nonzero(
                    np.logical_not(np.isnan(theano_derivative.flatten())))[0]
                np.testing.assert_array_equal(
                    theano_derivative.flatten()[without_nans],
                    our_derivative.flatten()[without_nans])
def estimate_fisher(outputs, n_outputs, parameters):
    # shape (sample_size, n_outputs, #parameters)
    grads = T.stack(*[util.batched_flatcat(
        T.jacobian(outputs[:, j], parameters))
        for j in xrange(n_outputs)])
    # ravel the batch and output axes so that the product will sum
    # over the outputs *and* over the batch. divide by the batch
    # size to get the batch mean.
    grads = grads.reshape((grads.shape[0] * grads.shape[1], grads.shape[2]))
    fisher = T.dot(grads.T, grads) / grads.shape[0]
    return fisher
Example #28
0
def Hessian(objective, *Vars, **kwargs):
    """block structure matrix of Jacobian of gradients, symmetric"""
    return T.concatenate([
        T.concatenate([
            T.jacobian(T.grad(objective, var1,
                              disconnected_inputs='ignore').reshape((-1, )),
                       var2,
                       disconnected_inputs='ignore').reshape(
                           (var1.size, var2.size)) for var2 in Vars
        ],
                      axis=1) for var1 in Vars
    ],
                         axis=0)
Example #29
0
def Hessian(objective, *Vars, **kwargs):
    return T.concatenate([
        T.concatenate([
            T.jacobian(
                T.grad(objective, var1, disconnected_inputs='ignore').reshape(
                    (T.prod(var1.shape), )),
                var2,
                disconnected_inputs='ignore').reshape(
                    (T.prod(var1.shape), T.prod(var2.shape))) for var2 in Vars
        ],
                      axis=1) for var1 in Vars
    ],
                         axis=0)
Example #30
0
def test_jacobian_vector():
    x = tensor.vector()
    y = x * 2
    rng = np.random.RandomState(seed=utt.fetch_seed())

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert np.allclose(f(vx), np.eye(10) * 2)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert np.allclose(f(vx), np.eye(10) * 2)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert np.allclose(f(vx), np.eye(10) * 2)

    # test when the jacobian is called with a list of two elements
    z = tensor.vector()
    y = x * z
    Js = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Js)
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    vz = rng.uniform(size=(10,)).astype(theano.config.floatX)
    vJs = f(vx, vz)
    evx = np.zeros((10, 10))
    evz = np.zeros((10, 10))
    np.fill_diagonal(evx, vx)
    np.fill_diagonal(evz, vz)
    assert np.allclose(vJs[0], evz)
    assert np.allclose(vJs[1], evx)
Example #31
0
def test001_jacobian_vector():
    x = tensor.vector()
    y = x * 2
    rng = numpy.random.RandomState(seed=utt.fetch_seed())

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), numpy.eye(10) * 2)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), numpy.eye(10) * 2)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), numpy.eye(10) * 2)

    # test when the jacobian is called with a list of two elements
    z = tensor.vector()
    y = x * z
    Js = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Js)
    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
    vz = rng.uniform(size=(10,)).astype(theano.config.floatX)
    vJs = f(vx, vz)
    evx = numpy.zeros((10, 10))
    evz = numpy.zeros((10, 10))
    numpy.fill_diagonal(evx, vx)
    numpy.fill_diagonal(evz, vz)
    assert numpy.allclose(vJs[0], evz)
    assert numpy.allclose(vJs[1], evx)
Example #32
0
    def test_vectors(self):
        
        try:
            import theano.tensor as T
            from theano import function            
        except:
            return
            
        for MT in [False, True]:

            # Set up variables and function
            vals = [np.random.randn(20) for i in range(5)]
            f = lambda a, b, c, d, e : a + (b * c) - d ** e

            # Set up our objects
            Cs = [ch.Ch(v) for v in vals]
            C_result = f(*Cs)
            C_result.MT = MT

            # Set up Theano equivalents
            Ts = T.dvectors('T1', 'T2', 'T3', 'T4', 'T5')
            TF = f(*Ts)
            T_result = function(Ts, TF)        

            if False:
                import theano.gradient
                which = 1
                theano_sse = (TF**2.).sum()
                theano_grad = theano.gradient.grad(theano_sse, Ts[which])
                theano_fn = function(Ts, theano_grad)
                print theano_fn(*vals)
                C_result_grad = ch.SumOfSquares(C_result).dr_wrt(Cs[which])
                print C_result_grad
                
                # if True:
                #     aaa = np.linalg.solve(C_result_grad.T.dot(C_result_grad), C_result_grad.dot(np.zeros(C_result_grad.shape[1])))
                #     theano_hes = theano.R_obbb = theano.R_op()
                
                import pdb; pdb.set_trace()

            # Make sure values and derivatives are equal
            np.testing.assert_array_equal(C_result.r, T_result(*vals))
            for k in range(len(vals)):
                theano_derivative = function(Ts, T.jacobian(TF, Ts[k]))(*vals)
                our_derivative = np.array(C_result.dr_wrt(Cs[k]).todense())
                #print theano_derivative, our_derivative   
            
                # Theano produces has more nans than we do during exponentiation. 
                # So we test only on entries where Theano is without NaN's    
                without_nans = np.nonzero(np.logical_not(np.isnan(theano_derivative.flatten())))[0]
                np.testing.assert_array_equal(theano_derivative.flatten()[without_nans], our_derivative.flatten()[without_nans])
def test_flow_det_local(flow_spec):
    z0 = tt.arange(0, 12).astype('float32')
    spec = flow_spec.cls.get_param_spec_for(d=12)
    params = dict()
    for k, shp in spec.items():
        params[k] = np.random.randn(1, *shp).astype('float32')
    flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params)
    assert flow.batched
    with change_flags(compute_test_value='off'):
        z1 = flow.forward.flatten()
        J = tt.jacobian(z1, z0)
        logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
        det = flow.logdet[0]
    np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
Example #34
0
        def get_fisher_mat():
            grad2d = []
            for p in self.model.params:
                grad2d += [T.jacobian(self.f_loss_samples, p)]
                if grad2d[-1].ndim == 2:
                    grad2d[-1] = grad2d[-1].dimshuffle(0, 1, 'x')

            grad2d_vec = T.concatenate([g.flatten(2).T for g in grad2d]).T

            # tensor wise: F_p,i,j = sum_k grad2d[p,i,k]*grad2d[p,k,j]
            # just a slow reference implementation of what is below
            # F = T.mean(T.batched_dot(grad2d_vec.dimshuffle(0, 1, 'x'), grad2d_vec.dimshuffle(0, 'x', 1)), 0)/self.over_sampling
            F = T.dot(grad2d_vec.T, grad2d_vec)/T.cast(grad2d_vec.shape[0], theano.config.floatX)/self.over_sampling
            return F
def test_flow_det_local(flow_spec):
    z0 = tt.arange(0, 12).astype('float32')
    spec = flow_spec.cls.get_param_spec_for(d=12)
    params = dict()
    for k, shp in spec.items():
        params[k] = np.random.randn(1, *shp).astype('float32')
    flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params)
    assert flow.batched
    with change_flags(compute_test_value='off'):
        z1 = flow.forward.flatten()
        J = tt.jacobian(z1, z0)
        logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
        det = flow.logdet[0]
    np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001)
Example #36
0
def jacobian_vector(expr, wrt):
    """Computes the Jacobian of a vector expression with respect to varaibles.

    Args:
        expr: Vector Theano tensor expression.
        wrt: List of Theano variables.

    Returns:
        Theano tensor.
    """
    try:
        return _tensor_map(lambda f: jacobian_scalar(f, wrt), expr)
    except ValueError:
        # Fallback for wider support.
        return T.stack([T.jacobian(expr, wrt, disconnected_inputs="ignore")])
Example #37
0
def test003_jacobian_scalar():
    x = tensor.scalar()
    y = x * 2
    rng = numpy.random.RandomState(seed=utt.fetch_seed())

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = numpy.cast[theano.config.floatX](rng.uniform())
    assert numpy.allclose(f(vx), 2)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = numpy.cast[theano.config.floatX](rng.uniform())
    assert numpy.allclose(f(vx), 2)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = numpy.cast[theano.config.floatX](rng.uniform())
    assert numpy.allclose(f(vx), 2)

    # test when the jacobian is called with a list of two elements
    z = tensor.scalar()
    y = x * z
    Jx = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Jx)
    vx = numpy.cast[theano.config.floatX](rng.uniform())
    vz = numpy.cast[theano.config.floatX](rng.uniform())
    vJx = f(vx, vz)

    assert numpy.allclose(vJx[0], vz)
    assert numpy.allclose(vJx[1], vx)
Example #38
0
def test_jacobian_scalar():
    x = tensor.scalar()
    y = x * 2
    rng = np.random.RandomState(seed=utt.fetch_seed())

    # test when the jacobian is called with a tensor as wrt
    Jx = tensor.jacobian(y, x)
    f = theano.function([x], Jx)
    vx = np.cast[theano.config.floatX](rng.uniform())
    assert np.allclose(f(vx), 2)

    # test when the jacobian is called with a tuple as wrt
    Jx = tensor.jacobian(y, (x,))
    assert isinstance(Jx, tuple)
    f = theano.function([x], Jx[0])
    vx = np.cast[theano.config.floatX](rng.uniform())
    assert np.allclose(f(vx), 2)

    # test when the jacobian is called with a list as wrt
    Jx = tensor.jacobian(y, [x])
    assert isinstance(Jx, list)
    f = theano.function([x], Jx[0])
    vx = np.cast[theano.config.floatX](rng.uniform())
    assert np.allclose(f(vx), 2)

    # test when the jacobian is called with a list of two elements
    z = tensor.scalar()
    y = x * z
    Jx = tensor.jacobian(y, [x, z])
    f = theano.function([x, z], Jx)
    vx = np.cast[theano.config.floatX](rng.uniform())
    vz = np.cast[theano.config.floatX](rng.uniform())
    vJx = f(vx, vz)

    assert np.allclose(vJx[0], vz)
    assert np.allclose(vJx[1], vx)
Example #39
0
    def initialize_calc_ll_gmm_hist_fun(self):
        meansvec = T.dvector('means')
        covarsvec = T.dvector('covars')
        weights = T.dvector('weights')
        gm_num = weights.shape[0]
        means = T.reshape(meansvec, (gm_num, meansvec.shape[0] / gm_num))
        covars = T.reshape(covarsvec, (gm_num, meansvec.shape[0] / gm_num))
        Yp = T.dmatrix('Yp')
        Yn = T.dmatrix('Yn')
        p_p,r_p,p_p_m = self.calc_ll_gmm(Yp, means, covars, weights)
        p_n,r_n,p_n_m = self.calc_ll_gmm(Yn, means, covars, weights)

        L, hmax, hmin, hn, hp = self.calc_hist_loss_vector(p_n, p_p)
        dL = T.jacobian(L, [meansvec, covarsvec, weights, Yp, Yn])
        self.gmmhist_df = function([meansvec, covarsvec, weights, Yp, Yn], dL, allow_input_downcast=True)
        self.gmmhist_f = function([meansvec, covarsvec, weights, Yp, Yn], [L, hmax, hmin, hn, hp], allow_input_downcast=True)
Example #40
0
    def grad(self, inputs, dCdf):
        """ Gradient MTF
        """
        MU = inputs[0][0]
        SD = inputs[0][1]
#        Y = self._normal(just_return = True, MU=MU, SD=SD)
        Y, Y_upd = th.scan(fn=self.norm_fun,
                               sequences=self.counter, non_sequences=[MU, SD])


        dYdMIn = T.jacobian(Y.sum(axis=0), inputs[0])
#        dYdSD = T.jacobian(Y, SD)
#        return dYdMIn[0]*dCdf[0][0] + dYdMIn[1]*dCdf[0][1],
#        return T.as_tensor([dCdf[0][0]*dYdMIn[0][0] + dCdf[0][1]*dYdMIn[1][0],
#                dCdf[0][0]*dYdMIn[0][1] + dCdf[0][1]*dYdMIn[1][1]]),
        return T.as_tensor([dCdf[0].dot(dYdMIn[:,0]), dCdf[0].dot(dYdMIn[:,1])]),
Example #41
0
    def L_op(self, inputs, output, grads):
        # from IPython import embed; embed()
        if not hasattr(self, 'precomputed_grads'):
            grad_integrators = T.jacobian(self._expr, self._extra_vars)
            self.precomputed_grads = [
                IntegrateVectorizedGeneralized(gi, self._var, self.bins,
                                               *self._extra_vars)
                for gi in grad_integrators
            ]

        out, = grads
        dargs = []
        for integrate in self.precomputed_grads:
            darg = T.dot(out, integrate(*inputs))
            # print(darg)
            dargs.append(darg)
        return dargs
Example #42
0
    def _get_updates(self):
        n = self.params['batch_size']
        N = self.params['train_size']
        prec_lik = self.params['prec_lik']
        prec_prior = self.params['prec_prior']
        gc_norm = self.params['gc_norm']
        alpha = self.params['alpha']
        mu = self.params['mu']
        use_gamma = self.params['use_gamma']

        # compute log-likelihood
        error = self.model_outputs - self.true_outputs
        logliks = log_normal(error, prec_lik)
        sumloglik = logliks.sum()
        meanloglik = sumloglik / n

        # compute gradients
        grads = tensor.grad(cost=meanloglik, wrt=self.weights)

        # update preconditioning matrix
        V_t_next = [
            alpha * v + (1 - alpha) * g * g for g, v in zip(grads, self.V_t)
        ]
        G_t = [1. / (mu + tensor.sqrt(v)) for v in V_t_next]

        logprior = log_prior_normal(self.weights, prec_prior)
        grads_prior = tensor.grad(cost=logprior, wrt=self.weights)

        updates = []
        [updates.append((v, v_n)) for v, v_n in zip(self.V_t, V_t_next)]

        for p, g, gp, gt in zip(self.weights, grads, grads_prior, G_t):
            # inject noise
            noise = tensor.sqrt(self.lr * gt) * trng.normal(p.shape)
            if use_gamma:
                # compute gamma
                gamma = nlinalg.extract_diag(
                    tensor.jacobian(gt.flatten(), p).flatten(ndim=2))
                gamma = gamma.reshape(p.shape)
                updates.append((p, p + 0.5 * self.lr *
                                ((gt * (gp + N * g)) + gamma) + noise))
            else:
                updates.append(
                    (p, p + 0.5 * self.lr * (gt * (gp + N * g)) + noise))

        return updates, sumloglik
Example #43
0
    def grad(self, inputs, g_outputs):
        [gz] = g_outputs
        [A] = inputs
        v = self(A)

        dexp = T.jacobian(self.exp(v).flatten(), v)
        invdexp = T.nlinalg.matrix_inverse(
            dexp.reshape((
                A.shape[0] * A.shape[1],
                v.shape[0] * v.shape[1],
            ))).reshape((
                A.shape[0],
                A.shape[1],
                v.shape[0],
                v.shape[1],
            ))

        return [T.tensordot(gz, invdexp, ((0, 1), (0, 1)))]
Example #44
0
 def __init__(self, mode='matrix', exp=None, LAtoV=None, VtoLA=None):
     assert mode in ['matrix', 'zeroest', 'nearest']
     self.mode = mode
     if exp is None:
         exp = T.slinalg.Expm()
     self.exp = exp
     self.LAtoVf = None
     self.VtoLAf = None
     self.lossf = None
     self.dlossf = None
     if mode != 'matrix':
         g = T.matrix()
         hatxi = T.vector()
         xi = T.matrix()
         self.LAtoVf = theano.function([xi], LAtoV(xi))
         self.VtoLAf = theano.function([hatxi], VtoLA(hatxi))
         loss = lambda hatxi, g: T.sum((exp(VtoLA(hatxi)) - g)**2)
         dloss = lambda hatxi, g: T.jacobian(loss(hatxi, g), hatxi)
         self.lossf = theano.function([hatxi, g], loss(hatxi, g))
         self.dlossf = theano.function([hatxi, g], dloss(hatxi, g))
Example #45
0
def get_order_n_pole(order):
    """Generate function to calculate the Fourier transform `order`-order pole.

    The Fourier transform of :math:`{(z-ϵ)}^{n}` is calculate, where `ϵ` is the
    position of the pole and `n` the order of the pole.

    Parameters
    ----------
    order : int
        The order of the pole

    Returns
    -------
    order_n_pole : Callable
        The function (tau, pole, beta)->gf_tau calculating the Fourier transform.

    """
    import theano
    import theano.tensor as T
    from theano.ifelse import ifelse
    from math import factorial

    pole = T.dscalar('pole')
    beta = T.dscalar('beta')
    # tau = T.dscalar('tau')
    tau = T.dscalar('tau')
    fermi_fct = (1 + T.tanh(-beta*pole/2))/2

    gf_tau = ifelse(
        pole > 0,  # avoid overflows asserting negative exponent
        -(1 - fermi_fct)*T.exp(-pole*tau),
        -fermi_fct*T.exp(pole*(beta-tau)),
    )
    n_gf_tau = gf_tau
    for __ in range(order-1):
        # n_gf_tau = T.grad(n_gf_tau, pole)
        n_gf_tau = T.jacobian(n_gf_tau, pole)
    n_gf_tau = n_gf_tau / factorial(order-1)
    # resuts, __ = theano.scan(n_gf_tau.)
    func = theano.function([tau, pole, beta], n_gf_tau)
    return np.vectorize(func, otypes=[np.float])
Example #46
0
    def compute_hessian(self, objective, argument):
        """
        Computes the directional derivative of the gradient (which is equal to
        the Hessian multiplied by direction).
        """
        g = T.grad(objective, argument)

        # Create a new tensor A, which has the same type (i.e. same
        # dimensionality) as argument.
        try:
            A = argument.type()
        except AttributeError:
            # Assume we are on the product manifold
            A = [arg.type() for arg in argument]

        try:
            # First attempt efficient 'R-op', this directly calculates the
            # directional derivative of the gradient, rather than explicitly
            # calculating the Hessian and then multiplying.
            R = T.Rop(g, argument, A)
        except NotImplementedError:
            # TODO: fix this fallback for the product manifold.
            shp = T.shape(argument)
            H = T.jacobian(g.flatten(),
                           argument).reshape(T.concatenate([shp, shp]),
                                             2 * A.ndim)
            R = T.tensordot(H, A, A.ndim)

        try:
            hess = theano.function([argument, A], R, on_unused_input="warn")
        except TypeError:
            hess_prod = theano.function(argument + A,
                                        R,
                                        on_unused_input="warn")

            def hess(x, a):
                return hess_prod(*(x + a))

        return hess
Example #47
0
    def compute_hessian(self, objective, argument):
        """
        Computes the directional derivative of the gradient (which is equal to
        the Hessian multiplied by direction).
        """
        g = T.grad(objective, argument)

        # Create a new tensor A, which has the same type (i.e. same
        # dimensionality) as argument.
        try:
            A = argument.type()
        except AttributeError:
            # Assume we are on the product manifold
            A = [arg.type() for arg in argument]

        try:
            # First attempt efficient 'R-op', this directly calculates the
            # directional derivative of the gradient, rather than explicitly
            # calculating the Hessian and then multiplying.
            R = T.Rop(g, argument, A)
        except NotImplementedError:
            # TODO: fix this fallback for the product manifold.
            shp = T.shape(argument)
            H = T.jacobian(g.flatten(), argument).reshape(
                T.concatenate([shp, shp]), 2 * A.ndim)
            R = T.tensordot(H, A, A.ndim)

        try:
            hess = theano.function([argument, A], R, on_unused_input="warn")
        except TypeError:
            hess_prod = theano.function(argument + A, R,
                                        on_unused_input="warn")

            def hess(x, a):
                return hess_prod(*(x + a))

        return hess
Example #48
0
def auto4check(dataset, x, tol=1e-9, maxiter=1000):

    t0 = theano.shared(value=dataset[0], name="t0")
    a0 = theano.shared(value=dataset[1], name="a0")
    b0 = theano.shared(value=dataset[2], name="b0")
    c0 = theano.shared(value=dataset[3], name="c0")
    k = T.vector('k')
    a_t = np.e ** (-(k[0] + k[1]) * t0)
    b_t = k[0] / (k[0] + k[1]) * (1 - a_t)
    c_t = k[1] / (k[0] + k[1]) * (1 - a_t)
    f = T.sum((a0 - a_t) ** 2 + (b0 - b_t) ** 2 + (c0 - c_t) ** 2)
    F = theano.function([k], f)
    g_f_k = T.jacobian(f, k)
    j_f_k = theano.function([k], g_f_k)
    H_f_k = T.hessian(f, k)
    Hessian = theano.function([k], H_f_k)


    track, f_val = [], []
    track.append(array(x))
    f_val.append(F(x))
    g = j_f_k(x)
    i = 0
    print "Step =", i, "g=", g, "x=", x, "loss=", F(x)
    while norm(g) > tol:
        i += 1
        if i > maxiter:
            break
        G = Hessian(x)
        s = -np.linalg.solve(G, g)
        x += s
        track.append(array(x))
        f_val.append(F(x))
        g = j_f_k(x)
        print "step =", i, "g=", g, "x=", x, "loss=", F(x), "G=", G
    return x, F(x), track, f_val
def grad_hess(objective, argument):
    """
    Compute both the gradient and the directional derivative of the gradient
    (which is equal to the hessian multiplied by direction).
    """
    # TODO: Check that the hessian calculation is correct!
    # TODO: Make this compatible with non-matrix manifolds.
    g = T.grad(objective, argument)
    grad = compile(g, argument)

    # Create a new tensor A, which has the same type (i.e. same dimensionality)
    # as argument.
    A = argument.type()

    try:
        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient, rather than explicitly
        # calculating the hessian and then multiplying.
        print("begins")
        sys.stdout.flush()
        R = T.Rop(g, argument, A)
        print("ends")
        sys.stdout.flush()
    except NotImplementedError:
        # This will break if the manifold is not a matrix.
        n, p = T.shape(argument)
        H = T.jacobian(g.flatten(), argument).reshape([n, p, n, p], 4)
        R = T.tensordot(H, A)

    try:
        hess = theano.function([argument, A], R)
    except theano.compile.UnusedInputError:
        warn('Theano detected unused input - suggests hessian may be zero or '
             'constant.')
        hess = theano.function([argument, A], R, on_unused_input='ignore')
    return grad, hess
Example #50
0
input_duration = T.scalar('input_duration')
input_intensity = T.scalar('input_intensity')

P = input_intensity * ((T.sgn(input_duration - t) + 1) / 2)

corrected_sigmoid = \
    1 / (1 + T.exp(-T.mul(a,(T.dot(c,activation) + P) - theta))) \
    - 1 / (1 + T.exp(np.multiply(a, theta)))
#corrected_sigmoid = theano.function([t, activation, input_duration, input_intensity], corrected_s)
#d_a = T.true_div(-activation + T.mul( 1 - T.mul(r, activation), corrected_s), tau)
d_a = T.true_div(-activation + T.mul( k - T.mul(r, activation), corrected_sigmoid), tau)
d_activation = theano.function(inputs=[activation, t, input_duration, input_intensity], 
                               outputs=d_a, on_unused_input='warn')

J = theano.function(inputs=[activation, t, input_duration, input_intensity], 
                    outputs=T.jacobian(d_a, activation), on_unused_input='warn')

activation_0 = np.array([0, 0])
t_0 = 0
t_1 = .125
dt = .0001
times = np.arange(t_0, t_1, dt)

intens = 1
duration = .125
params = (duration, intens)

#r = ode(d_activation).set_integrator('vode')
#r.set_initial_value(activation_0, t_0).set_f_params(*params)

timeseries = odeint(d_activation, activation_0, times, params, Dfun=J)
Example #51
0
    def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
        ker, self.samples, self.params, self.KmmInv  = kernel(), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 
            'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
        
        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
                       'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 
            'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
        self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
Example #52
0
drawsallbase    =  (np.tile(np.arange(ndraws), (nobs,nchoice,1)).transpose() + 0.5)/ndraws
draws1allbase   = norminv(drawsallbase*p0allbase)
p1allbase    = normcdf(-(Vallbase[:,1,:] + c10[:,groupid]*draws1allbase)/c11[:,groupid]).mean(axis=0)

pallbase = p0allbase*p1allbase

if use_fe and use_share_moments:    
    pstation = T.stack([pallbase[1:,np.where(stationid==i)[0]].mean(axis=1) for i in range(nstation)]).transpose().flatten()[(~nuisancexi).flatten().nonzero()[0]]
    pstationtrue = np.stack([dv_choice[1:,stationid==i].mean(axis=1) for i in range(nstation)]).transpose().flatten()[~nuisancexi.flatten()]
                   
    obj_multiplier = T.dscalar('obj_multiplier')
    lagrange_multiplier = T.dvector('lagrange_multiplier')
    lagrange = obj_multiplier*obj + (lagrange_multiplier*pstation).sum()
    
    constr = theano.function([theta], pstation)
    jab = theano.function([theta], T.jacobian(pstation, [theta]))
    hess_constr = theano.function([theta, lagrange_multiplier, obj_multiplier], 
                                  outputs=theano.gradient.hessian(lagrange, [theta]))
    
    ntheta1 = nalpha + nbeta + nallsigma
    nxifull = (nchoice-1)*nstation
    mask00 = np.ones((ntheta1, ntheta1), dtype = bool)
    mask01 = np.ones((ntheta1, nxi), dtype = bool)
    mask10 = np.ones((nxi, ntheta1), dtype = bool)
    mask11 = np.tile(np.eye(nstation, dtype = bool), (nchoice-1, nchoice-1))[~nuisancexi.flatten(),:][:,~nuisancexi.flatten()]
    
    maskj = np.hstack((mask10, mask11))
    maskh = np.hstack((np.vstack((mask00, mask10)), np.vstack((mask01, mask11))))
    
    def solve_constr(theta0, use_hess = False):
        pyipopt.set_loglevel(1)    
Example #53
0
 def compute_reproj_err_d_wrapper(curr_w,o,feat):
     curr_cam = cams[o[0]]
     curr_X = X[o[1]]
     return T.jacobian(compute_reproj_err(curr_cam,curr_X,curr_w,feat),
                        [curr_cam,curr_X,curr_w])
Example #54
0
  def __init__(self,fname,constants={},sparse=False):
    # parse model specification
    with open(fname,'r') as fid:
      mod = json.load(fid,object_pairs_hook=OrderedDict)
    self.mod = mod

    # constants
    self.con_dict = OrderedDict()
    for name in mod['constants']:
      value = constants[name]
      self.con_dict[name] = np.array(value) if type(value) is list else value

    # arguments
    self.arg_info = OrderedDict()
    self.arg_dict = OrderedDict()
    for (name,spec) in mod['arguments'].items():
      asize = spec['size']
      (amin,amax) = spec['range']
      agrid = np.linspace(amin,amax,asize)

      info = OrderedDict()
      info['size'] = asize
      info['grid'] = agrid

      self.arg_info[name] = info
      self.arg_dict[name] = agrid

    # parameters
    self.par_info = OrderedDict()
    self.par_sizes = []
    for (name,spec) in mod['parameters'].items():
      ptype = spec.get('type','scalar')
      psize = 1 if ptype == 'scalar' else spec['size']

      info = OrderedDict()
      info['type'] = ptype
      info['size'] = psize

      self.par_info[name] = info
      self.par_sizes.append(psize)

    # variables
    self.var_info = OrderedDict()
    self.var_sizes = []
    for (name,spec) in mod['variables'].items():
      vtype = spec['type']

      info = OrderedDict()
      info['type'] = vtype

      if vtype == 'scalar':
        vsize = 1
        self.var_sizes.append(vsize)
      elif vtype == 'vector':
        vsize = spec['size']
        self.var_sizes.append(vsize)
      elif vtype == 'function':
        vder = spec.get('derivs',[])
        nder = len(vder)
        args = spec['args']
        ainfo = [self.arg_info[arg] for arg in args]
        vsize = np.prod([ai['size'] for ai in ainfo])
        info['vder'] = vder
        info['nder'] = nder
        info['args'] = args
        info['shape'] = [self.arg_info[a]['size'] for a in args]
        info['grid'] = map(lambda v: v.transpose().flatten(),np.meshgrid(*[self.arg_info[a]['grid'] for a in args])) if len(args) > 1 else [self.arg_info[args[0]]['grid']]
        self.var_sizes.append(vsize)
        self.var_sizes += sum(map(len,vder))*[vsize]

      info['size'] = vsize
      self.var_info[name] = info

    # totals
    self.n_pars = len(self.par_info)
    self.n_vars = len(self.var_info)

    self.sz_pars = np.sum(self.par_sizes)
    self.sz_vars = np.sum(self.var_sizes)

    # input vectors
    self.par_vec = T.dvector('parvec')
    self.var_vec = T.dvector('varvec')

    # unpack and map out variables
    self.par_dict = OrderedDict()
    piter = iter(split(self.par_vec,self.par_sizes))
    for (name,info) in self.par_info.items():
      ptype = info['type']
      par = next(piter)
      if ptype == 'scalar':
        par = par[0]
        par.name = name
        self.par_dict[name] = par
      else:
        par.name = name
        self.par_dict[name] = par

    self.var_dict = OrderedDict()
    self.der_dict = OrderedDict()
    viter = iter(split(self.var_vec,self.var_sizes))
    for (name,info) in self.var_info.items():
      var = next(viter)
      vtype = info['type']
      if vtype == 'scalar':
        var = var[0]
        var.name = name
        self.var_dict[name] = var
      elif vtype == 'vector':
        var.name = name
        self.var_dict[name] = var
      elif vtype == 'function':
        var.name = name
        self.var_dict[name] = var
        vder = info.get('vder',[])
        nder = len(vder)
        self.der_dict[var] = {'': var}
        for der in vder:
          for s in prefixes(der):
            dvar = viter.next()
            dvar.name = name+'_'+s
            self.der_dict[var][s] = dvar

    # define operators
    def diff(var,*args):
      name = ''.join([getkey(self.arg_dict,v) for v in args])
      return self.der_dict[var][name]
    def vslice(var,arg,point):
      var_name = var.name
      arg_name = getkey(self.arg_dict,arg)
      var_info = self.var_info[var_name]
      args = var_info['args']
      (idx, _) = filter(lambda ia: ia[1]==arg_name, enumerate(args))[0]
      shape = var_info['shape']
      idx_list = slice_dim([point],idx,shape)
      return var[idx_list]
    def grid(var,arg):
      var_name = var.name
      arg_name = getkey(self.arg_dict,arg)
      var_info = self.var_info[var_name]
      args = var_info['args']
      (idx, _) = filter(lambda ia: ia[1]==arg_name, enumerate(args))[0]
      return var_info['grid'][idx]
    def interp(var,arg,x):
      i = icut(arg,x)
      t = np.clip((arg[i+1]-x)/(arg[i+1]-arg[i]),0.0,1.0)
      return t*vslice(var,arg,i) + (1.0-t)*vslice(var,arg,i+1)
    self.func_dict = {'diff': diff, 'slice': vslice, 'grid': grid, 'interp': interp}

    # combine them all
    self.sym_dict = merge(op_dict,self.con_dict,self.par_dict,self.var_dict,self.func_dict,self.arg_dict)

    # evaluate
    self.equations = []

    # regular equations
    for eq in mod['equations']:
      self.equations.append(eval(eq,{},self.sym_dict))

    # derivative relations
    for (name,info) in self.var_info.items():
      if info['type'] == 'function':
        var = self.var_dict[name]
        size = info['size']

        # derivative relations - symmetric except at 0
        vder = info.get('vder','')
        args = info['args']
        shape = info['shape']
        for der in vder:
          v0 = '' # function value
          for v1 in prefixes(der):
            # collect argument info
            arg = v1[-1]
            (adx, _) = filter(lambda ia: ia[1]==arg, enumerate(args))[0]
            s = shape[adx]
            grid = info['grid'][adx]

            # generate accessors
            zer_idx = slice_dim([0],adx,shape)
            one_idx = slice_dim([1],adx,shape)
            beg_idx = slice_dim(range(s-2),adx,shape)
            mid_idx = slice_dim(range(1,s-1),adx,shape)
            end_idx = slice_dim(range(2,s),adx,shape)

            # calculate derivatives
            d0 = self.der_dict[var][v0]
            d1 = self.der_dict[var][v1]
            self.equations.append(d0[one_idx]-d0[zer_idx]-(grid[one_idx]-grid[zer_idx])*d1[zer_idx])
            self.equations.append((d0[end_idx]-d0[beg_idx])-(grid[end_idx]-grid[beg_idx])*d1[mid_idx])

            # to next level
            v0 = v1

    # repack
    self.eqn_vec = T.join(0,*map(ensure_vector,self.equations))

    # jacobians
    self.par_jac = T.jacobian(self.eqn_vec,self.par_vec)
    self.var_jac = T.jacobian(self.eqn_vec,self.var_vec)

    # sparse?
    if sparse:
      self.par_jac = S.csc_from_dense(self.par_jac)
      self.var_jac = S.csc_from_dense(self.var_jac)
      self.linsolve = spsolve
    else:
      self.linsolve = np.linalg.solve

    # compile
    print('Compiling...')
    self.eqn_fun = theano.function([self.par_vec,self.var_vec],self.eqn_vec)
    self.parjac_fun = theano.function([self.par_vec,self.var_vec],self.par_jac)
    self.varjac_fun = theano.function([self.par_vec,self.var_vec],self.var_jac)

    # newtonian path
    t = T.dscalar('t')
    start = T.dvector('start')
    finish = T.dvector('finish')
    path = (1.0-t)*start + t*finish
    dpath = T.jacobian(path,t)
    self.path_fun = theano.function([start,finish,t],path)
    self.dpath_fun = theano.function([start,finish,t],dpath)
    def CompileTrainingFunctions(self, RPROP_penalty=0.35, RPORP_gain=0.2, SGD_LR_=5e-5,
                                 SGD_momentum_=0.9, b_Override_only_SGD=False, bOverride_OnlyGPROP=False,
                                 bOverride_OnlyRPORP=False, b_Override_only_RMSPROP=False, bWeightDecay=False,
                                 bHighActivationPenalty=False, b_layerwise_LR= False, b_external_top_error=False,
                                 b_use_clipped_gradients = False, f_clip_at = 5e-3):
        """ creates the functions for the last layer of <self.layers>
            trains all parameters included in <self.params>, i.e. ignoring the layer structure

            rmsprop and sgd share <last_grads>, so switching between them may behave a bit strangely

            """
        
        print "Called: CompileTrainingFunctions. You don't have to call this function, you may use .training_step() directly!"
        if len(self.params)==0:
            print "call CompileOutputFunctions() before calling CompileTrainingFunctions()!"
            return -1

        # create a list of gradients for all model parameters

        if b_external_top_error==False:
            if b_use_clipped_gradients==False:
                output_layer_Gradients = T.grad( self.output_layer_Loss, self.params, disconnected_inputs="warn")


            else:
                print "\nBE WARNED: Feature activated: use_clipped_gradients (f_clip_at =",f_clip_at,")"
                output_layer_Gradients_tmp = T.jacobian( self.layers[-1].negative_log_likelihood_array(self.y), self.params, disconnected_inputs="warn")
                #each element has shape: (batchsize, rest...)
                output_layer_Gradients = [T.mean(T.clip(x,-np.float32(np.abs(f_clip_at)),np.float32(np.abs(f_clip_at))),axis=0) for x in output_layer_Gradients_tmp]

        else:
            self.known_top_err = T.TensorType('float32',(False,)*5,name='known_top_err')('known_top_err')
            print "predictions are last_layer.output, which is (hopefully) sigmoid!"
            print "top error is specified externally: <self.known_top_err> (batchsize,x,n_classes,y,z)"
            output_layer_Gradients = theano.gradient.grad( T.sum(self.layers[-1].output*self.known_top_err) , self.params ,disconnected_inputs="warn")#.subgraph_grad()


        if b_Override_only_SGD==False:
            self.RPROP_LRs=[] # one for each parameter -> many
        self.last_grads=[]
        self.gprop_grad_variance=[]



        for i,para in enumerate(self.params):
            if para in self.params[:i]:
                print "Detected RNN or shared param @index =",i
                continue
            if b_Override_only_SGD==False:
#                print "warning: was 4e-5"
                self.RPROP_LRs.append(theano.shared(  1e-4*np.ones(para.get_value().shape,dtype=theano.config.floatX) , name=para.name+str('_RPORP') , borrow=0))
                self.gprop_grad_variance.append(theano.shared( 1e-2*np.ones(para.get_value().shape,dtype=theano.config.floatX) , name=para.name+str('_GPROP') , borrow=0))
#            print "WARNING change this if you want to use sgd/rmsprop"
            self.last_grads.append(theano.shared( np.zeros(para.get_value().shape,dtype=theano.config.floatX) , name=para.name+str('_LG') , borrow=0))
            #self.SGD_EigHessian_perturbed_grads.append(theano.shared(  zeros(para.get_value().shape,dtype=theano.config.floatX) , name=para.name+str('_pLG') , borrow=True))

        n = len(self.last_grads)
        for i,lay in enumerate(self.layers):
            low = (i*2)%n
            lay.last_grads = self.last_grads[low:low+2]




        SGD_updatesa=[]
        SGD_updatesb=[]

        if b_Override_only_SGD==False:
            RPROP_updates = []
        RMSPROP_updates = []



        self.SGD_global_LR.set_value(np.float32(SGD_LR_))
        if bWeightDecay:
            print "CNN::using Weight decay! Change via this.SGD_global_weightdecay.set_value()"
            self.SGD_global_weightdecay = theano.shared(np.asarray(0.0005).astype("float32"))
        self.SGD_momentum.set_value(np.float32(SGD_momentum_))


        if b_Override_only_SGD==False:
            assert len(self.params)==len(self.last_grads),"rnn/shared params not yet implemented in rprop/gprop"


#            print "Trading memory usage for more speed (SGD_updates_a), change it if it gets too big (removes momentum, too)."
            for param_i, grad_i, last_grad_i, pLR_i, gprop_var_i in zip(self.params, output_layer_Gradients, self.last_grads, self.RPROP_LRs, self.gprop_grad_variance):
                # capping RPROP-LR inside [1e-7,1e-2]
                print "RPROP: missing backtracking handling "
                RPROP_updates.append((pLR_i, T.minimum( T.maximum( pLR_i * ( 1 - np.float32(RPROP_penalty)* ((last_grad_i*grad_i) < -1e-9) + np.float32(RPORP_gain)* ((last_grad_i*grad_i) > 1e-11)   ) , 1e-7*T.ones_like(pLR_i) ),2e-3 * T.ones_like(pLR_i)) ))
                RPROP_updates.append((param_i, param_i  - pLR_i * grad_i/(T.abs_(grad_i) + 1e-6) - (0 if bWeightDecay==False else self.SGD_global_weightdecay*param_i) ))

                RPROP_updates.append((last_grad_i, grad_i ))#RPROP_updates.append((last_grad_i, (grad_i + 0.5*last_grad_i)/1.5)) #trailing exp-mean over last gradients: smoothing. check if useful...


        if b_layerwise_LR:
            print "Using layerwise LR multiplier. Speed penalty ~ 10%. Access it via this.SGD_local_LRs (default is 1. == no modification of the global LR)."
            self.SGD_local_LRs = [theano.shared(np.float32(1.)) for x in self.params] #one LR modifier per param group
        else:
            self.SGD_local_LRs = [1. for x in self.params]


        for param_i, grad_i, last_grad_i, local_lr_modifier in zip(self.params, output_layer_Gradients, self.last_grads, self.SGD_local_LRs):
            if len(self.params)>len(self.last_grads):
                grad_i = None
                print "grad_param::",param_i
                for i in range(len(self.params)):
                    if param_i == self.params[i]:
                        print ">>",i
                        grad_i = output_layer_Gradients[i] if grad_i==None else grad_i + output_layer_Gradients[i]

            SGD_updatesa.append((last_grad_i, grad_i             + last_grad_i * self.SGD_momentum))#use this if you want to use the gradient magnitude

        for i, param_i, grad_i, last_grad_i, local_lr_modifier in zip(range(len(self.params)), self.params, output_layer_Gradients, self.last_grads, self.SGD_local_LRs):
            if bWeightDecay and (i < len(self.params)-2): #no WeightDecay in last layer
                SGD_updatesb.append((param_i, param_i  - (self.SGD_global_LR * local_lr_modifier) * last_grad_i - self.SGD_global_LR *self.SGD_global_weightdecay*param_i   ))
            else:
                SGD_updatesb.append((param_i, param_i  - (self.SGD_global_LR * local_lr_modifier) * last_grad_i   ))

            RMSPROP_updates.append((last_grad_i, 0.95*last_grad_i + 0.05* (grad_i)**2  ))
            RMSPROP_updates.append((param_i, param_i - self.SGD_global_LR * grad_i/(  T.sqrt(last_grad_i+0.000001) ) ))
        print "RMSPROP: advice: a good LR is 2e-4  (value for <self.SGD_global_LR>)"



        if bHighActivationPenalty:
            self.HighActivationPenalty_coeff = theano.shared(np.float32(1e-4))
            print "Applying high-activation-penalty..."
            print "todo: test..."
            for lay in self.layers:
                type_ = lay.ActivationFunction
                ok=1

                if type_=="tanh":
                    grads = T.grad( T.mean((lay.output)**2), lay.params)
                elif type_=="sigmoid":
                    grads = T.grad( 2*T.mean((lay.output-0.5)**2), lay.params)
                elif type_=="relu":
                    print "relu...todo:test"
                    grads = T.grad( -T.mean((lay.output)**2), lay.params)
                else:
                    print "UNSUPPORTED ActivationFunction!"
                    ok=0

                if ok:

                    for param_i,grad_i in zip(lay.params,grads):

                        for i,u in enumerate(SGD_updatesb):
                            if u[0]==param_i:
                                SGD_updatesb[i] = (param_i,u[1] - (self.SGD_global_LR * self.HighActivationPenalty_coeff) * grad_i)
                                break
                        try:
                            for i,u in enumerate(RMSPROP_updates):
                                if u[0]==param_i:
                                    RMSPROP_updates[i] = (param_i,u[1] - (self.SGD_global_LR * self.HighActivationPenalty_coeff) * grad_i)
                                    break
                            for i,u in enumerate(RPROP_updates):
                                if u[0]==param_i:
                                    RPROP_updates[i] = (param_i,u[1] - (self.SGD_global_LR * self.HighActivationPenalty_coeff) * grad_i)
                                    break
                        except:
                            print "only sgd..."


        addthis = [self.z,] if self.bUseModulatedNLL else []

        if b_external_top_error:
            addthis = addthis + [self.known_top_err]

        if bOverride_OnlyRPORP or (b_Override_only_SGD==False and bOverride_OnlyGPROP==False and b_Override_only_RMSPROP==0):
            print "compiling RPROP..."
            self.train_model_RPROP = theano.function([self.x] + ([] if b_external_top_error else [self.y])+addthis, None if b_external_top_error else self.output_layer_Loss, updates=RPROP_updates,  on_unused_input='warn')

        if b_Override_only_SGD==False and bOverride_OnlyGPROP==False and bOverride_OnlyRPORP==False:
            print "compiling RMSPROP..."
            self.train_model_RMSPROP = theano.function([self.x] + ([] if b_external_top_error else [self.y])+addthis, None if b_external_top_error else self.output_layer_Loss, updates=RMSPROP_updates,  on_unused_input='warn')

        if bOverride_OnlyGPROP==0 and b_Override_only_RMSPROP==0 and bOverride_OnlyRPORP==False:
            print "compiling SGD..."
            # a only updates last_grads, it DOES NOT change any parameters
            #you could call it 10 times and would get the same nll every time... but if momentum is != 0 then this changes the search direction

            assert len(SGD_updatesa)==len(SGD_updatesb),str(len(SGD_updatesa))+" != "+str(len(SGD_updatesb))

            self.train_model_SGD_a     = theano.function([self.x] + ([] if b_external_top_error else [self.y])+addthis, None if b_external_top_error else self.output_layer_Loss, updates=SGD_updatesa,  on_unused_input='warn')#the output is the value you get BEFORE updates....
            
            try:
                self.train_model_SGD_a_ext = theano.function([self.x,self.y]+addthis, [self.output_layer_Loss, self.layers[-1].class_probabilities_realshape], updates=SGD_updatesa,  on_unused_input='warn')
            except:
                print "NNet.train_model_SGD_a_ext unavailable"
            # b ONLY changes the parameters
            self.train_model_SGD_b     = theano.function([], None, updates=SGD_updatesb)

        return 0
    def __init__(self, fileEmbeddings, wordEmbeddings, weights=None, contextSize=None, negative=None):
        filesCount, fileEmbeddingSize = fileEmbeddings.shape
        wordsCount, wordEmbeddingSize = wordEmbeddings.shape

        trainWeights = weights is None
        if trainWeights:
            weights = rnd2(fileEmbeddingSize + contextSize * wordEmbeddingSize, wordsCount)
        else:
            featuresCount, activationsCount = weights.shape
            contextSize = (featuresCount - fileEmbeddingSize) / wordEmbeddingSize
            negative = activationsCount - 1

        self.fileEmbeddings = theano.shared(asfx(fileEmbeddings), 'fileEmbeddings', borrow=False)
        self.wordEmbeddings = theano.shared(asfx(wordEmbeddings), 'wordEmbeddings', borrow=False)
        self.weights = theano.shared(asfx(weights), 'weights', borrow=False)

        fileIndexOffset = 0
        wordIndicesOffset = fileIndexOffset + 1
        indicesOffset = wordIndicesOffset + contextSize

        contexts = T.imatrix('contexts')
        fileIndices = contexts[:,fileIndexOffset:wordIndicesOffset]
        wordIndices = contexts[:,wordIndicesOffset:indicesOffset]
        indices = contexts[:,indicesOffset:indicesOffset + negative]

        files = self.fileEmbeddings[fileIndices]
        fileFeatures = T.flatten(files, outdim=2)
        words = self.wordEmbeddings[wordIndices]
        wordFeatures = T.flatten(words, outdim=2)
        features = T.concatenate([fileFeatures, wordFeatures], axis=1)

        subWeights = self.weights[:,indices].dimshuffle(1, 0, 2)

        probabilities = T.batched_dot(features, subWeights)

        parameters = [self.fileEmbeddings]
        subParameters = [files]
        consider_constant = [self.wordEmbeddings]

        if trainWeights:
            parameters.append(self.weights)
            subParameters.append(subWeights)
        else:
            consider_constant.append(self.weights)

        # cost = -T.mean(T.log(T.nnet.sigmoid(probabilities[:,0])) + T.sum(T.log(T.nnet.sigmoid(-probabilities[:,1:])), dtype=floatX, acc_dtype=floatX), dtype=floatX, acc_dtype=floatX)
        cost = -T.log(T.nnet.sigmoid(probabilities[:,0])) - T.sum(T.log(T.nnet.sigmoid(-probabilities[:,1:])), dtype=floatX, acc_dtype=floatX)

        learningRate = T.scalar('learningRate', dtype=floatX)

        updates = []
        for p, subP in zip(parameters, subParameters):
            if subP is not None:
                gradient = T.jacobian(cost, wrt=subP)
                update = (p, T.inc_subtensor(subP, -learningRate * gradient))
            else:
                gradient = T.jacobian(cost, wrt=p)
                update = (p, p - learningRate * gradient)

            updates.append(update)

        contextIndex = T.iscalar('contextIndex')
        self.trainingContexts = theano.shared(empty(1,1,1), 'trainingContexts', borrow=False)

        self.trainModel = theano.function(
            inputs=[contextIndex, learningRate],
            outputs=cost,
            updates=updates,
            givens={
                contexts: self.trainingContexts[:,contextIndex]
            }
        )
Example #57
0
    def compile(self, optimizer, metrics=[]):
        metrics += [mean_q]

        if hasattr(optimizer, '__len__'):
            if len(optimizer) != 2:
                raise ValueError('More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.')
            actor_optimizer, critic_optimizer = optimizer
        else:
            actor_optimizer = optimizer
            critic_optimizer = clone_optimizer(optimizer)
        assert actor_optimizer != critic_optimizer

        if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr(metrics[1], '__len__'):
            actor_metrics, critic_metrics = metrics
        else:
            actor_metrics = critic_metrics = metrics

        def clipped_mse(y_true, y_pred):
            delta = K.clip(y_true - y_pred, self.delta_range[0], self.delta_range[1])
            return K.mean(K.square(delta), axis=-1)

        # Compile target networks. We only use them in feed-forward mode, hence we can pass any
        # optimizer and loss since we never use it anyway.
        self.target_actor = clone_model(self.actor, self.custom_model_objects)
        self.target_actor.compile(optimizer='sgd', loss='mse')
        self.target_critic = clone_model(self.critic, self.custom_model_objects)
        self.target_critic.compile(optimizer='sgd', loss='mse')

        # We also compile the actor. We never optimize the actor using Keras but instead compute
        # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence
        # we also compile it with any optimzer and
        self.actor.compile(optimizer='sgd', loss='mse')

        # Compile the critic.
        if self.target_model_update < 1.:
            # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model.
            critic_updates = get_soft_target_model_updates(self.target_critic, self.critic, self.target_model_update)
            critic_optimizer = AdditionalUpdatesOptimizer(critic_optimizer, critic_updates)
        self.critic.compile(optimizer=critic_optimizer, loss=clipped_mse, metrics=critic_metrics)

        # Combine actor and critic so that we can get the policy gradient.
        combined_inputs = []
        critic_inputs = []
        for i in self.critic.input:
            if i == self.critic_action_input:
                combined_inputs.append(self.actor.output)
            else:
                combined_inputs.append(i)
                critic_inputs.append(i)
        combined_output = self.critic(combined_inputs)
        if K._BACKEND == 'tensorflow':
            grads = K.gradients(combined_output, self.actor.trainable_weights)
            grads = [g / float(self.batch_size) for g in grads]  # since TF sums over the batch
        elif K._BACKEND == 'theano':
            import theano.tensor as T
            grads = T.jacobian(combined_output.flatten(), self.actor.trainable_weights)
            grads = [K.mean(g, axis=0) for g in grads]
        else:
            raise RuntimeError('Unknown Keras backend "{}".'.format(K._BACKEND))
        
        # We now have the gradients (`grads`) of the combined model wrt to the actor's weights and
        # the output (`output`). Compute the necessary updates using a clone of the actor's optimizer.
        clipnorm = getattr(actor_optimizer, 'clipnorm', 0.)
        clipvalue = getattr(actor_optimizer, 'clipvalue', 0.)
        def get_gradients(loss, params):
            # We want to follow the gradient, but the optimizer goes in the opposite direction to
            # minimize loss. Hence the double inversion.
            assert len(grads) == len(params)
            modified_grads = [-g for g in grads]
            if clipnorm > 0.:
                norm = K.sqrt(sum([K.sum(K.square(g)) for g in modified_grads]))
                modified_grads = [optimizers.clip_norm(g, clipnorm, norm) for g in modified_grads]
            if clipvalue > 0.:
                modified_grads = [K.clip(g, -clipvalue, clipvalue) for g in modified_grads]
            return modified_grads
        actor_optimizer.get_gradients = get_gradients
        updates = actor_optimizer.get_updates(self.actor.trainable_weights, self.actor.constraints, None)
        if self.target_model_update < 1.:
            # Include soft target model updates.
            updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update)
        updates += self.actor.updates  # include other updates of the actor, e.g. for BN

        # Finally, combine it all into a callable function.
        actor_inputs = None
        if not hasattr(self.actor.input, '__len__'):
            actor_inputs = [self.actor.input]
        else:
            actor_inputs = self.actor.input
        inputs = actor_inputs + critic_inputs
        if self.uses_learning_phase:
            inputs += [K.learning_phase()]
        self.actor_train_fn = K.function(inputs, [self.actor.output], updates=updates)
        self.actor_optimizer = actor_optimizer

        self.compiled = True
Example #58
0
compile_mode = 'FAST_COMPILE'
#compile_mode = 'FAST_RUN'
th.config.linker='cvm'

start = t.time()
err_ = hand_objective(params_,nbones_,base_relatives_,parents_,inverse_base_absolutes_,base_positions_,
                      weights_,mirror_factor_,points_,correspondences_)
f = th.function([params_,nbones_,base_relatives_,parents_,inverse_base_absolutes_,base_positions_,
                 weights_,mirror_factor_,points_,correspondences_], err_, mode=compile_mode)
end = t.time()
tf_compile = (end - start)
print("tf_compile: %f" % tf_compile)

start = t.time()
jac = T.jacobian(T.flatten(err_),[params_])
fjac = th.function([params_,nbones_,base_relatives_,parents_,inverse_base_absolutes_,base_positions_,
                 weights_,mirror_factor_,points_,correspondences_], jac, mode=compile_mode)
end = t.time()
tJ_compile = (end - start)
print("tJ_compile: %f" % tJ_compile)

ntasks = (len(sys.argv)-1)//5
for task_id in range(ntasks):
    print("task_id: %i" % task_id)

    argv_idx = task_id*5 + 1
    dir_in = sys.argv[argv_idx]
    dir_out = sys.argv[argv_idx+1]
    fn = sys.argv[argv_idx+2]
    nruns_f = int(sys.argv[argv_idx+3])