Ejemplo n.º 1
0
    def define_process(self):
        # Prior
        self.prior_freedom = self.freedom()
        self.prior_mean = self.location_space
        self.prior_covariance = self.kernel_f_space * self.prior_freedom
        self.prior_variance = tnl.extract_diag(self.prior_covariance)
        self.prior_std = tt.sqrt(self.prior_variance)
        self.prior_noise = tt.sqrt(tnl.extract_diag(self.kernel_space * self.prior_freedom))
        self.prior_median = self.prior_mean

        sigma = 2
        self.prior_quantile_up = self.prior_mean + sigma * self.prior_std
        self.prior_quantile_down = self.prior_mean - sigma * self.prior_std
        self.prior_noise_up = self.prior_mean + sigma * self.prior_noise
        self.prior_noise_down = self.prior_mean - sigma * self.prior_noise

        self.prior_sampler = self.prior_mean + self.random_scalar * cholesky_robust(self.prior_covariance).dot(self.random_th)

        # Posterior
        self.posterior_freedom = self.prior_freedom + self.inputs.shape[1]
        beta = (self.mapping_outputs - self.location_inputs).T.dot(tsl.solve(self.kernel_inputs, self.mapping_outputs - self.location_inputs))
        coeff = (self.prior_freedom + beta - 2)/(self.posterior_freedom - 2)
        self.posterior_mean = self.location_space + self.kernel_f_space_inputs.dot( tsl.solve(self.kernel_inputs, self.mapping_outputs - self.location_inputs))
        self.posterior_covariance = coeff * (self.kernel_f.cov(self.space_th) - self.kernel_f_space_inputs.dot(
            tsl.solve(self.kernel_inputs, self.kernel_f_space_inputs.T)))
        self.posterior_variance = tnl.extract_diag(self.posterior_covariance)
        self.posterior_std = tt.sqrt(self.posterior_variance)
        self.posterior_noise = coeff * tt.sqrt(tnl.extract_diag(self.kernel.cov(self.space_th) - self.kernel_f_space_inputs.dot(
            tsl.solve(self.kernel_inputs, self.kernel_f_space_inputs.T))))
        self.posterior_median = self.posterior_mean
        self.posterior_quantile_up = self.posterior_mean + sigma * self.posterior_std
        self.posterior_quantile_down = self.posterior_mean - sigma * self.posterior_std
        self.posterior_noise_up = self.posterior_mean + sigma * self.posterior_noise
        self.posterior_noise_down = self.posterior_mean - sigma * self.posterior_noise
        self.posterior_sampler = self.posterior_mean + self.random_scalar * cholesky_robust(self.posterior_covariance).dot(self.random_th)
Ejemplo n.º 2
0
    def __init__(self, tau2_0=0.1, sigma2_0=0.1, l_0=0.1, eta=0.1, debug=1):
        """
        :type sigma_0: float
        :param sigma_0: starting value for variance.
        
        :type l_0: float
        :param l_0: starting value for length scale.
        
        :type eta: float
        :param eta: learning rate
        
        :type debug: int
        :param debug: verbosity
        """

        print "GP Initing..." if debug > 0 else 0

        ##################################################
        #### Prepare the -loglik gradient descent

        ##Init the shared vars
        X = T.dmatrix('X')
        f = T.dmatrix('f')
        self.tau2 = theano.shared(tau2_0)
        self.l = theano.shared(l_0)
        self.sigma2 = theano.shared(sigma2_0)

        #Make the covar matrix
        K = self.covFunc(X, X, self.l)

        #Get a numerically safe decomp
        L = LA.cholesky(K + self.tau2 * T.identity_like(K))

        #Calculate the weights for each of the training data; predictions are a weighted sum.
        alpha = LA.solve(T.transpose(L), LA.solve(L, f))

        ##Calculate - log marginal likelihood
        nloglik = -T.reshape(
            -0.5 * T.dot(T.transpose(f), alpha) - T.sum(T.log(T.diag(L))), [])

        #Get grad
        grads = [
            T.grad(nloglik, self.tau2),
            T.grad(nloglik, self.l),
            T.grad(nloglik, self.sigma2)
        ]

        #Updates, make sure to keep the params positive
        updates = [
            (var, T.max([var - eta * grad, 0.1]))
            for var, grad in zip([self.tau2, self.l, self.sigma2], grads)
        ]

        self._gd = theano.function(inputs=[X, f], updates=updates)

        print "Done" if debug > 0 else 0
Ejemplo n.º 3
0
Archivo: ops.py Proyecto: Ambier/Theano
def inv_as_solve(node):
    if not imported_scipy:
        return False
    if isinstance(node.op, (Dot, Dot22)):
        l, r = node.inputs
        if l.owner and l.owner.op == matrix_inverse:
            return [solve(l.owner.inputs[0], r)]
        if r.owner and r.owner.op == matrix_inverse:
            if is_symmetric(r.owner.inputs[0]):
                return [solve(r.owner.inputs[0], l.T).T]
            else:
                return [solve(r.owner.inputs[0].T, l.T).T]
Ejemplo n.º 4
0
 def subprocess_gp(self, subkernel, cov=False, noise=False):
     k_ni = subkernel.cov(self.space, self.inputs)
     mu = self.mean(self.space) + k_ni.dot(sL.solve(self.cov_inputs, self.inv_outputs - self.mean_inputs))
     if noise:
         k_cov = self.kernel.cov(self.space) - k_ni.dot(sL.solve(self.cov_inputs, k_ni.T))
     else:
         k_cov = self.kernel_f.cov(self.space) - k_ni.dot(sL.solve(self.cov_inputs, k_ni.T))
     var = nL.extract_diag(debug(k_cov, 'k_cov'))
     if cov:
         return mu, var, k_cov
     else:
         return mu, var
Ejemplo n.º 5
0
def inv_as_solve(node):
    if not imported_scipy:
        return False
    if isinstance(node.op, (Dot, Dot22)):
        l, r = node.inputs
        if l.owner and l.owner.op == matrix_inverse:
            return [solve(l.owner.inputs[0], r)]
        if r.owner and r.owner.op == matrix_inverse:
            if is_symmetric(r.owner.inputs[0]):
                return [solve(r.owner.inputs[0], l.T).T]
            else:
                return [solve(r.owner.inputs[0].T, l.T).T]
Ejemplo n.º 6
0
    def th_define_process(self):
        #print('stochastic_define_process')
        # Basic Tensors
        self.mapping_outputs = tt_to_num(self.f_mapping.inv(self.th_outputs))
        self.mapping_latent = tt_to_num(self.f_mapping(self.th_outputs))
        #self.mapping_scalar = tt_to_num(self.f_mapping.inv(self.th_scalar))

        self.prior_location_space = self.f_location(self.th_space)
        self.prior_location_inputs = self.f_location(self.th_inputs)

        self.prior_kernel_space = tt_to_cov(self.f_kernel_noise.cov(self.th_space))
        self.prior_kernel_inputs = tt_to_cov(self.f_kernel_noise.cov(self.th_inputs))
        self.prior_cholesky_space = cholesky_robust(self.prior_kernel_space)

        self.prior_kernel_f_space = self.f_kernel.cov(self.th_space)
        self.prior_kernel_f_inputs = self.f_kernel.cov(self.th_inputs)
        self.prior_cholesky_f_space = cholesky_robust(self.prior_kernel_f_space)

        self.cross_kernel_space_inputs = tt_to_num(self.f_kernel_noise.cov(self.th_space, self.th_inputs))
        self.cross_kernel_f_space_inputs = tt_to_num(self.f_kernel.cov(self.th_space, self.th_inputs))

        self.posterior_location_space = self.prior_location_space + self.cross_kernel_space_inputs.dot(
            tsl.solve(self.prior_kernel_inputs, self.mapping_outputs - self.prior_location_inputs))
        self.posterior_location_f_space = self.prior_location_space + self.cross_kernel_f_space_inputs.dot(
            tsl.solve(self.prior_kernel_inputs, self.mapping_outputs - self.prior_location_inputs))

        self.posterior_kernel_space = self.prior_kernel_space - self.cross_kernel_space_inputs.dot(
            tsl.solve(self.prior_kernel_inputs, self.cross_kernel_space_inputs.T))
        self.posterior_cholesky_space = cholesky_robust(self.posterior_kernel_space)

        self.posterior_kernel_f_space = self.prior_kernel_f_space - self.cross_kernel_f_space_inputs.dot(
            tsl.solve(self.prior_kernel_inputs, self.cross_kernel_f_space_inputs.T))
        self.posterior_cholesky_f_space = cholesky_robust(self.posterior_kernel_f_space)

        self.prior_kernel_diag_space = tt_to_bounded(tnl.extract_diag(self.prior_kernel_space), zero32)
        self.prior_kernel_diag_f_space = tt_to_bounded(tnl.extract_diag(self.prior_kernel_f_space), zero32)
        self.posterior_kernel_diag_space = tt_to_bounded(tnl.extract_diag(self.posterior_kernel_space), zero32)
        self.posterior_kernel_diag_f_space = tt_to_bounded(tnl.extract_diag(self.posterior_kernel_f_space), zero32)

        self.prior_kernel_sd_space = tt.sqrt(self.prior_kernel_diag_space)
        self.prior_kernel_sd_f_space = tt.sqrt(self.prior_kernel_diag_f_space)
        self.posterior_kernel_sd_space = tt.sqrt(self.posterior_kernel_diag_space)
        self.posterior_kernel_sd_f_space = tt.sqrt(self.posterior_kernel_diag_f_space)

        self.prior_cholesky_diag_space = tnl.alloc_diag(self.prior_kernel_sd_space)
        self.prior_cholesky_diag_f_space = tnl.alloc_diag(self.prior_kernel_sd_f_space)
        self.posterior_cholesky_diag_space = tnl.alloc_diag(self.posterior_kernel_sd_space)
        self.posterior_cholesky_diag_f_space = tnl.alloc_diag(self.posterior_kernel_sd_f_space)
Ejemplo n.º 7
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        se_for_each_sample = ((network_output - prediction_func)**2).ravel()

        params = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in params])

        J = compute_jacobian(se_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - slinalg.solve(
            J.T.dot(J) + new_mu * T.eye(n_params), J.T.dot(se_for_each_sample))

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Ejemplo n.º 8
0
 def __init__(self, f1, vs1, f2, vs2):
     self.f1 = f1
     self.f2 = f2
     self.vs1 = vs1
     self.vs2 = vs2
     self.sz1 = [shape(v)[0] for v in self.vs1]
     self.sz2 = [shape(v)[0] for v in self.vs2]
     for i in range(1, len(self.sz1)):
         self.sz1[i] += self.sz1[i-1]
     self.sz1 = [(0 if i==0 else self.sz1[i-1], self.sz1[i]) for i in range(len(self.sz1))]
     for i in range(1, len(self.sz2)):
         self.sz2[i] += self.sz2[i-1]
     self.sz2 = [(0 if i==0 else self.sz2[i-1], self.sz2[i]) for i in range(len(self.sz2))]
     self.df1 = grad(self.f1, vs1)
     self.new_vs1 = [tt.vector() for v in self.vs1]
     self.func1 = th.function(self.new_vs1, [-self.f1, -self.df1], givens=zip(self.vs1, self.new_vs1))
     def f1_and_df1(x0):
         return self.func1(*[x0[a:b] for a, b in self.sz1])
     self.f1_and_df1 = f1_and_df1
     J = jacobian(grad(f1, vs2), vs1)
     H = hessian(f1, vs1)
     g = grad(f2, vs1)
     self.df2 = -tt.dot(J, ts.solve(H, g))+grad(f2, vs2)
     self.func2 = th.function([], [-self.f2, -self.df2])
     def f2_and_df2(x0):
         for v, (a, b) in zip(self.vs2, self.sz2):
             v.set_value(x0[a:b])
         self.maximize1()
         return self.func2()
     self.f2_and_df2 = f2_and_df2
Ejemplo n.º 9
0
    def test_solve_dtype(self):
        pytest.importorskip("scipy")

        dtypes = [
            "uint8",
            "uint16",
            "uint32",
            "uint64",
            "int8",
            "int16",
            "int32",
            "int64",
            "float16",
            "float32",
            "float64",
        ]

        A_val = np.eye(2)
        b_val = np.ones((2, 1))

        # try all dtype combinations
        for A_dtype, b_dtype in itertools.product(dtypes, dtypes):
            A = tensor.matrix(dtype=A_dtype)
            b = tensor.matrix(dtype=b_dtype)
            x = solve(A, b)
            fn = function([A, b], x)
            x_result = fn(A_val.astype(A_dtype), b_val.astype(b_dtype))

            assert x.dtype == x_result.dtype
Ejemplo n.º 10
0
 def __init__(self, f1, vs1, f2, vs2):
     self.f1 = f1
     self.f2 = f2
     self.vs1 = vs1
     self.vs2 = vs2
     self.sz1 = [shape(v)[0] for v in self.vs1]
     self.sz2 = [shape(v)[0] for v in self.vs2]
     for i in range(1, len(self.sz1)):
         self.sz1[i] += self.sz1[i-1]
     self.sz1 = [(0 if i==0 else self.sz1[i-1], self.sz1[i]) for i in range(len(self.sz1))]
     for i in range(1, len(self.sz2)):
         self.sz2[i] += self.sz2[i-1]
     self.sz2 = [(0 if i==0 else self.sz2[i-1], self.sz2[i]) for i in range(len(self.sz2))]
     self.df1 = grad(self.f1, vs1)
     self.new_vs1 = [tt.vector() for v in self.vs1]
     self.func1 = th.function(self.new_vs1, [-self.f1, -self.df1], givens=zip(self.vs1, self.new_vs1))
     def f1_and_df1(x0):
         return self.func1(*[x0[a:b] for a, b in self.sz1])
     self.f1_and_df1 = f1_and_df1
     J = jacobian(grad(f1, vs2), vs1)
     H = hessian(f1, vs1)
     g = grad(f2, vs1)
     self.df2 = -tt.dot(J, ts.solve(H, g))+grad(f2, vs2)
     self.func2 = th.function([], [-self.f2, -self.df2])
     def f2_and_df2(x0):
         for v, (a, b) in zip(self.vs2, self.sz2):
             v.set_value(x0[a:b])
         self.maximize1()
         return self.func2()
     self.f2_and_df2 = f2_and_df2
Ejemplo n.º 11
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        se_for_each_sample = (
            (network_output - prediction_func) ** 2
        ).ravel()

        params = parameter_values(self.connection)
        param_vector = parameters2vector(self)

        J = compute_jacobian(se_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - slinalg.solve(
            J.T.dot(J) + new_mu * T.eye(n_params),
            J.T.dot(se_for_each_sample)
        )

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Ejemplo n.º 12
0
def lp_to_phi0_fs(lp, Tobs):
    Tobs2 = Tobs * Tobs
    Tobs3 = Tobs2 * Tobs
    phis = tts.solve(poly_basis_to_legendre_basis, lp)

    phi0 = phis[0]
    f0 = phis[1] / (2.0 * pi * Tobs)
    fdot = phis[2] / (pi * Tobs2)
    fddot = phis[3] / (pi / 3.0 * Tobs3)

    return (phi0, f0, fdot, fddot)
Ejemplo n.º 13
0
        def second_moments(i, j, M2, beta, R, logk_c, logk_r, z_, Sx, *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))
            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j), m2 + 1e-6, m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2
Ejemplo n.º 14
0
def test_local_lift_solve():
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    o = slinalg.solve(A, b)
    f_cpu = theano.function([A, b], o)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    assert not any(isinstance(n.op, slinalg.Solve)
                   for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(isinstance(n.op, GpuCusolverSolve)
               for n in f_gpu.maker.fgraph.apply_nodes)
    A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = numpy.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 15
0
    def __init__(self, f1, vs1, f2, vs2):
        # creates a isolated var of the vars
        self.f1 = f1
        self.f2 = f2
        self.vs1 = vs1
        self.vs2 = vs2
        #-----

        #
        self.sz1 = [
            shape(v)[0] for v in self.vs1
        ]  # converts from tensor variable to normal array (uses also some math magic)
        self.sz2 = [shape(v)[0] for v in self.vs2]
        for i in range(1, len(
                self.sz1)):  # adds together all future sz1 with old sz1
            self.sz1[i] += self.sz1[i - 1]
        self.sz1 = [
            (0 if i == 0 else self.sz1[i - 1], self.sz1[i])
            for i in range(len(self.sz1))
        ]  #makes the array into a 2d-array with (prevoius var, var)
        for i in range(1, len(self.sz2)):  # same thing az sz1
            self.sz2[i] += self.sz2[i - 1]
        self.sz2 = [(0 if i == 0 else self.sz2[i - 1], self.sz2[i])
                    for i in range(len(self.sz2))]  # samme thing as sz1
        self.df1 = grad(self.f1, vs1)  # IMPORTANT: VERY SLOW
        self.new_vs1 = [tt.vector() for v in self.vs1
                        ]  # back from normal array to tensorVector
        self.func1 = th.function(
            self.new_vs1, [-self.f1, -self.df1],
            givens=zip(self.vs1,
                       self.new_vs1))  # IMPORTANT: VERY VERY VERY SLOW

        def f1_and_df1(x0):
            return self.func1(*[x0[a:b] for a, b in self.sz1])

        self.f1_and_df1 = f1_and_df1
        J = jacobian(grad(f1, vs2), vs1)  # IMPORTANT: VERY VERY VERY VERY SLOW
        H = hessian(f1, vs1)  # IMPORTANT: VERY VERY VERY VERY SLOW
        g = grad(f2, vs1)  # IMPORTANT: SLOW
        self.df2 = -tt.dot(J, ts.solve(H, g)) + grad(f2,
                                                     vs2)  # IMPORTANT: SLOW
        self.func2 = th.function(
            [], [-self.f2, -self.df2])  # IMPORTANT: EXREMELY SLOW

        def f2_and_df2(x0):
            for v, (a, b) in zip(self.vs2, self.sz2):
                v.set_value(x0[a:b])
            self.maximize1()
            return self.func2()

        self.f2_and_df2 = f2_and_df2
Ejemplo n.º 16
0
 def grad(self, inputs, g_outputs):
     # let A = I - rho W, and dRho(A) be the derivatrive of A wrt Rho
     # dRho(log(|(AtA)|)) = dRho(log(|(AtA)|)))
     # = dRho(log(|At|) + log(|A|))
     # = dRho(log(|A| + log(|A|)))
     # = 2 dRho(log(|A|))
     # = 2 |A|^{-1} dRho(|A|) = 2|A|^{-1} tr(Adj(A)dRho(A))
     # = 2 |A|^{-1} |A| tr(A^{-1}(-W)) = 2 * tr(A^{-1}W)
     [gz] = g_outputs
     [rho] = inputs
     A = self.I - rho * self.W
     trAiW = slinalg.solve(A, self.W).diagonal().sum()
     #trAiW = (nlinalg.matrix_inverse(A).dot(self.W)).diagonal().sum()
     return [trAiW]
Ejemplo n.º 17
0
def test_local_lift_solve():
    if not cusolver_available:
        raise SkipTest('No cuSolver')
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    o = slinalg.solve(A, b)
    f_cpu = theano.function([A, b], o, mode_without_gpu)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    assert not any(isinstance(n.op, slinalg.Solve)
                   for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(isinstance(n.op, GpuCusolverSolve) and n.op.inplace
               for n in f_gpu.maker.fgraph.apply_nodes)
    A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 18
0
def test_gpu_solve_not_inplace():
    if not cusolver_available:
        raise SkipTest('No cuSolver')
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    s = slinalg.solve(A, b)
    o = tensor.dot(A, s)
    f_cpu = theano.function([A, b], o, mode_without_gpu)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    count_not_inplace = len([n.op for n in f_gpu.maker.fgraph.apply_nodes
                             if isinstance(n.op, GpuCusolverSolve) and not n.op.inplace])
    assert count_not_inplace == 1, count_not_inplace
    A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 19
0
def test_local_lift_solve():
    if not cusolver_available:
        raise SkipTest('No cuSolver')
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    o = slinalg.solve(A, b)
    f_cpu = theano.function([A, b], o, mode_without_gpu)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    assert not any(isinstance(n.op, slinalg.Solve)
                   for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(isinstance(n.op, GpuCusolverSolve) and n.op.inplace
               for n in f_gpu.maker.fgraph.apply_nodes)
    A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 20
0
def test_local_lift_solve():
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    o = slinalg.solve(A, b)
    f_cpu = theano.function([A, b], o)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    assert not any(
        isinstance(n.op, slinalg.Solve)
        for n in f_gpu.maker.fgraph.apply_nodes)
    assert any(
        isinstance(n.op, GpuCusolverSolve)
        for n in f_gpu.maker.fgraph.apply_nodes)
    A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = numpy.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 21
0
def test_gpu_solve_not_inplace():
    if not cusolver_available:
        raise SkipTest('No cuSolver')
    A = tensor.fmatrix()
    b = tensor.fmatrix()
    s = slinalg.solve(A, b)
    o = tensor.dot(A, s)
    f_cpu = theano.function([A, b], o, mode_without_gpu)
    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
    count_not_inplace = len([n.op for n in f_gpu.maker.fgraph.apply_nodes
                             if isinstance(n.op, GpuCusolverSolve) and not n.op.inplace])
    assert count_not_inplace == 1, count_not_inplace
    A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
    b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
    utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Ejemplo n.º 22
0
    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        updated_parameters = param_vector - slinalg.solve(
            hessian_matrix + penalty_const * T.eye(n_parameters),
            full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Ejemplo n.º 23
0
 def th_cross_mean(self, prior=False, noise=False, cross_kernel=None):
     """
     Using two kernels calculate the media of one process given the other.
     :param prior: if the process considers a prior of not
     :param noise: if the process considers noise
     :param cross_kernel: it's the covariance between two process
     :return: returns a tensor with the location of a process given another process.
     """
     if prior:
         return self.prior_location_space
     if cross_kernel is None:
         cross_kernel = self.f_kernel
     return self.prior_location_space + cross_kernel.cov(
         self.th_space_, self.th_inputs_).dot(
             tsl.solve(self.prior_kernel_inputs,
                       self.mapping_outputs - self.prior_location_inputs))
Ejemplo n.º 24
0
    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters
        )

        updated_parameters = param_vector - slinalg.solve(
            hessian_matrix + penalty_const * T.eye(n_parameters),
            full_gradient
        )
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Ejemplo n.º 25
0
    def logp(self, value):
        """
        the sparse cached log determinant assumes I - rho W = A, and computes
        the log determinant of A wrt rho with cached W. 

        To get this right with the SMA, we need to use -rho in the logdet. 
        """

        delta = value - self.mu
        ld = self.spld(-self.rho) 
        out = -self.W.n / 2.0 * tt.log(np.pi * self.scale)
        out -= ld

        kern = slinalg.solve(self.AAt, delta)
        kern = tt.mul(delta, kern)
        kern = kern.sum()
        kern *= self.scale**-2
        kern /= 2.0
        return out - kern
Ejemplo n.º 26
0
    def test_solve_dtype(self):
        if not imported_scipy:
            raise SkipTest("Scipy needed for the Solve op.")

        dtypes = ['uint8', 'uint16', 'uint32', 'uint64',
                  'int8', 'int16', 'int32', 'int64',
                  'float16', 'float32', 'float64']

        A_val = numpy.eye(2)
        b_val = numpy.ones((2, 1))

        # try all dtype combinations
        for A_dtype, b_dtype in itertools.product(dtypes, dtypes):
            A = tensor.matrix(dtype=A_dtype)
            b = tensor.matrix(dtype=b_dtype)
            x = solve(A, b)
            fn = function([A, b], x)
            x_result = fn(A_val.astype(A_dtype), b_val.astype(b_dtype))

            assert x.dtype == x_result.dtype
Ejemplo n.º 27
0
    def test_solve_dtype(self):
        if not imported_scipy:
            raise SkipTest("Scipy needed for the Solve op.")

        dtypes = [
            'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32',
            'int64', 'float16', 'float32', 'float64'
        ]

        A_val = np.eye(2)
        b_val = np.ones((2, 1))

        # try all dtype combinations
        for A_dtype, b_dtype in itertools.product(dtypes, dtypes):
            A = tensor.matrix(dtype=A_dtype)
            b = tensor.matrix(dtype=b_dtype)
            x = solve(A, b)
            fn = function([A, b], x)
            x_result = fn(A_val.astype(A_dtype), b_val.astype(b_dtype))

            assert x.dtype == x_result.dtype
Ejemplo n.º 28
0
    def predict(self, mx, Sx, *args, **kwargs):
        if self.N < self.n_inducing:
            # stick with the full GP
            return GP_UI.predict(self, mx, Sx)

        idims = self.D
        odims = self.E

        # centralize inputs
        zeta = self.X_sp - mx

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE/lscales.dimshuffle(0, 1, 'x')

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        B = (iLdotSx[:, :, None, :]*iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2/tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l_ = tt.exp(-0.5*tt.sum(inp*t, 2))
        lb = l_*self.beta_sp
        M = tt.sum(lb, 1)*c

        # input output covariance
        tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T*c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5*tt.sum(inp*inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx.T).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iK, sf2, R, logk_c, logk_r, z_, Sx):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5*solve(Rij, Sx))

            Q = tt.exp(n2)/tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(
                tt.eq(i, j), m2 - tt.sum(iK[i]*Q) + sf2[i], m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            M2 = theano.ifelse.ifelse(
                tt.eq(i, j), M2 + 1e-6, tt.set_subtensor(M2[j, i], m2))
            return M2

        nseq = [self.beta_sp, (self.iKmm - self.iBmm), sf2,
                R, logk_c, logk_r, z_, Sx]
        M2_, updts = theano.scan(
            fn=second_moments, sequences=indices, outputs_info=[M2],
            non_sequences=nseq, allow_gc=False)
        M2 = M2_[-1]
        S = M2 - tt.outer(M, M)

        return M, S, V
 def grad(self, inputs, g_outputs):
     gz, = g_outputs
     x, = inputs
     return [slinalg.solve(x.T, gz)]
Ejemplo n.º 30
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        # centralize inputs
        zeta = self.X - mx

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        # TODO vectorize this
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta  # E x N dot E x N
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = (t[:, :, None, :] * iL[:, None, :, :]).sum(-1)
        # tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iK, sf2, R, logk_c, logk_r, z_, Sx,
                           *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))

            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j),
                                      m2 - tt.sum(iK[i] * Q) + sf2[i], m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, self.iK, sf2, R, logk_c, logk_r, z_, Sx, self.L]
        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Ejemplo n.º 31
0
    def predict_symbolic(self, mx, Sx=None, unroll_scan=False):
        idims = self.D
        odims = self.E

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]
            # centralize inputs
            zeta = self.X[:, None, :] - mx[None, :, :]

            # predictive mean ( we don't need to do the rest )
            inp = (iL[:, None, :, None, :] * zeta[:, None, :, :]).sum(2)
            l = tt.exp(-0.5 * tt.sum(inp**2, -1))
            lb = l * self.beta[:, :, None]  # E x N
            M = tt.sum(lb, 1).T * sf2

            # apply saturating function to the output if available
            if self.sat_func is not None:
                # saturate the output
                M = self.sat_func(M)

            return M

        # centralize inputs
        zeta = self.X - mx

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, R, logk_c, logk_r, z_, Sx, *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))
            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j), m2 + 1e-6, m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, R, logk_c, logk_r, z_, Sx, self.iK, self.L]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        # apply saturating function to the output if available
        if self.sat_func is not None:
            # saturate the output
            M, S, U = self.sat_func(M, S)
            # compute the joint input output covariance
            V = V.dot(U)

        return M, S, V
 def inverse_map(self, y):
     return slinalg.solve(self.weights, (y - self.biases).T).T
Ejemplo n.º 33
0
#theano.config.exception_verbosity = 'high'
from scipy import optimize
with pm.Model() as model:
    # 2 state model
    P = pm.Dirichlet('P', a=np.ones((N_states,N_states)), shape=(N_states,N_states))
    
    A1 = pm.Normal('A1',mu=0, sd=0.3)
    A2 = pm.Normal('A2',mu=1, sd=0.3)
    S1 = pm.InverseGamma('S1',alpha=alphaS, beta=betaS)
    S2 = pm.InverseGamma('S2',alpha=alphaS, beta=betaS)
    
    AA = tt.dmatrix('AA')
        
    AA = tt.eye(N_states) - P + tt.ones(shape=(N_states,N_states))
    
    PA = pm.Deterministic('PA',sla.solve(AA.T,tt.ones(shape=(N_states))))
    
    states1 = HMMStatesN('states1',P=P,PA=PA, shape=len(dataset[4]))
    
    emission1 = HMMGaussianEmissions('emission1',
                                    A1=A1,
                                    A2=A1,
                                    S1=S1,
                                    S2=S2,
                                    states=states1,
                                    observed = dataset[4])
    
    states2 = HMMStatesN('states2',P=P,PA=PA, shape=len(dataset[205]))
    
    emission2 = HMMGaussianEmissions('emission2',
                                    A1=A1,