Beispiel #1
0
 def compute_analysis_cuda2(self,
                            xb,
                            y,
                            R,
                            P,
                            H,
                            HT=None,
                            hph=None,
                            calcP=True):
     if HT is None:
         HT = culinalg.transpose(H)
     HP = culinalg.dot(H, P)
     if hph is None:
         hph = culinalg.dot(HP, HT)
     Rhph = misc.add(R, hph)
     inv = culinalg.inv(Rhph)
     W = culinalg.dot(HP, inv, transa='T')
     Hxb = culinalg.dot(H, xb)
     yHxb = misc.subtract(y, Hxb)
     WyHxb = culinalg.dot(W, yHxb)
     xhat = misc.add(xb, WyHxb)
     #xhat = xb + culinalg.dot(W, (y - culinalg.dot(H, xb)))
     if calcP:
         I = culinalg.eye(P.shape[0])
         WH = culinalg.dot(W, H)
         IWH = I - WH
         Phat = culinalg.dot(IWH, P)
     else:
         Phat = misc.zeros((1, ), dtype=P.dtype)
     return xhat, Phat
Beispiel #2
0
    def impl_test_binaryop_2d(self, dtype):
        if issubclass(dtype, numbers.Integral):
            a_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            b_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            a_vec = np.random.randint(1, 10, 3).astype(dtype)
            b_vec = np.random.randint(1, 10, 3).astype(dtype)
            a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
        else:
            a_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            b_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            a_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
            b_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
            a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)

        a_sca_gpu = gpuarray.to_gpu(a_sca)
        b_sca_gpu = gpuarray.to_gpu(b_sca)
        a_vec_gpu = gpuarray.to_gpu(a_vec)
        b_vec_gpu = gpuarray.to_gpu(b_vec)
        a_mat_gpu = gpuarray.to_gpu(a_mat)
        b_mat_gpu = gpuarray.to_gpu(b_mat)

        # addition
        assert np.allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca + b_sca)
        assert np.allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec + b_vec)
        assert np.allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat + b_mat)

        # subtract
        assert np.allclose(
            misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca - b_sca)
        assert np.allclose(
            misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec - b_vec)
        assert np.allclose(
            misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat - b_mat)

        # multiplication
        assert np.allclose(
            misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca * b_sca)
        assert np.allclose(
            misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec * b_vec)
        assert np.allclose(
            misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat * b_mat)

        # division
        assert np.allclose(
            misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca / b_sca)
        assert np.allclose(
            misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec / b_vec)
        assert np.allclose(
            misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat / b_mat)
Beispiel #3
0
        def thunk():
            alpha = gpuarray.to_gpu(np.squeeze(np.asarray(inputs[0]))[:, None])
            x_t = gpuarray.to_gpu(np.asarray(inputs[1])[0, :, :])
            x_f = gpuarray.to_gpu(np.asarray(inputs[2])[0, :, :])
            Xt = cumath.exp(misc.add(linalg.dot(x_t, A), b))
            Xf = cumath.exp(misc.add(linalg.dot(x_f, A), b))
            Xtn = misc.sum(Xt, axis=1, keepdims=True)
            Xfn = misc.sum(Xf, axis=1, keepdims=True)
            Xt = misc.divide(Xt, Xtn)
            Xf = misc.divide(Xf, Xfn)
            w = misc.multiply(Xt, alpha) + misc.multiply(Xf, 1 - alpha)
            dq = Xt - Xf
            qdw = dq / w
            t1 = misc.sum(x * qdw, axis=1)
            f = 2 * depth + self.base.n
            t2 = f * misc.sum(dq, axis=1) / misc.sum(w, axis=1)
            t3 = misc.sum(x, axis=1) * misc.sum(qdw, axis=1)
            dalpha = t1 - t2 + t3
            del dq, t1, f, t2, t3

            iw = 1 / w
            S1 = misc.multiply(
                depth[:, None] * (self.base.n - 1) / self.base.n, iw)
            S2 = (self.base.n + depth[:, None]) / cumath.log(
                misc.sum(w, axis=1, keepdims=True))
            F = misc.multiply(misc.subtract((x * iw) - S1, S2), alpha)
            del w, iw, S1, S2

            cast = gpuarray.zeros((x_t.shape[1], Xt.shape[1]),
                                  dtype=theano.config.floatX)
            dLq_t = gpuarray.zeros(x_t.shape, dtype=theano.config.floatX)
            dLq_f = gpuarray.zeros(x_f.shape, dtype=theano.config.floatX)
            for i in range(Xt.shape[0]):
                S1 = misc.multiply(Xt[None, i, :], A)
                S2 = misc.sum(S1, axis=1, keepdims=True)
                S2 = misc.multiply(S2, misc.add(Xt[None, i, :], cast))
                dLq_t[i, :] = misc.sum(misc.multiply(F[None, i, :], S1 - S2),
                                       axis=1)
                S1 = misc.multiply(Xf[None, i, :], A)
                S2 = misc.sum(S1, axis=1, keepdims=True)
                S2 = misc.multiply(S2, misc.add(Xf[None, i, :], cast))
                dLq_f[i, :] = misc.sum(misc.multiply(F[None, i, :], S1 - S2),
                                       axis=1)
            outputs[0][0] = dalpha.get()
            outputs[1][0] = dLq_t.get()
            outputs[2][0] = dLq_f.get()
            for v in node.outputs:
                compute_map[v][0] = True
Beispiel #4
0
    def impl_test_binaryop_2d(self, dtype):
        if issubclass(dtype, numbers.Integral):
            a_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            b_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            a_vec = np.random.randint(1, 10, 3).astype(dtype)
            b_vec = np.random.randint(1, 10, 3).astype(dtype)
            a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)            
        else:
            a_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            b_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            a_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype)
            b_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype)
            a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)

        a_sca_gpu = gpuarray.to_gpu(a_sca)
        b_sca_gpu = gpuarray.to_gpu(b_sca)
        a_vec_gpu = gpuarray.to_gpu(a_vec)
        b_vec_gpu = gpuarray.to_gpu(b_vec)
        a_mat_gpu = gpuarray.to_gpu(a_mat)
        b_mat_gpu = gpuarray.to_gpu(b_mat)

        # addition
        assert np.allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca+b_sca)
        assert np.allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec+b_vec)
        assert np.allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat+b_mat)

        # subtract
        assert np.allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca-b_sca)
        assert np.allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec-b_vec)
        assert np.allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat-b_mat)

        # multiplication
        assert np.allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca*b_sca)
        assert np.allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec*b_vec)
        assert np.allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat*b_mat)

        # division
        assert np.allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca/b_sca)
        assert np.allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec/b_vec)
        assert np.allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat/b_mat)
Beispiel #5
0
 def thunk():
     alpha = gpuarray.to_gpu(np.squeeze(np.asarray(inputs[0]))[:, None])
     x_t = gpuarray.to_gpu(np.asarray(inputs[1])[0, :, :])
     x_f = gpuarray.to_gpu(np.asarray(inputs[2])[0, :, :])
     Xt = cumath.exp(misc.add(linalg.dot(x_t, A), b))
     Xf = cumath.exp(misc.add(linalg.dot(x_f, A), b))
     Xtn = misc.sum(Xt, axis=1, keepdims=True)
     Xfn = misc.sum(Xf, axis=1, keepdims=True)
     Xt = misc.divide(Xt, Xtn)
     Xf = misc.divide(Xf, Xfn)
     w = misc.multiply(Xt, alpha) + misc.multiply(Xf, 1 - alpha)
     wp = cumath.log(w)
     wpn = misc.sum(wp, axis=1, keepdims=True) / self.n
     wp = misc.subtract(wp, wpn)
     t1 = misc.sum(x * wp, axis=1)
     t2 = (self.n + depth) * cumath.log(misc.sum(w, axis=1))
     t3 = depth * wpn
     outputs[0][0] = misc.sum(t1 - t2 + t3).get()
     for v in node.outputs:
         compute_map[v][0] = True
Beispiel #6
0
 def __radd__(self, other): return cumisc.add(other, self)
 def __rsub__(self, other): return cumisc.subtract(other, self)
Beispiel #7
0
 def __add__(self, other): return cumisc.add(self, other)
 def __sub__(self, other): return cumisc.subtract(self, other)
Beispiel #8
0
def add(a, b):
    ''' Calculates matrix addition "a+b".'''
    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    return misc.add(a, b)        
Beispiel #9
0
    def _impl_test_binaryop_2d(self, dtype):
        if issubclass(dtype, numbers.Integral):
            a_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            b_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            a_vec = np.random.randint(1, 10, 3).astype(dtype)
            b_vec = np.random.randint(1, 10, 3).astype(dtype)
            a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat_f = np.random.randint(1, 10, 6).reshape(
                (3, 2)).astype(dtype, order='F')
        else:
            a_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            b_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            a_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
            b_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype)
            a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat_f = np.random.normal(scale=5.0,
                                       size=(3, 2)).astype(dtype, order='F')

        a_sca_gpu = gpuarray.to_gpu(a_sca)
        b_sca_gpu = gpuarray.to_gpu(b_sca)
        a_vec_gpu = gpuarray.to_gpu(a_vec)
        b_vec_gpu = gpuarray.to_gpu(b_vec)
        a_mat_gpu = gpuarray.to_gpu(a_mat)
        b_mat_gpu = gpuarray.to_gpu(b_mat)
        b_mat_f_gpu = gpuarray.to_gpu(b_mat_f)

        # addition
        assert_allclose(misc.add(a_sca_gpu, b_sca_gpu).get(),
                        a_sca + b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add(a_vec_gpu, b_vec_gpu).get(),
                        a_vec + b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add(a_mat_gpu, b_mat_gpu).get(),
                        a_mat + b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # subtract
        assert_allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(),
                        a_sca - b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(),
                        a_vec - b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(),
                        a_mat - b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # multiplication
        assert_allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(),
                        a_sca * b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(),
                        a_vec * b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(),
                        a_mat * b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # division
        if issubclass(dtype, numbers.Integral):
            assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(),
                            a_sca // b_sca,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(),
                            a_vec // b_vec,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(),
                            a_mat // b_mat,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
        else:
            assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(),
                            a_sca / b_sca,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(),
                            a_vec / b_vec,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(),
                            a_mat / b_mat,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])

        # mismatched order
        assert_raises(ValueError, misc.add, a_mat_gpu, b_mat_f_gpu)
Beispiel #10
0
    def almLasso_mat_fun(self):
        '''
        This function represents the Augumented Lagrangian Multipliers method for Lasso problem.
        The lagrangian form of the Lasso can be expressed as following:

        MIN{ 1/2||Y-XBHETA||_2^2 + lambda||THETA||_1} s.t B-T=0

        When applied to this problem, the ADMM updates take the form

        BHETA^t+1 = (XtX + rhoI)^-1(Xty + rho^t - mu^t)
        THETA^t+1 = Shrinkage_lambda/rho(BHETA(t+1) + mu(t)/rho)
        mu(t+1) = mu(t) + rho(BHETA(t+1) - BHETA(t+1))

        The algorithm involves a 'ridge regression' update for BHETA, a soft-thresholding (shrinkage) step for THETA and
        then a simple linear update for mu

        NB: Actually, this ADMM version contains several variations such as the using of two penalty parameters instead
        of just one of them (mu1, mu2)
        '''

        print('\tADMM processing...')

        alpha1 = alpha2 = 0
        if (len(self.reg_params) == 1):
            alpha1 = self.reg_params[0]
            alpha2 = self.reg_params[0]
        elif (len(self.reg_params) == 2):
            alpha1 = self.reg_params[0]
            alpha2 = self.reg_params[1]

        #thresholds parameters for stopping criteria
        if (len(self.thr) == 1):
            thr1 = self.thr[0]
            thr2 = self.thr[0]
        elif (len(self.thr) == 2):
            thr1 = self.thr[0]
            thr2 = self.thr[1]

        # entry condition
        err1 = 10 * thr1
        err2 = 10 * thr2

        start_time = time.time()

        # setting penalty parameters for the ALM
        mu1p = alpha1 * 1 / self.computeLambda()
        print("\t\t-Compute Lambda- Time = %s seconds" %
              (time.time() - start_time))
        mu2p = alpha2 * 1

        mu1 = mu1p
        mu2 = mu2p

        i = 1
        start_time = time.time()
        if self.GPU == True:

            # defining penalty parameters e constraint to minimize, lambda and C matrix respectively
            THETA = misc.zeros((self.num_columns, self.num_columns),
                               dtype='float64')
            lambda2 = misc.zeros((self.num_columns, self.num_columns),
                                 dtype='float64')

            gpu_data = gpuarray.to_gpu(self.data)
            P_GPU = linalg.dot(gpu_data, gpu_data, transa='T')

            OP1 = P_GPU
            linalg.scale(np.float32(mu1), OP1)

            OP2 = linalg.eye(self.num_columns)
            linalg.scale(mu2, OP2)

            if self.affine == True:

                print('\t\tGPU affine...')

                OP3 = misc.ones((self.num_columns, self.num_columns),
                                dtype='float64')
                linalg.scale(mu2, OP3)
                lambda3 = misc.zeros((1, self.num_columns), dtype='float64')

                # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy
                A = np.linalg.inv(
                    misc.add(misc.add(OP1.get(), OP2.get()), OP3.get()))

                A_GPU = gpuarray.to_gpu(A)

                while ((err1 > thr1 or err2 > thr1) and i < self.max_iter):

                    _lambda2 = gpuarray.to_gpu(lambda2)
                    _lambda3 = gpuarray.to_gpu(lambda3)

                    linalg.scale(1 / mu2, _lambda2)
                    term_OP2 = gpuarray.to_gpu(_lambda2.get())

                    OP2 = gpuarray.to_gpu(misc.subtract(THETA, term_OP2))
                    linalg.scale(mu2, OP2)

                    OP4 = gpuarray.to_gpu(
                        np.matlib.repmat(_lambda3.get(), self.num_columns, 1))

                    # updating Z
                    BHETA = linalg.dot(
                        A_GPU, misc.add(misc.add(misc.add(OP1, OP2), OP3),
                                        OP4))

                    # deallocating unnecessary GPU variables
                    OP2.gpudata.free()
                    OP4.gpudata.free()
                    _lambda2.gpudata.free()
                    _lambda3.gpudata.free()

                    # updating C
                    THETA = misc.add(BHETA, term_OP2)
                    THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2)
                    THETA = THETA.astype('float64')

                    # updating Lagrange multipliers
                    term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA))

                    linalg.scale(mu2, term_lambda2)
                    term_lambda2 = gpuarray.to_gpu(term_lambda2.get())
                    lambda2 = misc.add(lambda2, term_lambda2)  # on GPU

                    term_lambda3 = misc.subtract(
                        misc.ones((1, self.num_columns), dtype='float64'),
                        misc.sum(BHETA, axis=0))
                    linalg.scale(mu2, term_lambda3)
                    term_lambda3 = gpuarray.to_gpu(term_lambda3.get())
                    lambda3 = misc.add(lambda3, term_lambda3)  # on GPU

                    # deallocating unnecessary GPU variables
                    term_OP2.gpudata.free()
                    term_lambda2.gpudata.free()
                    term_lambda3.gpudata.free()

                    err1 = self.errorCoef(BHETA.get(), THETA)
                    err2 = self.errorCoef(np.sum(BHETA.get(), axis=0),
                                          np.ones([1, self.num_columns]))

                    # deallocating unnecessary GPU variables
                    BHETA.gpudata.free()

                    THETA = gpuarray.to_gpu((THETA))

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print(
                            '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e'
                            % (i, err1, err2))
                    i += 1

                THETA = THETA.get()

                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n'
                        % (i, err1, err2))

            else:
                print '\t\tGPU not affine'

                # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy
                A = np.linalg.inv(misc.add(OP1.get(), OP2.get()))
                A_GPU = gpuarray.to_gpu(A)

                while (err1 > thr1 and i < self.max_iter):

                    _lambda2 = gpuarray.to_gpu(lambda2)

                    term_OP2 = THETA
                    linalg.scale(mu2, term_OP2)

                    term_OP2 = misc.subtract(term_OP2, _lambda2)

                    OP2 = gpuarray.to_gpu(term_OP2.get())

                    BHETA = linalg.dot(A_GPU, misc.add(OP1, OP2))

                    linalg.scale(1 / mu2, _lambda2)
                    term_THETA = gpuarray.to_gpu(_lambda2.get())

                    THETA = misc.add(BHETA, term_THETA)
                    THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2)

                    THETA = THETA.astype('float32')

                    # updating Lagrange multipliers
                    term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA))
                    linalg.scale(mu2, term_lambda2)
                    term_lambda2 = gpuarray.to_gpu(term_lambda2.get())
                    lambda2 = misc.add(lambda2, term_lambda2)  # on GPU

                    err1 = self.errorCoef(BHETA.get(), THETA)

                    THETA = gpuarray.to_gpu((THETA))

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' %
                              (i, err1))
                    i += 1

                THETA = THETA.get()
                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e'
                        % (i, err1))

        else:  #CPU version

            # defining penalty parameters e constraint to minimize, lambda and C matrix respectively
            THETA = np.zeros([self.num_columns, self.num_columns])
            lambda2 = np.zeros([self.num_columns, self.num_columns])

            P = self.data.T.dot(self.data)
            OP1 = np.multiply(P, mu1)

            if self.affine == True:

                # INITIALIZATION
                lambda3 = np.zeros(self.num_columns).T

                A = np.linalg.inv(
                    np.multiply(mu1, P) +
                    np.multiply(mu2, np.eye(self.num_columns, dtype=int)) +
                    np.multiply(mu2,
                                np.ones([self.num_columns, self.num_columns])))

                OP3 = np.multiply(
                    mu2, np.ones([self.num_columns, self.num_columns]))

                while ((err1 > thr1 or err2 > thr1) and i < self.max_iter):

                    # updating Bheta
                    OP2 = np.multiply(THETA - np.divide(lambda2, mu2), mu2)
                    OP4 = np.matlib.repmat(lambda3, self.num_columns, 1)
                    BHETA = A.dot(OP1 + OP2 + OP3 + OP4)

                    # updating C
                    THETA = BHETA + np.divide(lambda2, mu2)
                    THETA = self.shrinkL1Lq(THETA, 1 / mu2)

                    # updating Lagrange multipliers
                    lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA)
                    lambda3 = lambda3 + np.multiply(
                        mu2,
                        np.ones([1, self.num_columns]) - np.sum(BHETA, axis=0))

                    err1 = self.errorCoef(BHETA, THETA)
                    err2 = self.errorCoef(np.sum(BHETA, axis=0),
                                          np.ones([1, self.num_columns]))

                    # mu1 = min(mu1 * (1 + 10 ^ -5), 10 ^ 2 * mu1p);
                    # mu2 = min(mu2 * (1 + 10 ^ -5), 10 ^ 2 * mu2p);

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print(
                            '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e'
                            % (i, err1, err2))
                    i += 1

                Err = [err1, err2]

                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n'
                        % (i, err1, err2))
            else:
                print '\t\tCPU not affine'

                A = np.linalg.inv(
                    OP1 +
                    np.multiply(mu2, np.eye(self.num_columns, dtype=int)))

                while (err1 > thr1 and i < self.max_iter):

                    # updating Z
                    OP2 = np.multiply(mu2, THETA) - lambda2
                    BHETA = A.dot(OP1 + OP2)

                    # updating C
                    THETA = BHETA + np.divide(lambda2, mu2)
                    THETA = self.shrinkL1Lq(THETA, 1 / mu2)

                    # updating Lagrange multipliers
                    lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA)

                    # computing errors
                    err1 = self.errorCoef(BHETA, THETA)

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' %
                              (i, err1))
                    i += 1

                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e'
                        % (i, err1))

        print("\t\t-ADMM- Time = %s seconds" % (time.time() - start_time))

        return THETA, Err
Beispiel #11
0
    for step in xrange(N_TIMESTEPS):
        # print step
       # Implementing split-step method
       # Update wavefunction and resovoir, record density
        cu_fft.fft(psi_gpu, psi_gpu, plan_forward)
        psi_gpu *= kineticFactorHalf_gpu
        cu_fft.ifft(psi_gpu, psi_gpu, plan_inverse, scale=True)

        # currentDensity_gpu = abs(psi_gpu) ** 2
        # currentDensity_gpu = psi_gpu.real **2 + psi_gpu.imag ** 2
        currentDensity_gpu = (psi_gpu * psi_gpu.conj()).real
        # modSquared.prepared_call(grid, block, psi_gpu.gpudata,
        #                          currentDensity_gpu.gpudata, 1024)
        # n_gpu *= cumath.exp(-gammaRdt_gpu + Rdt_gpu * currentDensity_gpu)
        n_gpu *= cumath.exp(misc.add(- gammaRdt_gpu,
                                     - misc.multiply(Rdt_gpu, currentDensity_gpu)))
        n_gpu += Pdt_gpu
        psi_gpu *= cumath.exp(
            misc.add(
                misc.add(misc.multiply(expFactorPolFirst_gpu, n_gpu),
                         misc.multiply(expFactorPolSecond_gpu, currentDensity_gpu)),
                expFactorPolThird_gpu))

        #  psiNonlinear.prepared_call(grid, block, expFactorPolFirst,
        #                             expFactorPolSecond, expFactorPolThird,
        #                             psi_gpu.gpudata, n_gpu.gpudata,
        #                             currentDensity_gpu.gpudata, 1024)

        cu_fft.fft(psi_gpu, psi_gpu, plan_forward)
        # record spectrum
        drv.memcpy_dtod(spectrum[step, :].gpudata, psi_gpu[N//2, :].gpudata,
Beispiel #12
0
    for step in xrange(N_TIMESTEPS):
        # print step
        # Implementing split-step method
        # Update wavefunction and resovoir, record density
        cu_fft.fft(psi_gpu, psi_gpu, plan_forward)
        psi_gpu *= kineticFactorHalf_gpu
        cu_fft.ifft(psi_gpu, psi_gpu, plan_inverse, scale=True)

        # currentDensity_gpu = abs(psi_gpu) ** 2
        # currentDensity_gpu = psi_gpu.real **2 + psi_gpu.imag ** 2
        currentDensity_gpu = (psi_gpu * psi_gpu.conj()).real
        # modSquared.prepared_call(grid, block, psi_gpu.gpudata,
        #                          currentDensity_gpu.gpudata, 1024)
        # n_gpu *= cumath.exp(-gammaRdt_gpu + Rdt_gpu * currentDensity_gpu)
        n_gpu *= cumath.exp(
            misc.add(-gammaRdt_gpu,
                     -misc.multiply(Rdt_gpu, currentDensity_gpu)))
        n_gpu += Pdt_gpu
        psi_gpu *= cumath.exp(
            misc.add(
                misc.add(
                    misc.multiply(expFactorPolFirst_gpu, n_gpu),
                    misc.multiply(expFactorPolSecond_gpu, currentDensity_gpu)),
                expFactorPolThird_gpu))

        #  psiNonlinear.prepared_call(grid, block, expFactorPolFirst,
        #                             expFactorPolSecond, expFactorPolThird,
        #                             psi_gpu.gpudata, n_gpu.gpudata,
        #                             currentDensity_gpu.gpudata, 1024)

        cu_fft.fft(psi_gpu, psi_gpu, plan_forward)
        # record spectrum
Beispiel #13
0
    def _impl_test_binaryop_2d(self, dtype):
        if issubclass(dtype, numbers.Integral):
            a_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            b_sca = np.array(np.random.randint(1, 10), dtype=dtype)
            a_vec = np.random.randint(1, 10, 3).astype(dtype)
            b_vec = np.random.randint(1, 10, 3).astype(dtype)
            a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype)
            b_mat_f = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype, order='F')
        else:
            a_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            b_sca = np.random.normal(scale=5.0, size=()).astype(dtype)
            a_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype)
            b_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype)
            a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype)
            b_mat_f = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype, order='F')

        a_sca_gpu = gpuarray.to_gpu(a_sca)
        b_sca_gpu = gpuarray.to_gpu(b_sca)
        a_vec_gpu = gpuarray.to_gpu(a_vec)
        b_vec_gpu = gpuarray.to_gpu(b_vec)
        a_mat_gpu = gpuarray.to_gpu(a_mat)
        b_mat_gpu = gpuarray.to_gpu(b_mat)
        b_mat_f_gpu = gpuarray.to_gpu(b_mat_f)

        # addition
        assert_allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca+b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec+b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat+b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # subtract
        assert_allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca-b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec-b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat-b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # multiplication
        assert_allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca*b_sca,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec*b_vec,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])
        assert_allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat*b_mat,
                        rtol=dtype_to_rtol[dtype],
                        atol=dtype_to_atol[dtype])

        # division
        if issubclass(dtype, numbers.Integral):
            assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca//b_sca,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec//b_vec,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat//b_mat,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
        else:
            assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca/b_sca,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec/b_vec,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])
            assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat/b_mat,
                            rtol=dtype_to_rtol[dtype],
                            atol=dtype_to_atol[dtype])

        # mismatched order
        assert_raises(ValueError, misc.add, a_mat_gpu, b_mat_f_gpu)