Ejemplo n.º 1
0
def NNMF_gpu(X,r,tol,V=v0,W=w0,verbose=1):
    Vr = V[:,0:r].copy()
    Wr = W[0:r,:].copy()
    X_gpu = gpuarray.to_gpu(X)
    V_gpu = gpuarray.to_gpu(Vr)
    W_gpu = gpuarray.to_gpu(Wr)
    #Frobinius norm at previous step
    B_gpu = linalg.dot(V_gpu, W_gpu)
    L = linalg.norm(X_gpu-B_gpu)**2
    iteration = 0
    while 1: #update V
        V_gpu *= linalg.dot(X_gpu,linalg.transpose(W_gpu))
        V_gpu /= linalg.dot(B_gpu,linalg.transpose(W_gpu))
        B_gpu = linalg.dot(V_gpu, W_gpu)
        #update W
        W_gpu *= linalg.dot(linalg.transpose(V_gpu),X_gpu)
        W_gpu /= linalg.dot(linalg.transpose(V_gpu),B_gpu)
        B_gpu = linalg.dot(V_gpu, W_gpu)
        Lnew = linalg.norm(X_gpu-B_gpu)**2
        if abs(Lnew-L) <= tol*(L+1):
            break
        else:
            L = Lnew
            iteration += 1
            if(verbose and iteration%50==0):
                print "At iteration %i, the loss is %.2f" %(iteration, L)
    return V_gpu,W_gpu,iteration
Ejemplo n.º 2
0
    def computeLambda(self):
        print('\t\tComputing lambda...')

        T = np.zeros(self.num_columns)

        if (self.GPU == True):

            if not self.affine:

                gpu_data = gpuarray.to_gpu(self.data)
                C_gpu = linalg.dot(gpu_data, gpu_data, transa='T')

                for i in xrange(self.num_columns):
                    T[i] = linalg.norm(C_gpu[i, :])

            else:

                gpu_data = gpuarray.to_gpu(self.data)

                # affine transformation
                y_mean_gpu = misc.mean(gpu_data, axis=1)

                # creating affine matrix to subtract to the data (may encounter problem with strides)
                aff_mat = np.zeros([self.num_rows,
                                    self.num_columns]).astype('f')
                for i in xrange(0, self.num_columns):
                    aff_mat[:, i] = y_mean_gpu.get()

                aff_mat_gpu = gpuarray.to_gpu(aff_mat)
                gpu_data_aff = misc.subtract(aff_mat_gpu, gpu_data)

                C_gpu = linalg.dot(gpu_data, gpu_data_aff, transa='T')

                #computing euclidean norm (rows)
                for i in xrange(self.num_columns):
                    T[i] = linalg.norm(C_gpu[i, :])
        else:

            if not self.affine:

                T = np.linalg.norm(np.dot(self.data.T, self.data), axis=1)

            else:
                #affine transformation
                y_mean = np.mean(self.data, axis=1)

                tmp_mat = np.outer(y_mean, np.ones(
                    self.num_columns)) - self.data

                T = np.linalg.norm(np.dot(self.data.T, tmp_mat), axis=1)

        _lambda = np.amax(T)

        return _lambda
def skcuda_linalg(a, b):
    linalg.init()
    a = np.asarray(a, np.float32)
    b = np.asarray(b, np.float32)
    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    c_gpu = linalg.dot(a_gpu, b_gpu, 'T')
    a_nrm = linalg.norm(a_gpu)
    b_nrm = linalg.norm(b_gpu)
    type(a_nrm)
    ans = misc.divide(c_gpu, a_nrm * b_nrm)
    print ans
    def get_Ehrenfest(self, t):
        """
        Calculate observables entering the Ehrenfest theorems at time
        :param t: current time
        :return: coordinate and momentum densities, if the Ehrenfest theorems were calculated; otherwise, return None
        """
        if self.isEhrenfest:

            #########################################################################
            #
            #   Working in the coordinate representation
            #
            #########################################################################

            # Normalize the wave function as required in self.weighted_func_cuda_code
            self.wavefunction /= cu_linalg.norm(self.wavefunction)

            # save the current value of <X>
            self.X_average.append(self.get_average("X"))

            # save the current value of <-diff_V>
            self.P_average_RHS.append(-self.get_average(self.diff_V))

            # save the potential energy
            self.hamiltonian_average.append(self.get_average(self.V))

            #########################################################################
            #
            #   Working in the momentum representation
            #
            #########################################################################

            cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)

            # Normalize the wave function as required in self.weighted_func_cuda_code
            self.wavefunction /= cu_linalg.norm(self.wavefunction)

            # save the current value of <diff_K>
            self.X_average_RHS.append(self.get_average(self.diff_K))

            # save the current value of <P>
            self.P_average.append(self.get_average("P"))

            # add the expectation value for the kinetic energy
            self.hamiltonian_average[-1] += self.get_average(self.K)

            # going back to the coordinate representation
            cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)
Ejemplo n.º 5
0
def fast_matmul(x, y, x_type, y_type):
    '''
    use pycuda to compute c = a * b
    '''
    linalg.init()
    a_gpu = gpuarray.to_gpu(x.astype(x_type))
    a_t_gpu = gpuarray.to_gpu(x.T.copy().astype(x_type))
    b_gpu = gpuarray.to_gpu(y.astype(y_type))
    # row_sum = gpuarray.zeros(shape = x[0].shape, dtype = x_type)
    row_sum = 0
    # a = np.asarray(x, x_type)
    # b = np.asarray(y, y_type)
    # a_gpu = gpuarray.to_gpu(a)
    # b_gpu = gpuarray.to_gpu(b)

    t1_inside = time.time()
    c_gpu = linalg.dot(a_gpu, b_gpu)
    for a_i in a_gpu:
        # row_sum = misc.add(row_sum, a_i)
        row_sum += a_i
        gg = linalg.dot(a_gpu, b_gpu)
        gg = linalg.dot(a_i, a_i)
        gg = reduce(linalg.dot, (a_gpu, b_gpu, b_gpu, b_gpu))
        # tmp1, tmp2 = linalg.dot(a_gpu, b_gpu), linalg.dot(b_gpu, b_gpu)
        z_gpu = a_gpu.copy()
    tmp = a_t_gpu
    # print('x.T\n', x.T)
    # print('tmp\n', tmp)
    # print('x = a_gpu: ', np.allclose(x, a_gpu.get()))
    # print('x.T = tmp: ', np.allclose(x.T, tmp.get()))

    a_prod = linalg.dot(a_gpu, tmp)
    t2_inside = time.time()
    print('inside cost {:.4f}s'.format(t2_inside - t1_inside))

    a = np.random.randint(-5, 5, (3, 4)).astype(np.float32)
    a_gpu = gpuarray.to_gpu(a)
    norm_gpu = linalg.norm(a_gpu)
    print('is norm right?', np.linalg.norm(a) == norm_gpu)
    a_gpu = abs(a_gpu)
    column_sum = misc.sum(a_gpu, axis=0)
    column_sum = column_sum.reshape((1, -1))
    all_one_gpu = gpuarray.to_gpu(np.ones((3, 1), np.float32))
    div_mat_gpu = linalg.dot(all_one_gpu, column_sum)

    norm_1 = a_gpu / (div_mat_gpu + 1e-3)

    print(a_gpu)
    print(column_sum)
    print(column_sum.shape)
    print(norm_1)
    # abs_a = a_gpu.__abs__()
    # print(a)
    # print(abs_a)
    # c = abs_a + a_gpu
    # print(repr(c))
    # print(type(c))
    # c = 1/2 * c
    # print(a_gpu, c)
    return c_gpu.get(), a_prod.get(), row_sum.get()
    def projectout_stationary_states(self, wavefunction):
        """
        Project out the stationary states from wavefunction
        :param wavefunction: provided wavefunction
        :return: wavefunction
        """
        ##########################################################################################
        #
        #   Making sure that all the functions are initialized
        #
        ##########################################################################################
        try:
            self.vdot
            self.projectout
        except AttributeError:

            wavefunction_type = dict(wave_type = dtype_to_ctype(self.wavefunction.dtype))

            # set the vdot function (scalar product with complex conjugation)
            self.vdot = ReductionKernel(
                self.wavefunction.dtype,
                neutral="0.".format(**wavefunction_type),
                reduce_expr="a + b",
                map_expr="conj(bra[i]) * ket[i]",
                arguments="const {wave_type} *bra, const {wave_type} *ket".format(**wavefunction_type)
            )

            self.projectout = ElementwiseKernel(
                "{wave_type} *psi, const {wave_type} *phi, const {wave_type} C".format(**wavefunction_type),
                "psi[i] -= C * phi[i]"
            )

        # normalize
        wavefunction /= cu_linalg.norm(wavefunction)

        # calculate all projections
        projs = [self.vdot(psi, wavefunction).get() for psi in self.stationary_states]

        # project out the stationary states
        for psi, proj in zip(self.stationary_states, projs):
            self.projectout(wavefunction, psi, proj)

        # normalize
        wavefunction /= cu_linalg.norm(wavefunction)

        return wavefunction
    def get_energy(self):
        """
        Calculate the expectation value of the Hamiltonian
        :return:
        """
        # Normalize the wave function as required in self.weighted_func_cuda_code
        self.wavefunction /= cu_linalg.norm(self.wavefunction)

        av_V = self.get_average(self.V)

        # go to the momentum representation
        cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)

        # Normalize the wave function as required in self.weighted_func_cuda_code
        self.wavefunction /= cu_linalg.norm(self.wavefunction)

        av_K = self.get_average(self.K)

        # go back to the coordinate representation
        cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)

        return av_V + av_K
    def set_ground_state(self, nsteps=10000):
        """
        Obtain the ground state wave function by the imaginary time propagation
        :param nsteps: number of the imaginary time steps to take
        :return: self
        """
        self.set_wavefunction(1.)

        for _ in xrange(nsteps):
            self.bloch_expV(self.wavefunction, **self.wavefunction_mapper_params)

            cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)
            self.bloch_expK(self.wavefunction, **self.wavefunction_mapper_params)
            cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)

            self.bloch_expV(self.wavefunction, **self.wavefunction_mapper_params)

            self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX)

        return self
    def propagate(self, steps=1):
        """
        Time propagate the density matrix saved in self.rho
        :param steps: number of self.dt time increments to make
        :return: self
        """
        for _ in xrange(steps):
            # increment current time
            self.t += self.dt

            # advance by one time step
            self.single_step_propagation()

            # calculate the Ehrenfest theorems
            self.get_Ehrenfest(self.t)

            # normalize
            self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX)

        return self
def logis(y,x):
    end = 0
    start = 0
    x = x.astype(np.float32)
    y = y.astype(np.float32)
    start=time.time()
    # Translado de variable a GPU
    x_gpu = gpuarray.to_gpu(x)
    y_gpu = gpuarray.to_gpu(y)

    linalg.init()
    # Transpuesta de X
    x_gpu_T = linalg.transpose(x_gpu)
    beta_gpu = linalg.dot(linalg.dot(linalg.inv(linalg.dot(x_gpu_T,x_gpu)),x_gpu_T),y_gpu)
    j = 1
    while(True):
        mu = sapply(x,beta_gpu.get())
        mu = mu.astype(np.float32)
        mu_gpu = gpuarray.to_gpu(mu)
        V_gpu= linalg.diag(mu_gpu)
        f2_gpu = linalg.multiply(mu_gpu,1-mu_gpu)
        f3_gpu = linalg.diag(1/f2_gpu)
        f4_gpu = (y_gpu-mu_gpu)
        f5_gpu = linalg.dot(f3_gpu,f4_gpu)
        if(np.isnan(f5_gpu.get()).any()):
            f5_cpu = f5_gpu.get()
            f5_cpu = nanValue(f5_cpu)
            f5_gpu = gpuarray.to_gpu(f5_cpu.astype(np.float32))
        y_1_gpu = linalg.dot(x_gpu,beta_gpu) + f5_gpu
        beta_1_gpu = linalg.dot(linalg.dot(linalg.dot(linalg.inv(linalg.dot(linalg.dot(x_gpu_T,V_gpu),x_gpu)),x_gpu_T),V_gpu),y_1_gpu)
        check_value = np.absolute(linalg.norm(beta_1_gpu-beta_gpu))
        #if(check_value<0.00001):
            #break
        if(j == 10 or check_value<0.00001):
            break
        beta_gpu = beta_1_gpu
        j = j + 1
    end = time.time()
    tiempo = (end-start)
    return {"iteraciones":j,"Betas":beta_gpu.get(),"time":tiempo}
    def set_wavefunction(self, new_wave_func):
        """
        Set the initial wave function
        :param new_wave_func: 1D numpy array, 1D GPU array contaning the wave function,
                    a string of the C code specifying the initial condition,
                    a python function of the form F(self, X), or a float number
                    Coordinate (X) is declared.
        :return: self
        """
        if isinstance(new_wave_func, (np.ndarray, gpuarray.GPUArray)):
            # perform the consistency checks
            assert new_wave_func.shape == self.wavefunction.shape, \
                "The grid sizes does not match with the wave function"

            # copy wigner function
            self.wavefunction[:] = new_wave_func.astype(np.complex128)

        elif isinstance(new_wave_func, FunctionType):
            # user supplied the function which will return the wave function
            self.wavefunction[:] = new_wave_func(self, self.X)

        elif isinstance(new_wave_func, str):
            # user specified C code
            print("\n================================ Compiling init_wavefunc ================================\n")
            init_wigner = SourceModule(
                self.init_wavefunction_cuda_source.format(cuda_consts=self.cuda_consts, new_wave_func=new_wave_func),
            ).get_function("Kernel")
            init_wigner(self.wavefunction, **self.wavefunction_mapper_params)

        elif isinstance(new_wave_func, (float, complex)):
            # user specified a constant
            self.wavefunction.fill(np.complex128(new_wave_func))
        else:
            raise NotImplementedError("new_wave_func must be either function or numpy.array")

        # normalize
        self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX)

        return self
Ejemplo n.º 12
0
def update_W_hat_skcuda(W_hat, X_hat, A_t, B_t, x_sum, alpha_sum, eps, t):
    n_hat, k_cluster = W_hat.shape
    # m_dim, _ = X_hat.shape
    W_hat_new = W_hat.copy()
    linalg.init()

    if not isinstance(W_hat_new, gpuarray.GPUArray):
        W_hat_new_gpu = gpuarray.to_gpu(W_hat_new.astype(np.float64))
    else:
        W_hat_new_gpu = W_hat_new

    if not isinstance(X_hat, gpuarray.GPUArray):
        tmp_x = np.ascontiguousarray(X_hat)
        X_hat_gpu = gpuarray.to_gpu(tmp_x.astype(np.float64))
    else:
        X_hat_gpu = X_hat
    # X_hat_T_gpu = gpuarray.to_gpu(X_hat.T.copy().astype(np.float64))
    X_hat_T_gpu = linalg.transpose(X_hat_gpu)

    if not isinstance(A_t, gpuarray.GPUArray):
        A_t_gpu = gpuarray.to_gpu(A_t.astype(np.float64))
    else:
        A_t_gpu = A_t
    A_t_gpu_trans = linalg.transpose(A_t_gpu)

    if not isinstance(B_t, gpuarray.GPUArray):
        B_t_gpu = gpuarray.to_gpu(B_t.astype(np.float64))
    else:
        B_t_gpu = B_t
    B_t_gpu_trans = linalg.transpose(B_t_gpu)

    all_ones_gpu = gpuarray.to_gpu(np.ones((n_hat, 1), dtype=np.float64))

    k = 0
    while True:
        k += 1
        # ipdb.set_trace()
        W_hat_old_gpu = W_hat_new_gpu.copy()
        for j in range(k_cluster):
            T1 = linalg.dot(X_hat_T_gpu, B_t_gpu_trans[j, :].reshape((-1, 1)))
            X_product_gpu = linalg.dot(X_hat_T_gpu, X_hat_gpu)
            T2 = reduce(linalg.dot, (X_product_gpu, W_hat_new_gpu,
                                     A_t_gpu_trans[j, :].reshape(-1, 1)))
            grad_gpu = -T1 + T2
            step_size = 1 / (linalg.norm(X_product_gpu) *
                             linalg.norm(A_t_gpu_trans[j, :]) + 1e-8)
            tmp = -step_size * grad_gpu.reshape(
                (-1)) + W_hat_new_gpu[:, j].copy()

            # u_j_gpu = 1/2 * (tmp + abs(tmp))
            # normalized_u_j_gpu = 1/max(linalg.norm(u_j_gpu), 1) * u_j_gpu

            # u_j_gpu = 1/max(linalg.norm(tmp), 1) * tmp
            # normalized_u_j_gpu = 1/2 * (u_j_gpu + abs(u_j_gpu))
            u_j = geo_projection_to_cvx_cmb(tmp.get())
            normalized_u_j_gpu = gpuarray.to_gpu(u_j.astype(np.float64))

            W_hat_new_gpu[:, j] = normalized_u_j_gpu

        # T1 = linalg.dot(X_hat_T_gpu, B_t_gpu)
        # X_product_gpu = linalg.dot(X_hat_T_gpu, X_hat_gpu)
        # T2 = reduce(linalg.dot, (X_product_gpu, W_hat_new_gpu, A_t_gpu))
        # grad_gpu =  T2 - T1
        # step_size = 1/(linalg.norm(X_product_gpu) * linalg.norm(A_t_gpu) + 1e-8)
        # tmp =  W_hat_new_gpu - step_size * grad_gpu
        # u_gpu = 1/2 * (tmp + abs(tmp))

        # column_sum_gpu = misc.sum(u_gpu, axis = 0).astype(np.float64)
        # # ipdb.set_trace()
        # div_mat_gpu = linalg.dot(all_ones_gpu, column_sum_gpu.reshape((1, -1))) + 1e-8
        # W_hat_new_gpu = u_gpu / div_mat_gpu.astype(np.float64)

        # if k % 50 == 0:
        #     g_val = get_g_hat_value(t, W_hat_new_gpu.get(), X_hat,
        #             A_t, B_t, x_sum, alpha_sum)
        #     print('iteration {}, function value: {:.4f}'.format(k, g_val))

        if (linalg.norm(W_hat_new_gpu - W_hat_old_gpu) < eps) or k >= 10000:
            break

    return W_hat_new_gpu