Python cublasDasum Beispiele, skcuda.cublas.cublasDasum Python Beispiele

Beispiel #1

0

Datei anzeigen

    def run(self,
            ERR_BOUND=None,
            err_iter=None,
            time_iter=None,
            SILENCE=False,
            DEBUG=False):
        # initialize
        self.DEBUG = DEBUG
        if isinstance(ERR_BOUND, float):
            IS_BOUNDED = True
        else:
            IS_BOUNDED = False

        if isinstance(err_iter, np.ndarray):
            self.ERR_RCD = True
        else:
            self.ERR_RCD = False

        if isinstance(time_iter, np.ndarray):
            self.TIME_RCD = True
        else:
            self.TIME_RCD = False

        self.x.fill(0)
        for i in range(self.BLOCK):
            self.x_block[i].fill(0)
            self.x_block_gpu[i].fill(0)
            self.Ax_gpu[i].fill(0)
        self.Ax.fill(0)
        self.x_gpu.fill(0)

        b_k_gpu = gpuarray.empty_like(self.b_gpu)
        rx_gpu = gpuarray.empty_like(self.x_block_gpu[0])
        soft_t_gpu = gpuarray.empty_like(self.x_block_gpu[0])
        Bx_gpu = gpuarray.empty_like(self.x_block_gpu[0])
        s11_gpu = gpuarray.zeros((self.idx_m, 1), np.float64)
        s13_gpu = gpuarray.zeros((self.idx_n, 1), np.float64)
        s23_gpu = gpuarray.zeros((self.idx_m, 1), np.float64)
        d_d_gpu = gpuarray.zeros((self.idx_n, 1), np.float64)
        d_ATA_gpu = [gpuarray.to_gpu(self.d_ATA[i]) for i in range(self.BLOCK)]
        d_ATA_rec_gpu = [
            gpuarray.to_gpu(1 / self.d_ATA[i]) for i in range(self.BLOCK)
        ]
        block_Cnt = 0

        time_s = 0
        start = time.time()
        if self.TIME_RCD:
            time_iter[0] = 0

        for t in range(self.ITER_MAX):
            # select mth block
            m = self.index_get(t)
            '''
            result_s11 = np.sum(self.Ax, axis=0) - self.b
            self.s11_gpu.set(result_s11)
            self._zmvG(self.h, self.s13_gpu, 1,
                       self.gpu_cal.A_b_gpu[m], self.s11_gpu)
            self.s13_gpu.get(self.result_s13)
            rx = np.multiply(self.d_ATA[m], self.x_block[m]) -\
                self.result_s13
            soft_t = soft_thresholding(rx, self.mu)
            Bx = np.multiply(self.d_ATA_rec[m], soft_t)
            # result_s21 = Bx_p - x_p
            descent_D = Bx-self.x_block[m]
            self.d_d_gpu.set(descent_D)
            self._zmvG(self.h, self.s23_gpu, 1,
                       self.gpu_cal.A_b_cw_gpu[m], self.d_d_gpu)
            self.s23_gpu.get(self.result_s23)
            # result_s23 = self._mv(m, descent_D)
            r_1 = np.transpose(result_s11) @ self.result_s23 +\
                self.mu*(np.linalg.norm(Bx, ord=1) -
                         np.linalg.norm(self.x_block[m], ord=1))
            r_2 = np.transpose(self.result_s23) @ self.result_s23
            if r_2 == 0.0:
                print('r_2 is ZERO, could not divide ZERO!')
            else:
                r = np.float64(element_proj(-r_1/r_2, 0, 1))
            # x(t+1) = x(t)+r(Bx(t)-x(t))
            self.x_block[m] += r*descent_D
            # Ax(t+1)
            self.Ax[m] += r*self.result_s23
            '''

            # '''
            # begin pure gpu calculation
            self.fun_b_k(b_k_gpu, m)
            self._zaxpy(self.h, s11_gpu, -1, b_k_gpu, self.Ax_gpu[m])
            self._zmvG(self.h, s13_gpu, 1, self.gpu_cal.A_b_gpu[m],
                       cublas._CUBLAS_OP['N'], s11_gpu)
            # s14
            d_ATA_gpu[m]._elwise_multiply(self.x_block_gpu[m], rx_gpu)
            self._axpy(self.h, -1, s13_gpu, rx_gpu)
            self.zsoft_t(soft_t_gpu, rx_gpu, self.mu)
            # s15
            d_ATA_rec_gpu[m]._elwise_multiply(soft_t_gpu, Bx_gpu)
            self._zaxpy(self.h, d_d_gpu, -1, self.x_block_gpu[m], Bx_gpu)
            self._zmvG(self.h, s23_gpu, 1, self.gpu_cal.A_b_gpu[m],
                       cublas._CUBLAS_OP['T'], d_d_gpu)

            # stepsize
            # r_1g = self.r1_get(self.h, s11_gpu, s23_gpu,
            #                    Bx_gpu, self.x_block_gpu[m])
            temp_1 = cublas.cublasDdot(self.h, s11_gpu.size, s11_gpu.gpudata,
                                       1, s23_gpu.gpudata, 1)
            start_s = time.time()
            temp_2 = self.mu * (
                cublas.cublasDasum(self.h, Bx_gpu.size, Bx_gpu.gpudata, 1) -
                cublas.cublasDasum(self.h, self.x_block_gpu[m].size,
                                   self.x_block_gpu[m].gpudata, 1))
            time_s += time.time() - start_s
            r_1g = temp_1 + temp_2
            r_2g = np.square(self._l2norm(self.h, s23_gpu))
            if r_2g == 0.0:
                print('r_2 is ZERO, could not divide ZERO!')
            else:
                r_g = np.float64(element_proj(-r_1g / r_2g, 0, 1))

            # self.debug(result_s13, self.x_block[m], self.x, t, m, r)
            # self.err_record(err_iter, result_s13, self.x_block[m], t)

            # if IS_BOUNDED:
            #     if not (self.DEBUG & self.ERR_RCD):
            #         self.error = error_crit(
            #             result_s13, self.x_block[m], self.mu)
            #     if self.error < ERR_BOUND:
            #         block_Cnt += 1
            #     if self.BLOCK - 1 == m:
            #         if block_Cnt == self.BLOCK:
            #             break
            #         else:
            #             block_Cnt = 0

            self._axpy(self.h, r_g, d_d_gpu, self.x_block_gpu[m])
            self._axpy(self.h, r_g, s23_gpu, self.Ax_gpu[m])

            # print(np.allclose(self.x_block_gpu[m].get(),
            #                   self.x_block[m]))
            # '''
            self.time_record(time_iter, t, start)
            # print("matrix@vector:", time_mul,
            #       "s, matrix.T@vector:", time_mul_t)

        if self.TIME_RCD:
            t_elapsed = time_iter[t]
        else:
            t_elapsed = time.time() - start

        self.rlt_display(SILENCE, t_elapsed, t)
        self.x = np.vstack(self.x_block)
        if not SILENCE:
            print(self.descript + ': ' + str(time_s) + ' s.')

        return t_elapsed

Beispiel #2

0

Datei anzeigen

Datei: test_cublas.py Projekt: lvaleriu/scikit-cuda

 def test_cublasDasum(self):
     x = np.random.rand(5).astype(np.float64)
     x_gpu = gpuarray.to_gpu(x)
     result = cublas.cublasDasum(self.cublas_handle, x_gpu.size, x_gpu.gpudata, 1)
     assert np.allclose(result, np.sum(np.abs(x)))

Beispiel #3

0

Datei anzeigen

 def _l1norm(self, handle, x_gpu):
     return cublas.cublasDasum(handle, x_gpu.size, x_gpu.gpudata, 1)