Beispiel #1
0
def sqrt_normalize_gpu(img):
    global posr, negr, posa, nega, stream
    rgb = gpuarray.to_gpu(img[:, :, :3].copy())
    a = gpuarray.to_gpu(img[:, :, 3].copy())

    if not posr:
        posr = gpuarray.zeros_like(rgb) + 1
        negr = gpuarray.zeros_like(rgb) - 1
        posa = gpuarray.zeros_like(a) + 1
        nega = gpuarray.zeros_like(a) - 1
    rgb = cumath.sqrt(abs(rgb), stream=stream) * gpuarray.if_positive(
        rgb, posr, negr, stream=stream)
    a = cumath.sqrt(abs(a), stream=stream) * gpuarray.if_positive(
        a, posa, nega, stream=stream)
    return normalize_gpu(rgb, a)
Beispiel #2
0
 def test_sqrt(self):
     """tests if the sqrt function works"""
     a = simplearray.array(10).fill_arange()+1       
     b = cumath.sqrt(a)
     
     for i in range(10):
         self.assert_(abs(math.sqrt(a[i]) - b[i]) < 1e-3)
Beispiel #3
0
def gaussian_norm(data, sigma=0.5, **kwargs):
    """
    Performs Gaussian normalization to an input dataset. This is, every voxel
    is normalized by substracting the mean and dividing it by the standard
    deviation in a Gaussian neighbourhood around it.

    Parameters
    ----------
    data : 2 or 3 dimensional array
        The data to be filtered
    sigma : float or array of floats
        The standard deviation of the Gaussian filter used to estimate the mean
        and standard deviation of the kernel. Controls the radius and strength
        of the filter. If an array is given, it has to satisfy
        `len(sigma) = data.ndim`. Default: 0.5
    **kwargs : other named parameters
        Parameters are passed to `conv.make_gaussian_1d`

    Returns
    -------
    result : 2 or 3 dimensional filtered `GPUArray`
        The result of the filtering resulting from PyCuda. Use `.get()` to
        retrieve the corresponding Numpy array.
    """
    kwargs['keep_gpu'] = True
    num = gaussian_center(data, sigma=sigma, **kwargs)
    den = cumath.sqrt(gaussian(num**2, sigma=sigma, **kwargs))
    # TODO numerical precision ignore den < 1e-7
    num /= den
    return num
Beispiel #4
0
    def lcc_to_sphere_cuda(self,
                           x,
                           y,
                           R=6370,
                           truelat0=31.7,
                           truelat1=31.7,
                           ref_lat=31.68858,
                           stand_lon=-113.7):
        phi0 = np.radians(ref_lat)
        phi1 = np.radians(truelat0)
        phi2 = np.radians(truelat1)
        lambda0 = np.radians(stand_lon)

        if truelat0 == truelat1:
            n = np.sin(phi0)
        else:
            n = (np.log(np.cos(phi1) / np.cos(phi2)) / np.log(
                np.tan(np.pi / 4 + phi2 / 2) / np.tan(np.pi / 4 + phi1 / 2)))
        F = (np.cos(phi1) * np.power(np.tan(np.pi / 4 + phi1 / 2), n) / n)
        rho0 = F / np.power(np.tan(np.pi / 4 + phi0 / 2), n)
        x = x / R
        y = y / R
        ymrho = y - rho0
        rho = cumath.sqrt(x * x + ymrho * ymrho)
        atan1 = F**(1.0 / n)
        atan2 = rho**(1.0 / n)
        atan_res = self.atan2(atan1, atan2)
        phis = 360 * (atan_res - np.pi / 4) / np.pi
        lambdas = (cumath.asin(x / rho) / n + lambda0) * 180 / np.pi
        return phis, lambdas
    def _get_updates(self, grads):
        """Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        """
        self.t += 1
        self.ms = [
            self.beta_1 * m + (1 - self.beta_1) * grad
            for m, grad in zip(self.ms, grads)
        ]
        self.vs = [
            self.beta_2 * v + (1 - self.beta_2) * (grad**2)
            for v, grad in zip(self.vs, grads)
        ]
        self.learning_rate = (self.learning_rate_init *
                              np.sqrt(1 - self.beta_2**self.t) /
                              (1 - self.beta_1**self.t))
        updates = [
            -self.learning_rate * m / (cumath.sqrt(v) + self.epsilon)
            for m, v in zip(self.ms, self.vs)
        ]
        return updates
Beispiel #6
0
def magnitude(vec, vec2):
    #, fn = mod.get_function('magnitude')):
    #gpu_vec = drv.mem_alloc(vec.nbytes)
    #drv.memcpy_htod(gpu_vec, vec)

    #fn(gpu_vec, block=(512, 1, 1))

    #dest = drv.from_device_like(gpu_vec, vec)

    #print 'Dot product: ', dest[0]
    
    gpu_arry = gpuarr.to_gpu_async(vec)
    gpu_arry2 = gpuarr.to_gpu_async(vec2)
    mag = cumath.sqrt(gpuarr.dot(gpu_arry, gpu_arry, dtype=np.float32))
    mag2 = cumath.sqrt(gpuarr.dot(gpu_arry2, gpu_arry2, dtype=np.float32))

    product = gpuarr.dot(gpu_arry, gpu_arry2, dtype=np.float32) / mag + mag2
    print product
    return product.get()
Beispiel #7
0
 def _sigma(self, sliceset, u, lower_bounds, upper_bounds):
     block = (256, 1, 1)
     grid = (max(sliceset.n_slices // block[0], 1), 1, 1)
     cov_u = gpuarray.zeros(sliceset.n_slices, dtype=np.float64)
     sorted_std_per_slice(lower_bounds.gpudata,
                          upper_bounds.gpudata,
                          u.gpudata,
                          self.n_slices,
                          cov_u.gpudata,
                          block=block,
                          grid=grid)
     return cumath.sqrt(cov_u)
Beispiel #8
0
def diag_gpu(A, v1):
    # handle
    current_handle = cublas.cublasCreate()

    m = A.shape[0]
    Q = np.zeros((m, m), dtype=np.float64)
    # Q[0, :] = 0.0 # implied
    Q[1, :] = v1.copy()
    beta = np.zeros(m, dtype=np.float64)
    alpha = np.zeros(m, dtype=np.float64) 

    # move data onto the GPU
    A_gpu = gpuarray.to_gpu(A)
    Q_gpu = gpuarray.to_gpu(Q)
    beta_gpu = gpuarray.to_gpu(beta)
    alpha_gpu = gpuarray.to_gpu(alpha)
    w = gpuarray.zeros(m, dtype=np.float64)

    # we define three kernels for simple arithmetic
    w_scale = ElementwiseKernel(
        arguments="double *w, double *alpha, double *beta, double *Q1, double *Q2, int loop_index",
        operation="w[i] = w[i] - (alpha[loop_index] * Q1[i]) - (beta[loop_index] * Q2[i])",
        name="element_wise_w_building")
    # using -= to do inplace subtraction gives an incorrect answer


    norm_krnl = ReductionKernel(np.float64, neutral="0.0", reduce_expr="a+b", 
        map_expr="x[i]*x[i]", arguments="double *x")

    ediv = ElementwiseKernel(
        arguments="double *a, double *b, double *c, int loop_index",
        operation="a[i] = b[i] / c[loop_index+1]",
        name="element_wise_division")
    # the name must not have spaces!!!!

    for i in range(1, m-1):
        cublas.cublasDgemv(handle = current_handle, trans = 'T',
                            m = m, n = m, # Hermitian matrix
                            alpha = 1.0, 
                            beta = 0.0,
                            A = A_gpu.gpudata, 
                            lda = m,
                            x = Q_gpu[i, :].gpudata, 
                            incx = 1,    
                            y = w.gpudata, 
                            incy = 1,
                            )

        cublas.cublasDgemm(handle = current_handle, 
                            transa = 'n', transb = 'n',
                            m   = 1, n  = 1,      k = m,
                            lda = 1, ldb = m, ldc = 1,
                            alpha = 1.0,  beta = 0.0, 
                            A = w.gpudata, 
                            B = Q_gpu[i, :].gpudata, 
                            C = alpha_gpu[i].gpudata)


        w_scale(w, alpha_gpu, beta_gpu, Q_gpu[i, :], Q_gpu[i-1, :], i)
        beta_gpu[i+1] = cumath.sqrt(norm_krnl(w))
        ediv(Q_gpu[i+1, :], w, beta_gpu, i)
    # end of loop

    # last 2 steps
    cublas.cublasDgemv(handle = current_handle, trans = 'T',
                            m = m, n = m, # Hermitian matrix
                            alpha = 1.0,
                            beta = 0.0,
                            A = A_gpu.gpudata, 
                            lda = m,
                            x = Q_gpu[-1, :].gpudata,
                            incx = 1,    
                            y = w.gpudata,
                            incy = 1,)

    cublas.cublasDgemm(handle = current_handle, 
                        transa = 'n', transb = 'n',
                        m   = 1, n  = 1,  k = m,
                        lda = 1, ldb = m, ldc = 1,
                        alpha = 1.0,  beta = 0.0, 
                        A = w.gpudata, 
                        B = Q_gpu[-1, :].gpudata, 
                        C = alpha_gpu[-1].gpudata)

    # retrive the alpha's and betas
    alpha_cpu = alpha_gpu.get()
    beta_cpu = beta_gpu.get()

    print("GPU: ", alpha_cpu, beta_cpu, sep="\n\n")
    # make tridiagonal matrix out of alpha and B
    # Tri = np.zeros(matrix_size)
    return
Beispiel #9
0
 def sqrt_t(self, a, out):
     cumath.sqrt(a, out=out)
Beispiel #10
0
N = 100000

# --- Create random vectorson the CPU
h_a = np.random.randn(1, N)
h_b = np.random.randn(1, N)

# --- Set CPU arrays as single precision
h_a = h_a.astype(np.float32)
h_b = h_b.astype(np.float32)
h_c = np.empty_like(h_a)

d_a = gpuarray.to_gpu(h_a)
d_b = gpuarray.to_gpu(h_b)

start.record()
d_c = (cumath.sqrt(cumath.fabs(d_a)) + cumath.exp(d_b))
end.record()
end.synchronize()
secs = start.time_till(end) * 1e-3
print("Processing time = %fs" % (secs))

h_c = d_c.get()

if np.all(abs(h_c - (np.sqrt(np.abs(h_a)) + np.exp(h_b))) < 1e-5):
    print("Test passed!")
else:
    print("Error!")

# --- Flush context printf buffer
cuda.Context.synchronize()
Beispiel #11
0
 def sqrt_t(self, a, out):
     cumath.sqrt(a, out)
Beispiel #12
0
def cuda_field(ab, krv, cartesian=True, bohren=True):
    '''Returns the field scattered by the particle at each coordinate

    Parameters
    ----------
    ab : numpy.ndarray
        Mie scattering coefficients
    krv : numpy.ndarray
        Reduced vector displacements of particle from image coordinates
    cartesian : bool
        If set, return field projected onto Cartesian coordinates.
        Otherwise, return polar projection.
    bohren : bool
        If set, use sign convention from Bohren and Huffman.
        Otherwise, use opposite sign convention.

    Returns
    -------
    field : numpy.ndarray
        [3, npts] array of complex vector values of the
        scattered field at each coordinate.
    '''

    nc = ab.shape[0]  # number of partial waves in sum

    # GEOMETRY
    # 1. particle displacement [pixel]
    # Note: The sign convention used here is appropriate
    # for illumination propagating in the -z direction.
    # This means that a particle forming an image in the
    # focal plane (z = 0) is located at positive z.
    # Accounting for this by flipping the axial coordinate
    # is equivalent to using a mirrored (left-handed)
    # coordinate system.
    kx = gpuarray.to_gpu(krv[:, 0]).astype(np.float32)
    ky = gpuarray.to_gpu(krv[:, 1]).astype(np.float32)
    kz = gpuarray.to_gpu(-krv[:, 2]).astype(np.float32)
    npts = len(kx)

    # 2. geometric factors
    krho = cumath.sqrt(kx * kx + ky * ky)
    cosphi = kx / krho
    sinphi = ky / krho

    kr = cumath.sqrt(krho * krho + kz * kz)
    costheta = kz / kr
    sintheta = krho / kr

    sinkr = cumath.sin(kr)
    coskr = cumath.cos(kr)

    # SPECIAL FUNCTIONS
    # starting points for recursive function evaluation ...
    # 1. Riccati-Bessel radial functions, page 478.
    # Particles above the focal plane create diverging waves
    # described by Eq. (4.13) for $h_n^{(1)}(kr)$. These have z > 0.
    # Those below the focal plane appear to be converging from the
    # perspective of the camera. They are descrinbed by Eq. (4.14)
    # for $h_n^{(2)}(kr)$, and have z < 0. We can select the
    # appropriate case by applying the correct sign of the imaginary
    # part of the starting functions...
    factor = 1.j * kz / abs(kz)
    if not bohren:
        factor *= -1.
    xi_nm2 = coskr + factor * sinkr  # \xi_{-1}(kr)
    xi_nm1 = sinkr - factor * coskr  # \xi_0(kr)

    # 2. Angular functions (4.47), page 95
    pi_nm1 = 0.  # \pi_0(\cos\theta)
    pi_n = 1.  # \pi_1(\cos\theta)

    # 3. Vector spherical harmonics: [r,theta,phi]
    mo1n = gpuarray.zeros([3, npts], dtype=np.complex64)
    ne1n = gpuarray.empty([3, npts], dtype=np.complex64)

    # storage for scattered field
    es = gpuarray.zeros([3, npts], dtype=np.complex64)

    # COMPUTE field by summing partial waves
    for n in range(1, nc):
        # upward recurrences ...
        # 4. Legendre factor (4.47)
        # Method described by Wiscombe (1980)
        swisc = pi_n * costheta
        twisc = swisc - pi_nm1
        tau_n = pi_nm1 - n * twisc  # -\tau_n(\cos\theta)

        # ... Riccati-Bessel function, page 478
        xi_n = (2. * n - 1.) * (xi_nm1 / kr) - xi_nm2  # \xi_n(kr)

        # ... Deirmendjian's derivative
        dn = (n * xi_n) / kr - xi_nm1

        # vector spherical harmonics (4.50)
        # mo1n[0, :] = 0.j           # no radial component
        mo1n[1, :] = pi_n * xi_n  # ... divided by cosphi/kr
        mo1n[2, :] = tau_n * xi_n  # ... divided by sinphi/kr

        # ... divided by cosphi sintheta/kr^2
        ne1n[0, :] = n * (n + 1.) * pi_n * xi_n
        ne1n[1, :] = tau_n * dn  # ... divided by cosphi/kr
        ne1n[2, :] = pi_n * dn  # ... divided by sinphi/kr

        # prefactor, page 93
        en = 1.j**n * (2. * n + 1.) / n / (n + 1.)

        # the scattered field in spherical coordinates (4.45)
        es += np.complex64(1.j * en * ab[n, 0]) * ne1n
        es -= np.complex64(en * ab[n, 1]) * mo1n

        # upward recurrences ...
        # ... angular functions (4.47)
        # Method described by Wiscombe (1980)
        pi_nm1 = pi_n
        pi_n = swisc + ((n + 1.) / n) * twisc

        # ... Riccati-Bessel function
        xi_nm2 = xi_nm1
        xi_nm1 = xi_n
    # n: multipole sum

    # geometric factors were divided out of the vector
    # spherical harmonics for accuracy and efficiency ...
    # ... put them back at the end.
    radialfactor = 1. / kr
    es[0, :] *= cosphi * sintheta * radialfactor**2
    es[1, :] *= cosphi * radialfactor
    es[2, :] *= sinphi * radialfactor

    # By default, the scattered wave is returned in spherical
    # coordinates.  Project components onto Cartesian coordinates.
    # Assumes that the incident wave propagates along z and
    # is linearly polarized along x
    if cartesian:
        ec = gpuarray.empty_like(es)

        ec[0, :] = es[0, :] * sintheta * cosphi
        ec[0, :] += es[1, :] * costheta * cosphi
        ec[0, :] -= es[2, :] * sinphi

        ec[1, :] = es[0, :] * sintheta * sinphi
        ec[1, :] += es[1, :] * costheta * sinphi
        ec[1, :] += es[2, :] * cosphi

        ec[2, :] = es[0, :] * costheta - es[1, :] * sintheta
        return ec.get()
    else:
        return es.get()
    def execute(self):
        f_first = True

        resimg = self.images_iterator.read_reference_image()
        self.resulting_image = self.images_iterator.read_reference_image()

        shape = resimg.shape

        resimg.image[:] = 2**resimg.color_depth / 2

        resimg_nda = np.ndarray(shape=resimg.image.shape,
                                dtype=resimg.image.dtype)
        resimg_nda[:] = resimg.image[:]

        resimg_cu = gpuarray.to_gpu(resimg_nda)
        imgarr_cu = gpuarray.to_gpu(resimg_nda)

        avrimg_cu = gpuarray.zeros_like(resimg_cu)

        std_cu = gpuarray.zeros(shape[:2], dtype=resimg.dtype)

        std_cu.fill(np.float32(2**resimg.color_depth))

        dist_cu = gpuarray.zeros(shape[:2], dtype=resimg.dtype)
        flags_cu = gpuarray.zeros(shape[:2], dtype=np.bool)

        iter_cnt = 5

        print(shape)
        th_x = 32
        th_y = 32

        blk_x = int(shape[0] / th_x) + 1
        blk_y = int(shape[1] / th_y) + 1

        grid_im = (blk_x, blk_y, 1)
        block_im = (th_x, th_y, 1)

        print(block_im)
        print(grid_im)

        mod_dist_colors = SourceModule(self.__kernel_dist_colors)
        mod_std = SourceModule(self.__kernel_std)

        dist_colors = mod_dist_colors.get_function("dist_colors")
        img_merge_std = mod_std.get_function("img_merge_std")

        ca = time.clock()
        for itr in range(iter_cnt):
            invalid_imgs = []
            img_cnt = 0.0

            for imgarr in self.images_iterator:

                if shape != imgarr.shape:
                    self.images_iterator.discard_image()
                    continue

                img_cnt += 1
                imgarr_cu.set(imgarr.image)

                dist_colors(imgarr_cu,
                            resimg_cu,
                            avrimg_cu,
                            std_cu,
                            np.int32(shape[0]), np.int32(shape[1]),
                            np.int32(itr), np.float32(10.0),
                            block=block_im, grid=grid_im)

                cb = time.clock()
                print("avg clock: %1.4f" % (cb - ca))

            resimg_cu = avrimg_cu[:] / np.float32(img_cnt)
            std_cu.fill(0.0)

            for imgarr in self.images_iterator:
                imgarr_cu.set(imgarr.image)
                img_merge_std(imgarr_cu,
                              resimg_cu,
                              std_cu,
                              np.int32(shape[0]), np.int32(shape[1]),
                              block=block_im, grid=grid_im)
                cb = time.clock()
                print("std clock: %1.4f" % (cb - ca))

            std_cu /= np.float32(img_cnt)
            cumath.sqrt(std_cu, out=std_cu)

            avrimg_cu.fill(0.0)

        self.resulting_image.image = np.array(resimg_cu.get())
def random_normal(loc=0.0, scale=1.0, size=None):
    u1 = curandom.rand(size, dtype=numpy.float64)
    u2 = curandom.rand(size, dtype=numpy.float64)
    z1 = cumath.sqrt(-2.*cumath.log(u1))*cumath.cos(2.*numpy.pi*u2)
    return CUDAArray(scale*z1+loc)
 def sqrt(self):
     return CUDAArray(cumath.sqrt(self.arr))