Beispiel #1
def get_grad_omega(grad_omega, omega, r, d, qbin):
    Get the gradient of the Debye sum with respect to atomic positions

    grad_omega: kx3xQ array
        The gradient
    omega: kxQ array
        Debye sum
    r: k array
        The pair distance array
    d: kx3 array
        The pair displacements
    qbin: float
        The qbin size
    kmax, _, qmax_bin = grad_omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
    sv = f4(qx) * qbin
    rk = r[k]
    a = (sv * math.cos(sv * rk)) - omega[k, qx]
    a /= rk * rk
    for w in range(i4(3)):
        grad_omega[k, w, qx] = a * d[k, w]
Beispiel #2
def get_grad_omega(grad_omega, omega, r, d, qbin):
    Get the gradient of the Debye sum with respect to atomic positions

    grad_omega: kx3xQ array
        The gradient
    omega: kxQ array
        Debye sum
    r: k array
        The pair distance array
    d: kx3 array
        The pair displacements
    qbin: float
        The qbin size
    kmax, _, qmax_bin = grad_omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
    sv = f4(qx) * qbin
    rk = r[k]
    a = (sv * math.cos(sv * rk)) - omega[k, qx]
    a /= rk * rk
    for w in range(i4(3)):
        grad_omega[k, w, qx] = a * d[k, w]
Beispiel #3
def experimental_sum_grad_fq1(new_grad, grad, k_cov):
    k, qx = cuda.grid(2)
    if k >= len(grad) or qx >= grad.shape[2]:
    i, j = cuda_k_to_ij(i4(k + k_cov))
    for tz in range(3):
        a = grad[k, tz, qx]
        cuda.atomic.add(new_grad, (j, tz, qx), a)
        cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
Beispiel #4
def experimental_sum_grad_fq1(new_grad, grad, k_cov):
    k, qx = cuda.grid(2)
    if k >= len(grad) or qx >= grad.shape[2]:
    i, j = cuda_k_to_ij(i4(k + k_cov))
    for tz in range(3):
        a = grad[k, tz, qx]
        cuda.atomic.add(new_grad, (j, tz, qx), a)
        cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
def run_target(N, target):
    print '== Target', target
    vect_discriminant = vectorize([f4(f4, f4, f4), f8(f8, f8, f8)],

    A, B, C = generate_input(N, dtype=np.float32)
    D = np.empty(A.shape, dtype=A.dtype)

    ts = time()
    D = vect_discriminant(A, B, C)
    te = time()

    total_time = (te - ts)

    print 'Execution time %.4f' % total_time
    print 'Throughput %.4f' % (N / total_time)

    if '-verify' in sys.argv[1:]:
        check_answer(D, A, B, C)
Beispiel #6
def get_omega(omega, r, qbin):
    Generate Omega

    omega: kxQ array
    r: k array
        The pair distance array
    qbin: float
        The qbin size
    kmax, qmax_bin = omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
    sv = qbin * f4(qx)
    rk = r[k]
    omega[k, qx] = math.sin(sv * rk) / rk
Beispiel #7
def get_omega(omega, r, qbin):
    Generate Omega

    omega: kxQ array
    r: k array
        The pair distance array
    qbin: float
        The qbin size
    kmax, qmax_bin = omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
    sv = qbin * f4(qx)
    rk = r[k]
    omega[k, qx] = math.sin(sv * rk) / rk
Beispiel #8
def d2_zero(a):
    i, j = cuda.grid(2)
    if i >= a.shape[0] or j >= a.shape[1]:
    a[i, j] = f4(0.)
Beispiel #9
def cuda_k_to_ij(k):
    i = math.floor((f4(1) + f4(math.sqrt(f4(1) + f4(8.) * f4(k)))) * f4(.5))
    j = f4(k) - f4(i) * (f4(i) - f4(1)) * f4(.5)
    return i4(i), i4(j)
Beispiel #10
                # row
                if tmp[k * 4] and tmp[k * 4 + 1] and tmp[k * 4 +
                                                         2] and tmp[k * 4 + 3]:
                    return True

            # diag
                if tmp[0] and tmp[5] and tmp[10] and tmp[15]:
                    return True
                elif tmp[3] and tmp[6] and tmp[9] and tmp[12]:
                    return True
        return False

@numba.jit(numba.f4(numba.i1[:, :], numba.i8[:, :]), cache=True)
def reward(board, action):
    tmp = board.copy()
    tmp[action[0], action[1]] = 1
    reward = np.zeros(1, dtype=np.float32)
    # winning state
    if winning(tmp.flatten()):
        reward = 1.0
        # reward = 4.0 + np.sum(tmp == 0) // N
    elif (tmp != 0).all():
        reward = -0.1
    return reward

@numba.jit(numba.f4[:](numba.i1[:, :], numba.i8[:, :]), cache=True)
def getFeature(board, action):
Beispiel #11
#            _ - - - _
#                  _ - - - _
# example: 2 slices (workers) with window of 3, for parallel solving
#          (the 'underscore' cells are required by the numerical scheme)

import os

from joblib import Parallel, delayed
import numba as nb
import numpy as np

from mattflow import config as conf

def _g():
    return 9.81

@nb.njit(nb.f4[:, ::1](nb.f4[:, :, :], nb.i4, nb.i4, nb.b1), nogil=True)
def _max_horizontal_speed(U, Nx, Ng, parallel=True):
    """Max horizontal speed between left and right cells for every vertical
    g = _g()
    if parallel:
        max_h_speed = np.maximum(
            # x dim slicing of left values:  0: -1
            np.abs(U[1, Ng:-Ng, 0:-1] / U[0, Ng:-Ng, 0:-1]) +
            np.sqrt(g * np.abs(U[0, Ng:-Ng, 0:-1])),
Beispiel #12
def d2_zero(a):
    i, j = cuda.grid(2)
    if i >= a.shape[0] or j >= a.shape[1]:
    a[i, j] = f4(0.)