Beispiel #1
0
def get_grad_omega(grad_omega, omega, r, d, qbin):
    """
    Get the gradient of the Debye sum with respect to atomic positions

    Parameters
    ----------
    grad_omega: kx3xQ array
        The gradient
    omega: kxQ array
        Debye sum
    r: k array
        The pair distance array
    d: kx3 array
        The pair displacements
    qbin: float
        The qbin size
    """
    kmax, _, qmax_bin = grad_omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
        return
    sv = f4(qx) * qbin
    rk = r[k]
    a = (sv * math.cos(sv * rk)) - omega[k, qx]
    a /= rk * rk
    for w in range(i4(3)):
        grad_omega[k, w, qx] = a * d[k, w]
Beispiel #2
0
def get_grad_omega(grad_omega, omega, r, d, qbin):
    """
    Get the gradient of the Debye sum with respect to atomic positions

    Parameters
    ----------
    grad_omega: kx3xQ array
        The gradient
    omega: kxQ array
        Debye sum
    r: k array
        The pair distance array
    d: kx3 array
        The pair displacements
    qbin: float
        The qbin size
    """
    kmax, _, qmax_bin = grad_omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
        return
    sv = f4(qx) * qbin
    rk = r[k]
    a = (sv * math.cos(sv * rk)) - omega[k, qx]
    a /= rk * rk
    for w in range(i4(3)):
        grad_omega[k, w, qx] = a * d[k, w]
Beispiel #3
0
def experimental_sum_grad_fq1(new_grad, grad, k_cov):
    k, qx = cuda.grid(2)
    if k >= len(grad) or qx >= grad.shape[2]:
        return
    i, j = cuda_k_to_ij(i4(k + k_cov))
    for tz in range(3):
        a = grad[k, tz, qx]
        cuda.atomic.add(new_grad, (j, tz, qx), a)
        cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
Beispiel #4
0
def experimental_sum_grad_fq1(new_grad, grad, k_cov):
    k, qx = cuda.grid(2)
    if k >= len(grad) or qx >= grad.shape[2]:
        return
    i, j = cuda_k_to_ij(i4(k + k_cov))
    for tz in range(3):
        a = grad[k, tz, qx]
        cuda.atomic.add(new_grad, (j, tz, qx), a)
        cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
def run_target(N, target):
    print '== Target', target
    vect_discriminant = vectorize([f4(f4, f4, f4), f8(f8, f8, f8)],
                                target=target)(discriminant)

    A, B, C = generate_input(N, dtype=np.float32)
    D = np.empty(A.shape, dtype=A.dtype)

    ts = time()
    D = vect_discriminant(A, B, C)
    te = time()

    total_time = (te - ts)

    print 'Execution time %.4f' % total_time
    print 'Throughput %.4f' % (N / total_time)

    if '-verify' in sys.argv[1:]:
        check_answer(D, A, B, C)
Beispiel #6
0
def get_omega(omega, r, qbin):
    """
    Generate Omega

    Parameters
    ---------
    omega: kxQ array
    r: k array
        The pair distance array
    qbin: float
        The qbin size
    """
    kmax, qmax_bin = omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
        return
    sv = qbin * f4(qx)
    rk = r[k]
    omega[k, qx] = math.sin(sv * rk) / rk
Beispiel #7
0
def get_omega(omega, r, qbin):
    """
    Generate Omega

    Parameters
    ---------
    omega: kxQ array
    r: k array
        The pair distance array
    qbin: float
        The qbin size
    """
    kmax, qmax_bin = omega.shape
    k, qx = cuda.grid(2)
    if k >= kmax or qx >= qmax_bin:
        return
    sv = qbin * f4(qx)
    rk = r[k]
    omega[k, qx] = math.sin(sv * rk) / rk
Beispiel #8
0
def d2_zero(a):
    i, j = cuda.grid(2)
    if i >= a.shape[0] or j >= a.shape[1]:
        return
    a[i, j] = f4(0.)
Beispiel #9
0
def cuda_k_to_ij(k):
    i = math.floor((f4(1) + f4(math.sqrt(f4(1) + f4(8.) * f4(k)))) * f4(.5))
    j = f4(k) - f4(i) * (f4(i) - f4(1)) * f4(.5)
    return i4(i), i4(j)
Beispiel #10
0
                # row
                if tmp[k * 4] and tmp[k * 4 + 1] and tmp[k * 4 +
                                                         2] and tmp[k * 4 + 3]:
                    return True

            # diag
            else:
                if tmp[0] and tmp[5] and tmp[10] and tmp[15]:
                    return True
                elif tmp[3] and tmp[6] and tmp[9] and tmp[12]:
                    return True
    else:
        return False


@numba.jit(numba.f4(numba.i1[:, :], numba.i8[:, :]), cache=True)
def reward(board, action):
    tmp = board.copy()
    tmp[action[0], action[1]] = 1
    reward = np.zeros(1, dtype=np.float32)
    # winning state
    if winning(tmp.flatten()):
        reward = 1.0
        # reward = 4.0 + np.sum(tmp == 0) // N
    elif (tmp != 0).all():
        reward = -0.1
    return reward


@numba.jit(numba.f4[:](numba.i1[:, :], numba.i8[:, :]), cache=True)
def getFeature(board, action):
Beispiel #11
0
#            _ - - - _
#                  _ - - - _
#
# example: 2 slices (workers) with window of 3, for parallel solving
#          (the 'underscore' cells are required by the numerical scheme)

import os

from joblib import Parallel, delayed
import numba as nb
import numpy as np

from mattflow import config as conf


@nb.njit(nb.f4())
def _g():
    return 9.81


@nb.njit(nb.f4[:, ::1](nb.f4[:, :, :], nb.i4, nb.i4, nb.b1), nogil=True)
def _max_horizontal_speed(U, Nx, Ng, parallel=True):
    """Max horizontal speed between left and right cells for every vertical
    interface"""
    g = _g()
    if parallel:
        max_h_speed = np.maximum(
            # x dim slicing of left values:  0: -1
            np.abs(U[1, Ng:-Ng, 0:-1] / U[0, Ng:-Ng, 0:-1]) +
            np.sqrt(g * np.abs(U[0, Ng:-Ng, 0:-1])),
Beispiel #12
0
def d2_zero(a):
    i, j = cuda.grid(2)
    if i >= a.shape[0] or j >= a.shape[1]:
        return
    a[i, j] = f4(0.)