def get_grad_omega(grad_omega, omega, r, d, qbin): """ Get the gradient of the Debye sum with respect to atomic positions Parameters ---------- grad_omega: kx3xQ array The gradient omega: kxQ array Debye sum r: k array The pair distance array d: kx3 array The pair displacements qbin: float The qbin size """ kmax, _, qmax_bin = grad_omega.shape k, qx = cuda.grid(2) if k >= kmax or qx >= qmax_bin: return sv = f4(qx) * qbin rk = r[k] a = (sv * math.cos(sv * rk)) - omega[k, qx] a /= rk * rk for w in range(i4(3)): grad_omega[k, w, qx] = a * d[k, w]
def experimental_sum_grad_fq1(new_grad, grad, k_cov): k, qx = cuda.grid(2) if k >= len(grad) or qx >= grad.shape[2]: return i, j = cuda_k_to_ij(i4(k + k_cov)) for tz in range(3): a = grad[k, tz, qx] cuda.atomic.add(new_grad, (j, tz, qx), a) cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
def run_target(N, target): print '== Target', target vect_discriminant = vectorize([f4(f4, f4, f4), f8(f8, f8, f8)], target=target)(discriminant) A, B, C = generate_input(N, dtype=np.float32) D = np.empty(A.shape, dtype=A.dtype) ts = time() D = vect_discriminant(A, B, C) te = time() total_time = (te - ts) print 'Execution time %.4f' % total_time print 'Throughput %.4f' % (N / total_time) if '-verify' in sys.argv[1:]: check_answer(D, A, B, C)
def get_omega(omega, r, qbin): """ Generate Omega Parameters --------- omega: kxQ array r: k array The pair distance array qbin: float The qbin size """ kmax, qmax_bin = omega.shape k, qx = cuda.grid(2) if k >= kmax or qx >= qmax_bin: return sv = qbin * f4(qx) rk = r[k] omega[k, qx] = math.sin(sv * rk) / rk
def d2_zero(a): i, j = cuda.grid(2) if i >= a.shape[0] or j >= a.shape[1]: return a[i, j] = f4(0.)
def cuda_k_to_ij(k): i = math.floor((f4(1) + f4(math.sqrt(f4(1) + f4(8.) * f4(k)))) * f4(.5)) j = f4(k) - f4(i) * (f4(i) - f4(1)) * f4(.5) return i4(i), i4(j)
# row if tmp[k * 4] and tmp[k * 4 + 1] and tmp[k * 4 + 2] and tmp[k * 4 + 3]: return True # diag else: if tmp[0] and tmp[5] and tmp[10] and tmp[15]: return True elif tmp[3] and tmp[6] and tmp[9] and tmp[12]: return True else: return False @numba.jit(numba.f4(numba.i1[:, :], numba.i8[:, :]), cache=True) def reward(board, action): tmp = board.copy() tmp[action[0], action[1]] = 1 reward = np.zeros(1, dtype=np.float32) # winning state if winning(tmp.flatten()): reward = 1.0 # reward = 4.0 + np.sum(tmp == 0) // N elif (tmp != 0).all(): reward = -0.1 return reward @numba.jit(numba.f4[:](numba.i1[:, :], numba.i8[:, :]), cache=True) def getFeature(board, action):
# _ - - - _ # _ - - - _ # # example: 2 slices (workers) with window of 3, for parallel solving # (the 'underscore' cells are required by the numerical scheme) import os from joblib import Parallel, delayed import numba as nb import numpy as np from mattflow import config as conf @nb.njit(nb.f4()) def _g(): return 9.81 @nb.njit(nb.f4[:, ::1](nb.f4[:, :, :], nb.i4, nb.i4, nb.b1), nogil=True) def _max_horizontal_speed(U, Nx, Ng, parallel=True): """Max horizontal speed between left and right cells for every vertical interface""" g = _g() if parallel: max_h_speed = np.maximum( # x dim slicing of left values: 0: -1 np.abs(U[1, Ng:-Ng, 0:-1] / U[0, Ng:-Ng, 0:-1]) + np.sqrt(g * np.abs(U[0, Ng:-Ng, 0:-1])),