def __Rf(I, x, r, y, T): Y, rY, rT, MY = cuda.local.array((DIM, ), f4), cuda.local.array( (DIM, ), f4), cuda.local.array((DIM, ), f4), cuda.local.array((DIM, ), f4) for i in range(DIM): Y[i] = y[i] - x[i] if r.shape[0] == 1: for i in range(DIM): rY[i] = r[0] * Y[i] rT[i] = r[0] * T[i] else: rY[0] = r[0] * Y[0] + r[2] * Y[1] rY[1] = r[1] * Y[1] rT[0] = r[0] * T[0] + r[2] * T[1] rT[1] = r[1] * T[1] # Normalise rT: n = 0 for i in range(DIM): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(DIM): rT[i] *= n # Final vector: IP = 0 for i in range(DIM): IP += rT[i] * rY[i] m = 0 for i in range(DIM): MY[i] = rY[i] - rT[i] * IP m += MY[i] * MY[i] return I * __Rg(m) * n
def _bess(X, R, H, J, scale, out): # pragma: no cover if scale.size == 1: for i in numba.prange(X.shape[0]): rad = c_sqrt(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1]) * R ind = int(rad * H) if ind < J.size: out[i] = J[ind] else: out[i] = 0 else: for i in numba.prange(X.shape[0]): rad = c_sqrt(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1]) * R ind = int(rad * H) if ind < J.size: out[i] = scale[i] * J[ind] else: out[i] = 0
def __atom_pw_gpu(x0, x1, x2, pc, h): n = x0 * x0 + x1 * x1 + x2 * x2 if n >= h[0]: return 0 else: n = h[1] * c_sqrt(n) i = int(n) n -= i return __quadratic_interp_gpu(n, i, pc)
def __atom_pw_cpu(x0, x1, x2, pc, h): # pragma: no cover n = x0 * x0 + x1 * x1 + x2 * x2 if n >= h[0]: return 0 else: n = h[1] * c_sqrt(n) i = int(n) n -= i # return __linear_interp(n, i, pc) return __quadratic_interp(n, i, pc)
def __projnorm(x, T, DIM): tmp0, tmp1 = 0, 0 for i in range(DIM): tmp0 += x[i] * x[i] for i in range(T.size): tmp1 += x[i] * T[i] tmp0 = tmp0 - tmp1 * tmp1 if tmp0 > 0: return c_sqrt(tmp0) else: return 0
def __index(x, y, p, R, ind): IP = 0 n = 0 for i in range(x.size): IP += x[i] * y[i] n += y[i] * y[i] IP /= n R = R / c_sqrt(n) # Assume p[i] = p[0] + i*(p[1]-p[0]) ind[0] = int((IP - R - p[0]) / (p[1] - p[0])) ind[1] = int((IP + R - p[0]) / (p[1] - p[0])) + 1 ind[0] = max(0, ind[0]) ind[1] = min(p.size, ind[1] + 1) # [i,j] = range(i,j+1)
def __rf2(I, x, r, y, T, W0, s): DIM = 2 X = cuda.local.array((2, ), f4) for i in range(DIM): X[i] = x[i] - y[i] R, n = __Radius(r), __projnorm(X, T, DIM) if n + R < s: return __total(I, r) elif n - R > s: return 0 rT = cuda.local.array((2, ), f4) rT[0] = r[0] * T[0] + r[2] * T[1] rT[1] = r[1] * T[1] # Normalise rT: n = 0 for i in range(DIM): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(DIM): rT[i] *= n # Square of points is: x-s -> x+s buf0, buf1, buf2 = cuda.local.array((2, ), f4), cuda.local.array( (2, ), f4), cuda.local.array((2, ), f4) limits, Y = cuda.local.array((2, ), f4), cuda.local.array((2, ), f4) # <x,w_i> \not\in (limits[i,0], limits[i,1]) => Rf(x)=0 or x\not\in [-s,s] limits[0] = X[0] * W0[0] + X[1] * W0[1] limits[1] = min(limits[0] + R, s) limits[0] = max(limits[0] - R, -s) for i in range(DIM): Y[i] = limits[0] * W0[i] limits[0] = .1 * (limits[1] - limits[0]) count, sum = 0, 0 for _ in range(11): for i in range(DIM): Y[i] += limits[0] * W0[i] sum += __Rf(X, r, Y, rT, n, buf0, buf1, buf2, 2) count += 1 return I * sum / count * 10 * limits[0]
def __f(x, r, y, DIM): Y, rY = cuda.local.array((3, ), f4), cuda.local.array((3, ), f4) for i in range(DIM): Y[i] = y[i] - x[i] if DIM == 2: rY[0] = r[0] * Y[0] + r[2] * Y[1] rY[1] = r[1] * Y[1] else: rY[0] = r[0] * Y[0] + r[3] * Y[1] + r[5] * Y[2] rY[1] = r[1] * Y[1] + r[4] * Y[2] rY[2] = r[2] * Y[2] n = 0 for i in range(DIM): n += rY[i] * rY[i] return c_exp(-30 * n) * c_sqrt(30 / pi)
def __g(n): if n > 1: return 0 else: return c_exp(-30 * n) * c_sqrt(30 / pi)
def __dRf_aniso(I, x, r, y, tt, R, dR, ddR, order): # __Rf = I*__Rg(|M(y-x)|^2)/|rT| Y, rY, rT, MY = cuda.local.array((DIM, ), f4), cuda.local.array( (DIM, ), f4), cuda.local.array((DIM, ), f4), cuda.local.array((DIM, ), f4) index = cuda.local.array((6, 2), dtype=i4) index[0, 0], index[0, 1] = 0, 0 index[1, 0], index[1, 1] = 1, 1 index[2, 0], index[2, 1] = 2, 2 index[3, 0], index[3, 1] = 0, 1 index[4, 0], index[4, 1] = 1, 2 index[5, 0], index[5, 1] = 0, 2 lenT = tt.shape[0] for i in range(DIM): Y[i] = y[i] - x[i] rY[0] = r[0] * Y[0] + r[3] * Y[1] + r[5] * Y[2] rY[1] = r[1] * Y[1] + r[4] * Y[2] rY[2] = r[2] * Y[2] T = cuda.local.array((DIM, ), f4) T[0], T[1] = tt[0], tt[1] if lenT == 2: T[2] = 0 rT[0] = r[0] * T[0] + r[3] * T[1] rT[1] = r[1] * T[1] rT[2] = 0 else: T[2] = tt[2] rT[0] = r[0] * T[0] + r[3] * T[1] + r[5] * T[2] rT[1] = r[1] * T[1] + r[4] * T[2] rT[2] = r[2] * T[2] # Normalise rT: n = 0 for i in range(lenT): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(lenT): rT[i] *= n # Final vector: IP = 0 for i in range(lenT): IP += rT[i] * rY[i] m = 0 for i in range(DIM): MY[i] = rY[i] - rT[i] * IP m += MY[i] * MY[i] # Preliminary derivatives: dMydT = cuda.local.array((DIM, DIM), f4) # Missing a factor of n for i in range(DIM): for j in range(DIM): dMydT[i, j] = rT[i] * (IP * rT[j] - MY[j]) dMydT[i, i] -= IP dg, dJdy, dJdT = cuda.local.array((3, ), f4), cuda.local.array( (DIM, ), f4), cuda.local.array((DIM, ), f4) __dRg(m, dg) if (dg[0] == 0) and (dg[1] == 0) and (dg[2] == 0): R[0] = 0 for i in range(10): dR[i] = 0 for j in range(10): ddR[i, j] = 0 else: tmp = dg[1] * 2 * n for i in range(DIM): dJdy[i] = tmp * MY[i] tmp = -n * n for i in range(DIM): dJdT[i] = tmp * (2 * IP * MY[i] * dg[1] + rT[i] * dg[0]) ddJdyy, ddJdyT, ddJdTT = cuda.local.array( (DIM, DIM), f4), cuda.local.array( (DIM, DIM), f4), cuda.local.array((DIM, DIM), f4) if order > 1: tmp = 2 * n for i in range(DIM): for j in range(i): ddJdyy[i, j] = ddJdyy[j, i] ddJdyy[i, i] = tmp * (2 * MY[i] * MY[i] * dg[2] + (1 - rT[i] * rT[i]) * dg[1]) for j in range(i + 1, DIM): ddJdyy[i, j] = tmp * (2 * MY[i] * MY[j] * dg[2] - rT[i] * rT[j] * dg[1]) tmp = 2 * n * n for i in range(DIM): for j in range(DIM): ddJdyT[i, j] = tmp * ((dMydT[i, j] - MY[i] * rT[j]) * dg[1] - 2 * IP * MY[i] * MY[j] * dg[2]) tmp = 2 * n * n * n for i in range(DIM): for j in range(i): ddJdTT[i, j] = ddJdTT[j, i] ddJdTT[i, i] = tmp * ( (1.5 * rT[i] * rT[i] - .5) * dg[0] + (2 * IP * MY[i] * rT[i] + IP * rT[i] * MY[i] - IP * dMydT[i, i] - MY[i] * MY[i]) * dg[1] + 2 * IP * IP * MY[i] * MY[i] * dg[2]) for j in range(i + 1, DIM): ddJdTT[i, j] = tmp * ( 1.5 * rT[i] * rT[j] * dg[0] + (2 * IP * MY[i] * rT[j] + IP * rT[i] * MY[j] - IP * dMydT[i, j] - MY[i] * MY[j]) * dg[1] + 2 * IP * IP * MY[i] * MY[j] * dg[2]) # Fill in values: R[0] = I * dg[0] * n # dI dR[0] = dg[0] * n ddR[0, 0] = 0 # dIdx # ddR[0, 1 + i] = - r_{j,i}dJdy[j] ddR[0, 1 + 0] = -r[0] * dJdy[0] ddR[0, 1 + 1] = -(r[3] * dJdy[0] + r[1] * dJdy[1]) ddR[0, 1 + 2] = -(r[5] * dJdy[0] + r[4] * dJdy[1] + r[2] * dJdy[2]) # dIdr # ddR[0, 4 + (I, i)] = dJdy[I] * Y[i] + dJdT[I] * T[i] for i in range(6): i0, i1 = index[i] ddR[0, 4 + i] = dJdy[i0] * Y[i1] + dJdT[i0] * T[i1] # dr, dx for i in range(9): dR[1 + i] = I * ddR[0, 1 + i] if order > 1: # d^2r # ddR[1+(I,i), 1+(J,j)] = I*( ddJdyy[I,J]Y[i]Y[j] + ddJdYT[I,J]Y[i]T[j] # + ddJdYT[J,I]Y[j]T[i] + ddJdTT[I,J]T[i]T[j]) for i in range(6): i0, i1 = index[i] for j in range(i, 6): j0, j1 = index[j] ddR[4 + i, 4 + j] = I * (ddJdyy[i0, j0] * Y[i1] * Y[j1] + ddJdyT[i0, j0] * Y[i1] * T[j1] + ddJdyT[j0, i0] * Y[j1] * T[i1] + ddJdTT[i0, j0] * T[i1] * T[j1]) # dxdr # ddR[1+j, 4+(I,i)] = -I(ddJdyy[I,k]Y[i]r[k,j] + ddJdYT[k,I]T[i]r[k,j] # + dJdY[I](i==j)) # 0 -> 0,0 # 1 -> 1,1 # 2 -> 2,2 # 3 -> 0,1 # 4 -> 1,2 # 5 -> 0,2 for i in range(6): i0, i1 = index[i] ddR[1 + 0, 4 + i] = -I * ( (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[0]) ddR[1 + 1, 4 + i] = -I * ( (ddJdyy[i0, 1] * Y[i1] + ddJdyT[1, i0] * T[i1]) * r[1] + (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[3]) ddR[1 + 2, 4 + i] = -I * ( (ddJdyy[i0, 2] * Y[i1] + ddJdyT[2, i0] * T[i1]) * r[2] + (ddJdyy[i0, 1] * Y[i1] + ddJdyT[1, i0] * T[i1]) * r[4] + (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[5]) # j == i1 ddR[1 + i1, 4 + i] -= I * dJdy[i0] # d^2x # ddR[1+i,1+j] = I(ddJdYY[I,J]r[I,i]r[J,j]) ddR[1 + 0, 1 + 0] = I * (ddJdyy[0, 0] * r[0] * r[0]) ddR[1 + 0, 1 + 1] = I * (ddJdyy[0, 1] * r[0] * r[1] + ddJdyy[0, 0] * r[0] * r[3]) ddR[1 + 0, 1 + 2] = I * (ddJdyy[0, 2] * r[0] * r[2] + ddJdyy[0, 1] * r[0] * r[4] + ddJdyy[0, 0] * r[0] * r[5]) ddR[1 + 1, 1 + 1] = I * ( ddJdyy[1, 1] * r[1] * r[1] + ddJdyy[1, 0] * r[1] * r[3] + ddJdyy[0, 1] * r[3] * r[1] + ddJdyy[0, 0] * r[3] * r[3]) ddR[1 + 1, 1 + 2] = I * ( ddJdyy[1, 2] * r[1] * r[2] + ddJdyy[1, 1] * r[1] * r[4] + ddJdyy[1, 0] * r[1] * r[5] + ddJdyy[0, 2] * r[3] * r[2] + ddJdyy[0, 1] * r[3] * r[4] + ddJdyy[0, 0] * r[3] * r[5]) ddR[1 + 2, 1 + 2] = I * ( ddJdyy[2, 2] * r[2] * r[2] + ddJdyy[2, 1] * r[2] * r[4] + ddJdyy[2, 0] * r[2] * r[5] + ddJdyy[1, 2] * r[4] * r[2] + ddJdyy[1, 1] * r[4] * r[4] + ddJdyy[1, 0] * r[4] * r[5] + ddJdyy[0, 2] * r[5] * r[2] + ddJdyy[0, 1] * r[5] * r[4] + ddJdyy[0, 0] * r[5] * r[5]) # Symmetrise the Hessian for i in range(10): for j in range(i): ddR[i, j] = ddR[j, i]
def __drf3(I, x, r, Y, T, W0, W1, s, R, dR, ddR, order): DIM = 3 X = cuda.local.array((3, ), f4) for i in range(DIM): X[i] = x[i] - Y[i] R[0] = 0 for j0 in range(10): dR[j0] = 0 for j1 in range(10): ddR[j0, j1] = 0 rr, n = __Radius(r), __projnorm(X, T, DIM) if n + rr < s: R[0] = __total(I, r) return elif n - rr > s: return rT = cuda.local.array((3, ), f4) rT[0] = r[0] * T[0] + r[3] * T[1] + r[5] * T[2] rT[1] = r[1] * T[1] + r[4] * T[2] rT[2] = r[2] * T[2] # Normalise rT: n = 0 for i in range(DIM): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(DIM): rT[i] *= n # Square of points is: x-s -> x+s buf0, buf1, buf2 = cuda.local.array((3, ), f4), cuda.local.array( (3, ), f4), cuda.local.array((3, ), f4) buf3, buf4, buf5 = cuda.local.array((1, ), f4), cuda.local.array( (10, ), f4), cuda.local.array((10, 10), f4) limits = cuda.local.array((2, 2), f4) # <x,w_i> \not\in (limits[i,0], limits[i,1]) => Rf(x)=0 or x\not\in [-s,s] limits[0, 0] = X[0] * W0[0] + X[1] * W0[1] + X[2] * W0[2] limits[0, 1] = min(limits[0, 0] + rr, s) limits[0, 0] = max(limits[0, 0] - rr, -s) limits[1, 0] = X[0] * W1[0] + X[1] * W1[1] + X[2] * W1[2] limits[1, 1] = min(limits[1, 0] + rr, s) limits[1, 0] = max(limits[1, 0] - rr, -s) for i in range(DIM): Y[i] = limits[0, 0] * W0[i] + limits[1, 0] * W1[i] for i in range(2): limits[i, 0] = .1 * (limits[i, 1] - limits[i, 0]) count = 0 for _ in range(11): for i in range(DIM): Y[i] += limits[0, 0] * W0[i] for __ in range(11): for i in range(DIM): Y[i] += limits[1, 0] * W1[i] __dRf3(I, X, r, Y, T, rT, n, buf0, buf1, buf2, buf3, buf4, buf5, order) R[0] += buf3[0] for j0 in range(10): dR[j0] += buf4[j0] for j1 in range(j0, 10): ddR[j0, j1] += buf5[j0, j1] count += 1 for i in range(DIM): Y[i] -= 11 * limits[1, 0] * W1[i] scale = 100 * limits[0, 0] * limits[1, 0] / count R[0] *= scale for j0 in range(10): dR[j0] *= scale for j1 in range(j0, 10): ddR[j0, j1] *= scale ddR[j1, j0] = ddR[j0, j1]
def __drf2(I, x, r, y, T, W0, s, R, dR, ddR, order): DIM = 2 X = cuda.local.array((2, ), f4) for i in range(DIM): X[i] = x[i] - y[i] rr, n = __Radius(r), __projnorm(X, T, DIM) if n + rr < s: R[0] = __total(I, r) for j0 in range(6): dR[j0] = 0 for j1 in range(j0, 6): ddR[j0, j1] = 0 ddR[j1, j0] = 0 return elif n - rr > s: R[0] = 0 for j0 in range(6): dR[j0] = 0 for j1 in range(j0, 6): ddR[j0, j1] = 0 ddR[j1, j0] = 0 return rT = cuda.local.array((2, ), f4) rT[0] = r[0] * T[0] + r[2] * T[1] rT[1] = r[1] * T[1] # Normalise rT: n = 0 for i in range(DIM): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(DIM): rT[i] *= n # Square of points is: x-s -> x+s buf0, buf1, buf2 = cuda.local.array((2, ), f4), cuda.local.array( (2, ), f4), cuda.local.array((2, ), f4) buf3, buf4, buf5 = cuda.local.array((1, ), f4), cuda.local.array( (6, ), f4), cuda.local.array((6, 6), f4) limits, Y = cuda.local.array((2, ), f4), cuda.local.array((2, ), f4) limits[0] = X[0] * W0[0] + X[1] * W0[1] limits[1] = min(limits[0] + rr, s) limits[0] = max(limits[0] - rr, -s) for i in range(DIM): Y[i] = limits[0] * W0[i] limits[0] = .1 * (limits[1] - limits[0]) R[0] = 0 for j0 in range(6): dR[j0] = 0 for j1 in range(j0, 6): ddR[j0, j1] = 0 count = 0 for _ in range(11): for i in range(DIM): Y[i] += limits[0] * W0[i] __dRf2(I, X, r, Y, T, rT, n, buf0, buf1, buf2, buf3, buf4, buf5, order) R[0] += buf3[0] for j0 in range(6): dR[j0] += buf4[j0] for j1 in range(j0, 6): ddR[j0, j1] += buf5[j0, j1] count += 1 scale = 10 * limits[0] / count R[0] *= scale for j0 in range(6): dR[j0] *= scale for j1 in range(j0, 6): ddR[j0, j1] *= scale ddR[j1, j0] = ddR[j0, j1]
def __norm(x, DIM): tmp = 0 for i in range(DIM): tmp += x[i] * x[i] return c_sqrt(tmp)
def __total(I, r): if r.size == 3: return I / (r[0] * r[1]) * c_sqrt(pi / 30) else: return I / (r[0] * r[1] * r[2]) * (pi / 30)
def __rf3(I, x, r, y, T, W0, W1, s): DIM, lenT = 3, T.shape[0] X = cuda.local.array((3, ), f4) for i in range(DIM): X[i] = x[i] - y[i] R, n = __Radius(r), __projnorm(X, T, DIM) if n + R < s: return __total(I, r) elif n - R > s: return 0 rT = cuda.local.array((3, ), f4) if lenT == 2: rT[0] = r[0] * T[0] + r[3] * T[1] rT[1] = r[1] * T[1] rT[2] = 0 else: rT[0] = r[0] * T[0] + r[3] * T[1] + r[5] * T[2] rT[1] = r[1] * T[1] + r[4] * T[2] rT[2] = r[2] * T[2] # Normalise rT: n = 0 for i in range(lenT): n += rT[i] * rT[i] n = 1 / c_sqrt(n) for i in range(lenT): rT[i] *= n # Square of points is: x-s -> x+s buf0, buf1, buf2 = cuda.local.array((3, ), f4), cuda.local.array( (3, ), f4), cuda.local.array((3, ), f4) limits, Y = cuda.local.array((2, 2), f4), cuda.local.array((3, ), f4) # <x,w_i> \not\in (limits[i,0], limits[i,1]) => Rf(x)=0 or x\not\in [-s,s] limits[0, 0] = X[0] * W0[0] + X[1] * W0[1] + X[2] * W0[2] limits[0, 1] = min(limits[0, 0] + R, s) limits[0, 0] = max(limits[0, 0] - R, -s) limits[1, 0] = X[0] * W1[0] + X[1] * W1[1] + X[2] * W1[2] limits[1, 1] = min(limits[1, 0] + R, s) limits[1, 0] = max(limits[1, 0] - R, -s) for i in range(DIM): Y[i] = limits[0, 0] * W0[i] + limits[1, 0] * W1[i] for i in range(2): limits[i, 0] = .1 * (limits[i, 1] - limits[i, 0]) count, sum = 0, 0 for _ in range(11): for i in range(DIM): Y[i] += limits[0, 0] * W0[i] for __ in range(11): for i in range(DIM): Y[i] += limits[1, 0] * W1[i] sum += __Rf(X, r, Y, rT, n, buf0, buf1, buf2, 3) count += 1 for i in range(DIM): Y[i] -= 11 * limits[1, 0] * W1[i] # return sum if count == 0: return 0 else: return I * sum / count * 100 * limits[0, 0] * limits[1, 0]