def __dRg(n, dg): # R = exp(-n/2) # dR = -R/2, ddR = R/4 if n > 1: dg[0] = 0 dg[1] = 0 dg[2] = 0 else: dg[0] = c_exp(-30 * n) dg[1] = -30 * dg[0] dg[2] = 900 * dg[0]
def __f(x, r, y, DIM): Y, rY = cuda.local.array((3, ), f4), cuda.local.array((3, ), f4) for i in range(DIM): Y[i] = y[i] - x[i] if DIM == 2: rY[0] = r[0] * Y[0] + r[2] * Y[1] rY[1] = r[1] * Y[1] else: rY[0] = r[0] * Y[0] + r[3] * Y[1] + r[5] * Y[2] rY[1] = r[1] * Y[1] + r[4] * Y[2] rY[2] = r[2] * Y[2] n = 0 for i in range(DIM): n += rY[i] * rY[i] return c_exp(-30 * n) * c_sqrt(30 / pi)
def __Rf(x, r, y, rT, n, Y, rY, MY, DIM): for i in range(DIM): Y[i] = y[i] - x[i] if DIM == 2: rY[0] = r[0] * Y[0] + r[2] * Y[1] rY[1] = r[1] * Y[1] else: rY[0] = r[0] * Y[0] + r[3] * Y[1] + r[5] * Y[2] rY[1] = r[1] * Y[1] + r[4] * Y[2] rY[2] = r[2] * Y[2] # Final vector: IP = 0 for i in range(DIM): IP += rT[i] * rY[i] m = 0 for i in range(DIM): MY[i] = rY[i] - rT[i] * IP m += MY[i] * MY[i] return c_exp(-30 * m) * n
def __Rg(n): # int exp(-(x^2+n)/2)/sqrt(2pi) dx = exp(-n/2) if n > 1: return 0 else: return c_exp(-30 * n)
def __g(n): if n > 1: return 0 else: return c_exp(-30 * n) * c_sqrt(30 / pi)
def __dRf3(I, x, r, y, T, rT, n, Y, rY, MY, R, dR, ddR, order): DIM = 3 index = cuda.local.array((6, 2), dtype=i4) index[0, 0], index[0, 1] = 0, 0 index[1, 0], index[1, 1] = 1, 1 index[2, 0], index[2, 1] = 2, 2 index[3, 0], index[3, 1] = 0, 1 index[4, 0], index[4, 1] = 1, 2 index[5, 0], index[5, 1] = 0, 2 for i in range(DIM): Y[i] = y[i] - x[i] if DIM == 2: rY[0] = r[0] * Y[0] + r[2] * Y[1] rY[1] = r[1] * Y[1] else: rY[0] = r[0] * Y[0] + r[3] * Y[1] + r[5] * Y[2] rY[1] = r[1] * Y[1] + r[4] * Y[2] rY[2] = r[2] * Y[2] # Final vector: IP = 0 for i in range(DIM): IP += rT[i] * rY[i] m = 0 for i in range(DIM): MY[i] = rY[i] - rT[i] * IP m += MY[i] * MY[i] # Preliminary derivatives: dMydT = cuda.local.array((3, 3), f4) # Missing a factor of n for i in range(DIM): for j in range(DIM): dMydT[i, j] = rT[i] * (IP * rT[j] - MY[j]) dMydT[i, i] -= IP dg, dJdy, dJdT = cuda.local.array((3, ), f4), cuda.local.array( (3, ), f4), cuda.local.array((3, ), f4) dg[0] = c_exp(-30 * m) dg[1] = -30 * dg[0] dg[2] = 900 * dg[0] # First order derivatives: tmp = dg[1] * 2 * n for i in range(DIM): dJdy[i] = tmp * MY[i] tmp = -n * n for i in range(DIM): dJdT[i] = tmp * (2 * IP * MY[i] * dg[1] + rT[i] * dg[0]) ddJdyy, ddJdyT, ddJdTT = cuda.local.array((3, 3), f4), cuda.local.array( (3, 3), f4), cuda.local.array((3, 3), f4) if order > 1: tmp = 2 * n for i in range(DIM): for j in range(i): ddJdyy[i, j] = ddJdyy[j, i] ddJdyy[i, i] = tmp * (2 * MY[i] * MY[i] * dg[2] + (1 - rT[i] * rT[i]) * dg[1]) for j in range(i + 1, DIM): ddJdyy[i, j] = tmp * (2 * MY[i] * MY[j] * dg[2] - rT[i] * rT[j] * dg[1]) tmp = 2 * n * n for i in range(DIM): for j in range(DIM): ddJdyT[i, j] = tmp * ((dMydT[i, j] - MY[i] * rT[j]) * dg[1] - 2 * IP * MY[i] * MY[j] * dg[2]) tmp = 2 * n * n * n for i in range(DIM): for j in range(i): ddJdTT[i, j] = ddJdTT[j, i] ddJdTT[i, i] = tmp * ((1.5 * rT[i] * rT[i] - .5) * dg[0] + (2 * IP * MY[i] * rT[i] + IP * rT[i] * MY[i] - IP * dMydT[i, i] - MY[i] * MY[i]) * dg[1] + 2 * IP * IP * MY[i] * MY[i] * dg[2]) for j in range(i + 1, DIM): ddJdTT[ i, j] = tmp * (1.5 * rT[i] * rT[j] * dg[0] + (2 * IP * MY[i] * rT[j] + IP * rT[i] * MY[j] - IP * dMydT[i, j] - MY[i] * MY[j]) * dg[1] + 2 * IP * IP * MY[i] * MY[j] * dg[2]) # Fill in values: R[0] = I * dg[0] * n # dI dR[0] = dg[0] * n ddR[0, 0] = 0 # dIdx # ddR[0, 1 + i] = - r_{j,i}dJdy[j] ddR[0, 1 + 0] = -r[0] * dJdy[0] ddR[0, 1 + 1] = -(r[3] * dJdy[0] + r[1] * dJdy[1]) ddR[0, 1 + 2] = -(r[5] * dJdy[0] + r[4] * dJdy[1] + r[2] * dJdy[2]) # dIdr # ddR[0, 4 + (I, i)] = dJdy[I] * Y[i] + dJdT[I] * T[i] for i in range(6): i0, i1 = index[i] ddR[0, 4 + i] = dJdy[i0] * Y[i1] + dJdT[i0] * T[i1] # dr, dx for i in range(9): dR[1 + i] = I * ddR[0, 1 + i] if order > 1: # d^2r # ddR[1+(I,i), 1+(J,j)] = I*( ddJdyy[I,J]Y[i]Y[j] + ddJdYT[I,J]Y[i]T[j] # + ddJdYT[J,I]Y[j]T[i] + ddJdTT[I,J]T[i]T[j]) for i in range(6): i0, i1 = index[i] for j in range(i, 6): j0, j1 = index[j] ddR[4 + i, 4 + j] = I * (ddJdyy[i0, j0] * Y[i1] * Y[j1] + ddJdyT[i0, j0] * Y[i1] * T[j1] + ddJdyT[j0, i0] * Y[j1] * T[i1] + ddJdTT[i0, j0] * T[i1] * T[j1]) # dxdr # ddR[1+j, 4+(I,i)] = -I(ddJdyy[I,k]Y[i]r[k,j] + ddJdYT[k,I]T[i]r[k,j] # + dJdY[I](i==j)) # 0 -> 0,0 # 1 -> 1,1 # 2 -> 2,2 # 3 -> 0,1 # 4 -> 1,2 # 5 -> 0,2 for i in range(6): i0, i1 = index[i] ddR[1 + 0, 4 + i] = -I * ( (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[0]) ddR[1 + 1, 4 + i] = -I * ( (ddJdyy[i0, 1] * Y[i1] + ddJdyT[1, i0] * T[i1]) * r[1] + (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[3]) ddR[1 + 2, 4 + i] = -I * ( (ddJdyy[i0, 2] * Y[i1] + ddJdyT[2, i0] * T[i1]) * r[2] + (ddJdyy[i0, 1] * Y[i1] + ddJdyT[1, i0] * T[i1]) * r[4] + (ddJdyy[i0, 0] * Y[i1] + ddJdyT[0, i0] * T[i1]) * r[5]) # j == i1 ddR[1 + i1, 4 + i] -= I * dJdy[i0] # d^2x # ddR[1+i,1+j] = I(ddJdYY[I,J]r[I,i]r[J,j]) ddR[1 + 0, 1 + 0] = I * (ddJdyy[0, 0] * r[0] * r[0]) ddR[1 + 0, 1 + 1] = I * (ddJdyy[0, 1] * r[0] * r[1] + ddJdyy[0, 0] * r[0] * r[3]) ddR[1 + 0, 1 + 2] = I * (ddJdyy[0, 2] * r[0] * r[2] + ddJdyy[0, 1] * r[0] * r[4] + ddJdyy[0, 0] * r[0] * r[5]) ddR[1 + 1, 1 + 1] = I * (ddJdyy[1, 1] * r[1] * r[1] + ddJdyy[1, 0] * r[1] * r[3] + ddJdyy[0, 1] * r[3] * r[1] + ddJdyy[0, 0] * r[3] * r[3]) ddR[1 + 1, 1 + 2] = I * (ddJdyy[1, 2] * r[1] * r[2] + ddJdyy[1, 1] * r[1] * r[4] + ddJdyy[1, 0] * r[1] * r[5] + ddJdyy[0, 2] * r[3] * r[2] + ddJdyy[0, 1] * r[3] * r[4] + ddJdyy[0, 0] * r[3] * r[5]) ddR[1 + 2, 1 + 2] = I * (ddJdyy[2, 2] * r[2] * r[2] + ddJdyy[2, 1] * r[2] * r[4] + ddJdyy[2, 0] * r[2] * r[5] + ddJdyy[1, 2] * r[4] * r[2] + ddJdyy[1, 1] * r[4] * r[4] + ddJdyy[1, 0] * r[4] * r[5] + ddJdyy[0, 2] * r[5] * r[2] + ddJdyy[0, 1] * r[5] * r[4] + ddJdyy[0, 0] * r[5] * r[5])
def _precomp_atom(a, b, d, pw, ZERO, dtype=FTYPE): """ Helper for computing atomic intensities. This function precomputes values so that abs(__atom(a,b,x,dx,pw) - __atom_pw_cpu(*x,*__precomp(a,b,dx,pw))) < ZERO etc. Parameters ---------- a, b : `numpy.ndarray`, (n,) Continuous atom is represented by: `y\mapsto sum_i a[i]*exp(-b[i]*|y|^2)` d : array-like, (3,) Physical dimensions of a single pixel. Depending on the `pw` flag, this function approximates the intensity on the box [x-d/2, x+d/2] pw: `bool`, optional If `True`, the evaluation is pointwise. If `False` (default), returns average intensity over box of width `d`. ZERO : `float` > 0 Permitted threshold for approximation dtype : `str`, optional String representing `numpy` precision type, default is 'float64' Returns ------- precomp : `numpy.ndarray` [`'float32'`] Radial profile of atom intensities params : `numpy.ndarray` [`'float32'`] Grid spacings to convert real positions to indices Rmax : `float` The cut-off radius for this atom """ if pw: n_zeros = sum(1 for D in d if D == 0) f = lambda x: sum(a[i] * c_exp(-b[i] * x ** 2) * (pi / b[i]) ** (n_zeros / 2) for i in range(a.size)) Rmax = .1 while f(Rmax) >= ZERO * f(0): Rmax *= 1.1 h = max(Rmax / 500, max(d) / 10) pms = array([Rmax ** 2, 1 / h], dtype=dtype) precomp = array([f(x) for x in arange(0, Rmax + 2 * h, h)], dtype=dtype) else: def f(i, j, x): A = a[i] ** (1 / 3) # factor spread evenly over 3 dimensions B = b[i] ** .5 if d[j] == 0: return A * 2 / B return A * (c_erf(B * (x + d[j] / 2)) -c_erf(B * (x - d[j] / 2))) / (2 * d[j] * B) * pi ** .5 h = [D / 10 for D in d] Rmax = ones([a.size, 3], dtype=dtype) / 10 L = 1 for i in range(a.size): for j in range(3): if d[j] == 0: Rmax[i, j] = 1e5 continue while f(i, j, Rmax[i, j]) > ZERO * f(i, j, 0): Rmax[i, j] *= 1.1 L = max(L, Rmax[i, j] / h[j] + 2) L = min(400, int(ceil(L))) # TODO: The number 400 should probably link to ZERO precomp, grid = zeros([a.size, 3, L], dtype=dtype), arange(L) for i in range(a.size): for j in range(3): h[j] = Rmax[:, j].max() / (L - 2) precomp[i, j] = array([f(i, j, x * h[j]) for x in grid], dtype=dtype) pms = array([Rmax.max(0), 1 / array(h)], dtype=dtype) Rmax = Rmax.max(0).min() return precomp, pms, Rmax