def update_hermitian(self, n_mG, deps_m, df_m, chi0_wGG): for w, omega in enumerate(self.omega_w): if self.blockcomm.size == 1: x_m = (-2 * df_m * deps_m / (omega.imag**2 + deps_m**2))**0.5 nx_mG = n_mG.conj() * x_m[:, np.newaxis] rk(-self.prefactor, nx_mG, 1.0, chi0_wGG[w], 'n') else: x_m = 2 * df_m * deps_m / (omega.imag**2 + deps_m**2) mynx_mG = n_mG[:, self.Ga:self.Gb] * x_m[:, np.newaxis] mmm(self.prefactor, mynx_mG, 'c', n_mG, 'n', 1.0, chi0_wGG[w])
def multiply(self, alpha, a, opa, b, opb, beta, c, symmetric): if symmetric: assert opa == 'N' assert opb == 'C' or opb == 'T' and a.dtype == float if a is b: blas.rk(alpha, a.array, beta, c.array) else: if beta == 1.0 and a.shape[1] == 0: return blas.r2k(0.5 * alpha, a.array, b.array, beta, c.array) else: blas.mmm(alpha, a.array, opa, b.array, opb, beta, c.array)
def update_hermitian(self, n_mG, deps_m, wd, chi0_wGG): """If eta=0 use hermitian update.""" omega_w = wd.get_data() deps_m += self.eshift * np.sign(deps_m) for w, omega in enumerate(omega_w): if self.blockcomm.size == 1: x_m = (-2 * deps_m / (omega.imag**2 + deps_m**2) + 0j)**0.5 nx_mG = n_mG.conj() * x_m[:, np.newaxis] rk(-1.0, nx_mG, 1.0, chi0_wGG[w], 'n') else: x_m = 2 * deps_m / (omega.imag**2 + deps_m**2) mynx_mG = n_mG[:, self.Ga:self.Gb] * x_m[:, np.newaxis] mmm(1.0, mynx_mG, 'C', n_mG, 'N', 1.0, chi0_wGG[w])
def fastmmm2notsym(a, b, out): if a.comm: assert b.comm is a.comm if a.comm.size > 1: assert out.comm == a.comm assert out.state == 'a sum is needed' comm = a.dist.comm M, N = a.shape m = (M + comm.size - 1) // comm.size mym = len(a.array) buf1 = np.empty((m, N), dtype=a.dtype) buf2 = np.empty((m, N), dtype=a.dtype) aa = a.array bb = b.array for r in range(comm.size): rrequest = None srequest = None if r < comm.size - 1: srank = (comm.rank + r + 1) % comm.size rrank = (comm.rank - r - 1) % comm.size m1 = min(rrank * m, M) m2 = min(m1 + m, M) if m2 > m1: rrequest = comm.receive(buf1[:m2 - m1], rrank, 11, False) if mym > 0: srequest = comm.send(b.array, srank, 11, False) m1 = min(((comm.rank - r) % comm.size) * m, M) m2 = min(m1 + m, M) #symmmmmmmmmmmmmmmmmmmmmmetricccccccccccccccc ?? blas.mmm(1.0, aa, 'N', bb[:m2 - m1], 'C', 1.0, out.array[:, m1:m2]) if rrequest: comm.wait(rrequest) if srequest: comm.wait(srequest) bb = buf1 buf1, buf2 = buf2, buf1 return out
def fastmmm(m1, m2, m3, beta): comm = m1.dist.comm buf1 = m2.array N = len(m1) n = (N + comm.size - 1) // comm.size for r in range(comm.size): if r == 0: buf2 = np.empty((n, buf1.shape[1]), dtype=buf1.dtype) rrequest = None srequest = None if r < comm.size - 1: rrank = (comm.rank + r + 1) % comm.size rn1 = min(rrank * n, N) rn2 = min(rn1 + n, N) if rn2 > rn1: rrequest = comm.receive(buf2[:rn2 - rn1], rrank, 21, False) srank = (comm.rank - r - 1) % comm.size if len(m2.array) > 0: srequest = comm.send(m2.array, srank, 21, False) r0 = (comm.rank + r) % comm.size n1 = min(r0 * n, N) n2 = min(n1 + n, N) blas.mmm(1.0, m1.array[:, n1:n2], 'N', buf1[:n2 - n1], 'N', beta, m3.array) beta = 1.0 if r == 0: buf1 = np.empty_like(buf2) buf1, buf2 = buf2, buf1 if rrequest: comm.wait(rrequest) if srequest: comm.wait(srequest) return m3
"""Test BLAS matrix-matrix-multiplication interface.""" import numpy as np from gpaw.utilities.blas import mmm def op(o, m): if o == 'n': return m if o == 't': return m.T return m.T.conj() def matrix(shape, dtype): if dtype == float: return np.random.random(shape) return np.random.random(shape) + 1j * np.random.random(shape) for dtype in [float, complex]: a = matrix((2, 3), dtype) for opa in 'ntc': A = op(opa, a) B = matrix((A.shape[1], 4), dtype) for opb in 'ntc': b = op(opb, B).copy() C = np.dot(A, B) mmm(1, a, opa, b, opb, -1, C) assert abs(C).max() < 1e-14
def calculate_projections(self, wfs, kpt): for a, P_ni in kpt.P_ani.items(): # ATLAS can't handle uninitialized output array: P_ni.fill(117) mmm(1.0, kpt.C_nM, 'N', wfs.P_aqMi[a][kpt.q], 'N', 0.0, P_ni)
def fastmmm2(a, b, out): if a.comm: assert b.comm is a.comm if a.comm.size > 1: assert out.comm == a.comm assert out.state == 'a sum is needed' comm = a.dist.comm M, N = a.shape m = (M + comm.size - 1) // comm.size mym = len(a.array) buf1 = np.empty((m, N), dtype=a.dtype) buf2 = np.empty((m, N), dtype=a.dtype) half = comm.size // 2 aa = a.array bb = b.array for r in range(half + 1): rrequest = None srequest = None if r < half: srank = (comm.rank + r + 1) % comm.size rrank = (comm.rank - r - 1) % comm.size skip = (comm.size % 2 == 0 and r == half - 1) m1 = min(rrank * m, M) m2 = min(m1 + m, M) if not (skip and comm.rank < half) and m2 > m1: rrequest = comm.receive(buf1[:m2 - m1], rrank, 11, False) if not (skip and comm.rank >= half) and mym > 0: srequest = comm.send(b.array, srank, 11, False) if not (comm.size % 2 == 0 and r == half and comm.rank < half): m1 = min(((comm.rank - r) % comm.size) * m, M) m2 = min(m1 + m, M) if r == 0: #symmmmmmmmmmmmmmmmmmmmmmetricccccccccccccccc blas.mmm(1.0, aa, 'N', bb, 'C', 1.0, out.array[:, m1:m2]) else: beta = 1.0 if r <= comm.rank else 0.0 blas.mmm(1.0, aa, 'N', buf2[:m2 - m1], 'C', beta, out.array[:, m1:m2]) #print(comm.rank, r, beta, out.array, m1, m2) # out.array[:, m1:m2] = m12.array[:, :m2 - m1] if rrequest: comm.wait(rrequest) if srequest: comm.wait(srequest) bb = buf1 buf1, buf2 = buf2, buf1 requests = [] blocks = [] nrows = (comm.size - 1) // 2 for row in range(nrows): for column in range(comm.size - nrows + row, comm.size): if comm.rank == row: m1 = min(column * m, M) m2 = min(m1 + m, M) if mym > 0 and m2 > m1: requests.append( comm.send(out.array[:, m1:m2].T.conj().copy(), column, 12, False)) elif comm.rank == column: m1 = min(row * m, M) m2 = min(m1 + m, M) if mym > 0 and m2 > m1: block = np.empty((mym, m2 - m1), out.dtype) blocks.append((m1, m2, block)) requests.append(comm.receive(block, row, 12, False)) comm.waitall(requests) for m1, m2, block in blocks: out.array[:, m1:m2] += block return out