Beispiel #1
0
    def _pseudo_braket(self, bra_xG, ket_yG, A_yx, square=None):
        """Calculate matrix elements of braket pairs of pseudo wave functions.
        Low-level helper function. Results will be put in the *A_yx* array::
        
                   /     ~ *     ~   
           A    =  | dG bra (G) ket  (G)
            nn'    /       n       n'


        Parameters:

        bra_xG: ndarray
            Set of bra-like vectors in which the matrix elements are evaluated.
        key_yG: ndarray
            Set of ket-like vectors in which the matrix elements are evaluated.
        A_yx: ndarray
            Matrix in which to put calculated elements. Take care: Due to the
            difference in Fortran/C array order and the inherent BLAS nature,
            the matrix has to be filled in transposed (conjugated in future?).

        """
        assert bra_xG.shape[1:] == ket_yG.shape[1:]
        assert (ket_yG.shape[0], bra_xG.shape[0]) == A_yx.shape

        if square is None:
            square = (bra_xG.shape[0] == ket_yG.shape[0])

        dv = self.gd.dv
        if ket_yG is bra_xG:
            rk(dv, bra_xG, 0.0, A_yx)
        elif self.hermitian and square:
            r2k(0.5 * dv, bra_xG, ket_yG, 0.0, A_yx)
        else:
            gemm(dv, bra_xG, ket_yG, 0.0, A_yx, 'c')
Beispiel #2
0
    def integrate(self,
                  a_xg,
                  b_yg=None,
                  global_integral=True,
                  hermitian=False,
                  _transposed_result=None):
        """Integrate function(s) over domain.

        a_xg: ndarray
            Function(s) to be integrated.
        b_yg: ndarray
            If present, integrate a_xg.conj() * b_yg.
        global_integral: bool
            If the array(s) are distributed over several domains, then the
            total sum will be returned.  To get the local contribution
            only, use global_integral=False.
        hermitian: bool
            Result is hermitian.
        _transposed_result: ndarray
            Long story.  Don't use this unless you are a method of the
            MatrixOperator class ..."""

        xshape = a_xg.shape[:-3]

        if b_yg is None:
            # Only one array:
            result = a_xg.reshape(xshape + (-1, )).sum(axis=-1) * self.dv
            if global_integral:
                if result.ndim == 0:
                    result = self.comm.sum(result)
                else:
                    self.comm.sum(result)
            return result

        A_xg = np.ascontiguousarray(a_xg.reshape((-1, ) + a_xg.shape[-3:]))
        B_yg = np.ascontiguousarray(b_yg.reshape((-1, ) + b_yg.shape[-3:]))

        if _transposed_result is None:
            result_yx = np.zeros((len(B_yg), len(A_xg)), A_xg.dtype)
        else:
            result_yx = _transposed_result
            global_integral = False

        if a_xg is b_yg:
            rk(self.dv, A_xg, 0.0, result_yx)
        elif hermitian:
            r2k(0.5 * self.dv, A_xg, B_yg, 0.0, result_yx)
        else:
            gemm(self.dv, A_xg, B_yg, 0.0, result_yx, 'c')

        if global_integral:
            self.comm.sum(result_yx)

        yshape = b_yg.shape[:-3]
        result = result_yx.T.reshape(xshape + yshape)

        if result.ndim == 0:
            return result.item()
        else:
            return result
Beispiel #3
0
    def _pseudo_braket(self, bra_xG, ket_yG, A_yx, square=None):
        """Calculate matrix elements of braket pairs of pseudo wave functions.
        Low-level helper function. Results will be put in the *A_yx* array::
        
                   /     ~ *     ~   
           A    =  | dG bra (G) ket  (G)
            nn'    /       n       n'


        Parameters:

        bra_xG: ndarray
            Set of bra-like vectors in which the matrix elements are evaluated.
        key_yG: ndarray
            Set of ket-like vectors in which the matrix elements are evaluated.
        A_yx: ndarray
            Matrix in which to put calculated elements. Take care: Due to the
            difference in Fortran/C array order and the inherent BLAS nature,
            the matrix has to be filled in transposed (conjugated in future?).

        """
        assert bra_xG.shape[1:] == ket_yG.shape[1:]
        assert (ket_yG.shape[0], bra_xG.shape[0]) == A_yx.shape

        if square is None:
            square = (bra_xG.shape[0]==ket_yG.shape[0])

        dv = self.gd.dv
        if ket_yG is bra_xG:
            rk(dv, bra_xG, 0.0, A_yx)
        elif self.hermitian and square:
            r2k(0.5 * dv, bra_xG, ket_yG, 0.0, A_yx)
        else:
            gemm(dv, bra_xG, ket_yG, 0.0, A_yx, 'c')
Beispiel #4
0
 def update_hilbert(self, n_mG, deps_m, df_m, chi0_wGG):
     domega = self.omega_w[1]
     for omega, df, n_G in zip(deps_m, df_m, n_mG):
         w = omega / domega
         iw = int(w)
         weights = df * np.array([[1 - w + iw], [w - iw]])
         x_2G = n_G * weights**0.5
         rk(self.prefactor, x_2G, 1.0, chi0_wGG[iw:iw + 2])
Beispiel #5
0
    def integrate(self, a_xg, b_yg=None,
                  global_integral=True, hermitian=False,
                  _transposed_result=None):
        """Integrate function(s) over domain.

        a_xg: ndarray
            Function(s) to be integrated.
        b_yg: ndarray
            If present, integrate a_xg.conj() * b_yg.
        global_integral: bool
            If the array(s) are distributed over several domains, then the
            total sum will be returned.  To get the local contribution
            only, use global_integral=False.
        hermitian: bool
            Result is hermitian.
        _transposed_result: ndarray
            Long story.  Don't use this unless you are a method of the
            MatrixOperator class ..."""
        
        xshape = a_xg.shape[:-3]
        
        if b_yg is None:
            # Only one array:
            result = a_xg.reshape(xshape + (-1,)).sum(axis=-1) * self.dv
            if global_integral:
                if result.ndim == 0:
                    result = self.comm.sum(result)
                else:
                    self.comm.sum(result)
            return result

        A_xg = np.ascontiguousarray(a_xg.reshape((-1,) + a_xg.shape[-3:]))
        B_yg = np.ascontiguousarray(b_yg.reshape((-1,) + b_yg.shape[-3:]))

        if _transposed_result is None:
            result_yx = np.zeros((len(B_yg), len(A_xg)), A_xg.dtype)
        else:
            result_yx = _transposed_result
            global_integral = False

        if a_xg is b_yg:
            rk(self.dv, A_xg, 0.0, result_yx)
        elif hermitian:
            r2k(0.5 * self.dv, A_xg, B_yg, 0.0, result_yx)
        else:
            gemm(self.dv, A_xg, B_yg, 0.0, result_yx, 'c')
        
        if global_integral:
            self.comm.sum(result_yx)

        yshape = b_yg.shape[:-3]
        result = result_yx.T.reshape(xshape + yshape)
        
        if result.ndim == 0:
            return result.item()
        else:
            return result
Beispiel #6
0
 def update_hermitian(self, n_mG, deps_m, df_m, chi0_wGG):
     for w, omega in enumerate(self.omega_w):
         if self.blockcomm.size == 1:
             x_m = (-2 * df_m * deps_m / (omega.imag**2 + deps_m**2))**0.5
             nx_mG = n_mG.conj() * x_m[:, np.newaxis]
             rk(-self.prefactor, nx_mG, 1.0, chi0_wGG[w], 'n')
         else:
             x_m = 2 * df_m * deps_m / (omega.imag**2 + deps_m**2)
             mynx_mG = n_mG[:, self.Ga:self.Gb] * x_m[:, np.newaxis]
             mmm(self.prefactor, mynx_mG, 'c', n_mG, 'n', 1.0, chi0_wGG[w])
 def update_hermitian(self, n_mG, deps_m, df_m, chi0_wGG):
     for w, omega in enumerate(self.omega_w):
         if self.blockcomm.size == 1:
             x_m = (-2 * df_m * deps_m / (omega.imag**2 + deps_m**2))**0.5
             nx_mG = n_mG.conj() * x_m[:, np.newaxis]
             rk(-self.prefactor, nx_mG, 1.0, chi0_wGG[w], 'n')
         else:
             x_m = 2 * df_m * deps_m / (omega.imag**2 + deps_m**2)
             mynx_mG = n_mG[:, self.Ga:self.Gb] * x_m[:, np.newaxis]
             mmm(self.prefactor, mynx_mG, 'c', n_mG, 'n', 1.0, chi0_wGG[w])
Beispiel #8
0
 def multiply(self, alpha, a, opa, b, opb, beta, c, symmetric):
     if symmetric:
         assert opa == 'N'
         assert opb == 'C' or opb == 'T' and a.dtype == float
         if a is b:
             blas.rk(alpha, a.array, beta, c.array)
         else:
             if beta == 1.0 and a.shape[1] == 0:
                 return
             blas.r2k(0.5 * alpha, a.array, b.array, beta, c.array)
     else:
         blas.mmm(alpha, a.array, opa, b.array, opb, beta, c.array)
Beispiel #9
0
    def update_hermitian(self, n_mG, deps_m, wd, chi0_wGG):
        """If eta=0 use hermitian update."""
        omega_w = wd.get_data()
        deps_m += self.eshift * np.sign(deps_m)

        for w, omega in enumerate(omega_w):
            if self.blockcomm.size == 1:
                x_m = (-2 * deps_m / (omega.imag**2 + deps_m**2) + 0j)**0.5
                nx_mG = n_mG.conj() * x_m[:, np.newaxis]
                rk(-1.0, nx_mG, 1.0, chi0_wGG[w], 'n')
            else:
                x_m = 2 * deps_m / (omega.imag**2 + deps_m**2)
                mynx_mG = n_mG[:, self.Ga:self.Gb] * x_m[:, np.newaxis]
                mmm(1.0, mynx_mG, 'C', n_mG, 'N', 1.0, chi0_wGG[w])
    def integrate(self, a_xg, b_yg=None,
                  global_integral=True, hermitian=False,
                  _transposed_result=None):
        """Integrate function(s) over domain.

        a_xg: ndarray
            Function(s) to be integrated.
        b_yg: ndarray
            If present, integrate a_xg.conj() * b_yg.
        global_integral: bool
            If the array(s) are distributed over several domains, then the
            total sum will be returned.  To get the local contribution
            only, use global_integral=False.
        hermitian: bool
            Result is hermitian.
        _transposed_result: ndarray
            Long story.  Don't use this unless you are a method of the
            MatrixOperator class ..."""
        
        xshape = a_xg.shape[:-3]
        
        if b_yg is None:
            # Only one array:
            result = a_xg.reshape(xshape + (-1,)).sum(axis=-1) * self.dv
            if global_integral:
                if result.ndim == 0:
                    result = self.comm.sum(result)
                else:
                    self.comm.sum(result)
            return result

        if isinstance(a_xg, mic.OffloadArray):
            # offload arrays have to be contiguous in any case
            A_xg = a_xg
            B_yg = b_yg
        else:
            A_xg = np.ascontiguousarray(a_xg.reshape((-1,) + a_xg.shape[-3:]))
            B_yg = np.ascontiguousarray(b_yg.reshape((-1,) + b_yg.shape[-3:]))

        if _transposed_result is None:
            result_yx = np.zeros((len(B_yg), len(A_xg)), A_xg.dtype)
        else:
            result_yx = _transposed_result
            global_integral = False

        if isinstance(a_xg, mic.OffloadArray):
            result_yx_mic = stream.bind(result_yx)
            stream.sync()
            # result_yx_mic.fillfrom(result_yx)
            # result_yx_mic.array[:] = result_yx[:]
            # result_yx_mic.update_device()

        if a_xg is b_yg:
            if isinstance(a_xg, mic.OffloadArray):
                # dsyrk performs badly in MIC so use dgemm here
                # mic_rk(self.dv, A_xg, 0.0, result_yx_mic)
                mic_gemm(self.dv, A_xg, A_xg, 0.0, result_yx_mic, 'c')
            else:
                rk(self.dv, A_xg, 0.0, result_yx)
        elif hermitian:
            if isinstance(a_xg, mic.OffloadArray):
                mic_r2k(self.dv, A_xg, B_yg, 0.0, result_yx_mic)
            else:
                r2k(0.5 * self.dv, A_xg, B_yg, 0.0, result_yx)
        else:
            if isinstance(a_xg, mic.OffloadArray):
                mic_gemm(self.dv, A_xg, B_yg, 0.0, result_yx_mic, 'c')
            else:
                gemm(self.dv, A_xg, B_yg, 0.0, result_yx, 'c')
        
        if isinstance(a_xg, mic.OffloadArray):
            result_yx_mic.update_host()
            stream.sync()

        if global_integral:
            self.comm.sum(result_yx)

        yshape = b_yg.shape[:-3]
        result = result_yx.T.reshape(xshape + yshape)
        
        if result.ndim == 0:
            return result.item()
        else:
            return result
Beispiel #11
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)
    
    if (dtype==complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)
    
    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        #gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print A0.shape, Z0.shape
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype) # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype) # zeros needed for rank-updates

    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err  = abs(S1 - S0).max()
        rk_err   = abs(U1 - U0).max()
        print 'gemm err', gemm_err
        print 'gemv err', gemv_err
        print 'r2k err' , r2k_err
        print 'rk_err'  , rk_err
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err  = 0.0
        rk_err   = 0.0

    gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err  = world.sum(r2k_err)
    rk_err   = world.sum(rk_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err,0, tol)
Beispiel #12
0
def main(N=73, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)
    
    if (dtype==complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1*np.tri(N, N, k= -N // nprocs) + 0.3*np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2*np.tri(N, N, k= -N // nprocs) + 0.4*np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0*np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0*np.eye(N, N, 0)
        C0 = S0.copy()

    # Local result matrices
    W0 = np.empty((N),dtype=float)
    W0_g = np.empty((N),dtype=float)

    # Calculate eigenvalues
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0) # result returned in lower triangle
        # tri2full(C0) # symmetrize
        
    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')
    scalapack_inverse_cholesky(dist, C, 'L')

    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test-C0).max()
        ## print 'diagonalize ex err', diag_ex_err
        print 'diagonalize dc err', diag_dc_err
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print 'general diagonalize dc err', general_diag_dc_err
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print 'inverse chol err', inverse_chol_err 
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err) 
    inverse_chol_err = world.sum(inverse_chol_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa="T")
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo="L", side="L")

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print("gemm err", gemm_err)
        print("gemv err", gemv_err)
        print("r2k err", r2k_err)
        print("rk_err", rk_err)
        print("hemm_err", hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)
Beispiel #14
0
assert not c.any()

# Check gemm for transa='c'
a = np.arange(4 * 5 * 1 * 3).reshape(4, 5, 1, 3) * (3. - 2.j) + 4.
c = np.tensordot(a, a2.conj(), [[1, 2, 3], [1, 2, 3]])
gemm(1., a2, a, -1., c, 'c')
assert not c.any()

# Check axpy
c = 5.j * a
axpy(-5.j, a, c)
assert not c.any()

# Check rk
c = np.tensordot(a, a.conj(), [[1, 2, 3], [1, 2, 3]])
rk(1., a, -1., c)
tri2full(c)
assert not c.any()

# Check gemmdot for transa='c'
c = np.tensordot(a, a2.conj(), [-1, -1])
gemmdot(a, a2, beta=-1., out=c, trans='c')
assert not c.any()

# Check gemmdot for transa='n'
a2.shape = 3, 7, 5, 1
c = np.tensordot(a, a2, [-1, 0])
gemmdot(a, a2, beta=-1., out=c, trans='n')
assert not c.any()

# Check r2k
Beispiel #15
0
 def update_hermitian(self, n_mG, deps_m, df_m, chi0_wGG):
     for w, omega in enumerate(self.omega_w):
         x_m = (-2 * df_m * deps_m / (omega.imag**2 + deps_m**2))**0.5
         nx_mG = n_mG.T.copy() * x_m
         rk(-self.prefactor, nx_mG, 1.0, chi0_wGG[w])
Beispiel #16
0
assert not c.any()

# Check gemm for transa='c'
a = np.arange(4 * 5 * 1 * 3).reshape(4, 5, 1, 3) * (3. - 2.j) + 4.
c = np.tensordot(a, a2.conj(), [[1, 2, 3], [1, 2, 3]])
gemm(1., a2, a, -1., c, 'c')
assert not c.any()

# Check axpy
c = 5.j * a
axpy(-5.j, a, c)
assert not c.any()

# Check rk
c = np.tensordot(a, a.conj(), [[1, 2, 3], [1, 2, 3]])
rk(1., a, -1., c)
tri2full(c)
assert not c.any()

# Check gemmdot for transa='c'
c = np.tensordot(a, a2.conj(), [-1, -1])
gemmdot(a, a2, beta=-1., out=c, trans='c')
assert not c.any()

# Check gemmdot for transa='n'
a2.shape = 3, 7, 5, 1
c = np.tensordot(a, a2, [-1, 0])
gemmdot(a, a2, beta=-1., out=c, trans='n')
assert not c.any()

# Check r2k
Beispiel #17
0
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if (dtype == complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) +
                             0.3 * np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) +
                             0.4 * np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0 * np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0 * np.eye(N, N, 0)
        C0 = S0.copy()
        S0_inv = S0.copy()

    # Local result matrices
    W0 = np.empty((N), dtype=float)
    W0_g = np.empty((N), dtype=float)

    # Calculate eigenvalues / other serial results
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0)  # result returned in lower triangle
        tri2full(S0_inv, 'L')
        S0_inv = inv(S0_inv)
        # tri2full(C0) # symmetrize

    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L')  # C0 was previously overwritten
    Glob2dist.redistribute(S0, Sinv, uplo='L')

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')

    scalapack_inverse_cholesky(dist, C, 'L')

    if dtype == complex:  # Only supported for complex for now
        scalapack_inverse(dist, Sinv, 'L')
    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Sinv_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)
    Dist2glob.redistribute(Sinv, Sinv_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test - C0).max()

        tri2full(Sinv_test, 'L')
        inverse_err = abs(Sinv_test - S0_inv).max()
        ## print 'diagonalize ex err', diag_ex_err
        print('diagonalize dc err', diag_dc_err)
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print('general diagonalize dc err', general_diag_dc_err)
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print('inverse chol err', inverse_chol_err)
        if dtype == complex:
            print('inverse err', inverse_err)
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0
        inverse_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err)
    inverse_chol_err = world.sum(inverse_chol_err)
    inverse_err = world.sum(inverse_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
    if dtype == complex:
        assert inverse_err < tol
Beispiel #18
0
def makeU(gpwfile='grid.gpw', orbitalfile='w_wG__P_awi.pckl',
          rotationfile='eps_q__U_pq.pckl', tolerance=1e-5,
          writeoptimizedpairs=False, dppname='D_pp.pckl', S_w=None):

    # S_w: None or diagonal of overlap matrix. In the latter case
    # the optimized and truncated pair orbitals are obtained from
    # normalized (to 1) orbitals.
    #     
    # Tolerance is used for truncation of optimized pairorbitals
    #calc = GPAW(gpwfile, txt=None)
    from gpaw import GPAW
    from gpaw.utilities import pack, unpack
    from gpaw.utilities.blas import rk, gemm
    from gpaw.mpi import world, MASTER
    calc = GPAW(gpwfile, txt='pairorb.txt') # XXX
    gd = calc.wfs.gd
    setups = calc.wfs.setups
    myatoms = calc.density.D_asp.keys()
    del calc

    # Load orbitals on master and distribute to slaves
    if world.rank == MASTER:
        wglobal_wG, P_awi = pickle.load(open(orbitalfile))
        Nw = len(wglobal_wG)
        print('Estimated total (serial) mem usage: %0.3f GB' % (
            np.prod(gd.N_c) * Nw**2 * 8 / 1024.**3))
    else:
        wglobal_wG = None
        Nw = 0
    Nw = gd.comm.sum(Nw) #distribute Nw to all nodes
    w_wG = gd.empty(n=Nw)
    gd.distribute(wglobal_wG, w_wG)
    del wglobal_wG
    
    # Make pairorbitals
    f_pG = gd.zeros(n=Nw**2)
    Np = len(f_pG)
    for p, (w1, w2) in enumerate(np.ndindex(Nw, Nw)):
        np.multiply(w_wG[w1], w_wG[w2], f_pG[p])
    del w_wG
    assert f_pG.flags.contiguous 
    # Make pairorbital overlap (lower triangle only)
    D_pp = np.zeros((Nw**2, Nw**2))
    rk(gd.dv, f_pG, 0., D_pp)

    # Add atomic corrections to pairorbital overlap
    for a in myatoms:
        if setups[a].type != 'ghost':
            P_pp = np.array([pack(np.outer(P_awi[a][w1], P_awi[a][w2]))
                             for w1, w2 in np.ndindex(Nw, Nw)])
            I4_pp = setups[a].four_phi_integrals()
            A = np.zeros((len(I4_pp), len(P_pp)))
            gemm(1.0, P_pp, I4_pp, 0.0, A, 't')
            gemm(1.0, A, P_pp, 1.0, D_pp)
            #D_pp += np.dot(P_pp, np.dot(I4_pp, P_pp.T))
   
    # Summ all contributions to master
    gd.comm.sum(D_pp, MASTER)

    if world.rank == MASTER:
        if S_w is not None:
            print('renormalizing pairorb overlap matrix (D_pp)')
            S2 = np.sqrt(S_w)
            for pa, (wa1, wa2) in enumerate(np.ndindex(Nw, Nw)):
                for pb, (wb1, wb2) in enumerate(np.ndindex(Nw, Nw)):
                    D_pp[pa, pb] /= S2[wa1] * S2[wa2] * S2[wb1] * S2[wb2]

        D_pp.dump(dppname) # XXX if the diagonalization below (on MASTER only)
                           # fails, then one can always restart the stuff
                           # below using only the stored D_pp matrix

        # Determine eigenvalues and vectors on master only
        eps_q, U_pq = np.linalg.eigh(D_pp, UPLO='L')
        del D_pp
        indices = np.argsort(-eps_q.real)
        eps_q = np.ascontiguousarray(eps_q.real[indices])
        U_pq = np.ascontiguousarray(U_pq[:, indices])

        # Truncate
        indices = eps_q > tolerance
        U_pq = np.ascontiguousarray(U_pq[:, indices])
        eps_q = np.ascontiguousarray(eps_q[indices])

        # Dump to file
        pickle.dump((eps_q, U_pq), open(rotationfile, 'wb'), 2)

    if writeoptimizedpairs is not False:
        assert world.size == 1 # works in parallel if U and eps are broadcast
        Uisq_qp = (U_pq / np.sqrt(eps_q)).T.copy()
        g_qG = gd.zeros(n=len(eps_q))
        gemm(1.0, f_pG, Uisq_qp, 0.0, g_qG)
        g_qG = gd.collect(g_qG)
        if world.rank == MASTER:
            P_app = dict([(a, np.array([pack(np.outer(P_wi[w1], P_wi[w2]),
                                             tolerance=1e3)
                                        for w1, w2 in np.ndindex(Nw, Nw)]))
                          for a, P_wi in P_awi.items()])
            P_aqp = dict([(a, np.dot(Uisq_qp, P_pp))
                          for a, P_pp in P_app.items()])
            pickle.dump((g_qG, P_aqp), open(writeoptimizedpairs, 'wb'), 2)
Beispiel #19
0
    def integrate(self, a_xg, b_yg=None,
                  global_integral=True, hermitian=False,
                  _transposed_result=None):
        """Integrate function(s) over domain.

        a_xg: ndarray
            Function(s) to be integrated.
        b_yg: ndarray
            If present, integrate a_xg.conj() * b_yg.
        global_integral: bool
            If the array(s) are distributed over several domains, then the
            total sum will be returned.  To get the local contribution
            only, use global_integral=False.
        hermitian: bool
            Result is hermitian.
        _transposed_result: ndarray
            Long story.  Don't use this unless you are a method of the
            MatrixOperator class ..."""

        if b_yg is None:
            # Only one array:
            assert self.dtype == float
            return a_xg[..., 0].real * self.gd.dv

        A_xg = a_xg.reshape((-1, a_xg.shape[-1]))
        B_yg = b_yg.reshape((-1, b_yg.shape[-1]))

        alpha = self.gd.dv / self.gd.N_c.prod()

        if self.dtype == float:
            alpha *= 2
            A_xg = A_xg.view(float)
            B_yg = B_yg.view(float)

        if _transposed_result is None:
            result_yx = np.zeros((len(B_yg), len(A_xg)), self.dtype)
        else:
            result_yx = _transposed_result

        if a_xg is b_yg:
            rk(alpha, A_xg, 0.0, result_yx)
        elif hermitian:
            r2k(0.5 * alpha, A_xg, B_yg, 0.0, result_yx)
        else:
            gemm(alpha, A_xg, B_yg, 0.0, result_yx, 'c')

        if self.dtype == float:
            correction_yx = np.outer(B_yg[:, 0], A_xg[:, 0])
            if hermitian:
                result_yx -= 0.25 * alpha * (correction_yx + correction_yx.T)
            else:
                result_yx -= 0.5 * alpha * correction_yx

        xshape = a_xg.shape[:-1]
        yshape = b_yg.shape[:-1]
        result = result_yx.T.reshape(xshape + yshape)

        if result.ndim == 0:
            return result.item()
        else:
            return result
Beispiel #20
0
def makeU(gpwfile='grid.gpw', orbitalfile='w_wG__P_awi.pckl',
          rotationfile='eps_q__U_pq.pckl', tolerance=1e-5,
          writeoptimizedpairs=False, dppname='D_pp.pckl', S_w=None):

    # S_w: None or diagonal of overlap matrix. In the latter case
    # the optimized and truncated pair orbitals are obtained from
    # normalized (to 1) orbitals.
    #     
    # Tolerance is used for truncation of optimized pairorbitals
    #calc = GPAW(gpwfile, txt=None)
    from gpaw import GPAW
    from gpaw.utilities import pack, unpack
    from gpaw.utilities.blas import rk, gemm
    from gpaw.mpi import world, MASTER, serial_comm
    calc = GPAW(gpwfile, txt='pairorb.txt') # XXX
    gd = calc.wfs.gd
    setups = calc.wfs.setups
    myatoms = calc.density.D_asp.keys()
    del calc

    # Load orbitals on master and distribute to slaves
    if world.rank == MASTER:
        wglobal_wG, P_awi = pickle.load(open(orbitalfile))
        Nw = len(wglobal_wG)
        print 'Estimated total (serial) mem usage: %0.3f GB' % (
            np.prod(gd.N_c) * Nw**2 * 8 / 1024.**3)
    else:
        wglobal_wG = None
        Nw = 0
    Nw = gd.comm.sum(Nw) #distribute Nw to all nodes
    w_wG = gd.empty(n=Nw)
    gd.distribute(wglobal_wG, w_wG)
    del wglobal_wG
    
    # Make pairorbitals
    f_pG = gd.zeros(n=Nw**2)
    Np = len(f_pG)
    for p, (w1, w2) in enumerate(np.ndindex(Nw, Nw)):
        np.multiply(w_wG[w1], w_wG[w2], f_pG[p])
    del w_wG
    assert f_pG.flags.contiguous 
    # Make pairorbital overlap (lower triangle only)
    D_pp = np.zeros((Nw**2, Nw**2))
    rk(gd.dv, f_pG, 0., D_pp)

    # Add atomic corrections to pairorbital overlap
    for a in myatoms:
        if setups[a].type != 'ghost':
            P_pp = np.array([pack(np.outer(P_awi[a][w1], P_awi[a][w2]))
                             for w1, w2 in np.ndindex(Nw, Nw)])
            I4_pp = setups[a].four_phi_integrals()
            A = np.zeros((len(I4_pp), len(P_pp)))
            gemm(1.0, P_pp, I4_pp, 0.0, A, 't')
            gemm(1.0, A, P_pp, 1.0, D_pp)
            #D_pp += np.dot(P_pp, np.dot(I4_pp, P_pp.T))
   
    # Summ all contributions to master
    gd.comm.sum(D_pp, MASTER)

    if world.rank == MASTER:
        if S_w != None:
            print 'renormalizing pairorb overlap matrix (D_pp)'
            S2 = np.sqrt(S_w)
            for pa, (wa1, wa2) in enumerate(np.ndindex(Nw, Nw)):
                for pb, (wb1, wb2) in enumerate(np.ndindex(Nw, Nw)):
                    D_pp[pa, pb] /= S2[wa1] * S2[wa2] * S2[wb1] * S2[wb2]

        D_pp.dump(dppname) # XXX if the diagonalization below (on MASTER only)
                           # fails, then one can always restart the stuff
                           # below using only the stored D_pp matrix

        # Determine eigenvalues and vectors on master only
        eps_q, U_pq = np.linalg.eigh(D_pp, UPLO='L')
        del D_pp
        indices = np.argsort(-eps_q.real)
        eps_q = np.ascontiguousarray(eps_q.real[indices])
        U_pq = np.ascontiguousarray(U_pq[:, indices])

        # Truncate
        indices = eps_q > tolerance
        U_pq = np.ascontiguousarray(U_pq[:, indices])
        eps_q = np.ascontiguousarray(eps_q[indices])

        # Dump to file
        pickle.dump((eps_q, U_pq), open(rotationfile, 'wb'), 2)

    if writeoptimizedpairs is not False:
        assert world.size == 1 # works in parallel if U and eps are broadcast
        Uisq_qp = (U_pq / np.sqrt(eps_q)).T.copy()
        g_qG = gd.zeros(n=len(eps_q))
        gemm(1.0, f_pG, Uisq_qp, 0.0, g_qG)
        g_qG = gd.collect(g_qG)
        if world.rank == MASTER:
            P_app = dict([(a, np.array([pack(np.outer(P_wi[w1], P_wi[w2]),
                                             tolerance=1e3)
                                        for w1, w2 in np.ndindex(Nw, Nw)]))
                          for a, P_wi in P_awi.items()])
            P_aqp = dict([(a, np.dot(Uisq_qp, P_pp))
                          for a, P_pp in P_app.items()])
            pickle.dump((g_qG, P_aqp), open(writeoptimizedpairs, 'wb'), 2)
Beispiel #21
0
    def integrate(self, a_xg, b_yg=None,
                  global_integral=True, hermitian=False,
                  _transposed_result=None):
        """Integrate function(s) over domain.

        a_xg: ndarray
            Function(s) to be integrated.
        b_yg: ndarray
            If present, integrate a_xg.conj() * b_yg.
        global_integral: bool
            If the array(s) are distributed over several domains, then the
            total sum will be returned.  To get the local contribution
            only, use global_integral=False.
        hermitian: bool
            Result is hermitian.
        _transposed_result: ndarray
            Long story.  Don't use this unless you are a method of the
            MatrixOperator class ..."""
        
        if b_yg is None:
            # Only one array:
            assert self.dtype == float
            return a_xg[..., 0].real * self.gd.dv

        A_xg = a_xg.reshape((-1, a_xg.shape[-1]))
        B_yg = b_yg.reshape((-1, b_yg.shape[-1]))

        alpha = self.gd.dv / self.gd.N_c.prod()

        if self.dtype == float:
            alpha *= 2
            A_xg = A_xg.view(float)
            B_yg = B_yg.view(float)

        if _transposed_result is None:
            result_yx = np.zeros((len(B_yg), len(A_xg)), self.dtype)
        else:
            result_yx = _transposed_result

        if a_xg is b_yg:
            rk(alpha, A_xg, 0.0, result_yx)
        elif hermitian:
            r2k(0.5 * alpha, A_xg, B_yg, 0.0, result_yx)
        else:
            gemm(alpha, A_xg, B_yg, 0.0, result_yx, 'c')
        
        if self.dtype == float:
            correction_yx = np.outer(B_yg[:, 0], A_xg[:, 0])
            if hermitian:
                result_yx -= 0.25 * alpha * (correction_yx + correction_yx.T)
            else:
                result_yx -= 0.5 * alpha * correction_yx

        xshape = a_xg.shape[:-1]
        yshape = b_yg.shape[:-1]
        result = result_yx.T.reshape(xshape + yshape)
        
        if result.ndim == 0:
            return result.item()
        else:
            return result
Beispiel #22
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean
        HEA0 = np.ascontiguousarray(HEA0)

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L')

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print('gemm err', gemm_err)
        print('gemv err', gemv_err)
        print('r2k err', r2k_err)
        print('rk_err', rk_err)
        print('hemm_err', hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)
Beispiel #23
0
    def iterate_one_k_point(self, hamiltonian, wfs, kpt):
        """Do Davidson iterations for the kpoint"""
        niter = self.niter
        nbands = self.nbands

        self.subspace_diagonalize(hamiltonian, wfs, kpt)
                    
        H_2n2n = self.H_2n2n
        S_2n2n = self.S_2n2n
        eps_2n = self.eps_2n
        psit2_nG = wfs.matrixoperator.suggest_temporary_buffer()

        self.timer.start('Davidson')
        R_nG = self.Htpsit_nG 
        self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG,
                                 kpt.P_ani, kpt.eps_n, R_nG)

        for nit in range(niter):
            H_2n2n[:] = 0.0
            S_2n2n[:] = 0.0

            error = 0.0
            for n in range(nbands):
                if kpt.f_n is None:
                    weight = kpt.weight
                else:
                    weight = kpt.f_n[n]
                if self.nbands_converge != 'occupied':
                    if n < self.nbands_converge:
                        weight = kpt.weight
                    else:
                        weight = 0.0
                error += weight * np.vdot(R_nG[n], R_nG[n]).real

                H_2n2n[n,n] = kpt.eps_n[n]
                S_2n2n[n,n] = 1.0
                psit2_nG[n] = self.preconditioner(R_nG[n], kpt)
            
            # Calculate projections
            P2_ani = wfs.pt.dict(nbands)
            wfs.pt.integrate(psit2_nG, P2_ani, kpt.q)
            
            # Hamiltonian matrix
            # <psi2 | H | psi>
            wfs.kin.apply(psit2_nG, self.Htpsit_nG, kpt.phase_cd)
            hamiltonian.apply_local_potential(psit2_nG, self.Htpsit_nG, kpt.s)
            gemm(self.gd.dv, kpt.psit_nG, self.Htpsit_nG, 0.0, self.H_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj()))

            self.gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, :nbands] = self.H_nn

            # <psi2 | H | psi2>
            r2k(0.5 * self.gd.dv, psit2_nG, self.Htpsit_nG, 0.0, self.H_nn)
            for a, P2_ni in P2_ani.items():
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj()))

            self.gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, nbands:] = self.H_nn

            # Overlap matrix
            # <psi2 | S | psi>
            gemm(self.gd.dv, kpt.psit_nG, psit2_nG, 0.0, self.S_nn, "c")
        
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj()))

            self.gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, :nbands] = self.S_nn

            # <psi2 | S | psi2>
            rk(self.gd.dv, psit2_nG, 0.0, self.S_nn)
            for a, P2_ni in P2_ani.items():
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj()))

            self.gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, nbands:] = self.S_nn

            if self.gd.comm.rank == 0:
                general_diagonalize(H_2n2n, eps_2n, S_2n2n)

            self.gd.comm.broadcast(H_2n2n, 0)
            self.gd.comm.broadcast(eps_2n, 0)

            kpt.eps_n[:] = eps_2n[:nbands]

            # Rotate psit_nG
            gemm(1.0, kpt.psit_nG, H_2n2n[:nbands, :nbands],
                 0.0, self.Htpsit_nG)
            gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:],
                 1.0, self.Htpsit_nG)
            kpt.psit_nG, self.Htpsit_nG = self.Htpsit_nG, kpt.psit_nG

            # Rotate P_uni:
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni)
                gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni)

            if nit < niter - 1 :
                wfs.kin.apply(kpt.psit_nG, self.Htpsit_nG, kpt.phase_cd)
                hamiltonian.apply_local_potential(kpt.psit_nG, self.Htpsit_nG,
                                                  kpt.s)
                R_nG = self.Htpsit_nG
                self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG,
                                         kpt.P_ani, kpt.eps_n, R_nG)

        self.timer.stop('Davidson')
        error = self.gd.comm.sum(error)
        return error