def transpose(a, axes=None, inplace=False, out=None): '''Transpose array for better memory efficiency Examples: >>> transpose(numpy.ones((3,2))) [[ 1. 1. 1.] [ 1. 1. 1.]] ''' if inplace: arow, acol = a.shape assert(arow == acol) tmp = numpy.empty((BLOCK_DIM,BLOCK_DIM)) for c0, c1 in misc.prange(0, acol, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp[:c1-c0,:r1-r0] = a[c0:c1,r0:r1] a[c0:c1,r0:r1] = a[r0:r1,c0:c1].T a[r0:r1,c0:c1] = tmp[:c1-c0,:r1-r0].T # diagonal blocks a[c0:c1,c0:c1] = a[c0:c1,c0:c1].T return a if not a.flags.c_contiguous: if a.ndim == 2: arow, acol = a.shape out = numpy.empty((acol,arow), a.dtype) r1 = c1 = 0 for c0 in range(0, acol-BLOCK_DIM, BLOCK_DIM): c1 = c0 + BLOCK_DIM for r0 in range(0, arow-BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c0:c1,r0:r1] = a[r0:r1,c0:c1].T out[c0:c1,r1:arow] = a[r1:arow,c0:c1].T for r0 in range(0, arow-BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c1:acol,r0:r1] = a[r0:r1,c1:acol].T out[c1:acol,r1:arow] = a[r1:arow,c1:acol].T return out else: return a.transpose(axes) if a.ndim == 2: arow, acol = a.shape c_shape = (ctypes.c_int*3)(1, arow, acol) out = numpy.ndarray((acol, arow), a.dtype, buffer=out) elif a.ndim == 3 and axes == (0,2,1): d0, arow, acol = a.shape c_shape = (ctypes.c_int*3)(d0, arow, acol) out = numpy.ndarray((d0, acol, arow), a.dtype, buffer=out) else: raise NotImplementedError if a.dtype == numpy.double: fn = _np_helper.NPdtranspose_021 else: fn = _np_helper.NPztranspose_021 fn.restype = ctypes.c_void_p fn(c_shape, a.ctypes.data_as(ctypes.c_void_p), out.ctypes.data_as(ctypes.c_void_p)) return out
def transpose_sum(a, inplace=False, out=None): '''a + a.T for better memory efficiency Examples: >>> transpose_sum(numpy.arange(4.).reshape(2,2)) [[ 0. 3.] [ 3. 6.]] ''' assert(a.shape[0] == a.shape[1]) na = a.shape[0] if inplace: out = a elif out is None: out = numpy.empty_like(a) else: out = numpy.ndarray(a.shape, a.dtype, buffer=out) for c0, c1 in misc.prange(0, na, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp = a[r0:r1,c0:c1] + a[c0:c1,r0:r1].T out[c0:c1,r0:r1] = tmp.T out[r0:r1,c0:c1] = tmp # diagonal blocks tmp = a[c0:c1,c0:c1] + a[c0:c1,c0:c1].T out[c0:c1,c0:c1] = tmp return out
def transpose(a, axes=None, inplace=False, out=None): '''Transpose array for better memory efficiency Examples: >>> transpose(numpy.ones((3,2))) [[ 1. 1. 1.] [ 1. 1. 1.]] ''' if inplace: arow, acol = a.shape assert (arow == acol) tmp = numpy.empty((BLOCK_DIM, BLOCK_DIM)) for c0, c1 in misc.prange(0, acol, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp[:c1 - c0, :r1 - r0] = a[c0:c1, r0:r1] a[c0:c1, r0:r1] = a[r0:r1, c0:c1].T a[r0:r1, c0:c1] = tmp[:c1 - c0, :r1 - r0].T # diagonal blocks a[c0:c1, c0:c1] = a[c0:c1, c0:c1].T return a if not a.flags.c_contiguous: if a.ndim == 2: arow, acol = a.shape out = numpy.empty((acol, arow), a.dtype) r1 = c1 = 0 for c0 in range(0, acol - BLOCK_DIM, BLOCK_DIM): c1 = c0 + BLOCK_DIM for r0 in range(0, arow - BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c0:c1, r0:r1] = a[r0:r1, c0:c1].T out[c0:c1, r1:arow] = a[r1:arow, c0:c1].T for r0 in range(0, arow - BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c1:acol, r0:r1] = a[r0:r1, c1:acol].T out[c1:acol, r1:arow] = a[r1:arow, c1:acol].T return out else: return a.transpose(axes) if a.ndim == 2: arow, acol = a.shape c_shape = (ctypes.c_int * 3)(1, arow, acol) out = numpy.ndarray((acol, arow), a.dtype, buffer=out) elif a.ndim == 3 and axes == (0, 2, 1): d0, arow, acol = a.shape c_shape = (ctypes.c_int * 3)(d0, arow, acol) out = numpy.ndarray((d0, acol, arow), a.dtype, buffer=out) else: raise NotImplementedError if a.dtype == numpy.double: fn = _np_helper.NPdtranspose_021 else: fn = _np_helper.NPztranspose_021 fn.restype = ctypes.c_void_p fn(c_shape, a.ctypes.data_as(ctypes.c_void_p), out.ctypes.data_as(ctypes.c_void_p)) return out
def transpose_sum(a, inplace=False, out=None): '''a + a.T for better memory efficiency Examples: >>> transpose_sum(numpy.arange(4.).reshape(2,2)) [[ 0. 3.] [ 3. 6.]] ''' assert (a.shape[0] == a.shape[1]) na = a.shape[0] if inplace: out = a elif out is None: out = numpy.empty_like(a) else: out = numpy.ndarray(a.shape, a.dtype, buffer=out) for c0, c1 in misc.prange(0, na, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp = a[r0:r1, c0:c1] + a[c0:c1, r0:r1].T out[c0:c1, r0:r1] = tmp.T out[r0:r1, c0:c1] = tmp # diagonal blocks tmp = a[c0:c1, c0:c1] + a[c0:c1, c0:c1].T out[c0:c1, c0:c1] = tmp return out
def transpose(a, inplace=False, out=None): '''Transpose array for better memory efficiency Examples: >>> transpose(numpy.ones((3,2))) [[ 1. 1. 1.] [ 1. 1. 1.]] ''' arow, acol = a.shape if inplace: assert (arow == acol) tmp = numpy.empty((BLOCK_DIM, BLOCK_DIM)) for c0, c1 in misc.prange(0, acol, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp[:c1 - c0, :r1 - r0] = a[c0:c1, r0:r1] a[c0:c1, r0:r1] = a[r0:r1, c0:c1].T a[r0:r1, c0:c1] = tmp[:c1 - c0, :r1 - r0].T # diagonal blocks a[c0:c1, c0:c1] = a[c0:c1, c0:c1].T return a else: if out is None: out = numpy.empty((acol, arow), a.dtype) else: out = numpy.ndarray((acol, arow), a.dtype, buffer=out) # C code is ~5% faster for acol=arow=10000 # Note: when the input a is a submatrix of another array, cannot call NPd(z)transpose # since NPd(z)transpose assumes data continuity if a.flags.c_contiguous: if numpy.iscomplexobj(a): fn = _np_helper.NPztranspose else: fn = _np_helper.NPdtranspose fn.restype = ctypes.c_void_p fn(ctypes.c_int(arow), ctypes.c_int(acol), a.ctypes.data_as(ctypes.c_void_p), out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(BLOCK_DIM)) else: r1 = c1 = 0 for c0 in range(0, acol - BLOCK_DIM, BLOCK_DIM): c1 = c0 + BLOCK_DIM for r0 in range(0, arow - BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c0:c1, r0:r1] = a[r0:r1, c0:c1].T out[c0:c1, r1:arow] = a[r1:arow, c0:c1].T for r0 in range(0, arow - BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c1:acol, r0:r1] = a[r0:r1, c1:acol].T out[c1:acol, r1:arow] = a[r1:arow, c1:acol].T return out
def transpose(a, inplace=False, out=None): '''Transpose array for better memory efficiency Examples: >>> transpose(numpy.ones((3,2))) [[ 1. 1. 1.] [ 1. 1. 1.]] ''' arow, acol = a.shape if inplace: assert(arow == acol) tmp = numpy.empty((BLOCK_DIM,BLOCK_DIM)) for c0, c1 in misc.prange(0, acol, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp[:c1-c0,:r1-r0] = a[c0:c1,r0:r1] a[c0:c1,r0:r1] = a[r0:r1,c0:c1].T a[r0:r1,c0:c1] = tmp[:c1-c0,:r1-r0].T # diagonal blocks a[c0:c1,c0:c1] = a[c0:c1,c0:c1].T return a else: if out is None: out = numpy.empty((acol,arow), a.dtype) else: out = numpy.ndarray((acol,arow), a.dtype, buffer=out) # C code is ~5% faster for acol=arow=10000 # Note: when the input a is a submatrix of another array, cannot call NPd(z)transpose # since NPd(z)transpose assumes data continuity if a.flags.c_contiguous: if numpy.iscomplexobj(a): fn = _np_helper.NPztranspose else: fn = _np_helper.NPdtranspose fn.restype = ctypes.c_void_p fn(ctypes.c_int(arow), ctypes.c_int(acol), a.ctypes.data_as(ctypes.c_void_p), out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(BLOCK_DIM)) else: r1 = c1 = 0 for c0 in range(0, acol-BLOCK_DIM, BLOCK_DIM): c1 = c0 + BLOCK_DIM for r0 in range(0, arow-BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c0:c1,r0:r1] = a[r0:r1,c0:c1].T out[c0:c1,r1:arow] = a[r1:arow,c0:c1].T for r0 in range(0, arow-BLOCK_DIM, BLOCK_DIM): r1 = r0 + BLOCK_DIM out[c1:acol,r0:r1] = a[r0:r1,c1:acol].T out[c1:acol,r1:arow] = a[r1:arow,c1:acol].T return out
def restore(self, filename, inplace=True): '''Read diis contents from a diis file and replace the attributes of current diis object if needed, then construct the vector. ''' fdiis = misc.H5TmpFile(filename) if inplace: self.filename = filename self._diisfile = fdiis diis_keys = fdiis.keys() x_keys = [k for k in diis_keys if k[0] == 'x'] e_keys = [k for k in diis_keys if k[0] == 'e'] # errvec may be incomplete if program is terminated when generating errvec. # The last vector or errvec should be excluded. nd = min(len(x_keys), len(e_keys)) if nd == 0: return self if inplace: if fdiis[x_keys[0]].size < INCORE_SIZE or self.incore: for key in diis_keys: self._buffer[key] = numpy.asarray(fdiis[key]) if 'xprev' in diis_keys: self._xprev = fdiis['xprev'] else: for key in diis_keys: self._store(key, fdiis[key].value) if 'xprev' in diis_keys: self._store('xprev', numpy.asarray(fdiis['xprev'])) if 'xprev' in self._buffer: # incore self._xprev = self._buffer['xprev'] else: self._xprev = self._diisfile['xprev'] self._bookkeep = list(range(nd)) self._head = nd vecsize = 0 e_mat = [] for i in range(nd): dti = numpy.asarray(self.get_err_vec(i)) vecsize = dti.size for j in range(i + 1): dtj = self.get_err_vec(j) assert (dtj.size == vecsize) tmp = 0 for p0, p1 in misc.prange(0, vecsize, BLOCK_SIZE): tmp += numpy.dot(dti[p0:p1].conj(), dtj[p0:p1]) e_mat.append(tmp) dti = dtj = None e_mat = numpy_helper.unpack_tril(e_mat) space = max(nd, self.space) self._H = numpy.zeros((space + 1, space + 1), e_mat.dtype) self._H[0, 1:] = self._H[1:, 0] = 1 self._H[1:nd + 1, 1:nd + 1] = e_mat return self
def extrapolate(self, nd=None): if nd is None: nd = self.get_num_vec() if nd == 0: raise RuntimeError('No vector found in DIIS object.') h = self._H[:nd + 1, :nd + 1] g = numpy.zeros(nd + 1, h.dtype) g[0] = 1 w, v = scipy.linalg.eigh(h) if numpy.any(abs(w) < 1e-14): logger.debug(self, 'Linear dependence found in DIIS error vectors.') idx = abs(w) > 1e-14 c = numpy.dot(v[:, idx] * (1. / w[idx]), numpy.dot(v[:, idx].T.conj(), g)) else: try: c = numpy.linalg.solve(h, g) except numpy.linalg.linalg.LinAlgError as e: logger.warn(self, ' diis singular, eigh(h) %s', w) raise e logger.debug1(self, 'diis-c %s', c) xnew = None for i, ci in enumerate(c[1:]): xi = self.get_vec(i) if xnew is None: xnew = numpy.zeros(xi.size, c.dtype) for p0, p1 in misc.prange(0, xi.size, BLOCK_SIZE): xnew[p0:p1] += xi[p0:p1] * ci return xnew
def hermi_sum(a, axes=None, hermi=HERMITIAN, inplace=False, out=None): '''Computing a + a.T.conj() with better memory efficiency Examples: >>> transpose_sum(numpy.arange(4.).reshape(2,2)) [[ 0. 3.] [ 3. 6.]] ''' if inplace: out = a else: out = numpy.ndarray(a.shape, a.dtype, buffer=out) if (not a.flags.c_contiguous or (a.dtype != numpy.double and a.dtype != numpy.complex)): if a.ndim == 2: na = a.shape[0] for c0, c1 in misc.prange(0, na, BLOCK_DIM): for r0, r1 in misc.prange(0, c0, BLOCK_DIM): tmp = a[r0:r1,c0:c1] + a[c0:c1,r0:r1].conj().T out[c0:c1,r0:r1] = tmp.T.conj() out[r0:r1,c0:c1] = tmp # diagonal blocks tmp = a[c0:c1,c0:c1] + a[c0:c1,c0:c1].conj().T out[c0:c1,c0:c1] = tmp return out else: raise NotImplementedError('input array is not C-contiguous') if a.ndim == 2: assert(a.shape[0] == a.shape[1]) c_shape = (ctypes.c_int*3)(1, a.shape[0], a.shape[1]) elif a.ndim == 3 and axes == (0,2,1): assert(a.shape[1] == a.shape[2]) c_shape = (ctypes.c_int*3)(*(a.shape)) else: raise NotImplementedError assert(a.flags.c_contiguous) if a.dtype == numpy.double: fn = _np_helper.NPdsymm_021_sum else: fn = _np_helper.NPzhermi_021_sum fn(c_shape, a.ctypes.data_as(ctypes.c_void_p), out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(hermi)) return out
def update(self, x, xerr=None): '''Extrapolate vector * If xerr the error vector is given, this function will push the target vector and error vector in the DIIS subspace, and use the error vector to extrapolate the vector and return the extrapolated vector. * If xerr is None, this function will take the difference between the current given vector and the last given vector as the error vector to extrapolate the vector. ''' if xerr is not None: self.push_err_vec(xerr) self.push_vec(x) nd = self.get_num_vec() if nd < self.min_space: return x dt = numpy.array(self.get_err_vec(self._head - 1), copy=False) if self._H is None: self._H = numpy.zeros((self.space + 1, self.space + 1), dt.dtype) self._H[0, 1:] = self._H[1:, 0] = 1 for i in range(nd): tmp = 0 dti = self.get_err_vec(i) for p0, p1 in misc.prange(0, dt.size, BLOCK_SIZE): tmp += numpy.dot(dt[p0:p1].conj(), dti[p0:p1]) self._H[self._head, i + 1] = tmp self._H[i + 1, self._head] = tmp.conjugate() dt = None if self._xprev is None: xnew = self.extrapolate(nd) else: self._xprev = None # release memory first self._xprev = xnew = self.extrapolate(nd) self._store('xprev', xnew) if 'xprev' not in self._buffer: # not incore self._xprev = self._diisfile['xprev'] return xnew.reshape(x.shape)
def push_vec(self, x): x = x.ravel() while len(self._bookkeep) >= self.space: self._bookkeep.pop(0) if self._err_vec_touched: self._bookkeep.append(self._head) key = 'x%d' % (self._head) self._store(key, x) self._head += 1 elif self._xprev is None: # If push_err_vec is not called in advance, the error vector is generated # as the diff of the current vec and previous returned vec (._xprev) # So store the first trial vec as the previous returned vec self._xprev = x self._store('xprev', x) if 'xprev' not in self._buffer: # not incore self._xprev = self._diisfile['xprev'] else: if self._head >= self.space: self._head = 0 self._bookkeep.append(self._head) ekey = 'e%d' % self._head xkey = 'x%d' % self._head self._store(xkey, x) if x.size < INCORE_SIZE or self.incore: self._store(ekey, x - numpy.asarray(self._xprev)) else: # not call _store to reduce memory footprint if ekey not in self._diisfile: self._diisfile.create_dataset(ekey, (x.size, ), x.dtype) edat = self._diisfile[ekey] for p0, p1 in misc.prange(0, x.size, BLOCK_SIZE): edat[p0:p1] = x[p0:p1] - self._xprev[p0:p1] self._diisfile.flush() self._head += 1