def _contract_compact(mydf, mos, coulG, max_memory): cell = mydf.cell moiT, mokT = mos nmoi, ngrids = moiT.shape nmok = mokT.shape[0] wcoulG = coulG * (cell.vol/ngrids) def fill_orbital_pair(moT, i0, i1, buf): npair = i1*(i1+1)//2 - i0*(i0+1)//2 out = numpy.ndarray((npair,ngrids), dtype=buf.dtype, buffer=buf) ij = 0 for i in range(i0, i1): numpy.einsum('p,jp->jp', moT[i], moT[:i+1], out=out[ij:ij+i+1]) ij += i + 1 return out eri = numpy.empty((nmoi*(nmoi+1)//2,nmok*(nmok+1)//2)) blksize = int(min(max(nmoi*(nmoi+1)//2, nmok*(nmok+1)//2), (max_memory*1e6/8 - eri.size)/2/ngrids+1)) buf = numpy.empty((blksize,ngrids)) for p0, p1 in lib.prange_tril(0, nmoi, blksize): mo_pairs_G = tools.fft(fill_orbital_pair(moiT, p0, p1, buf), mydf.mesh) mo_pairs_G*= wcoulG v = tools.ifft(mo_pairs_G, mydf.mesh) vR = numpy.asarray(v.real, order='C') for q0, q1 in lib.prange_tril(0, nmok, blksize): mo_pairs = numpy.asarray(fill_orbital_pair(mokT, q0, q1, buf), order='C') eri[p0*(p0+1)//2:p1*(p1+1)//2, q0*(q0+1)//2:q1*(q1+1)//2] = lib.ddot(vR, mo_pairs.T) v = None return eri
def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): cpu1 = cpu0 = (time.clock(), time.time()) log = logger.new_logger(mycc, verbose) if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 nocc, nvir = t1.shape nmo = nocc + nvir dtype = numpy.result_type(t1, t2, eris.ovoo.dtype) if mycc.incore_complete: ftmp = None eris_vvop = numpy.zeros((nvir,nvir,nocc,nmo), dtype) else: ftmp = lib.H5TmpFile() eris_vvop = ftmp.create_dataset('vvop', (nvir,nvir,nocc,nmo), dtype) orbsym = _sort_eri(mycc, eris, nocc, nvir, eris_vvop, log) mo_energy, t1T, t2T, vooo, fvo, restore_t2_inplace = \ _sort_t2_vooo_(mycc, orbsym, t1, t2, eris) cpu1 = log.timer_debug1('CCSD(T) sort_eri', *cpu1) cpu2 = list(cpu1) orbsym = numpy.hstack((numpy.sort(orbsym[:nocc]),numpy.sort(orbsym[nocc:]))) o_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[:nocc], minlength=8))) v_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[nocc:], minlength=8))) o_sym = orbsym[:nocc] oo_sym = (o_sym[:,None] ^ o_sym).ravel() oo_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(oo_sym, minlength=8))) nirrep = max(oo_sym) + 1 orbsym = orbsym.astype(numpy.int32) o_ir_loc = o_ir_loc.astype(numpy.int32) v_ir_loc = v_ir_loc.astype(numpy.int32) oo_ir_loc = oo_ir_loc.astype(numpy.int32) if dtype == numpy.complex: drv = _ccsd.libcc.CCsd_t_zcontract else: drv = _ccsd.libcc.CCsd_t_contract et_sum = numpy.zeros(1, dtype=dtype) def contract(a0, a1, b0, b1, cache): cache_row_a, cache_col_a, cache_row_b, cache_col_b = cache drv(et_sum.ctypes.data_as(ctypes.c_void_p), mo_energy.ctypes.data_as(ctypes.c_void_p), t1T.ctypes.data_as(ctypes.c_void_p), t2T.ctypes.data_as(ctypes.c_void_p), vooo.ctypes.data_as(ctypes.c_void_p), fvo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int(a0), ctypes.c_int(a1), ctypes.c_int(b0), ctypes.c_int(b1), ctypes.c_int(nirrep), o_ir_loc.ctypes.data_as(ctypes.c_void_p), v_ir_loc.ctypes.data_as(ctypes.c_void_p), oo_ir_loc.ctypes.data_as(ctypes.c_void_p), orbsym.ctypes.data_as(ctypes.c_void_p), cache_row_a.ctypes.data_as(ctypes.c_void_p), cache_col_a.ctypes.data_as(ctypes.c_void_p), cache_row_b.ctypes.data_as(ctypes.c_void_p), cache_col_b.ctypes.data_as(ctypes.c_void_p)) cpu2[:] = log.timer_debug1('contract %d:%d,%d:%d'%(a0,a1,b0,b1), *cpu2) # The rest 20% memory for cache b mem_now = lib.current_memory()[0] max_memory = max(0, mycc.max_memory - mem_now) bufsize = (max_memory*.5e6/8-nocc**3*3*lib.num_threads())/(nocc*nmo) #*.5 for async_io bufsize *= .5 #*.5 upper triangular part is loaded bufsize *= .8 #*.8 for [a0:a1]/[b0:b1] partition bufsize = max(8, bufsize) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) with lib.call_in_background(contract, sync=not mycc.async_io) as async_contract: for a0, a1 in reversed(list(lib.prange_tril(0, nvir, bufsize))): cache_row_a = numpy.asarray(eris_vvop[a0:a1,:a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_vvop[:a0,a0:a1], order='C') async_contract(a0, a1, a0, a1, (cache_row_a,cache_col_a, cache_row_a,cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize/8): cache_row_b = numpy.asarray(eris_vvop[b0:b1,:b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_vvop[:b0,b0:b1], order='C') async_contract(a0, a1, b0, b1, (cache_row_a,cache_col_a, cache_row_b,cache_col_b)) t2 = restore_t2_inplace(t2T) et_sum *= 2 if abs(et_sum[0].imag) > 1e-4: logger.warn(mycc, 'Non-zero imaginary part of CCSD(T) energy was found %s', et_sum[0]) et = et_sum[0].real log.timer('CCSD(T)', *cpu0) log.note('CCSD(T) correction = %.15g', et) return et
def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): cpu1 = cpu0 = (logger.process_clock(), logger.perf_counter()) log = logger.new_logger(mycc, verbose) if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 nocc, nvir = t1.shape nmo = nocc + nvir dtype = numpy.result_type(t1, t2, eris.ovoo.dtype) if mycc.incore_complete: ftmp = None eris_vvop = numpy.zeros((nvir, nvir, nocc, nmo), dtype) else: ftmp = lib.H5TmpFile() eris_vvop = ftmp.create_dataset('vvop', (nvir, nvir, nocc, nmo), dtype) orbsym = _sort_eri(mycc, eris, nocc, nvir, eris_vvop, log) mo_energy, t1T, t2T, vooo, fvo, restore_t2_inplace = \ _sort_t2_vooo_(mycc, orbsym, t1, t2, eris) cpu1 = log.timer_debug1('CCSD(T) sort_eri', *cpu1) cpu2 = list(cpu1) orbsym = numpy.hstack( (numpy.sort(orbsym[:nocc]), numpy.sort(orbsym[nocc:]))) o_ir_loc = numpy.append( 0, numpy.cumsum(numpy.bincount(orbsym[:nocc], minlength=8))) v_ir_loc = numpy.append( 0, numpy.cumsum(numpy.bincount(orbsym[nocc:], minlength=8))) o_sym = orbsym[:nocc] oo_sym = (o_sym[:, None] ^ o_sym).ravel() oo_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(oo_sym, minlength=8))) nirrep = max(oo_sym) + 1 orbsym = orbsym.astype(numpy.int32) o_ir_loc = o_ir_loc.astype(numpy.int32) v_ir_loc = v_ir_loc.astype(numpy.int32) oo_ir_loc = oo_ir_loc.astype(numpy.int32) if dtype == numpy.complex: drv = _ccsd.libcc.CCsd_t_zcontract else: drv = _ccsd.libcc.CCsd_t_contract et_sum = numpy.zeros(1, dtype=dtype) def contract(a0, a1, b0, b1, cache): cache_row_a, cache_col_a, cache_row_b, cache_col_b = cache drv(et_sum.ctypes.data_as(ctypes.c_void_p), mo_energy.ctypes.data_as(ctypes.c_void_p), t1T.ctypes.data_as(ctypes.c_void_p), t2T.ctypes.data_as(ctypes.c_void_p), vooo.ctypes.data_as(ctypes.c_void_p), fvo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int(a0), ctypes.c_int(a1), ctypes.c_int(b0), ctypes.c_int(b1), ctypes.c_int(nirrep), o_ir_loc.ctypes.data_as(ctypes.c_void_p), v_ir_loc.ctypes.data_as(ctypes.c_void_p), oo_ir_loc.ctypes.data_as(ctypes.c_void_p), orbsym.ctypes.data_as(ctypes.c_void_p), cache_row_a.ctypes.data_as(ctypes.c_void_p), cache_col_a.ctypes.data_as(ctypes.c_void_p), cache_row_b.ctypes.data_as(ctypes.c_void_p), cache_col_b.ctypes.data_as(ctypes.c_void_p)) cpu2[:] = log.timer_debug1('contract %d:%d,%d:%d' % (a0, a1, b0, b1), *cpu2) # The rest 20% memory for cache b mem_now = lib.current_memory()[0] max_memory = max(0, mycc.max_memory - mem_now) bufsize = (max_memory * .5e6 / 8 - nocc**3 * 3 * lib.num_threads()) / ( nocc * nmo) #*.5 for async_io bufsize *= .5 #*.5 upper triangular part is loaded bufsize *= .8 #*.8 for [a0:a1]/[b0:b1] partition bufsize = max(8, bufsize) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) with lib.call_in_background(contract, sync=not mycc.async_io) as async_contract: for a0, a1 in reversed(list(lib.prange_tril(0, nvir, bufsize))): cache_row_a = numpy.asarray(eris_vvop[a0:a1, :a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_vvop[:a0, a0:a1], order='C') async_contract( a0, a1, a0, a1, (cache_row_a, cache_col_a, cache_row_a, cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize / 8): cache_row_b = numpy.asarray(eris_vvop[b0:b1, :b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_vvop[:b0, b0:b1], order='C') async_contract( a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) t2 = restore_t2_inplace(t2T) et_sum *= 2 if abs(et_sum[0].imag) > 1e-4: logger.warn(mycc, 'Non-zero imaginary part of CCSD(T) energy was found %s', et_sum[0]) et = et_sum[0].real log.timer('CCSD(T)', *cpu0) log.note('CCSD(T) correction = %.15g', et) return et
def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): cpu1 = cpu0 = (logger.process_clock(), logger.perf_counter()) log = logger.new_logger(mycc, verbose) if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 t1a, t1b = t1 t2aa, t2ab, t2bb = t2 nocca, noccb = mycc.nocc nmoa = eris.focka.shape[0] nmob = eris.fockb.shape[0] nvira = nmoa - nocca nvirb = nmob - noccb if mycc.incore_complete: ftmp = None else: ftmp = lib.H5TmpFile() t1aT = t1a.T.copy() t1bT = t1b.T.copy() t2aaT = t2aa.transpose(2, 3, 0, 1).copy() t2bbT = t2bb.transpose(2, 3, 0, 1).copy() eris_vooo = numpy.asarray(eris.ovoo).transpose(1, 3, 0, 2).conj().copy() eris_VOOO = numpy.asarray(eris.OVOO).transpose(1, 3, 0, 2).conj().copy() eris_vOoO = numpy.asarray(eris.ovOO).transpose(1, 3, 0, 2).conj().copy() eris_VoOo = numpy.asarray(eris.OVoo).transpose(1, 3, 0, 2).conj().copy() eris_vvop, eris_VVOP, eris_vVoP, eris_VvOp = _sort_eri( mycc, eris, ftmp, log) cpu1 = log.timer_debug1('UCCSD(T) sort_eri', *cpu1) dtype = numpy.result_type(t1a.dtype, t2aa.dtype, eris_vooo.dtype) et_sum = numpy.zeros(1, dtype=dtype) mem_now = lib.current_memory()[0] max_memory = max(0, mycc.max_memory - mem_now) # aaa bufsize = max( 8, int((max_memory * .5e6 / 8 - nocca**3 * 3 * lib.num_threads()) * .4 / (nocca * nmoa))) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) orbsym = numpy.zeros(nocca, dtype=int) contract = _gen_contract_aaa(t1aT, t2aaT, eris_vooo, eris.focka, eris.mo_energy[0], orbsym, log) with lib.call_in_background(contract, sync=not mycc.async_io) as ctr: for a0, a1 in reversed(list(lib.prange_tril(0, nvira, bufsize))): cache_row_a = numpy.asarray(eris_vvop[a0:a1, :a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_vvop[:a0, a0:a1], order='C') ctr(et_sum, a0, a1, a0, a1, (cache_row_a, cache_col_a, cache_row_a, cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize / 8): cache_row_b = numpy.asarray(eris_vvop[b0:b1, :b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_vvop[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cpu1 = log.timer_debug1('contract_aaa', *cpu1) # bbb bufsize = max( 8, int((max_memory * .5e6 / 8 - noccb**3 * 3 * lib.num_threads()) * .4 / (noccb * nmob))) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) orbsym = numpy.zeros(noccb, dtype=int) contract = _gen_contract_aaa(t1bT, t2bbT, eris_VOOO, eris.fockb, eris.mo_energy[1], orbsym, log) with lib.call_in_background(contract, sync=not mycc.async_io) as ctr: for a0, a1 in reversed(list(lib.prange_tril(0, nvirb, bufsize))): cache_row_a = numpy.asarray(eris_VVOP[a0:a1, :a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_VVOP[:a0, a0:a1], order='C') ctr(et_sum, a0, a1, a0, a1, (cache_row_a, cache_col_a, cache_row_a, cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize / 8): cache_row_b = numpy.asarray(eris_VVOP[b0:b1, :b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_VVOP[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cpu1 = log.timer_debug1('contract_bbb', *cpu1) # Cache t2abT in t2ab to reduce memory footprint assert (t2ab.flags.c_contiguous) t2abT = lib.transpose(t2ab.copy().reshape(nocca * noccb, nvira * nvirb), out=t2ab) t2abT = t2abT.reshape(nvira, nvirb, nocca, noccb) # baa bufsize = int( max(12, (max_memory * .5e6 / 8 - noccb * nocca**2 * 5) * .7 / (nocca * nmob))) ts = t1aT, t1bT, t2aaT, t2abT fock = (eris.focka, eris.fockb) vooo = (eris_vooo, eris_vOoO, eris_VoOo) contract = _gen_contract_baa(ts, vooo, fock, eris.mo_energy, orbsym, log) with lib.call_in_background(contract, sync=not mycc.async_io) as ctr: for a0, a1 in lib.prange(0, nvirb, int(bufsize / nvira + 1)): cache_row_a = numpy.asarray(eris_VvOp[a0:a1, :], order='C') cache_col_a = numpy.asarray(eris_vVoP[:, a0:a1], order='C') for b0, b1 in lib.prange_tril(0, nvira, bufsize / 6 / 2): cache_row_b = numpy.asarray(eris_vvop[b0:b1, :b1], order='C') cache_col_b = numpy.asarray(eris_vvop[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cpu1 = log.timer_debug1('contract_baa', *cpu1) t2baT = numpy.ndarray((nvirb, nvira, noccb, nocca), buffer=t2abT, dtype=t2abT.dtype) t2baT[:] = t2abT.copy().transpose(1, 0, 3, 2) # abb ts = t1bT, t1aT, t2bbT, t2baT fock = (eris.fockb, eris.focka) mo_energy = (eris.mo_energy[1], eris.mo_energy[0]) vooo = (eris_VOOO, eris_VoOo, eris_vOoO) contract = _gen_contract_baa(ts, vooo, fock, mo_energy, orbsym, log) for a0, a1 in lib.prange(0, nvira, int(bufsize / nvirb + 1)): with lib.call_in_background(contract, sync=not mycc.async_io) as ctr: cache_row_a = numpy.asarray(eris_vVoP[a0:a1, :], order='C') cache_col_a = numpy.asarray(eris_VvOp[:, a0:a1], order='C') for b0, b1 in lib.prange_tril(0, nvirb, bufsize / 6 / 2): cache_row_b = numpy.asarray(eris_VVOP[b0:b1, :b1], order='C') cache_col_b = numpy.asarray(eris_VVOP[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cpu1 = log.timer_debug1('contract_abb', *cpu1) # Restore t2ab lib.transpose(t2baT.transpose(1, 0, 3, 2).copy().reshape(nvira * nvirb, nocca * noccb), out=t2ab) et_sum *= .25 if abs(et_sum[0].imag) > 1e-4: logger.warn(mycc, 'Non-zero imaginary part of UCCSD(T) energy was found %s', et_sum[0]) et = et_sum[0].real log.timer('UCCSD(T)', *cpu0) log.note('UCCSD(T) correction = %.15g', et) return et
def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): cpu1 = cpu0 = (time.clock(), time.time()) log = logger.new_logger(mycc, verbose) if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 nocc, nvir = t1.shape nmo = nocc + nvir _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) ftmp = h5py.File(_tmpfile.name) eris_vvop = ftmp.create_dataset('vvop', (nvir,nvir,nocc,nmo), 'f8') orbsym = _sort_eri(mycc, eris, nocc, nvir, eris_vvop, log) ftmp['t2'] = t2 # read back late. Cache t2T in t2 to reduce memory footprint mo_energy, t1T, t2T, vooo = _sort_t2_vooo_(mycc, orbsym, t1, t2, eris) cpu1 = log.timer_debug1('CCSD(T) sort_eri', *cpu1) cpu2 = list(cpu1) orbsym = numpy.hstack((numpy.sort(orbsym[:nocc]),numpy.sort(orbsym[nocc:]))) o_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[:nocc], minlength=8))) v_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[nocc:], minlength=8))) o_sym = orbsym[:nocc] oo_sym = (o_sym[:,None] ^ o_sym).ravel() oo_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(oo_sym, minlength=8))) nirrep = max(oo_sym) + 1 orbsym = orbsym.astype(numpy.int32) o_ir_loc = o_ir_loc.astype(numpy.int32) v_ir_loc = v_ir_loc.astype(numpy.int32) oo_ir_loc = oo_ir_loc.astype(numpy.int32) et_sum = [0] def contract(a0, a1, b0, b1, cache): cache_row_a, cache_col_a, cache_row_b, cache_col_b = cache drv = _ccsd.libcc.CCsd_t_contract drv.restype = ctypes.c_double et = drv(mo_energy.ctypes.data_as(ctypes.c_void_p), t1T.ctypes.data_as(ctypes.c_void_p), t2T.ctypes.data_as(ctypes.c_void_p), vooo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int(a0), ctypes.c_int(a1), ctypes.c_int(b0), ctypes.c_int(b1), ctypes.c_int(nirrep), o_ir_loc.ctypes.data_as(ctypes.c_void_p), v_ir_loc.ctypes.data_as(ctypes.c_void_p), oo_ir_loc.ctypes.data_as(ctypes.c_void_p), orbsym.ctypes.data_as(ctypes.c_void_p), cache_row_a.ctypes.data_as(ctypes.c_void_p), cache_col_a.ctypes.data_as(ctypes.c_void_p), cache_row_b.ctypes.data_as(ctypes.c_void_p), cache_col_b.ctypes.data_as(ctypes.c_void_p)) cpu2[:] = log.timer_debug1('contract %d:%d,%d:%d'%(a0,a1,b0,b1), *cpu2) et_sum[0] += et return et # The rest 20% memory for cache b mem_now = lib.current_memory()[0] max_memory = max(2000, mycc.max_memory - mem_now) bufsize = max(1, (max_memory*1e6/8-nocc**3*100)*.7/(nocc*nmo)) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) for a0, a1 in reversed(list(lib.prange_tril(0, nvir, bufsize))): with lib.call_in_background(contract) as async_contract: cache_row_a = numpy.asarray(eris_vvop[a0:a1,:a1], order='C') cache_col_a = numpy.asarray(eris_vvop[:a0,a0:a1], order='C') async_contract(a0, a1, a0, a1, (cache_row_a,cache_col_a, cache_row_a,cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize/6): cache_row_b = numpy.asarray(eris_vvop[b0:b1,:b1], order='C') cache_col_b = numpy.asarray(eris_vvop[:b0,b0:b1], order='C') async_contract(a0, a1, b0, b1, (cache_row_a,cache_col_a, cache_row_b,cache_col_b)) cache_row_b = cache_col_b = None cache_row_a = cache_col_a = None t2[:] = ftmp['t2'] ftmp.close() _tmpfile = None et = et_sum[0] * 2 log.timer('CCSD(T)', *cpu0) log.note('CCSD(T) correction = %.15g', et) return et
def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): cpu1 = cpu0 = (time.clock(), time.time()) log = logger.new_logger(mycc, verbose) if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 t1a, t1b = t1 t2aa, t2ab, t2bb = t2 nocca = eris.nocca noccb = eris.noccb nmoa = eris.focka.shape[0] nmob = eris.fockb.shape[0] nvira = nmoa - nocca nvirb = nmob - noccb mo_ea = eris.focka.diagonal().copy() mo_eb = eris.fockb.diagonal().copy() ftmp = lib.H5TmpFile() ftmp['t2ab'] = t2ab t1aT = t1a.T.copy() t1bT = t1b.T.copy() t2aaT = t2aa.transpose(2, 3, 0, 1).copy() t2bbT = t2bb.transpose(2, 3, 0, 1).copy() eris_vooo = numpy.asarray(eris.ovoo).transpose(1, 2, 0, 3).copy() eris_VOOO = numpy.asarray(eris.OVOO).transpose(1, 2, 0, 3).copy() eris_vOoO = numpy.asarray(eris.ovOO).transpose(1, 2, 0, 3).copy() eris_VoOo = numpy.asarray(eris.OVoo).transpose(1, 2, 0, 3).copy() _sort_eri(mycc, eris, ftmp, log) eris_vvop = ftmp['vvop'] eris_VVOP = ftmp['VVOP'] eris_vVoP = ftmp['vVoP'] eris_VvOp = ftmp['VvOp'] cpu1 = log.timer_debug1('UCCSD(T) sort_eri', *cpu1) et_sum = [0] mem_now = lib.current_memory()[0] max_memory = max(2000, mycc.max_memory - mem_now) # aaa bufsize = max( 1, int((max_memory * 1e6 / 8 - nocca**3 * 100) * .7 / (nocca * nmoa))) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) orbsym = numpy.zeros(mo_ea.size, dtype=int) contract = _gen_contract_aaa(t1aT, t2aaT, eris_vooo, mo_ea, orbsym, log) for a0, a1 in reversed(list(lib.prange_tril(0, nvira, bufsize))): with lib.call_in_background(contract) as ctr: cache_row_a = numpy.asarray(eris_vvop[a0:a1, :a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_vvop[:a0, a0:a1], order='C') ctr(et_sum, a0, a1, a0, a1, (cache_row_a, cache_col_a, cache_row_a, cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize / 6): cache_row_b = numpy.asarray(eris_vvop[b0:b1, :b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_vvop[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cache_row_b = cache_col_b = None cache_row_a = cache_col_a = None cpu1 = log.timer_debug1('contract_aaa', *cpu1) # bbb bufsize = max( 1, int((max_memory * 1e6 / 8 - noccb**3 * 100) * .7 / (noccb * nmob))) log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) orbsym = numpy.zeros(mo_eb.size, dtype=int) contract = _gen_contract_aaa(t1bT, t2bbT, eris_VOOO, mo_eb, orbsym, log) for a0, a1 in reversed(list(lib.prange_tril(0, nvirb, bufsize))): with lib.call_in_background(contract) as ctr: cache_row_a = numpy.asarray(eris_VVOP[a0:a1, :a1], order='C') if a0 == 0: cache_col_a = cache_row_a else: cache_col_a = numpy.asarray(eris_VVOP[:a0, a0:a1], order='C') ctr(et_sum, a0, a1, a0, a1, (cache_row_a, cache_col_a, cache_row_a, cache_col_a)) for b0, b1 in lib.prange_tril(0, a0, bufsize / 6): cache_row_b = numpy.asarray(eris_VVOP[b0:b1, :b1], order='C') if b0 == 0: cache_col_b = cache_row_b else: cache_col_b = numpy.asarray(eris_VVOP[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cache_row_b = cache_col_b = None cache_row_a = cache_col_a = None cpu1 = log.timer_debug1('contract_bbb', *cpu1) # Cache t2abT in t2ab to reduce memory footprint t2abT = lib.transpose(t2ab.reshape(nocca * noccb, nvira * nvirb).copy(), out=t2ab) t2abT = t2abT.reshape(nvira, nvirb, nocca, noccb) # baa bufsize = max( 1, int((max_memory * .9e6 / 8 - noccb * nocca**2 * 7) * .3 / nocca * nmob)) ts = t1aT, t1bT, t2aaT, t2abT vooo = (eris_vooo, eris_vOoO, eris_VoOo) contract = _gen_contract_baa(ts, vooo, (mo_ea, mo_eb), orbsym, log) for a0, a1 in lib.prange(0, nvirb, int(bufsize / nvira + 1)): with lib.call_in_background(contract) as ctr: cache_row_a = numpy.asarray(eris_VvOp[a0:a1, :], order='C') cache_col_a = numpy.asarray(eris_vVoP[:, a0:a1], order='C') for b0, b1 in lib.prange_tril(0, nvira, bufsize): cache_row_b = numpy.asarray(eris_vvop[b0:b1, :b1], order='C') cache_col_b = numpy.asarray(eris_vvop[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cache_row_b = cache_col_b = None cache_row_a = cache_col_a = None cpu1 = log.timer_debug1('contract_baa', *cpu1) t2baT = numpy.ndarray((nvirb, nvira, noccb, nocca), buffer=t2abT) t2baT[:] = t2abT.copy().transpose(1, 0, 3, 2) # abb ts = t1bT, t1aT, t2bbT, t2baT vooo = (eris_VOOO, eris_VoOo, eris_vOoO) contract = _gen_contract_baa(ts, vooo, (mo_eb, mo_ea), orbsym, log) for a0, a1 in lib.prange(0, nvira, int(bufsize / nvirb + 1)): with lib.call_in_background(contract) as ctr: cache_row_a = numpy.asarray(eris_vVoP[a0:a1, :], order='C') cache_col_a = numpy.asarray(eris_VvOp[:, a0:a1], order='C') for b0, b1 in lib.prange_tril(0, nvirb, bufsize): cache_row_b = numpy.asarray(eris_VVOP[b0:b1, :b1], order='C') cache_col_b = numpy.asarray(eris_VVOP[:b0, b0:b1], order='C') ctr(et_sum, a0, a1, b0, b1, (cache_row_a, cache_col_a, cache_row_b, cache_col_b)) cache_row_b = cache_col_b = None cache_row_a = cache_col_a = None cpu1 = log.timer_debug1('contract_abb', *cpu1) t2ab[:] = ftmp['t2ab'] et = et_sum[0] * .25 log.timer('UCCSD(T)', *cpu0) log.note('UCCSD(T) correction = %.15g', et) return et