def make_hdiag_csf (h1e, eri, norb, nelec, transformer, hdiag_det=None): smult = transformer.smult if hdiag_det is None: hdiag_det = make_hdiag_det (None, h1e, eri, norb, nelec) eri = ao2mo.restore(1, eri, norb) tlib = wlib = 0 neleca, nelecb = _unpack_nelec (nelec) min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape (norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult) npair_econf_size = npair_dconf_size * npair_sconf_size max_npair = min (neleca, nelecb) ncsf_all = count_all_csfs (norb, neleca, nelecb, smult) ndeta_all = cistring.num_strings(norb, neleca) ndetb_all = cistring.num_strings(norb, nelecb) ndet_all = ndeta_all * ndetb_all hdiag_csf = np.ascontiguousarray (np.zeros (ncsf_all, dtype=np.float64)) hdiag_csf_check = np.ones (ncsf_all, dtype=np.bool) for npair in range (min_npair, max_npair+1): ipair = npair - min_npair nconf = npair_econf_size[ipair] ndet = npair_sdet_size[ipair] ncsf = npair_csf_size[ipair] if ncsf == 0: continue nspin = neleca + nelecb - 2*npair csd_offset = npair_csd_offset[ipair] csf_offset = npair_csf_offset[ipair] hdiag_conf = np.ascontiguousarray (np.zeros ((nconf, ndet, ndet), dtype=np.float64)) det_addr = transformer.csd_mask[csd_offset:][:nconf*ndet] if ndet == 1: # Closed-shell singlets assert (ncsf == 1) hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat] hdiag_csf_check[csf_offset:][:nconf] = False continue det_addra, det_addrb = divmod (det_addr, ndetb_all) det_stra = np.ascontiguousarray (cistring.addrs2str (norb, neleca, det_addra).reshape (nconf, ndet, order='C')) det_strb = np.ascontiguousarray (cistring.addrs2str (norb, nelecb, det_addrb).reshape (nconf, ndet, order='C')) det_addr = det_addr.reshape (nconf, ndet, order='C') hdiag_conf = np.ascontiguousarray (np.zeros ((nconf, ndet, ndet), dtype=np.float64)) hdiag_conf_det = np.ascontiguousarray (hdiag_det[det_addr], dtype=np.float64) t1 = time.process_time () w1 = time.time () libcsf.FCICSFhdiag (hdiag_conf.ctypes.data_as (ctypes.c_void_p), hdiag_conf_det.ctypes.data_as (ctypes.c_void_p), eri.ctypes.data_as (ctypes.c_void_p), det_stra.ctypes.data_as (ctypes.c_void_p), det_strb.ctypes.data_as (ctypes.c_void_p), ctypes.c_uint (norb), ctypes.c_uint (nconf), ctypes.c_uint (ndet)) tlib += time.process_time () - t1 wlib += time.time () - w1 umat = get_spin_evecs (nspin, neleca, nelecb, smult) hdiag_conf = np.tensordot (hdiag_conf, umat, axes=1) hdiag_conf *= umat[np.newaxis,:,:] hdiag_csf[csf_offset:][:nconf*ncsf] = hdiag_conf.sum (1).ravel (order='C') hdiag_csf_check[csf_offset:][:nconf*ncsf] = False assert (np.count_nonzero (hdiag_csf_check) == 0), np.count_nonzero (hdiag_csf_check) #print ("Time in hdiag_csf library: {}, {}".format (tlib, wlib)) return hdiag_csf
def make_hdiag_csf_slower (h1e, eri, norb, nelec, transformer, hdiag_det=None): ''' This is tricky because I need the diagonal blocks for each configuration in order to get the correct csf hdiag values, not just the diagonal elements for each determinant. ''' smult = transformer.smult t0, w0 = time.process_time (), time.time () tstr = tlib = tloop = wstr = wlib = wloop = 0 if hdiag_det is None: hdiag_det = make_hdiag_det (None, h1e, eri, norb, nelec) eri = ao2mo.restore(1, eri, norb) neleca, nelecb = _unpack_nelec (nelec) min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape (norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult) npair_econf_size = npair_dconf_size * npair_sconf_size max_npair = min (neleca, nelecb) ncsf_all = count_all_csfs (norb, neleca, nelecb, smult) ndeta_all = cistring.num_strings(norb, neleca) ndetb_all = cistring.num_strings(norb, nelecb) ndet_all = ndeta_all * ndetb_all hdiag_csf = np.ascontiguousarray (np.zeros (ncsf_all, dtype=np.float64)) hdiag_csf_check = np.ones (ncsf_all, dtype=np.bool) for npair in range (min_npair, max_npair+1): ipair = npair - min_npair nconf = npair_econf_size[ipair] ndet = npair_sdet_size[ipair] ncsf = npair_csf_size[ipair] if ncsf == 0: continue nspin = neleca + nelecb - 2*npair csd_offset = npair_csd_offset[ipair] csf_offset = npair_csf_offset[ipair] hdiag_conf = np.ascontiguousarray (np.zeros ((nconf, ndet, ndet), dtype=np.float64)) det_addr = transformer.csd_mask[csd_offset:][:nconf*ndet] if ndet == 1: # Closed-shell singlets assert (ncsf == 1) hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat] hdiag_csf_check[csf_offset:][:nconf] = False continue umat = get_spin_evecs (nspin, neleca, nelecb, smult) det_addra, det_addrb = divmod (det_addr, ndetb_all) t1, w1 = time.process_time (), time.time () det_stra = cistring.addrs2str (norb, neleca, det_addra).reshape (nconf, ndet, order='C') det_strb = cistring.addrs2str (norb, nelecb, det_addrb).reshape (nconf, ndet, order='C') tstr += time.process_time () - t1 wstr += time.time () - w1 det_addr = det_addr.reshape (nconf, ndet, order='C') diag_idx = np.diag_indices (ndet) triu_idx = np.triu_indices (ndet) ipair_check = 0 # It looks like the library call below is, itself, usually responsible for about 50% of the # clock and wall time that this function consumes. t1, w1 = time.process_time (), time.time () for iconf in range (nconf): addr = det_addr[iconf] assert (len (addr) == ndet) stra = det_stra[iconf] strb = det_strb[iconf] t2, w2 = time.process_time (), time.time () libfci.FCIpspace_h0tril(hdiag_conf[iconf].ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(ndet)) tlib += time.process_time () - t2 wlib += time.time () - w2 #hdiag_conf[iconf][diag_idx] = hdiag_det[addr] #hdiag_conf[iconf] = lib.hermi_triu(hdiag_conf[iconf]) for iconf in range (nconf): hdiag_conf[iconf] = lib.hermi_triu (hdiag_conf[iconf]) for iconf in range (nconf): hdiag_conf[iconf][diag_idx] = hdiag_det[det_addr[iconf]] tloop += time.process_time () - t1 wloop += time.time () - w1 hdiag_conf = np.tensordot (hdiag_conf, umat, axes=1) hdiag_conf = (hdiag_conf * umat[np.newaxis,:,:]).sum (1) hdiag_csf[csf_offset:][:nconf*ncsf] = hdiag_conf.ravel (order='C') hdiag_csf_check[csf_offset:][:nconf*ncsf] = False assert (np.count_nonzero (hdiag_csf_check) == 0), np.count_nonzero (hdiag_csf_check) #print ("Total time in hdiag_csf: {}, {}".format (time.process_time () - t0, time.time () - w0)) #print (" Loop: {}, {}".format (tloop, wloop)) #print (" Library: {}, {}".format (tlib, wlib)) #print (" Cistring: {}, {}".format (tstr, wstr)) return hdiag_csf