def read0(self, infile, nbasis, debug=None): """Reads in the matrix elements from a Fortran binary file. This is a reference implementation, very slow. """ assert nbasis > 0 dbg = text_output(ifelse(debug, sys.stdout, None), flush=True) H2 = numpy.zeros((nbasis, nbasis, nbasis, nbasis), dtype=float) self.H2 = H2 F = fortran_bin_file(infile) S = os.stat(infile) fsize = S.st_size rec_desc = (('i', numpy.int32, 4), ('v', numpy.float64)) rec_bytesize = F.byte_length(*rec_desc) + F.default_int(0).itemsize * 2 rec_count = fsize // rec_bytesize dbg("File %s: %d integral records to be read\n" % (infile, rec_count)) for cc in xrange(rec_count): rec = F.read(*rec_desc) rec['i'] -= 1 # convert to py index (i, l, j, k) = rec['i'] v = rec['v'] # Use: V2b_inspect.permute_V2b('i','j','l','k',chem=1) # to generate this: H2[i, l, j, k] = v H2[j, k, i, l] = v H2[l, i, k, j] = v H2[k, j, l, i] = v H2[i, l, k, j] = v H2[k, j, i, l] = v H2[j, k, l, i] = v H2[l, i, j, k] = v F.close()
def dets_dump_stats(wlkrs, out=sys.stdout, psiT_kwd='PsiT'): """Verbosely reports the statistics of the determinants (weight, local energies, etc, and wave function overlap). """ # In the development script elsewhere (Check_walkers.py), # it was called 'report_status_dets'. # But the layout has changed considerably. from wpylib.iofmt.text_output import text_output from wpylib.text_tools import str_fmt_heading fmt = "%4d %17.9g %17.9g | %17.9g %17.9g | %17.9g %17.9gj | %17.9g %17.9gj | %17.9g %17.9gj || %17.9g %17.9gj | %17.9g %17.9gj | %17.9g %17.9gj |\n" fmt_heading = str_fmt_heading(fmt) #xkwd = lambda X : X.replace('%', psiT_kwd) heading = fmt_heading % ( 'no', 'wtwlkr', 'phasefac', 'Elocal', '(imag)', 'impfn', '(imag)', 'det_norm', '(imag)', 'ampl', '(imag)', '%s_ovlp' % psiT_kwd, '(imag)', 'up_%s_ovlp' % psiT_kwd, '(imag)', 'dn_%s_ovlp' % psiT_kwd, '(imag)', ) Print = text_output(out) Print(heading) for (i, D) in enumerate(wlkrs.dets): impfn = getattr(D, 'impfn', 1e-99) det_norm = getattr(D, 'det_norm', 1e-99) ampl = getattr(D, 'ampl', 1e-99) o = getattr(D, '%s_ovlp' % psiT_kwd, 1e-99) up_o = getattr(D, 'up_%s_ovlp' % psiT_kwd, 1e-99) dn_o = getattr(D, 'dn_%s_ovlp' % psiT_kwd, 1e-99) Print(fmt \ % (i+1, D.wtwlkr, D.phasefac, D.El.real, D.El.imag, impfn.real, impfn.imag, det_norm.real, det_norm.imag, ampl.real, ampl.imag, o.real, o.imag, up_o.real, dn_o.imag, dn_o.real, dn_o.imag, ) )
def write(self, outfile): """Writes molecular orbital in GAMESS format. What written depends on the `nbasis` and `udet` attributes, and the data is in the `alpha` and (optionally) `beta` attributes. Comments are not written out; they must be appended manually if you want them. This method use the flexible `text_output` facility, so the outfile can be an open file object or a filename. """ F = text_output(outfile) F(self.str()) F.flush()
def printout_timing(self, out=None): """ Prints out a report for timing estimate. """ if out == None: out = sys.stdout O = text_output(out, flush=True) #info = self.info nblkstep = self.get_nblkstep h = 60 * 60 # secs per hour Mh = h * 1.0e6 # million secs per hour O("deltau = %.14g\n" % self.deltau) O("\n") O("beta per blk = %10.3f ( %9d steps = %10.4f M avg wlk-steps )\n" % (nblkstep * self.deltau, nblkstep, nblkstep * self.nwlk * 1e-6)) O("beta_eq = %10.3f ( %9d steps = %10.4f M avg wlk-steps )\n" % (self.beta_eq, self.neqstep, self.neqstep * self.nwlk * 1e-6)) O("beta_grth = %10.3f ( %9d steps = %10.4f M avg wlk-steps )\n" % (self.beta_grth, self.ngrthstep, self.ngrthstep * self.nwlk * 1e-6)) O("beta_meas = %10.3f ( %9d steps = %10.4f M avg wlk-steps = %.6f M avg wlk-beta )\n" % (self.beta_meas, self.nmeastep, self.nmeastep * self.nwlk * 1e-6, self.nmeastep * self.deltau * self.nwlk * 1e-6)) O("beta_total = %10.3f ( %9d steps = %10.4f M avg wlk-steps )\n" % ( self.beta_eq + self.beta_grth + self.beta_meas, self.neqstep + self.ngrthstep + self.nmeastep, (self.neqstep + self.ngrthstep + self.nmeastep) * self.nwlk * 1e-6, )) O("\n") O("num_tasks = %d\n" % (self.num_tasks)) O("num_threads = %d\n" % (self.num_threads)) O("num_cores = %d total\n" % (self.num_tasks * self.num_threads)) O("\n") O("nwlkmin = %d\n" % (self.get_nwlkmin)) O("nwlk = %d\n" % (self.nwlk)) O("nwlkmax = %d\n" % (self.get_nwlkmax)) O("nwlkmax_proc = %d\n" % (self.get_nwlkmax_proc)) O("\n") O("wallclock stats:\n") O("teq = %10.3f h\n" % (self.teq / h)) O("tgrth = %10.3f h\n" % (self.tgrth / h)) O("tmeas1 = %10.3f h (%.2s secs) per measurement blk\n" % (self.tmeas1 / h, self.tmeas1)) O("tmeas = %10.3f h\n" % (self.tmeas / h)) O("ttotal = %10.3f h\n" % ((self.teq + self.tgrth + self.tmeas) / h)) O("\n") O("overall computer resource stats:\n") O("Teq = %10.6f M core*h\n" % (self.Teq / Mh)) O("Tgrth = %10.6f M core*h\n" % (self.Tgrth / Mh)) O("Tmeas1 = %10.6f M core*h\n" % (self.Tmeas1 / Mh)) O("Tmeas = %10.6f M core*h\n" % (self.Tmeas / Mh)) O("Ttotal = %10.6f M core*h\n" % ((self.Teq + self.Tgrth + self.Tmeas) / Mh))
def write(self, outfile, comment=None): F = text_output(outfile) if comment: cmt = " # " + str(comment) else: cmt = "" nbasis = self.nbasis nelem = nbasis * (nbasis + 1) / 2 F.write("%d %d%s\n" % (nbasis, nelem, cmt)) self.write_matrix(F, self.S, getattr(self, "S_name", "Overlap"), symmetric=True) self.write_matrix(F, self.H1, getattr(self, "H1_name", "Core Hamiltonian"), symmetric=True) F.close()
def h5meas_dump_reblk_raw_data(blk): """Dumps the reblocked datasets in a text table format. Used for debugging. Input `blk' object is the output of h5meas_reblk routine. """ from wpylib.iofmt.text_output import text_output rawblk = blk.raw_blocks for (pblk, tblk) in rawblk.keys(): fn = "dump-%dx%d" % (pblk, tblk) fd = text_output(fn) rblk = rawblk[pblk,tblk] X = rblk['Xw'] / rblk['w'] w = rblk['w'] for t in xrange(rblk.shape[1]): for p in xrange(rblk.shape[0]): fd("%3d %3d %16.9f %16.9f\n" % (p, t, X[p,t], w[p,t])) fd.close()
def printout(self, out=None): from pyqmc.results.gafqmc_info import gafqmc_info from pyqmc.results.pwqmc_info import pwqmc_info from wpylib.iofmt.text_output import text_output if out == None: out = sys.stdout O = text_output(out, flush=True) info = self.info if isinstance(info, gafqmc_info): O("Calculation: GAFQMC\n") elif isinstance(info, gafqmc_info): O("Calculation: PWQMC\n") beta_meas = self.nblk_actual * self.info.betablk O("walltime = %g secs (%g hours)\n" % (self.walltime, self.walltime / 3600.0)) O("cpu core count = %d\n" % (info.num_tasks * info.num_threads)) O("cputime >= %.6f M core*h\n" % (self.cputime * 1e-6)) O("num meas blocks = %d ( beta = %.3f <--> %.4f M avg wlk-steps = %.6f M avg wlk-beta )\n" % ( self.nblk_actual, beta_meas, self.nblk_actual * info.nblkstep * info.nwlk * 1e-6, beta_meas * info.nwlk * 1e-6, )) O("num all blocks = %d\n" % (info.neq + info.ngrth + self.nblk_actual)) O("num all steps = %d\n" % (self.nsteps_all)) O("nwlkmax_proc = %d\n" % (self.nwlkmax_proc)) O("\n") O("AVERAGE CASE\n") O("nwlkavg_proc = %.10g\n" % (self.nwlkavg_proc)) O("avg tot wlk steps = %.10g (per MPI task)\n" % (self.nwsteps_avg)) O("avg step time = %.10g\n" % (self.tsteps_avg)) O("\n") O("WORST CASE\n") O("nwlkmin_proc = %.10g\n" % (self.nwlkmin_proc)) O("min tot wlk steps = %.10g (per MPI task)\n" % (self.nwsteps_min)) O("WORST step time = %.10g\n" % (self.tsteps_max))
def write(self, outfile, comment=None, udet=False, verbose=None): #{ """Writes orbitals in eigen_gms formatted file. If only `alpha' orbitals exist then only the alpha sector is written out. This can be overriden by setting udet==True; then the alpha sector is duplicated as beta as well. The `udet' argument is not used if `beta' exists; both sectors will always be written out. """ out = text_output(outfile) (nbasis, norb) = self.alpha.shape if comment: cmt = " # " + str(comment) else: cmt = "" if hasattr(self, "beta"): sectors = (self.alpha, self.beta) if verbose: print "EigenGms.write: (alpha,beta) nbasis=%d, norb=%d" % ( nbasis, norb) elif udet: sectors = (self.alpha, self.alpha) if verbose: print "EigenGms.write: (alpha,alpha) nbasis=%d, norb=%d" % ( nbasis, norb) else: sectors = (self.alpha, ) if verbose: print "EigenGms.write: (alpha only) nbasis=%d, norb=%d" % ( nbasis, norb) for SS in sectors: out("%d %d%s\n\n" % (nbasis, norb, cmt)) for orb in numpy.array(SS, copy=False).T: out("\n".join(["%.15g" % a for a in orb] + ["\n"]))
def read(self, src, verbose=0, output=sys.stdout, rank=0, indep_rng=1): """Opens the `src' file and loads the walker data. Input parameters: - verbose = log verbosity level (default 0) - output = file-like stream for log output (default stdout) - rank = MPI rank of the process producing this checkpoint file - indep_rng = indicator whether an independent random number generator is used per each MPI process. """ Complex = self.Complex Float = self.Float Int = self.Int Str = lambda length: numpy.dtype('S' + str(length)) if verbose: w = text_output(output, flush=True) else: w = text_output(None) rec = struct() self.data = rec w("Reading checkpoint file %s\n" % (src, )) F = fortran_bin_file(src) if rank == 0: F.read(('code_version', Float), dest=rec) F.read(('date', Str(10)), dest=rec) F.read(('time', Str(10)), dest=rec) if verbose >= 10: w(("GAFQMC walker file data\n" \ " code_version = %.14g\n" \ " date = %s\n" \ " time = %s\n") \ % (rec.code_version, rec.date, rec.time)) F.read(('lran', Int, 4), dest=rec) F.read(('nblktot', Int), dest=rec) F.read(('uptot', Float), dest=rec) F.read(('downtot', Float), dest=rec) F.read(('srun', Float), dest=rec) F.read(('s2run', Float), dest=rec) elif indep_rng: F.read(('lran', Int, 4), dest=rec) F.read(('fmt_version', Float), dest=rec) # checkpoint format version F.read(('nh', Int), ('anorm', Float), ('etrial', Float), ('istpacc', Int), dest=rec) F.read(('timeshift', Float), dest=rec) F.read(('nwlkr_proc', Int), dest=rec) if verbose >= 10: w((" lran = %s\n" % rec.lran)) if rank == 0: w( \ (" nblktot = %i\n" % rec.nblktot) + \ (" uptot = %.14g\n" % rec.uptot) + \ (" downtot = %.14g\n" % rec.downtot) + \ (" srun = %.14g\n" % rec.srun) + \ (" s2run = %.14g\n" % rec.s2run) + \ (" fmt_version = %.14g\n" % rec.fmt_version) + \ (" nh = %i\n" % rec.nh) + \ (" anorm = %.14g\n" % rec.anorm) + \ (" etrial = %.14g\n" % rec.etrial) + \ (" istpacc = %i\n" % rec.istpacc) + \ (" timeshift = %i\n" % rec.timeshift) + \ (" nwlkr_proc = %i\n" % rec.nwlkr_proc) + \ "") if self.udet: nptot = self.nup + self.ndn else: nptot = self.nup if verbose >= 20: w("# Walker data: wtwlkr, phasefac, re(El), im(El)\n") pop = MultiDet() pop.dets = [] rec.wlkrs = pop for iwlk in xrange(rec.nwlkr_proc): wlk = Det() wlk.proc_rank = rank pop.dets.append(wlk) F.read(('iw', Int), dest=wlk) F.read(('wtwlkr', Float), dest=wlk) #wlk.wtwlkr = wlk.ampl # Note: the correct ampl is wtwlkr / <psiT|wlk> . # This must be applied later. F.read(('phasefac', Float), dest=wlk) F.read(('El', Complex), dest=wlk) orbs = numpy.zeros((self.nbasis, nptot), dtype=Complex) for ip in xrange(nptot): orbs[:, ip] = F.read(('col', Complex, self.nbasis))['col'] if self.udet: wlk.make(src_up=orbs[:, 0:self.nup], src_dn=orbs[:, self.nup:nptot]) else: wlk.make(src_up=orbs[:, 0:self.nup], src_dn=orbs[:, 0:self.ndn]) if verbose >= 20: w("%5d %14.10f %14.10f %16.10f %14.10f\n" \ % (wlk.iw, wlk.wtwlkr, wlk.phasefac, wlk.El.real, wlk.El.imag)) w(" :: %d walkers read from file %s\n" % (len(pop), src)) if rec.fmt_version >= 2.0: F.read(('iflg_chkpt_impfn', Int), dest=rec) if rec.iflg_chkpt_impfn > 0: F.read(('nwlkr_proc_1', Int), dest=rec) assert rec.nwlkr_proc_1 == rec.nwlkr_proc F.read(('impfn', Complex, (rec.nwlkr_proc_1, )), dest=rec) # todo: affix impfn to Det objects above # NOTE: if you use read_all() method instead, these values # would have been affixed there! return rec
def read_all(self, procs, fname_pattern='wlk/gafqmc-%(rank)05d', verbose=0, output=sys.stdout, indep_rng=1, fname_args={}): """Reads all walker files into a single big result record. The `procs` argument can be one of the following: - an integer > 0, which is the number of MPI processes (for parallel run), indicating that all the walker files woud be read - a list/tuple/array of process ranks, from which associated checkpoint files we want to read in the walkers. By default, we assume that all the walker files reside in a subdir called "wlk/" . """ from wpylib.sugar import is_iterable from pyqmc import PyqmcDataError from itertools import izip if verbose: w = text_output(output, flush=True) else: w = text_output(None) if not is_iterable(procs): ranks = xrange(procs) else: ranks = procs w("read_all: Reading from %d checkpoint files\n" % len(ranks)) dest = None impfn_all = [] has_impfn = None for rank in ranks: filename = fname_pattern % (dict(fname_args, rank=rank)) chk = self.read(src=filename, verbose=verbose, output=w, rank=rank, indep_rng=indep_rng) # don't leave the `data` field, or else it will confuse user later. # Use `data_all` instead! del self.data if dest is None: dest = chk self.data_all = chk dest.NUM_WARNINGS = 0 dest.ranks = [rank] dest.lran_proc = {rank: dest.lran} del dest.lran dest.wlkrs_proc = {rank: dest.wlkrs} has_impfn = hasattr(chk, 'impfn') else: dest.ranks.append(rank) # Do some sanity checks and issues warning irregular stuff. def check_param(name, val, ref_val): if ref_val != val: w(" Warning: parameter `%s' is different from expected value (%s, ref: %s)\n" \ % (name, val, ref_val)) dest.NUM_WARNINGS = dest.NUM_WARNINGS + 1 check_param('nh', chk.nh, dest.nh) check_param('anorm', chk.anorm, dest.anorm) check_param('etrial', chk.etrial, dest.etrial) check_param('istpacc', chk.istpacc, dest.istpacc) check_param('timeshift', chk.timeshift, dest.timeshift) dest.lran_proc[rank] = chk.lran dest.wlkrs_proc[rank] = chk.wlkrs dest.wlkrs.dets.extend(chk.wlkrs.dets) if has_impfn != hasattr(chk, 'impfn'): raise PyqmcDataError, \ (("Inconsistent existence of impfn field across walker files " \ " (currently on rank #%s; first-rank impfn status was %s)") \ % rank) if has_impfn: impfn_all.append(chk.impfn) # Final brush-up: if has_impfn: dest.impfn = numpy.concatenate(impfn_all) # Affix impfn to D for (impfn, D) in izip(dest.impfn, dest.wlkrs): D.impfn = impfn return dest
def parse_dataset_results_(self, baserec, F, index): """Parses a dataset: the `result' part. The """ global DEBUG_FD scf_cycle = [] rx = self.rx_ dt = self.dt_ try: dbg = DEBUG_FD except: dbg = text_output(None) # sys.stdout, flush=True) self['MyErrors'] = 0 self['dataset_index'] = index self['parent_'] = weakref.ref(baserec) # We can get the following from the outvars section of the output preamble def getoutvar(kwd, default, mapfunc=None): try: r = getattr(baserec.outvars, kwd + str(index)) except AttributeError: r = getattr(baserec.outvars, kwd, default) try: return map(mapfunc, r) except: if mapfunc == None: return r else: try: return mapfunc(r) except: return r #ngfft = getattr(baserec.outvars, "ngfft%d" % index, getattr(baserec.outvars, "ngfft", None)) self['ngfft'] = tuple(getoutvar("ngfft", None, mapfunc=int)) istwfk = getoutvar("istwfk", None, mapfunc=int) kpt_npw = {} for L in F: dbg("L: %s\n" % L.rstrip()) if rx.dataset_end % L: dbg("** end dataset detected **\n") F.file.push(L) # put back the text data to the file break elif rx.kpt_npw % L: # record the number of planewaves (w/o istwfk correction) kpt_npw[int(rx.kpt_npw['ikpt']) - 1] = int(rx.kpt_npw['npw']) elif rx.scf_begin % L: # Extracts the SCF cycle data plus whether it converges dbg("** SCF section **\n") L2 = "" for L2 in F: #dbg("L2: %s\n" % L2.rstrip()) if not (rx.scf_line1 % L2): break scf_cycle.append((float(rx.scf_line1['Etot']), float(rx.scf_line1['Ediff']))) self['scf_data'] = numpy.array(scf_cycle, dtype=dt.scf_cycle) self['scf'] = True for L2 in F: if L2.strip() != "": break if rx.scf_convg1 % L2: self['scf_nsteps'] = rx.scf_convg1['numscf'] self['scf_converged'] = True elif rx.E_begin % L: dbg("** Energy section **\n") for L2 in F: L2 = L2.rstrip() dbg("L2: %s\n" % L2) if rx.E_end % L2: break elif len(L2) == 0: continue else: for (pat, act, arg1) in rx.E_parts_list: if pat % L2: if isinstance(act, basestring): self[act] = arg1(pat[1]) break elif rx.eigen_begin % L: # Begins reading info of eigensolutions here: eigenvals, occ, ... self['nkpt'] = int(rx.eigen_begin['nkpt']) if (istwfk == None): istwfk = (0, ) * self['nkpt'] spin = rx.eigen_begin['spin'] dbg("** Eigenvector sector: spin = %s **\n" % spin) if spin == None: self['udet'] = False spins = ('up', ) wfk = result_base(up=[]) wfk['dn'] = wfk['up'] elif str(spin).lower() == 'up': self['udet'] = True spins = ('up', 'dn') wfk = result_base(up=[], dn=[]) else: raise PyqmcDataError, "Error: Unknown spin type detected" self['wfk'] = wfk for (ispin, s) in enumerate(spins): wfk_s = wfk[s] for k in xrange(self['nkpt']): L2 = F.next() dbg(" spin %s kpt %d: %s\n" % (s, k, L2.strip())) if not (rx.eigen_kpt1 % L2): if (rx.eigen_kpt_stop % L2): warn( "Cannot parse all kpt data; reason: `%s'" % L2) self['MyErrors'] += 1 break else: raise PyqmcDataError, "Expected `kpt#' line, got `%s'" % L2 e = rx.eigen_kpt1 # CAVEAT: The results here have a very limited precision. wfk_s_k = result_base( ikpt=int(e['ikpt']) - 1, # convert to 0-based nband=int(e['nband']), wtk=float(e['wtk']), kpt=( float(e['kx']), float(e['ky']), float(e['kz']), ), ) npwbasis = get_num_pw(kpt_npw[k], wfk_s_k.kpt, istwfk[k]) if DEBUG_LEVEL >= 10: print(kpt_npw[k], wfk_s_k.kpt, istwfk[k]), "->", npwbasis wfk_s_k['npwbasis'] = npwbasis wfk_s_k['istwfk'] = istwfk[k] if k != wfk_s_k['ikpt']: warn("Unexpected kpt index in Abinit output: given %d, expecting %d" \ % (wfk_s_k['ikpt'], k)) self['MyErrors'] += 1 # In case of discrepancy, we proceed, but at your risk # Reads in eigenvalues egnvals = [] mbands = wfk_s_k['nband'] for L2 in F: dbg(" egn: " + L2) m = map(float, L2.split()) egnvals += m mbands -= len(m) if mbands <= 0: break wfk_s_k['egnval'] = numpy.array(egnvals) # Reads in occupancy (if any) L2 = F.next() if not (L2.strip().startswith('occupation numbers')): # Maybe this one does not have occupancy; push back and continue # FIXME: Must get the occ from the end of the file. # For now we don't give the 'occ' field. F.file.push(L2) else: occ = [] mbands = wfk_s_k['nband'] for L2 in F: m = map(float, L2.split()) occ += m mbands -= len(m) if mbands <= 0: break wfk_s_k['occ'] = numpy.array(occ) wfk_s.append(wfk_s_k) if ispin + 1 < len(spins): assert rx.eigen_begin % F.next() elif rx.densph_begin % L: # spin density within a spherical boundary: for local spin moments self['local_spin_moment'] = \ F.read_items((0, int, 'iatom'), (1, float, 'sphere_radius'), (2, float, 'up_spin_dens'), (3, float, 'dn_spin_dens'), (4, float, 'tot_spin_dens'), (5, float, 'diff_spin_dens'), end_line_match=r'Note: Diff') else: pass # Fallback: if no SCF output exists, most likely it is not a SCF run. if not 'scf' in self: self['scf'] = False
def convert(self, src=None, info=None, output=None, **_opts_): #def convert(src="PWAF-meas.tar.lzma", info="INFO", # output="measurements.h5", # backend_opts={}, # debug=1): """Converts a set of measurement data (*.ene) to a standard meas_hdf5's HDF5 database. Also adds some useful metadata from the INFO file. Required parameters to be properly set: * src = the filename of the tarball, or the glob of the measurement files (*.ene). * info = the INFO file. * output = the target HDF5 data file. Additional methods, if defined in the derived object, will be called: * convert_preamble_steps_(hint, info, opts) * convert_postamble_steps_(hdf5_raw_group, info, opts) Optional arguments: * logfile: a file-like object (or a filename) to log the output of this routine and its called routines. A default can be given as "self.opts.logfile". Note: - `logfile' parameter is more recommended than the lower-level backend_opts['debug_out'] field passed to the actual converter routine. - If both are defined, the `debug_out' takes greater precedence. """ from wpylib.iofmt.text_output import text_output # FIXME: use self-introspection to reduce kitchen-sink params here: #p = Parameters(locals(), _opts_, _opts_.get('opts'), self.opts, _defaults) # The function defaults are now provided in the # Default_params...['convert_defaults'] field. p = self.opts._create_( self.Default_params[self.info_class]['convert_defaults']) if info == None: info_file = p.info else: info_file = info if src == None: src = p.src if output == None: output = p.output orig_dir = os.getcwd() tm1 = time.clock() if not isinstance(info_file, pwqmc_info): info = self.info_class(info_file) else: info = info_file info_file = info['info_file'] try: lf = p.logfile except: logfile = text_output() has_logfile = False else: logfile = text_output(lf) has_logfile = True # Deduce the datatype of the info structure: info_dtype = None for klass in self.Default_desc.keys(): if isinstance(info, klass): info_dtype = klass break if info_dtype == None: raise RuntimeError, \ "Cannot deduce the datatype of the info structure: %s" % type(info) Default_desc = self.Default_desc[info_dtype] if not issubclass(info_dtype, self.info_class): from warnings import warn warn("INFO class (%s) does not match the converter's info_class!" \ % (info_dtype, self.info_class), UserWarning) # do these fetches here just in case they fail due to my mistake/negligence #kpt_data1 = ALL_KPTS_DATA[cellstr][volstr][kpt_str] #E_GGA = ALL_KPTS_DATA[cellstr][volstr]['kgrid']['E_GGA'] if 'Etrial_noconst' not in info: raise PyqmcDataError, \ "Trial energy is not found in the INFO file: %s" % (info_file) use_tmpdir = False if isinstance(src, basestring): if src.endswith(".tar.lzma"): sh.mcd(p.TMPDIR) sh.system("rm -rf *") # always start clean os.chdir(orig_dir) os.run("tar", ("-C", p.TMPDIR, "--use-compress-program=lzma", "-xf", src)) files = TMPDIR + "/*.ene" use_tmpdir = True elif src.endswith(".tar.bz2"): sh.mcd(p.TMPDIR) sh.system("rm -rf *") # always start clean os.chdir(orig_dir) os.run("tar", ("-C", p.TMPDIR, "-j", "-xf", src)) files = TMPDIR + "/*.ene" use_tmpdir = True elif src.endswith(".ene"): # or src.endswith(".meas"): # *.meas is OLD. Don't use anymore. files = src else: raise ValueError, "Don't know how to handle src = %s" % (src) else: raise ValueError, "Don't know how to handle src %s = %s" % (str( type(src)), str(src)) try: backend_opts = dict(p.backend_opts) except: backend_opts = {} backend_opts.update({ 'nwlkavg': info['nwlk'], 'nwlkmax': info['nwlkmax'], #'default_raw_chunks': [1, info['nwlkmax']], #'value_processor': valpx, }) is_free_proj = info['constraint'] in ('none', ) if backend_opts.get('keep_El_imag') == 'auto': backend_opts['keep_El_imag'] = is_free_proj if backend_opts.get('keep_phasefac') == 'auto': backend_opts['keep_phasefac'] = is_free_proj if "debug_out" not in backend_opts and has_logfile: backend_opts['debug_out'] = logfile if p.E_prefactor != 1.0 and 'value_processor' not in backend_opts: def valpx(data, meta, *junk1, **junk2): """Rescales the energy values (real and imaginary!).""" data['E_l'] *= p.E_prefactor backend_opts['value_processor'] = valpx if 'convert_preamble_steps_' in dir(self): # This is useful for e.g. adding default_raw_chunks, defining # value_processor, etc. self.convert_preamble_steps_(hints=backend_opts, info=info, opts=p) # Examples: # in MnO 2x2x2: # natoms = cell_info[cellstr]['natoms'] # E_prefactor = 4.0 / natoms # def valpx(data, meta, *junk1, **junk2): # """Renormalize the energy values (real and imaginary!) to 4-atom # cell value.""" # data['E_l'] *= E_prefactor # NOTE: special to MnO runs, # We use chunksize = [1,nwlkmax] because we know the popsizes are # hovering near nwlkmax anyway. db = self.meas_module.convert_meas_to_hdf5_v2(output, files=files, betablk=info["betablk"], deltau=info["deltau"], H0=info["H0"], debug=p.debug, **backend_opts) db.flush() if p.debug > 0: self.last_db = db # The last opened raw group is given as db.raw # --this is also the data group which contain the dataset from the # conversion just done. job0 = db.raw.job() # Add some useful attributes: if 'System' in p: System = p.System elif 'System' in info: System = info['System'] elif 'System' in Default_desc: System = Default_desc['System'] else: System = '(Unknown calculation)' job0.attrs['System'] = System job0.attrs['H0'] = info['H0'] job0.attrs['deltau'] = info['deltau'] job0.attrs['Evar'] = info['Evar'] * p.E_prefactor ET = (info['Etrial_noconst'] + info['H0']) * p.E_prefactor ET_delta = 2 / math.sqrt(info['deltau']) * p.E_prefactor job0.attrs['Etrial'] = ET * p.E_prefactor # for El_bounds job0.attrs['Ebounds'] = ((ET - ET_delta) * p.E_prefactor, (ET + ET_delta) * p.E_prefactor) job0.attrs['units'] = Default_desc['unit'] extra_meta_copy = Default_desc.get('extra_meta_copy', None) if extra_meta_copy != None: extra_meta_copy(info, job0) if 'convert_postamble_steps_' in dir(self): # This is useful for e.g. adding more metadata self.convert_postamble_steps_(raw_group=job0, info=info, opts=p) tm2 = time.clock() logfile("%s : ET = %g; total time = %d s; %s\n" % \ (output, job0.attrs['Etrial'], tm2 - tm1, str_trunc_begin(System, 64), ) ) db.flush() logfile.close() return db
def h5meas_dump_reblk_stats(blk, fn): """Dumps the reblocking statistics in a text table format. Input `blk' object is the output of h5meas_reblk routine. The output is like this: #tblk pblk mean_g err_g mean_b err_b w_tot tbsz mean_jk err_jk 25 1 -8092.438212090 0.003446285 -8092.437398380 0.003932366 77093.632027542 2 -8092.438213362 0.003502350 10 1 -8092.438212090 0.003178238 -8092.437240991 0.003409631 77093.632027542 5 -8092.438220600 0.003732636 5 1 -8092.438212090 0.003726210 -8092.437043970 0.004222428 77093.632027542 10 -8092.438238199 0.004836244 ... The fields are: tblk: number of blocks in the imaginary-time axis tbsz: list of the number of blocks in the imaginary-time axis pblk: size of each block in the imaginary-time axis (or, the number of time slices averaged for reblocking along time axis) mean_g, err_g: the 'grand' weighted statistics mean and (biased) error estimate mean_b, err_b: the unweighted statistics mean and error estimate (sometimes called 'block average'--i.e. plain averaging of the reblocked data), mean_jk, err_jk: mean and error restimate from the delete-1 jackknife procedure. w_tot: total weight of all the raw data points accounted in the reblocking process. """ from wpylib.iofmt.text_output import text_output from wpylib.text_tools import str_fmt_heading from wpylib.math.stats.jackknife1 import jk_generate_averages, jk_stats_aa from numpy import average, product, std, sqrt, sum rawblk = blk.raw_blocks #fn = "reblock-stats" fd = text_output(fn) fmt = "%5d %6d %16.9f %16.9f %16.9f %16.9f %16.9f %4d %16.9f %16.9f\n" fd(str_fmt_heading(fmt) % ('#tblk', 'pblk', 'mean_g', 'err_g', 'mean_b', 'err_b', 'w_tot', 'tbsz', 'mean_jk', 'err_jk')) for (pblk, tblk) in sorted(rawblk.keys()): rblk = rawblk[pblk,tblk] ndata = product(rblk.shape) # reblocked "measurements": X_ds = rblk['Xw'] / rblk['w'] wtot = sum(rblk['w']) w2tot = sum(rblk['w']**2) # grand (weighted) averages Xg = sum(rblk['Xw']) / sum(rblk['w']) # REF: http://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance # biased sample variance Xg_var_biased = sum(rblk['w'] * (X_ds - Xg)**2) / wtot #Xg_var_unbiased = # biased error estimate of the "grand" average Xg_err_biased = sqrt(Xg_var_biased / (ndata)) # FIXME: Add also unbiased error estimate # poor but simplest attempt to reduce bias in var estimate above: Xg_err_unbiased1 = sqrt(Xg_var_biased / (ndata-1)) # unweighted statistics: block average, standard error of the blk avg Xb = average(X_ds) Xerr = std(X_ds, ddof=1) / sqrt(ndata) # Jackknife statistics Emix_aa_jk = jk_generate_averages(a=X_ds.flatten(), weights=rblk['w'].flatten()) (Emix_jk, Emix_jk_err, Emix_jk_corrected) = jk_stats_aa(aa_jk=Emix_aa_jk) fd(fmt % (rblk.shape[1], pblk, Xg, Xg_err_biased, \ Xb, Xerr, wtot, tblk, Emix_jk, Emix_jk_err)) fd.close()
def init(self, info, wlkr_dir, chkfile_pattern=None, logfile=sys.stdout): self._init_metadata(info) self.wlkr_dir = wlkr_dir if chkfile_pattern is not None: self.chkfile_pattern = chkfile_pattern self.logfile = text_output(logfile)
class emergency_walkers_fixup(object): """A tool to quickly fix a collection of QMC walkers for reuse in a subsequent QMC run. Motivation: In very large computers sometimes walker files can get corrupted. In this case, to prevent the QMC job from failing, we simply 'sample' the walker population to regenerate the missing/broken walker file. This tool provides a general framework to perform this emergency saving in the case of broken files. """ # walkers_fixup_dir is a relative subdirectory to contain fixed up walker file set # walkers_fixup_note_bad_dir is a relative subdirectory to contain ONLY the replaced # bad walker filenames (for human inspection) walkers_fixup_dir = "fixup" walkers_fixup_note_bad_dir = "fixup/bad" verbose = 1 log = text_output(sys.stdout, flush=True) # The following must be overriden: walkers_glob = None """ Transient variables: - walkers_dir - walkers_absdir - walkers[] - walkers_bad[] - walkers_ibad[] -- index of bad walkers - walkers_good[] - walkers_reserved[] -- names of walker files that are NOT to be used as replacement for bad walkers. """ def is_bad_walker_file(self, filename): """Decides if a walker file is bad. Must be overriden in actual class.""" return 0 def collect_walker_files(self, dir="wlk"): orig_pwd = os.getcwd() self.walkers_absdir = abspath(dir) os.chdir(dir) self.walkers_dir = dir self.walkers = sh.sorted_glob(self.walkers_glob) if self.verbose: self.log(": Found %d walker files\n" % len(self.walkers)) os.chdir(orig_pwd) try: del self.walkers_bad del self.walkers_ibad del self.walkers_good except: pass def check_walker_files(self, report=None): """Checks the walker files (can take awhile), and returns the number of bad walker files found.""" self.walkers_bad = [] self.walkers_ibad = [] self.walkers_good = [] for (iw, w) in enumerate(self.walkers): if self.is_bad_walker_file(w): self.walkers_bad.append(w) self.walkers_ibad.append(iw) else: self.walkers_good.append(w) return len(self.walkers_bad) @property def has_bad_walkers(self): if not hasattr(self, "walkers_bad"): self.check_walkers_bad(report=False) return len(self.walkers_bad) > 0 def report_bad_walkers(self, level): if self.has_bad_walkers: self.log("! Found %d bad walker files...\n" % len(self.walkers_bad)) if level > 0 and level < 10: self.log("".join(["! Bad files (indices): "] + [" %s" % iw for iw in self.walkers_ibad] + ["\n"])) else: self.log("".join(["! Bad files:\n"] + [ "! - %6d %s\n" % (iw, w) for (iw, w) in zip(self.walkers_ibad, self.walkers_bad) ])) def fixup_walkers1(self): #if not self.has_bad_walkers: # return False orig_pwd = os.getcwd() os.chdir(self.walkers_absdir) sh.mkdir("-p", self.walkers_fixup_dir) sh.mkdir("-p", self.walkers_fixup_note_bad_dir) # Copy over the good walkers for w in self.walkers_good: self.copy_file(w, joinpath(self.walkers_fixup_dir, w)) num_good = len(self.walkers_good) max_tries = 100 for wb in self.walkers_bad: tries = 0 while tries <= max_tries: tries += 1 r = numpy.random.randint(num_good) wg = self.walkers_good[r] if wg not in self.walkers_reserved: break elif tries == max_tries: raise RuntimeError, \ "Fatal: cannot find good replacement for walker file %s" % wb if self.verbose >= 10: self.log("fixup: %s -> %s\n" % (wb, wg)) self.copy_file(wg, joinpath(self.walkers_fixup_dir, wb)) self.copy_file(wg, joinpath(self.walkers_fixup_note_bad_dir, wb)) os.chdir(orig_pwd) def generate_replacement_list(self): """Generates a list of walker replacement. Excluding those that are in walkers_reserved array. """ raise NotImplementedError def copy_file(self, src, dest): from wpylib.file.file_utils import relpath # FIXME--may want real copy command, perhaps? sh.provide_link(dest, relpath(dirname(abspath(dest)), abspath(src)))
def read(self, infile, vec_select=1, verbose=0, nbasis=None, out=sys.stdout): """Reads off molecular orbital vectors. Usage: movecs = pyqmc.gamess.datfile.movecs(fname, [options]) Valid options: vec_select = <integer> (default: 1; 1-based choice) verbose = 0|1 (default: 0) nbasis = <integer> (default: autodetected) This routine was translated from Gamess::ReadGamessMOVecs routine in my Gamess.pm perl module. The latter routine was derived from C2_UHF_gamess.pl dated ~20070813. CAUTION: The resulting orbital (orbitals_alpha, orbitals_beta) arrays are 1-based, both in the orbital index and the basis index. Strict vector ordering (1, 2, 3, ..., N) is required in the $VEC data. We will check orbital indices strictly. This requires the orbitals be strictly ordered, with no skipping, etc. Strict checking is necessary for proper reading when we have more than 99 basis functions. In anticipating for large basis size, the rule for deducing UHF-type movecs is more complicated. For nbasis >= 100, the tag number rolls back to zero, unfortunately, which makes thing a bit difficult: when we see a tag of " 1" again, is it UHF beta sector, or movec #101? One way we can be assured that it IS an UHF movecs is prohibiting the size of movecs to be greater than the deduced nbasis, which is a reasonable restriction. Then, when we apparently encounter movec "101" when nbasis==100, we can be sure that the 101st vector is actually beta movec #1. Thus UHF movecs can be detected by the following rule: - ( old $VecTag != 0, or old $VecIndex == $nbasis already ) AND new $VecTag == 1 . FIXME: The solution above is still not foolproof in two cases: 1) suppose we have an UHF-type movecs with nbasis=220, but each spin sector only has 100 orbitals listed. Then this will be interpreted as an RHF-type movecs with nbasis=220 and norbitals=200. 2) in spherical basis, maximum norbitals is <= nbasis. When this happens, then the deduced nbasis is not the right number of spherical basis functions (thus the nbasis deduced above is not right). But I haven't seen this case yet, so forget about them temporarily. """ from wpylib.regexps import regex # MOVECS comments (always 3 lines preceding the $VEC block) rx_comment = regex(r'^--- ') rx_vec = regex(r'^(?i) \$vec') rx_endvec = regex(r'^(?i) ?\$end') F = text_input(infile) comments = None found = False vec_blk_count = 0 n_comment_lines = 0 O = text_output(out, flush=True) spin = "alpha" udet = False AllVecs = {} for txt in F: if rx_comment % txt: comments = [] n_comment_lines = 3 if n_comment_lines > 0: comments.append(txt) n_comment_lines -= 1 if rx_vec % txt: vec_blk_count += 1 if vec_blk_count < vec_select: # the comments we just read (if any) are irrelevant, so # remove them. comments = None continue found = True # This is the actual movecs reading loop --- # The $END marker for initial orbital guess (PUNMO=.TRUE.) is # buggy--we must tolerate that txt = F.next() # VecIndex = MO index to identify the whole vector # VecTag = MO "tag" number # In general VecTag is equal to VecIndex except when we have >= 100 # basis funcs (where VecTag has only the last two digits). # NOTE: VecTag is *always* a 2-character string! VecIndex = 1 VecTag = " 1" AmplIndex = 0 Ampl = [] Vecs = [Ampl] while not (rx_endvec % txt): NewVecTag = txt[0:2] #print "H: $txt\n"; #print "V: $NewVecTag\n"; # We should safely assume that VecTag > 1 at the end # of alpha orbitals; if that's not the case, that's # YOUR fault (why doing 1-basis quantum chemistry?) if NewVecTag != VecTag: # Just in case, we are very pedantic in checking for errors here: if nbasis != None: # (1) nbasis must be consistent if AmplIndex != nbasis: raise PyqmcDataError, \ ("%s:%d: Inconsistent nbasis " + \ "(original guess was = %d, most recently deduced value = %d) " + \ "for %s vector #%d") \ % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex) else: # Deduce nbasis nbasis = AmplIndex if nbasis == 0: raise PyqmcDataError, \ ("%s:%d: nbasis detected as zero! " + \ "Maybe there is corruption in the input file?") \ % (infile, F.lineno) if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Deduced nbasis = %d\n" \ % (infile, nbasis)) # UHF-type vector detection scheme: see the notes above if (VecTag != " 0" or VecIndex == nbasis) and NewVecTag == " 1": if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Found UHF-type movecs\n" \ % (infile,)) if udet: raise PyqmcDataError, \ ("%s:%d: alpha and beta orbitals were already defined?! " + \ "Maybe there is a mistake with your $VEC data?") \ % (infile, F.lineno) AllVecs[spin] = numpy.array(Vecs, dtype=float).T # start all over with a new MO block Ampl = [] Vecs = [Ampl] spin = "beta" udet = True VecIndex = 0 # end UHF-type detection scheme else: # Some additional error checking(s) if VecIndex >= nbasis: # and nbasis != 100: # NOTE: We disallow norbitals > nbasis in our reader. raise PyqmcDataError, \ ("%s:%d: The $VEC block has more than %d orbitals, " + \ "which is prohibited by this routine") \ % (infile, F.lineno, nbasis) Ampl = [] Vecs.append(Ampl) AmplIndex = 0 # Start off a new vector VecIndex += 1 VecTag = NewVecTag # end new vector/new spin sector detection # Strict index vs. tag checking: TagChk = "%2d" % (VecIndex % 100) if TagChk != VecTag: raise PyqmcDataError, \ ("%s:%d: Mismatch vector tag number in vector #%d " + \ "(wanted '%s', got '%s')") \ % (infile, F.lineno, VecIndex, TagChk, VecTag) # the amplitudes are always stored in -n.nnnnnnnnE+nn fixed format # (15 characters wide) txtdata = txt[5:].rstrip() lendata = len(txtdata) // 15 Ampl += [ float(txtdata[i * 15:i * 15 + 15]) for i in xrange(0, lendata) ] AmplIndex += lendata # TODO: $i < 5 is allowed ONLY on the last line; # Make sure we catch that. #print $VecIndex, " ", $AmplIndex, "\n"; try: txt = F.next() except StopIteration: raise PyqmcDataError, \ ("%s:%d: Unexpected EOF while reading in $VEC data") \ % (infile, F.lineno) # end loop for reading in $VEC data # Finalization: do final checks, etc. AllVecs[spin] = numpy.array(Vecs, dtype=float).T if AmplIndex != nbasis: raise PyqmcDataError, \ ("%s:%d: Inconsistent nbasis " + \ "(original guess was = %d, most recently deduced value = %d) " + \ "for %s vector #%d") \ % (infile, F.lineno, nbasis, AmplIndex, spin, VecIndex) if udet: if AllVecs['alpha'].shape != AllVecs['beta'].shape: raise PyqmcDataError, \ ("%s:%d: Inconsistent shape of MO matrix: " + \ "(alpha = %s, beta = %s)") \ % (infile, F.lineno, \ AllVecs['alpha'].shape, \ AllVecs['beta'].shape, \ ) if verbose > 0: O("pyqmc.gamess.movecs.read:%s: Total MO vectors read = %s%s\n" \ % (infile, VecIndex, ifelse(udet, " (UHF-type)", ""))) # stop reading if the desired vectors have been loaded break # end main text reading loop if not found: raise PyqmcDataError, \ ("%s: Cannot find $VEC block number %s") \ % (infile, vec_select) # Save the reading results to the "self" record: self.filename = infile self.vec_select = vec_select self.comments = comments self.udet = udet self.nbasis = nbasis for (spin, mo) in AllVecs.iteritems(): setattr(self, spin, mo) return self
def read(self, infile, nbasis, debug=None, blksize=16384, perm='default'): """Reads in the matrix elements from a Fortran binary file. This is supposed to be an accelerated implementation. We *bypass* the fortran binary format and slurp the file into memory before doing further processing. Permutation flags (`perm`) honored: * 0, False, None = No permutation (generally you don't want this except for debugging) * 1, 'standard' = standard fourfold permutation for a Hermitian two-body Hamiltonian * 2, 'real' = eightfold permutation for a Hermitian two-body Hamiltonian with real basis functions in real space """ from numpy import conj from os.path import abspath assert nbasis > 0 try: perm = self.V2b_permutation_options[perm] except KeyError: raise ValueError, "Invalid permutation options: `%s'" % perm self.nbasis = nbasis dbg = text_output(ifelse(debug, sys.stdout, None), flush=True) H2 = numpy.zeros((nbasis, nbasis, nbasis, nbasis), dtype=float) self.H2 = H2 nn = nbasis * (nbasis + 1) // 2 S = os.stat(infile) fsize = S.st_size # net bytesize excluding marker rec_count = fsize // self.rectype.itemsize dbg("File %s: %d integral records to be read\n" % (infile, rec_count)) dbg("Matrix element permutation flag = %s\n" % (perm)) F = open(infile, "rb") self.filename = infile self.filename_abs = abspath(infile) # We use blocked read and assignment to minimize the python overhead for iblk in xrange(0, rec_count, blksize): read_blksize = min(blksize, rec_count - iblk) blob = numpy.fromfile(F, dtype=self.rectype, count=read_blksize) # The following provides a minimal consistency check if the file just read # is indeed a valid two_body_gms_ufmt file: if not numpy.all(blob['m1'] == self.recsize_net): raise PyqmcDataError, \ "Invalid record marker (m1) detected: file %s may be corrupt or of incorrect format." \ % (infile,) if not numpy.all(blob['m2'] == self.recsize_net): raise PyqmcDataError, \ "Invalid record marker (m2) detected: file %s may be corrupt or of incorrect format." \ % (infile,) # convert to py index (0-based) blob['i'] -= 1 blob['l'] -= 1 blob['j'] -= 1 blob['k'] -= 1 get_flat_perm_index = lambda iljk: \ self.make_flat_index(blob[iljk[0]], blob[iljk[1]], blob[iljk[2]], blob[iljk[3]]) # Use: V2b_inspect.permute_V2b('i','j','l','k',chem=1) # to generate this: v = blob['v'] H2.put(get_flat_perm_index('iljk'), v) if (perm == 1 or perm == 2): H2.put(get_flat_perm_index('jkil'), v) H2.put(get_flat_perm_index('likj'), conj(v)) H2.put(get_flat_perm_index('kjli'), conj(v)) if (perm == 2): # Only usable if the basis orbitals are real in real-space. H2.put(get_flat_perm_index('ilkj'), v) H2.put(get_flat_perm_index('kjil'), v) H2.put(get_flat_perm_index('jkli'), v) H2.put(get_flat_perm_index('lijk'), v) F.close()