def chkg_E(fptr,unit='Ha',record=False,e_list=False,suppress=False): import time today_log = 'gaussian_%s.log' % time.strftime("%d_%m_%Y") # Read in data from file energies, _, time = g09.parse_atoms("gaussian/"+fptr+".log",parse_all=True) # If you want the standard output to terminal, do this if not suppress: # Get all energy values if e_list: for e in energies: print(e) print('\n---------------------------------------------------') print('Job Name: '+fptr) print('Energy Data Points: '+str(len(energies))) if len(energies)>2: print('dE 2nd last = '+str(units.convert_energy('Ha',unit,energies[-2]-energies[-3]))+' '+unit) if len(energies)>1: print('dE last = '+str(units.convert_energy('Ha',unit,energies[-1]-energies[-2]))+' '+unit) if len(energies)>0: print('Last Energy = '+str(energies[-1])+' Ha') print('---------------------------------------------------') if time: print 'Job finished in %.2g seconds' % time elif (fptr) in get_jlist(): print 'Job is still running' print '~~~~ Convergenge Criteria' s = open('gaussian/'+fptr+'.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) else: print 'Job failed to converge. Log file says:\n~~~~ End Of File Info' os.system('tail -n 5 '+"gaussian/"+fptr+".log") print '~~~~ Convergenge Criteria' s = open('gaussian/'+fptr+'.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) print('---------------------------------------------------\n')
def chkg_E(fptr, unit='Ha', record=False, e_list=False, suppress=False): import time today_log = 'gaussian_%s.log' % time.strftime("%d_%m_%Y") # Read in data from file energies, _, time = g09.parse_atoms("gaussian/" + fptr + ".log", parse_all=True) # If you want the standard output to terminal, do this if not suppress: # Get all energy values if e_list: for e in energies: print(e) print('\n---------------------------------------------------') print('Job Name: ' + fptr) print('Energy Data Points: ' + str(len(energies))) if len(energies) > 2: print( 'dE 2nd last = ' + str( units.convert_energy( 'Ha', unit, energies[-2] - energies[-3])) + ' ' + unit) if len(energies) > 1: print( 'dE last = ' + str( units.convert_energy( 'Ha', unit, energies[-1] - energies[-2])) + ' ' + unit) if len(energies) > 0: print('Last Energy = ' + str(energies[-1]) + ' Ha') print('---------------------------------------------------') if time: print 'Job finished in %.2g seconds' % time elif (fptr) in get_jlist(): print 'Job is still running' print '~~~~ Convergenge Criteria' s = open('gaussian/' + fptr + '.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) else: print 'Job failed to converge. Log file says:\n~~~~ End Of File Info' os.system('tail -n 5 ' + "gaussian/" + fptr + ".log") print '~~~~ Convergenge Criteria' s = open('gaussian/' + fptr + '.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) print('---------------------------------------------------\n')
def method_CLANCELOT(opt_method="LBFGS"): from files import read_xyz, write_xyz import neb from units import convert, convert_energy FPTR = "./../xyz/CNH_HCN.xyz" frames = read_xyz(FPTR) route = '! HF-3c' if RIGID_ROTATION: is_on = "ON" else: is_on = "OFF" print("\nRUNNING CLANCELOT SIMULATION WITH RIGID_ROTATION %s...\n" % is_on) run_name = 'CNH_HCN_c_' + opt_method new_opt_params = { 'step_size': ALPHA, 'step_size_adjustment': 0.5, 'max_step': MAX_STEP, 'linesearch': 'backtrack', 'accelerate': True, 'reset_step_size': 5, 'g_rms': convert("eV/Ang", "Ha/Ang", 0.03), 'g_max': convert("eV/Ang", "Ha/Ang", FMAX) } opt = neb.NEB(run_name, frames, route, k=convert_energy("eV", "Ha", 0.1), opt=opt_method, new_opt_params=new_opt_params) output = opt.optimize() frames = output[-1] write_xyz(frames, "CNH_HCN_opt_%s" % opt_method) print("\nDONE WITH CLANCELOT SIMULATION...\n")
elif dft == 'orca': try: data = orca.read(run_name) except IOError: print("Error - orca simulation %s does not exist. Are you sure -dft orca is correct?" % run_name) sys.exit() else: print("DFT type %s not available..." % dft) sys.exit() # Get the header information head = 'Job Name: %s\n' % run_name head += 'DFT Simmulation in %s\n' % dft head += 'Energy Data Points: %d\n' % len(data.energies) if len(data.energies) > 2: Ener = str(units.convert_energy(u1, u2, data.energies[-2] - data.energies[-3])) head += 'dE 2nd last = %s %s\n' % (Ener,u2) if len(data.energies) > 1: Ener = str(units.convert_energy(u1, u2, data.energies[-1] - data.energies[-2])) head += 'dE last = %s %s\n' % (Ener,u2) if len(data.energies) > 0: Ener = str(units.convert_energy(u1, u2, data.energies[-1])) head += 'Last Energy = %s %s' % (Ener,u2) body, tail = '', '' if data.convergence != None: for line in data.convergence: body += '\t'.join([str(s) for s in line])+'\n' body = utils.spaced_print(body, delim='\t') if data.converged:
def post_enthalpy_solvation(fpl_obj, md_dft="dft", unit="kT_300"): energies = [data.energy for data in fpl_obj.data] return units.convert_energy("Ha", unit, energies[0] - energies[1] - energies[2])
if time: print 'Job finished in %.2g seconds' % time elif (fptr) in get_jlist(): print 'Job is still running' print '~~~~ Convergenge Criteria' s = open('gaussian/'+fptr+'.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) else: print 'Job failed to converge. Log file says:\n~~~~ End Of File Info' os.system('tail -n 5 '+"gaussian/"+fptr+".log") print '~~~~ Convergenge Criteria' s = open('gaussian/'+fptr+'.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) print('---------------------------------------------------\n') # If you want to record data, do this if record: try: s = open(today_log).read().replace('##$$@@#'+fptr+'#$@$#@#$',str(units.convert_energy('Ha',unit,energies[-1]))+' '+unit) except: s = open(today_log).read().replace('##$$@@#'+fptr+'#$@$#@#$','Error - Could not get the energy from file') print('\nWarning - Could not get the energy for '+fptr+'.\n') f = open(today_log,'w') f.write(s) f.close() # Goes through and updates all the eval values that haven't been updated def chkg_E_all(u='Ha'): import time today_log = 'gaussian_%s.log' % time.strftime("%d_%m_%Y") sptr = open(today_log).read() a = '##$$@@#' b = '#$@$#@#$'
def convert(x): return units.convert_dist( "Ang", "Bohr", units.convert_energy("Ha", "kcal", x))
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print( "Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist( "Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
def optimize(self): # Try seeing if neb was run for <= 2 frames if (isinstance(self.states, list) and not isinstance(self.states[0], list)): print("Error - Only one frame in NEB calculation. Did you mean to \ run an optimization instead?") sys.exit() elif (isinstance(self.states, type(self.states[0])) and len(self.states) <= 2): print( "Error - NEB requires at least 3 frames to run. You have \ entered only %d frames." % len(self.states)) sys.exit() # Set which atoms will be affected by virtual springs if not self.spring_atoms: self.spring_atoms = range(len(self.states[0])) elif isinstance(self.spring_atoms, str): # A list of element names elements = self.spring_atoms.split() self.spring_atoms = [ i for i, a in enumerate(self.states[0]) if a.element in elements ] # NEB Header print("\n---------------------------------------------" + "---------------------------------------------") print("Run_Name = %s" % str(self.name)) print("DFT Package = %s" % self.DFT) print("Spring Constant for NEB: %lg Ha/Ang = %lg eV/Ang" % (self.k, units.convert_energy("Ha", "eV", self.k))) if self.no_energy: print("Running NEB with old tangent approximation") if self.ci_neb: print("Running Climbing Image, starting at iteration %d" % self.ci_N) if self.opt == "sd": output = steepest_descent(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt == "bfgs": output = bfgs(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt == "lbfgs": output = lbfgs(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt == "qm": output = quick_min(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt == "fire": output = fire(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt == "cg": output = conjugate_gradient(np.array(self.coords_start), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) elif self.opt.startswith("scipy"): print("\nRunning neb with optimization method " + self.opt) params = np.array(self.coords_start) output = minimize(self.get_error, params, jac=self.get_gradient, method=self.opt.split("_")[-1], options=self.new_opt_params) else: print( "\nERROR - %s optimizations method does not exist! Choose \ from the following:" % str(self.opt)) print("\t1. BFGS") print("\t2. LBFGS") print("\t3. QM") print("\t4. SD") print("\t5. FIRE") print("\t6. CG") print("\t7. scipy_X where X is a valid scipy minimization method.") sys.exit() if not self.opt.startswith("scipy"): FINAL_PARAMS, CODE, ITERS = output if CODE == FAIL_CONVERGENCE: print("\nNEB failed to converge.") elif CODE == MAXITER_CONVERGENCE: print("\nNEB quit after reaching the specified maximum number \ of iterations.") elif CODE == G_MAX_CONVERGENCE: print("\nNEB converged the maximum force.") elif CODE == G_RMS_CONVERGENCE: print("\nNEB converged the RMS force.") elif CODE == STEP_SIZE_TOO_SMALL: print("\nNEB failed to converge. Step size either started too \ small, or was backtracked to being too small.") else: print( "\nSomething unknown happened during NEB optimization, and \ no flag was returned.") print("---------------------------------------------" + "---------------------------------------------\n\n") return FINAL_PARAMS, ITERS, self.states else: return output, self.states
def calculate(self, coords): self.calls_to_calculate += 1 # Update coordinates in states. This won't change anything on # the first run through, but will on subsequent ones coord_count = 0 for s in self.states[1:-1]: for a in s: a.x, a.y, a.z = coords[coord_count:coord_count + 3] coord_count += 3 # Start DFT jobs running_jobs = [] for i, state in enumerate(self.states): if (i == 0 or i == len(self.states) - 1) and self.step > 0: # No need to calculate anything for first and last states # after the first step pass else: running_jobs.append( self.start_job(self, i, state, self.charge, self.multiplicity, self.procs, self.queue, self.initial_guess, self.extra_section, self.mem, self.priority, self.extra_keywords)) # Wait for jobs to finish if self.job_hang_time is not None: time.sleep(self.job_hang_time) for j in running_jobs: j.wait() if self.job_hang_time is not None: time.sleep(self.job_hang_time) # Get forces and energies from DFT calculations if not self.no_energy: energies = [] for i, state in enumerate(self.states): # State 0 and state N-1 don't change, so just use result # from self.step == 0 if (i == 0 or i == len(self.states) - 1): step_to_use = 0 else: step_to_use = self.step new_energy, new_atoms = self.get_results(self, step_to_use, i, state) if not self.no_energy: energies.append(new_energy) # V = potential energy from DFT. energies = V+springs if not self.no_energy: V = copy.deepcopy(energies) # In climbing image neb, after a few iterations we take the highest # energy image and use that. if self.ci_neb and self.ci_img is None and self.step > self.ci_N: self.ci_img = V.index(max(V)) if self.ci_img in [0, len(self.states) - 1]: raise Exception("CI found endpoint. Is your band correct?") # Get positions in a flat array def get_positions(image): pos = np.array([np.empty([3]) for j in image]) for j, atom in enumerate(image): if j not in self.spring_atoms: continue pos[j] = np.array([atom.x, atom.y, atom.z]) return pos.flatten() # Add spring forces to atoms for i in range(1, len(self.states) - 1): a = get_positions(self.states[i - 1]) b = get_positions(self.states[i]) c = get_positions(self.states[i + 1]) real_force = np.array([np.empty([3]) for j in self.states[i]]) for j, atom in enumerate(self.states[i]): if j not in self.spring_atoms: continue real_force[j] = np.array([atom.fx, atom.fy, atom.fz]) real_force = real_force.flatten() # Find tangent tplus = c - b tminus = b - a if not self.no_energy: dVmin = min(abs(V[i + 1] - V[i]), abs(V[i - 1] - V[i])) dVmax = max(abs(V[i + 1] - V[i]), abs(V[i - 1] - V[i])) if self.no_energy: tangent = (c.copy() - b.copy()) / np.linalg.norm(c.copy() - b.copy()) + \ (b.copy() - a.copy()) / np.linalg.norm(b.copy() - a.copy()) tangent = tangent / np.linalg.norm(tangent) else: if V[i + 1] > V[i] and V[i] > V[i - 1]: tangent = tplus.copy() elif V[i + 1] < V[i] and V[i] < V[i - 1]: tangent = tminus.copy() elif V[i + 1] > V[i - 1]: tangent = tplus * dVmax + tminus * dVmin else: tangent = tplus * dVmin + tminus * dVmax # Normalize tangent tangent_norm = np.sqrt(np.vdot(tangent, tangent)) if tangent_norm != 0: tangent /= tangent_norm F_spring_parallel = self.k * (np.linalg.norm(tplus) - np.linalg.norm(tminus)) * tangent F_real_perpendicular = real_force -\ (np.vdot(real_force, tangent) * tangent) # Set NEB forces # Note, in climbing image we have the formula: # F = F_real - 2*F_real*tau*tau # Vs the normal: # F = F_spring_parallel + F_real_perpendicular if self.ci_img is not None and i == self.ci_img: forces = real_force - 2.0 * np.vdot(real_force, tangent) * tangent else: forces = F_spring_parallel + F_real_perpendicular forces = forces.reshape((-1, 3)) for j, atom in enumerate(self.states[i]): if j not in self.spring_atoms: continue atom.fx, atom.fy, atom.fz = forces[j] # Remove net translation forces from the gradient if self.fit_rigid: net_translation_force = [] for state in self.states[1:-1]: net_force = np.zeros(3) for a in state: net_force += (a.fx, a.fy, a.fz) net_trans = np.sqrt((net_force**2).sum()) / len(state) net_translation_force.append(net_trans) for a in state: a.fx -= net_force[0] / len(state) a.fy -= net_force[1] / len(state) a.fz -= net_force[2] / len(state) # Set gradient self.gradient = [] for state in self.states[1:-1]: for a in state: # Gradient of self.error self.gradient += [-a.fx, -a.fy, -a.fz] # Calculate RMS Force and Max force force_mags = [(a.fx**2 + a.fy**2 + a.fz**2)**0.5 for state in self.states[1:-1] for a in state] RMS_force = geometry.rms(force_mags) self.RMS_force = RMS_force MAX_force = max(force_mags) self.MAX_force = MAX_force # Print data if not self.no_energy: V = V[:1] + [ units.convert_energy("Ha", "kT_300", e - V[0]) for e in V[1:] ] if not self.no_energy: MAX_energy = max(V) else: MAX_energy = float('inf') if self.prv_RMS is None or self.prv_RMS > RMS_force: rms = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", RMS_force)), 'GREEN') else: rms = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", RMS_force)), 'RED') if self.prv_MAX is None or self.prv_MAX > MAX_force: max_f = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", MAX_force)), 'GREEN') else: max_f = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", MAX_force)), 'RED') if not self.no_energy: if self.prv_MAX_E is None or self.prv_MAX_E > MAX_energy: max_e = print_helper.color_set(float("%.1f" % MAX_energy), 'GREEN') else: max_e = print_helper.color_set(float("%.1f" % MAX_energy), 'RED') if not self.no_energy: if self.step == 0: print("Step\tRMS_F (eV/Ang)\tMAX_F (eV/Ang)\tMAX_E (kT_300)\ \tEnergies (kT_300)\n----") print("%d\t%s\t\t%s\t\t%s" % (self.step, rms, max_f, max_e)), print ' \t\t\t\t', '%7.5g +'\ % V[0], ('%5.1f ' * len(V[1:])) % tuple(V[1:]) else: if self.step == 0: print("Step\tRMS_F (eV/Ang)\tMAX_F (eV/Ang)\n----") print("%d\t%s\t\t%s" % (self.step, rms, max_f)) sys.stdout.flush() if self.prv_RMS is None: self.prv_RMS = RMS_force self.prv_RMS = min(RMS_force, self.prv_RMS) if self.prv_MAX is None: self.prv_MAX = MAX_force self.prv_MAX = min(MAX_force, self.prv_MAX) if not self.no_energy: if self.prv_MAX_E is None: self.prv_MAX_E = MAX_energy self.prv_MAX_E = min(MAX_energy, self.prv_MAX_E) # Set error self.error = RMS_force # Increment step self.step += 1 if self.callback is not None: self.callback(self.states)
print '~~~~ Convergenge Criteria' s = open('gaussian/' + fptr + '.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) else: print 'Job failed to converge. Log file says:\n~~~~ End Of File Info' os.system('tail -n 5 ' + "gaussian/" + fptr + ".log") print '~~~~ Convergenge Criteria' s = open('gaussian/' + fptr + '.log').read() print('\n'.join(s[s.rfind("Converged?"):].split('\n')[1:5])) print('---------------------------------------------------\n') # If you want to record data, do this if record: try: s = open(today_log).read().replace( '##$$@@#' + fptr + '#$@$#@#$', str(units.convert_energy('Ha', unit, energies[-1])) + ' ' + unit) except: s = open(today_log).read().replace( '##$$@@#' + fptr + '#$@$#@#$', 'Error - Could not get the energy from file') print('\nWarning - Could not get the energy for ' + fptr + '.\n') f = open(today_log, 'w') f.write(s) f.close() # Goes through and updates all the eval values that haven't been updated def chkg_E_all(u='Ha'): import time
def convert(x): return units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x) )
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print("Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x) ) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print("Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z) ) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition ) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
def optimize(self): # Try seeing if ANEB was run for <= 2 frames if (isinstance(self.states, list) and not isinstance(self.states[0], list)): print( "Error - Only one frame in ANEB calculation. Did you mean to \ run an optimization instead?") sys.exit() elif (isinstance(self.states, type(self.states[0])) and len(self.states) <= 2): print( "Error - ANEB requires at least 3 frames to run. You have \ entered only %d frames." % len(self.states)) sys.exit() # Set which atoms will be affected by virtual springs set_spring_atoms(self) # ANEB Header print("\n---------------------------------------------" + "---------------------------------------------") print("Run_Name = %s" % str(self.name)) print("DFT Package = %s" % self.DFT) print("Spring Constant for ANEB: %lg Ha/Ang = %lg eV/Ang" % (self.k_0, units.convert_energy("Ha", "eV", self.k_0))) if self.opt == "lbfgs": self.ci_ANEB = False while len(self.all_states) < self.ANEB_Nmax: print("\nRunning for N_sim = %d of %d frames (N_max = %d)..." % (self.ANEB_Nsim, len(self.all_states), self.ANEB_Nmax)) _ = lbfgs(np.array(self.flattened_states), self.get_gradient, NEB_obj=self, new_opt_params=self.new_auto_opt_params) add_frame(self) self.prv_RMS = None self.prv_MAX = None self.prv_MAX_E = None self.step = 0 self.nframes = len(self.states) set_spring_atoms(self) self.ci_ANEB = True self.states = copy.deepcopy(self.all_states) self.flattened_states = flattened(self.states) self.nframes = len(self.states) self.k = [self.k_0 for i in self.all_states] print("\nRunning final CI-ANEB with %d frames" % len(self.all_states)) output = lbfgs(np.array(self.flattened_states), self.get_gradient, NEB_obj=self, new_opt_params=self.new_opt_params) else: print( "\nERROR - %s optimizations method does not exist! Choose \ from the following:" % str(self.opt)) print("\t1. LBFGS") sys.exit() if not self.opt.startswith("scipy"): FINAL_PARAMS, CODE, ITERS = output if CODE == FAIL_CONVERGENCE: print("\nANEB failed to converge.") elif CODE == MAXITER_CONVERGENCE: print( "\nANEB quit after reaching the specified maximum number \ of iterations.") elif CODE == G_MAX_CONVERGENCE: print("\nANEB converged the maximum force.") elif CODE == G_RMS_CONVERGENCE: print("\nANEB converged the RMS force.") elif CODE == STEP_SIZE_TOO_SMALL: print( "\nANEB failed to converge. Step size either started too \ small, or was backtracked to being too small.") else: print( "\nSomething unknown happened during ANEB optimization, and \ no flag was returned.") print("---------------------------------------------" + "---------------------------------------------\n\n") return FINAL_PARAMS, ITERS, self.states else: return output, self.states
except IOError: print( "Error - orca simulation %s does not exist. Are you sure -dft orca is correct?" % run_name) sys.exit() else: print("DFT type %s not available..." % dft) sys.exit() # Get the header information head = 'Job Name: %s\n' % run_name head += 'DFT calculation via %s\n' % dft head += 'Energy Data Points: %d\n' % len(data.energies) if len(data.energies) > 2: Ener = str( units.convert_energy(u1, u2, data.energies[-2] - data.energies[-3])) head += 'dE 2nd last = %s %s\n' % (Ener, u2) if len(data.energies) > 1: Ener = str( units.convert_energy(u1, u2, data.energies[-1] - data.energies[-2])) head += 'dE last = %s %s\n' % (Ener, u2) if len(data.energies) > 0: Ener = str(units.convert_energy(u1, u2, data.energies[-1])) head += 'Last Energy = %s %s' % (Ener, u2) body, tail = '', '' if data.convergence != None: for line in data.convergence: body += '\t'.join([str(s) for s in line]) + '\n' body = utils.spaced_print(body, delim='\t')
def calculate(self, coords): self.calls_to_calculate += 1 # Update coordinates in states. This won't change anything on # the first run through, but will on subsequent ones coord_count = 0 for s in self.states[1:-1]: for a in s: a.x, a.y, a.z = coords[coord_count:coord_count + 3] coord_count += 3 # Start DFT jobs running_jobs = [] if self.initialize == True: # Run a single point calculation to determine energies before main curve-smoothing begins for i, state in enumerate(self.states): running_jobs.append( self.start_job(self, i, state, self.charge, self.procs, self.queue, self.initial_guess, self.extra_section, self.mem, self.priority)) else: # Once initialization is complete, run main spline_NEB simulation for curve smoothing for i, state in enumerate(self.states): if (i == 0 or i == self.peak or i == len(self.states) - 1) and self.step > 0: # No need to calculate anything for first and last states # after the first step pass else: running_jobs.append( self.start_job(self, i, state, self.charge, self.procs, self.queue, self.initial_guess, self.extra_section, self.mem, self.priority)) # Wait for jobs to finish for j in running_jobs: j.wait() # Get forces and energies from DFT calculations energies = [] for i, state in enumerate(self.states): # State 0 and state N-1 don't change, so just use result # from self.step == 0 if (i == 0 or i == self.peak or i == len(self.states) - 1): step_to_use = 0 else: step_to_use = self.step new_energy, new_atoms = self.get_results(self, step_to_use, i, state) energies.append(new_energy) # V = potential energy from DFT. energies = V+springs V = copy.deepcopy(energies) # Get positions in a flat array def get_positions(image): pos = np.array([np.empty([3]) for j in image]) for j, atom in enumerate(image): if j not in self.spring_atoms: continue pos[j] = np.array([atom.x, atom.y, atom.z]) return pos.flatten() if self.initialize == True: # During initialization phase, use single point energies previously calculated to # determine highest energy frame (peak) and generate spring constants between frames # Peak of reaction coordinate is highest energy frame self.peak = energies.index(max(energies)) #Calculate spring constants for smoothing curve d_before = np.linalg.norm( get_positions(self.states[self.peak]) - get_positions(self.states[0])) d_after = np.linalg.norm( get_positions(self.states[self.peak]) - get_positions(self.states[-1])) l_before = -d_before**2 / np.log(self.gamma) l_after = -d_after**2 / np.log(self.gamma) x1, x2 = [], [] for i in range(self.peak): v = (get_positions(self.states[i]) + get_positions(self.states[i+1])) / 2.0 - \ get_positions(self.states[0]) x1.append(np.linalg.norm(v)) for i in range(self.peak, len(self.states) - 1): v = (get_positions(self.states[i]) + get_positions(self.states[i+1])) / 2.0 - \ get_positions(self.states[0]) x2.append(np.linalg.norm(v)) for x in x1: self.k.append(self.k_max * exp(-((x - d_before)**2) / l_before)) for x in x2: self.k.append(self.k_max * exp(-((x - d_after)**2) / l_after)) # Add spring forces to atoms for i in range(1, len(self.states) - 1): if i == self.peak: # Set NEB forces at peak to 0 for j, atom in enumerate(self.states[i]): if j not in self.spring_atoms: continue atom.fx, atom.fy, atom.fz = [0, 0, 0] else: a = get_positions(self.states[i - 1]) b = get_positions(self.states[i]) c = get_positions(self.states[i + 1]) real_force = np.array([np.empty([3]) for j in self.states[i]]) for j, atom in enumerate(self.states[i]): if j not in self.spring_atoms: continue real_force[j] = np.array([atom.fx, atom.fy, atom.fz]) real_force = real_force.flatten() # Find tangent tplus = c - b tminus = b - a dVmin = min(abs(V[i + 1] - V[i]), abs(V[i - 1] - V[i])) dVmax = max(abs(V[i + 1] - V[i]), abs(V[i - 1] - V[i])) if V[i + 1] > V[i] and V[i] > V[i - 1]: tangent = tplus.copy() elif V[i + 1] < V[i] and V[i] < V[i - 1]: tangent = tminus.copy() elif V[i + 1] > V[i - 1]: tangent = tplus * dVmax + tminus * dVmin else: tangent = tplus * dVmin + tminus * dVmax # Normalize tangent tangent_norm = np.sqrt(np.vdot(tangent, tangent)) if tangent_norm != 0: tangent /= tangent_norm # Set NEB forces forces = (self.k[i] * np.linalg.norm(tplus) - self.k[i - 1] * np.linalg.norm(tminus)) * tangent forces = forces.reshape((-1, 3)) for j, atom in enumerate(self.states[i]): if j not in self.spring_atoms: continue atom.fx, atom.fy, atom.fz = forces[j] # Remove net translation forces from the gradient if self.fit_rigid: net_translation_force = [] for state in self.states[1:-1]: net_force = np.zeros(3) for a in state: net_force += (a.fx, a.fy, a.fz) net_trans = np.sqrt((net_force**2).sum()) / len(state) net_translation_force.append(net_trans) for a in state: a.fx -= net_force[0] / len(state) a.fy -= net_force[1] / len(state) a.fz -= net_force[2] / len(state) max_translation_force = units.convert("Ha/Ang", "eV/Ang", max(net_translation_force)) else: max_translation_force = 0 # Set gradient self.gradient = [] for state in self.states[1:-1]: for a in state: # Gradient of self.error self.gradient += [-a.fx, -a.fy, -a.fz] # Calculate RMS Force and Max force force_mags = [(a.fx**2 + a.fy**2 + a.fz**2)**0.5 for state in self.states[1:-1] for a in state] RMS_force = geometry.rms(force_mags) self.RMS_force = RMS_force MAX_force = max(force_mags) self.MAX_force = MAX_force # Print data V = V[:1] + [ units.convert_energy("Ha", "kT_300", e - V[0]) for e in V[1:] ] MAX_energy = max(V) if self.prv_RMS is None or self.prv_RMS > RMS_force: rms = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", RMS_force)), 'GREEN') else: rms = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", RMS_force)), 'RED') if self.prv_MAX is None or self.prv_MAX > MAX_force: max_f = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", MAX_force)), 'GREEN') else: max_f = print_helper.color_set( float("%.4f" % units.convert_energy("Ha", "eV", MAX_force)), 'RED') if self.prv_MAX_E is None or self.prv_MAX_E > MAX_energy: max_e = print_helper.color_set(float("%.1f" % MAX_energy), 'GREEN') else: max_e = print_helper.color_set(float("%.1f" % MAX_energy), 'RED') if self.step == 0 and self.initialize == False: print("Step\tRMS_F (eV/Ang)\tMAX_F (eV/Ang)\tMAX_E (kT_300)\ \tMAX Translational Force (eV/Ang)\tEnergies (kT_300)\n----") print("%d\t%s\t\t%s\t\t%s\t\t%.4f" % (self.step, rms, max_f, max_e, max_translation_force)), print ' \t\t\t\t', '%7.5g +'\ % V[0], ('%5.1f ' * len(V[1:])) % tuple(V[1:]) sys.stdout.flush() if self.prv_RMS is None: self.prv_RMS = RMS_force self.prv_RMS = min(RMS_force, self.prv_RMS) if self.prv_MAX is None: self.prv_MAX = MAX_force self.prv_MAX = min(MAX_force, self.prv_MAX) if self.prv_MAX_E is None: self.prv_MAX_E = MAX_energy self.prv_MAX_E = min(MAX_energy, self.prv_MAX_E) # Set error self.error = RMS_force # Increment step self.step += 1 # End initialization phase self.initialize = False if self.callback is not None: self.callback(self.states)