def orca_results(spline_NEB, step_to_use, i, state): """ A method for reading in the output of Orca single point calculations for spline_NEB calculations. This will both (a) assign forces to the atoms stored in state and (b) return the energy and atoms. **Parameters** spline_NEB: :class:`spline_NEB` A spline_NEB container holding the main spline_NEB simulation step_to_use: *int* Which iteration in the spline_NEB sequence the output to be read in is on. i: *int* The index corresponding to which image on the frame is to be simulated. state: *list,* :class:`structures.Atom` A list of atoms describing the image on the frame associated with index *i*. **Returns** new_energy: *float* The energy of the system in Hartree (Ha). new_atoms: *list,* :class:`structures.Atom` A list of atoms with the forces attached in units of Hartree per Angstrom (Ha/Ang). """ read_data = orca.engrad_read('%s-%d-%d' % (spline_NEB.name, step_to_use, i), force='Ha/Ang', pos='Ang') new_atoms, new_energy = read_data for a, b in zip(state, new_atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz return new_energy, new_atoms
def _read_orca(name): """ A method for reading in the output of Orca single point calculations to get the atomic positions with forces. Further, energy is also returned. **Parameters** name: *str* The name of the Orca simulation in questions. **Returns** new_energy: *float* The energy of the system in Hartree (Ha). new_atoms: *list,* :class:`structures.Atom` A list of atoms with the forces attached in units of Hartree per Angstrom (Ha/Ang). """ read_data = orca.engrad_read(name, force='Ha/Ang', pos='Ang') new_atoms, new_energy = read_data return new_energy, new_atoms
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print( "Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist( "Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print("Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x) ) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print("Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z) ) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition ) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition