def recompute_relax_torsion_profile(scanxyz, pdb_fnm, dihedralstxt, sysxml_fnm): grid_geo_data, _ = read_scan_xyz(scanxyz) dihedral_list = read_dihedralstxt(dihedralstxt) sysxml_path = os.path.abspath(sysxml_fnm) m = Molecule(pdb_fnm) # create a new tmp dir for running calculations if os.path.exists(tmpdir): shutil.rmtree(tmpdir) os.mkdir(tmpdir) os.chdir(tmpdir) # loop over each geometry in each grid for grid_id, geo in grid_geo_data.items(): folder = 'gid_' + '_'.join(f'{d:+03d}' for d in grid_id) print(f'Running constrained optimization in {folder}') os.mkdir(folder) os.chdir(folder) # copy files m.xyzs = [geo] m.write('frame.pdb') shutil.copy(sysxml_path, 'openmm_system.xml') write_constraints_txt(dihedral_list, grid_id) # run geometric command = "geometric-optimize openmm_system.xml constraints.txt --openmm --pdb frame.pdb --qccnv --reset --epsilon 0.0 --enforce 0.1 --qdata" subprocess.run(command, shell=True, check=True) os.chdir('..') os.chdir('..')
def load_geomeTRIC_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ # the name of the file is consistent with the --prefix tdrive option, # this also requires the input file NOT be named to sth like tdrive.in # otherwise the output will become tdrive_optim.xyz if not os.path.isfile('tdrive.xyz'): raise OSError("geomeTRIC output tdrive.xyz file not found") m = Molecule('tdrive.xyz')[-1] m.qm_energies = [float(m.comms[0].rsplit(maxsplit=1)[-1])] return m
def load_input(self, input_file): """ !!!only Cartesian molecule specification is supported at the moment!!! Load Gaussian09 input Example input file: %Mem=6GB %NProcShared=2 %Chk=lig # B3LYP/6-31G(d) Opt=ModRedundant water energy 0 1 O -0.464 0.177 0.0 H -0.464 1.137 0.0 H 0.441 -0.143 0.0 """ elems, coords = [], [] reading_molecule, found_geo = False, False gauss_temp = [ ] # store a template of the input file for generating new ones with open(input_file) as gauss_in: for line in gauss_in: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): reading_molecule = True elems.append(ls[0]) coords.append(ls[1:]) if not found_geo: found_geo = True gauss_temp.append("$!geometry@here") elif reading_molecule: if line.strip().lower() == '': reading_molecule = False gauss_temp.append(line) gauss_temp.append("$!optblock@here") else: gauss_temp.append(line) if 'opt' in line.lower(): self.temp_type = 'optimize' assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" self.gauss_temp = gauss_temp self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=float)] self.M.build_topology()
def load_input(self, input_file): """Input file is the name of the pdb file with the coords in we also require that the xml has the same name""" self.m_pdb = Molecule(input_file)[0] self.M = copy.deepcopy(self.m_pdb) xml_name = os.path.splitext(input_file)[0] + '.xml' # Check the xml file is present assert os.path.exists( xml_name ) is True, "OpenMM requires a pdb and xml file, ensure you have both in the current folder with the same prefix" with open(xml_name) as f: self.xml_content = f.read()
def launch_opt_jobs(self): """ Mimicing DihedralScanner.launch_opt_jobs, """ assert hasattr(self, 'next_jobs') and hasattr( self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, final_gradient, job_folder = self.task_cache[ to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] if final_gradient is not None: result_m.qm_grads = [final_gradient] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) if grid_id is None: print( f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}" ) else: self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) else: # append the job to self.next_jobs, which is the output of torsiondrive-API self.next_jobs[to_grid_id].append(m.xyzs[0].copy())
def current_state_json_load(json_state_dict): """ Load a state from JSON dictionary """ json_state = copy.deepcopy(json_state_dict) natoms = len(json_state['elements']) # convert geometries into correct numpy format init_coords = [ np.array(c, dtype=float).reshape(natoms, 3) * bohr2ang for c in json_state['init_coords'] ] json_state['init_coords'] = init_coords # convert grid_status into dictionary grid_status = defaultdict(list) # create a molecule object here to evaluate dihedrals later m = Molecule() m.xyzs = init_coords m.elem = json_state['elements'] m.build_bonds() dihedrals = json_state['dihedrals'] grid_spacing = json_state['grid_spacing'] # create grid status dictionary for grid_id_str, grid_jobs in json_state['grid_status'].items(): grid_id = tuple(int(i) for i in grid_id_str.split(',')) for start_geo, end_geo, end_energy in grid_jobs: # convert to numpy array, shape should match here start_geo = np.array(start_geo, dtype=float).reshape(natoms, 3) * bohr2ang end_geo = np.array(end_geo, dtype=float).reshape(natoms, 3) * bohr2ang # here we check if the end_geo matches the target grid id m.xyzs = [end_geo] dihedral_values = np.array( [m.measure_dihedrals(*d)[0] for d in dihedrals]) for dv, dref in zip(dihedral_values, grid_id): diff = abs(dv - dref) if min(diff, abs(360 - diff)) > 0.9: print( "Warning! dihedral values inconsistent with target grid_id" ) print('dihedral_values', dihedral_values, 'ref_grid_id', grid_id) dihedral_id = (np.round(dihedral_values / grid_spacing) * grid_spacing).astype(int) real_grid_id = tuple( (d + (180 - d) // 360 * 360) for d in dihedral_id) # here we append the result into the real grid_id grid_status[real_grid_id].append((start_geo, end_geo, end_energy)) json_state['grid_status'] = grid_status return json_state
def test_gaussian_write_geometric(tmpdir): """ Test writing out new gaussian style geometric files. """ tmpdir.chdir() engine = EngineGaussian(input_file=get_data("hooh_geometric.com"), exe="g09", native_opt=False) # set the dihedral to be scanned and the value engine.set_dihedral_constraints([[0, 1, 2, 3, 90]]) # check if gaussian can be ran else expect an error g_version = get_gaussian_version() if g_version is None: with pytest.raises(subprocess.CalledProcessError): engine.optimize_geomeTRIC() else: engine.gaussian_exe = g_version engine.optimize_geomeTRIC() # make sure the molecule is the same molecule = Molecule("input.com") assert molecule.Data["bonds"] == engine.M.Data["bonds"] assert molecule.Data["elem"] == engine.M.Data["elem"] assert molecule.Data["charge"] == engine.M.Data["charge"] assert molecule.Data["mult"] == engine.M.Data["mult"] assert len(molecule.molecules) == len(engine.M.molecules) assert np.allclose(molecule.xyzs[0], engine.M.xyzs[0])
def test_gaussian_version_wrong(): """ Test loading an engine with an incorrect version. """ molecule = Molecule(os.path.join(datad, "ethane.com")) with pytest.raises(ValueError): _ = Gaussian(molecule=molecule, exe="gaussian09")
def read_scan_xyz(filename): """ Parse the scan xyz file into a dictionary Parameters ---------- filename: str path to the scan.xyz file generated by torsiondrive Returns ------- grid_data: dict A dictionary of {grid_id: energy} """ grid_data = {} m = Molecule(filename) for line in m.comms: # parse comment line, find grid id between "(" and ")" try: left_p_idx = line.index('(') right_p_idx = line.index(')') except ValueError: print("Grid id in (XX, XX) format not found in file") raise grid_id_str = line[left_p_idx + 1:right_p_idx] grid_id = tuple(int(s) for s in grid_id_str.split(',') if s) # read the last element as energy ls = line.rsplit(maxsplit=1) energy = float(ls[-1]) grid_data[grid_id] = energy return grid_data
def load_geomeTRIC_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ # the name of the file is consistent with the --prefix tdrive option, # this also requires the input file NOT be named to sth like tdrive.in # otherwise the output will become tdrive_optim.xyz if not os.path.isfile('qdata.txt'): raise OSError("geomeTRIC output qdata.txt file not found") m = Molecule('qdata.txt')[-1] # copy the m.elem since qdata.txt does not have it m.elem = self.M.elem # check the data loaded assert len(m.qm_energies) == 1 assert len( m.qm_grads) == 1 and m.qm_grads[0].shape == self.M.xyzs[0].shape m.build_topology() return m
def main(): import argparse, sys parser = argparse.ArgumentParser(description="Potential energy scan of dihedral angle from 1 to 360 degree", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('inputfile', type=str, help='Input template file for QMEngine. Geometry will be used as starting point for scanning.') parser.add_argument('dihedralfile', type=str, help='File defining all dihedral angles to be scanned.') parser.add_argument('--init_coords', type=str, help='File contain a list of geometries, that will be used as multiple starting points, overwriting the geometry in input file.') parser.add_argument('-g', '--grid_spacing', type=int, nargs='*', default=[15], help='Grid spacing for dihedral scan, i.e. every 15 degrees, multiple values will be mapped to each dihedral angle') parser.add_argument('-e', '--engine', type=str, default="psi4", choices=['qchem', 'psi4', 'terachem', 'openmm', "gaussian"], help='Engine for running scan') parser.add_argument('-c', '--constraints', type=str, default=None, help='Provide a constraints file in geomeTRIC format for additional freeze or set constraints (geomeTRIC or TeraChem only)') parser.add_argument('--native_opt', action='store_true', default=False, help='Use QM program native constrained optimization algorithm. This will turn off geomeTRIC package.') parser.add_argument('--energy_thresh', type=float, default=1e-5, help='Only activate grid points if the new optimization is <thre> lower than the previous lowest energy (in a.u.).') parser.add_argument('--energy_upper_limit', type=float, default=None, help='Only activate grid points if the new optimization is less than <thre> higher than the global lowest energy (in a.u.).') parser.add_argument('--wq_port', type=int, default=None, help='Specify port number to use Work Queue to distribute optimization jobs.') parser.add_argument('--zero_based_numbering', action='store_true', help='Use zero_based_numbering in dihedrals file.') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information while running.') args = parser.parse_args() # print input command for reproducibility print(' '.join(sys.argv)) # parse the dihedral file if args.zero_based_numbering is True: print("The use of command line --zero_based_numbering is deprecated and will be removed in the future. Please use #zero_based_numbering in dihedralfile") dihedral_idxs, dihedral_ranges = load_dihedralfile(args.dihedralfile, args.zero_based_numbering) grid_dim = len(dihedral_idxs) # parse additional constraints constraints_dict = None if args.constraints is not None: with open(args.constraints) as fin: constraints_dict = make_constraints_dict(fin.read()) # check if there are extra constraints conflict with the specified dihedral angles check_conflict_constraints(constraints_dict, dihedral_idxs) # format grid spacing n_grid_spacing = len(args.grid_spacing) if n_grid_spacing == grid_dim: grid_spacing = args.grid_spacing elif n_grid_spacing == 1: grid_spacing = args.grid_spacing * grid_dim else: raise ValueError("Number of grid_spacing values %d is not consistent with number of dihedral angles %d" % (grid_dim, n_grid_spacing)) # create QM Engine, and WorkQueue object if provided port engine = create_engine(args.engine, inputfile=args.inputfile, work_queue_port=args.wq_port, native_opt=args.native_opt) # load init_coords if provided init_coords_M = Molecule(args.init_coords) if args.init_coords else None # create DihedralScanner object scanner = DihedralScanner(engine, dihedrals=dihedral_idxs, dihedral_ranges=dihedral_ranges, grid_spacing=grid_spacing, init_coords_M=init_coords_M, energy_decrease_thresh=args.energy_thresh, energy_upper_limit=args.energy_upper_limit, extra_constraints=constraints_dict, verbose=args.verbose) # Run the scan! scanner.master() # After finish, print result print("Dihedral scan is finished!") print(" Grid ID Energy") for grid_id in sorted(scanner.grid_energies.keys()): print(" %-20s %.10f" % (str(grid_id), scanner.grid_energies[grid_id]))
def test_gaussian_correct_version(version): """ Check that allowed versions do not raise errors. """ molecule = Molecule(os.path.join(datad, "ethane.com")) engine = Gaussian(molecule=molecule, exe=version) assert engine.gaussian_exe == version.lower()
def test_setting_threads(): """ For an input file with threads make sure we can overwrite them to our desired value. """ molecule = Molecule(os.path.join(datad, "ethane.com")) engine = Gaussian(molecule=molecule, exe="g09", threads=30) engine.load_gaussian_input(os.path.join(datad, "ethane.com")) assert "%NProcShared=30\n" in engine.gauss_temp
def test_missing_force_input(): """ Make sure an error is raised if we do not request the force in the input file. """ molecule = Molecule(os.path.join(datad, "no_force.com")) engine = Gaussian(molecule=molecule, exe="g09") with pytest.raises(RuntimeError): engine.load_gaussian_input(os.path.join(datad, "no_force.com"))
def test_checkpoint_name(): """ If the user supplies a file with a different checkpoint name make sure we overwrite it. """ molecule = Molecule(os.path.join(datad, "ethane_wrong_name.com")) engine = Gaussian(molecule=molecule, exe="g09", threads=None) engine.load_gaussian_input(os.path.join(datad, "ethane_wrong_name.com")) assert "%Chk=ligand\n" in engine.gauss_temp
def test_checkpoint_missing(): """ If the user passes a file with no checkpoint line make sure it is added. """ molecule = Molecule(os.path.join(datad, "ethane_no_data.com")) engine = Gaussian(molecule=molecule, exe="g09", threads=None) engine.load_gaussian_input(os.path.join(datad, "ethane_no_data.com")) assert "%Chk=ligand\n" in engine.gauss_temp
def test_adding_threads_none(): """ If we have an input with no threads and threads is None make sure we write 1. """ molecule = Molecule(os.path.join(datad, "ethane_no_data.com")) engine = Gaussian(molecule=molecule, exe="g09", threads=None) engine.load_gaussian_input(os.path.join(datad, "ethane_no_data.com")) assert "%NProcShared=1\n" in engine.gauss_temp
def test_adding_threads_value(): """ If we read an input file with no threads set but want them make sure they are added. """ molecule = Molecule(os.path.join(datad, "ethane_no_data.com")) engine = Gaussian(molecule=molecule, exe="g09", threads=30) engine.load_gaussian_input(os.path.join(datad, "ethane_no_data.com")) assert "%NProcShared=30\n" in engine.gauss_temp
def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=float)] m.qm_energies = [final_energy] m.build_topology() return m
def read_scan_xyz(fnm): m = Molecule(fnm) grid_geo_data = {} grid_energies = {} for geo, comms in zip(m.xyzs, m.comms): dihedral_str = comms.split()[1][1:-1] grid_id = tuple(int(d) for d in dihedral_str.split(',') if d != '') grid_geo_data[grid_id] = geo grid_energies[grid_id] = float(comms.split()[-1]) return grid_geo_data, grid_energies
def load_input(self, input_file): """ Load TeraChem input Example input file: coordinates start.xyz run gradient basis 6-31g* method rb3lyp charge 0 spinmult 1 dispersion yes scf diis+a maxit 50 """ self.tera_temp = [] geo_file = None with open(input_file) as terain: for line in terain: # we don't need to change the temp self.tera_temp.append(line) linest = line.strip() if not linest: continue key, value = linest.lower().split(None, 1) if key == 'coordinates': geo_file = value elif key == 'run': if value == 'gradient': self.temp_type = 'gradient' elif value == 'minimize': self.temp_type = 'optimize' # place holder for writing native constraints self.tera_temp.append('$!constraints@here') # check input assert geo_file, 'coordinates key not found in input file %s' % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # load molecule from separate file, one frame only self.M = Molecule(geo_file)[0] # store the name of geo_file self.tera_geo_file = geo_file
def test_gaussian_template(): """ Make sure the template is formed properly when reading input files. """ molecule = Molecule(os.path.join(datad, "ethane.com")) engine = Gaussian(molecule=molecule, exe="g09") engine.load_gaussian_input(os.path.join(datad, "ethane.com")) assert engine.gauss_temp == [ '%Mem=6GB\n', '%NProcShared=2\n', '%Chk=ligand\n', '# hf/6-31G(d) Force=NoStep\n', '\n', 'ethane\n', '\n', '0 1\n', '$!geometry@here', '\n', '\n' ]
def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False found_final_geo = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy == None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=np.float64)] m.qm_energies = [final_energy] m.build_topology() return m
class EngineOpenMM(QMEngine): def load_input(self, input_file): """Input file is the name of the pdb file with the coords in we also require that the xml has the same name""" self.m_pdb = Molecule(input_file)[0] self.M = copy.deepcopy(self.m_pdb) xml_name = os.path.splitext(input_file)[0] + '.xml' # Check the xml file is present assert os.path.exists( xml_name ) is True, "OpenMM requires a pdb and xml file, ensure you have both in the current folder with the same prefix" with open(xml_name) as f: self.xml_content = f.read() def write_input(self): """Write a pdb file with the latest geometry and the input xml file""" self.m_pdb.xyzs[0] = self.M.xyzs[0] self.m_pdb.write('input.pdb') with open('input.xml', 'w') as out: out.write(self.xml_content) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ # sep 1 self.write_constraints_txt() # step 2 self.write_input() # set3 self.run( 'geometric-optimize --prefix tdrive --qccnv --reset --epsilon 0.0 --enforce 0.1 --qdata --pdb ' 'input.pdb --openmm input.xml constraints.txt', input_files=['input.xml', 'input.pdb', 'constraints.txt'], output_files=['tdrive.log', 'tdrive.xyz', 'qdata.txt'])
def finish(self): """ Write qdata.txt and scan.xyz file based on converged scan results """ m = Molecule() m.elem = list(self.engine.M.elem) m.qm_energies, m.xyzs, m.comms = [], [], [] # only print grid with energies for gid in sorted(self.grid_energies.keys()): m.qm_energies.append(self.grid_energies[gid]) m.xyzs.append(self.grid_final_geometries[gid]) m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid])) m.write('qdata.txt') print("Final scan energies are written to qdata.txt") m.write('scan.xyz') print("Final scan energies are written to scan.xyz")
def __init__(self, input_file=None, work_queue=None, native_opt=False, extra_constraints=None): self.temp_type = None # will be set to either "gradient" or "optimize" later self.work_queue = work_queue self.native_opt = native_opt self.extra_constraints = extra_constraints self.rootpath = os.getcwd() if input_file is not None: self.load_input(input_file) else: self.M = Molecule()
def read_scan_xyz(filename): """ Read the scan xyz file and return a dictionary of {grid_id: energy} """ res = {} m = Molecule(filename) for line in m.comms: ls = line.split() # read the second element as grid id grid_id_str = ls[1] assert grid_id_str[0] == '(' and grid_id_str[-1] == ')' grid_id = tuple(int(s) for s in grid_id_str[1:-1].split(',') if s) # read the last element as energy energy = float(ls[-1]) res[grid_id] = energy return res
def load_native_output(self, filename='ligand.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ # Check the log file to see if the optimization was successful opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: for line in logfile: # Accept both # Optimization completed. # Optimization completed on the basis of negligible forces. if 'Optimization completed' in line: opt_result = True break if not opt_result: raise RuntimeError("Geometry optimization failed in %s" % filename2) # Now we want to get the optimized structure from the fchk file as this is more reliable end_xyz_pos = None with open(filename) as outfile: for i, line in enumerate(outfile): if 'Current cartesian coordinates' in line: num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + i + 1) elif end_xyz_pos is not None and i < end_xyz_pos: coords.extend([ float(num) * 0.529177 for num in line.strip('\n').split() ]) elif 'Total Energy' in line: final_energy = float(line.split()[3]) if end_xyz_pos is None: raise RuntimeError( 'Cannot locate coordinates in ligand.fchk file.') # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def finish(self): """ Write qdata.txt and scan.xyz file based on converged scan results """ m = Molecule() m.elem = list(self.engine.M.elem) m.qm_energies, m.xyzs, m.comms = [], [], [] for gid in self.grid_ids: m.qm_energies.append(self.grid_energies[gid]) m.xyzs.append(self.grid_final_geometries[gid]) m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid])) m.write('qdata.txt') print("Final scan energies are written to qdata.txt") m.write('scan.xyz') print("Final scan energies are written to scan.xyz")
def test_calc_new_gaussian(): """ Test calculating the force using gaussian. Note this is expected to fail due to gaussian not being installed. """ major, minor, _ = platform.python_version_tuple() if not int(major) >= 3 and int(minor) >= 5: pytest.skip( "Python version below 3.5 TemporaryDirectory not available.") molecule = Molecule(os.path.join(datad, "ethane.com")) engine = Gaussian(molecule=molecule, exe="g09") engine.load_gaussian_input(os.path.join(datad, "ethane.com")) home = os.getcwd() with tempfile.TemporaryDirectory() as temp: os.chdir(temp) # now we want to run calc new to make sure the file is written correctly g_version = get_gaussian_version() if g_version is None: with pytest.raises(GaussianEngineError): engine.calc(coords=molecule.xyzs[0] / bohr2ang, dirname="ethane.tmp") else: engine.gaussian_exe = g_version engine.calc(coords=molecule.xyzs[0] / bohr2ang, dirname="ethane.tmp") # now we want to read the file back in to make sure it is correct molecule_2 = Molecule(os.path.join("ethane.tmp", "gaussian.com")) # now check over the data assert molecule.Data["elem"] == molecule_2.Data["elem"] assert molecule.Data["bonds"] == molecule_2.Data["bonds"] assert np.allclose(molecule.Data["xyzs"][0], molecule_2.Data["xyzs"][0]) os.chdir(home)
def load_native_output(self, filename='lig.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: logfile = logfile.readlines() for counter, line in enumerate(logfile): line = line.strip() if line.startswith('Optimization completed'): found_opt_result = True if found_opt_result is not True: raise RuntimeError("Geometry optimisation failed in %s" % filename2) with open(filename) as outfile: outfile = outfile.readlines() for counter, line in enumerate(outfile): if line.startswith('Current cartesian coordinates'): start_xyz_pos = int(counter + 1) num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + start_xyz_pos) if line.startswith('Total Energy'): energy_pos = counter if not start_xyz_pos and end_xyz_pos: raise EOFError('Cannot locate coordinates in lig.fchk file.') for line in outfile[start_xyz_pos:end_xyz_pos]: coords.extend( [float(num) * 0.529177 for num in line.strip('\n').split()]) # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" final_energy = float(outfile[energy_pos].split()[3]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def get_next_jobs(current_state, verbose=False): """ Take current scan state and generate the next set of optimizations. This function will create a new DihedralScanRepeater object and read all information from current_state, then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache. Return a list of new jobs that needs to be finished for the current iteration Parameters ---------- current_state: dict An dictionary containing information of the scan state, Required keys: 'dihedrals', 'grid_spacing', 'elements', 'init_coords', 'grid_status' Optional keys: 'dihedral_ranges', 'energy_decrease_thresh', 'energy_upper_limit' Returns ------- next_jobs: dict key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo * Note: the order of new_job should correspond to the finished job_info. Examples -------- current_state = { 'dihedrals': [[0,1,2,3], [1,2,3,4]] , 'grid_spacing': [30, 30], 'elements': ['H', 'C', 'O', ...] 'init_coords': [geo1, geo2, ..] 'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...} } >>> get_next_jobs(current_state) { (90, 60): [start_geo1, start_geo2, ..], (90, 90): [start_geo3, start_geo4, ..], } """ dihedrals = current_state['dihedrals'] grid_spacing = current_state['grid_spacing'] # rebuild the init_coords_M molecule object init_coords_M = Molecule() init_coords_M.elem = current_state['elements'] init_coords_M.xyzs = current_state['init_coords'] init_coords_M.build_topology() # create a new scanner object with blank engine engine = EngineBlank() dihedral_ranges = current_state.get('dihedral_ranges') energy_decrease_thresh = current_state.get('energy_decrease_thresh') energy_upper_limit = current_state.get('energy_upper_limit') scanner = DihedralScanRepeater(engine, dihedrals, grid_spacing, init_coords_M=init_coords_M, dihedral_ranges=dihedral_ranges, \ energy_decrease_thresh=energy_decrease_thresh, energy_upper_limit=energy_upper_limit, verbose=verbose) # rebuild the task_cache for scanner scanner.rebuild_task_cache(current_state['grid_status']) # run the scanner until some calculation is not found in cache scanner.repeat_scan_process() return scanner.next_jobs
def load_input(self, input_file): """ Load TeraChem input Example input file: coordinates start.xyz run gradient basis 6-31g* method rb3lyp charge 0 spinmult 1 dispersion yes scf diis+a maxit 50 """ self.tera_temp = [] geo_file = None with open(input_file) as terain: for line in terain: # we don't need to change the temp self.tera_temp.append(line) key, value = line.strip().lower().split(None, 1) if key == 'coordinates': geo_file = value elif key == 'run': if value == 'gradient': self.temp_type = 'gradient' elif value == 'minimize': self.temp_type = 'optimize' # place holder for writing native constraints self.tera_temp.append('$!constraints@here') # check input assert geo_file, 'coordinates key not found in input file %s' % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # load molecule from separate file, one frame only self.M = Molecule(geo_file)[0] # store the name of geo_file self.tera_geo_file = geo_file
def launch_opt_jobs(self): """ Launch constrained optimizations for molecules in opt_queue The current opt_queue will be cleaned up Return a dictionary that contains path and grid_ids: { path: (from_grid_id, to_grid_id) } """ assert hasattr(self, 'running_job_path_info') and hasattr(self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, job_folder = self.task_cache[to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy)) else: job_path = self.launch_constrained_opt(m, to_grid_id) self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id
class EngineTerachem(QMEngine): def load_input(self, input_file): """ Load TeraChem input Example input file: coordinates start.xyz run gradient basis 6-31g* method rb3lyp charge 0 spinmult 1 dispersion yes scf diis+a maxit 50 """ self.tera_temp = [] geo_file = None with open(input_file) as terain: for line in terain: # we don't need to change the temp self.tera_temp.append(line) key, value = line.strip().lower().split(None, 1) if key == 'coordinates': geo_file = value elif key == 'run': if value == 'gradient': self.temp_type = 'gradient' elif value == 'minimize': self.temp_type = 'optimize' # place holder for writing native constraints self.tera_temp.append('$!constraints@here') # check input assert geo_file, 'coordinates key not found in input file %s' % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # load molecule from separate file, one frame only self.M = Molecule(geo_file)[0] # store the name of geo_file self.tera_geo_file = geo_file def write_input(self): """ Write TeraChem input files, i.e. run.in and start.xyz """ assert hasattr(self, 'tera_temp'), "self.tera_temp not set, call load_input() first" assert hasattr(self, 'tera_geo_file'), "self.tera_temp not set, call load_input() first" with open('run.in', 'w') as terain: for line in self.tera_temp: if line == "$!constraints@here": if hasattr(self, 'constraintsStr'): # self.optblockStr will be set by self.optimize_native() terain.write(self.constraintsStr) else: terain.write(line) self.M.write(self.tera_geo_file) def optimize_native(self): """ Run the constrained optimization, following QChem 5.0 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file be an opt job" # add the $opt block self.constraintsStr = '\n$constraint_set\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.constraintsStr += 'dihedral %f %d_%d_%d_%d\n' % (v, d1+1, d2+1, d3+1, d4+1) self.constraintsStr += '$end\n' # write input file self.write_input() # run the job self.run('terachem run.in > run.out', input_files=['run.in', self.tera_geo_file], output_files=['run.out', 'scr']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input() # step 3 self.run('geometric-optimize --qccnv --reset --epsilon 0.0 run.in constraints.txt > optimize.log', input_files=['run.in', self.tera_geo_file, 'constraints.txt'], output_files=['optimize.log', 'opt.xyz', 'energy.txt']) def load_native_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule('scr/optim.xyz')[-1] # read the energy from optim.xyz comment line m.qm_energies = [float(m.comms[0].split(None, 1)[0])] return m
class EnginePsi4(QMEngine): def load_input(self, input_file): """ Load a Psi4 input file as a Molecule object into self.M Only xyz input coordinates are supported for now. Exmaple input file: memory 12 gb molecule { 0 1 H -0.90095 -0.50851 -0.76734 O -0.72805 0.02496 0.02398 O 0.72762 0.03316 -0.02696 H 0.90782 -0.41394 0.81465 units angstrom no_reorient symmetry c1 } set globals { basis 6-31+g* freeze_core True guess sad scf_type df print 1 } set_num_threads(1) gradient('mp2') """ coords = [] elems = [] reading_molecule, found_geo = False, False psi4_temp = [] # store a template of the input file for generating new ones with open(input_file) as psi4in: for line in psi4in: line_sl = line.strip().lower() if line_sl.startswith("molecule"): reading_molecule = True psi4_temp.append(line) elif reading_molecule is True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True psi4_temp.append("$!geometry@here") # parse the xyz format elems.append(ls[0]) coords.append(ls[1:4]) else: psi4_temp.append(line) if '}' in line: reading_molecule = False psi4_temp.append("$!optking@here") else: psi4_temp.append(line) if line_sl.startswith('gradient('): self.temp_type = "gradient" elif line_sl.startswith('optimize('): self.temp_type = "optimize" assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should contain optimize() command to use native opt" else: assert self.temp_type == 'gradient', "input_file should contain gradient() command to use geomeTRIC" # self.psi4_temp will enable writing input files with new geometries self.psi4_temp = psi4_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology() def write_input(self, filename='input.dat'): """ Write output based on self.psi4_temp and self.M, using only geometry of the first frame """ assert hasattr(self, 'psi4_temp'), "psi4_temp not found, call self.load_input() first" with open(filename, 'w') as outfile: for line in self.psi4_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == '$!optking@here': if hasattr(self, 'optkingStr'): outfile.write(self.optkingStr) else: outfile.write(line) def optimize_native(self): """ run the constrained optimization using native Optking, in 2 steps: 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file should have optimize() in it" # add the optking command self.optkingStr = '\nset optking {\n fixed_dihedral = ("\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optkingStr += ' %d %d %d %d %f\n' % (d1+1, d2+1, d3+1, d4+1, v) self.optkingStr += ' ")\n}\n' # write input file self.write_input('input.dat') # run the job self.run('psi4 input.dat -o output.dat', input_files=['input.dat'], output_files=['output.dat']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('input.dat') # step 3 self.run('geometric-optimize --qccnv --reset --epsilon 0.0 --psi4 input.dat constraints.txt > optimize.log', input_files=['input.dat', 'constraints.txt'], output_files=['optimize.log', 'opt.xyz', 'energy.txt']) def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False found_final_geo = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy == None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=np.float64)] m.qm_energies = [final_energy] m.build_topology() return m
def load_input(self, input_file): """ Load a Psi4 input file as a Molecule object into self.M Only xyz input coordinates are supported for now. Exmaple input file: memory 12 gb molecule { 0 1 H -0.90095 -0.50851 -0.76734 O -0.72805 0.02496 0.02398 O 0.72762 0.03316 -0.02696 H 0.90782 -0.41394 0.81465 units angstrom no_reorient symmetry c1 } set globals { basis 6-31+g* freeze_core True guess sad scf_type df print 1 } set_num_threads(1) gradient('mp2') """ coords = [] elems = [] reading_molecule, found_geo = False, False psi4_temp = [] # store a template of the input file for generating new ones with open(input_file) as psi4in: for line in psi4in: line_sl = line.strip().lower() if line_sl.startswith("molecule"): reading_molecule = True psi4_temp.append(line) elif reading_molecule is True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True psi4_temp.append("$!geometry@here") # parse the xyz format elems.append(ls[0]) coords.append(ls[1:4]) else: psi4_temp.append(line) if '}' in line: reading_molecule = False psi4_temp.append("$!optking@here") else: psi4_temp.append(line) if line_sl.startswith('gradient('): self.temp_type = "gradient" elif line_sl.startswith('optimize('): self.temp_type = "optimize" assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should contain optimize() command to use native opt" else: assert self.temp_type == 'gradient', "input_file should contain gradient() command to use geomeTRIC" # self.psi4_temp will enable writing input files with new geometries self.psi4_temp = psi4_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology()
def load_geomeTRIC_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule('opt.xyz') with open('energy.txt') as infile: m.qm_energies = [float(infile.read())] return m
def load_native_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule('scr/optim.xyz')[-1] # read the energy from optim.xyz comment line m.qm_energies = [float(m.comms[0].split(None, 1)[0])] return m
class EngineQChem(QMEngine): def load_input(self, input_file): """ Load QChem input Example input file: $molecule 0 1 H -3.20093 1.59945 -0.91132 O -2.89333 1.61677 -0.01202 O -1.41314 1.60154 0.01202 H -1.10554 1.61886 0.91132 $end $rem jobtype opt exchange hf basis 3-21g geom_opt_max_cycles 150 $end """ elems,coords = [], [] reading_molecule, found_geo = False, False qchem_temp = [] # store a template of the input file for generating new ones with open(input_file) as qchemin: for line in qchemin: line_sl = line.strip().lower() if line_sl.startswith("$molecule"): reading_molecule = True qchem_temp.append(line) elif reading_molecule == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True qchem_temp.append("$!geometry@here") elems.append(ls[0]) coords.append(ls[1:]) else: qchem_temp.append(line) if line_sl.startswith('$end'): reading_molecule = False qchem_temp.append("$!optblock@here") else: qchem_temp.append(line) if line_sl.startswith('jobtype'): jobtype = line_sl.split()[1] if jobtype.startswith('opt'): self.temp_type = 'optimize' elif jobtype.startswith('force'): self.temp_type = 'gradient' if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # self.qchem_temp will enable writing input files with new geometries self.qchem_temp = qchem_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology() def write_input(self, filename='qc.in'): """ Write QChem input using Molecule Class """ assert hasattr(self, 'qchem_temp'), "self.qchem_temp not set, call load_input() first" with open(filename, 'w') as outfile: for line in self.qchem_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == "$!optblock@here": if hasattr(self, 'optblockStr'): # self.optblockStr will be set by self.optimize_native() outfile.write(self.optblockStr) else: outfile.write(line) def optimize_native(self): """ Run the constrained optimization, following QChem 5.0 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file be an opt job" # add the $opt block self.optblockStr = '\n$opt\nCONSTRAINT\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optblockStr += 'tors %d %d %d %d %f\n' % (d1+1, d2+1, d3+1, d4+1, v) self.optblockStr += 'ENDCONSTRAINT\n$end\n' # write input file self.write_input('qc.in') # run the job self.run('qchem qc.in qc.out > qc.log', input_files=['qc.in'], output_files=['qc.out', 'qc.log']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('qc.in') # step 3 self.run('geometric-optimize --qccnv --reset --epsilon 0.0 --qchem qc.in constraints.txt > optimize.log', input_files=['qc.in', 'constraints.txt'], output_files=['optimize.log', 'opt.xyz', 'energy.txt']) def load_native_output(self, filename='qc.out'): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule(filename, ftype="qcout")[-1] return m
def load_input(self, input_file): """ Load QChem input Example input file: $molecule 0 1 H -3.20093 1.59945 -0.91132 O -2.89333 1.61677 -0.01202 O -1.41314 1.60154 0.01202 H -1.10554 1.61886 0.91132 $end $rem jobtype opt exchange hf basis 3-21g geom_opt_max_cycles 150 $end """ elems,coords = [], [] reading_molecule, found_geo = False, False qchem_temp = [] # store a template of the input file for generating new ones with open(input_file) as qchemin: for line in qchemin: line_sl = line.strip().lower() if line_sl.startswith("$molecule"): reading_molecule = True qchem_temp.append(line) elif reading_molecule == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True qchem_temp.append("$!geometry@here") elems.append(ls[0]) coords.append(ls[1:]) else: qchem_temp.append(line) if line_sl.startswith('$end'): reading_molecule = False qchem_temp.append("$!optblock@here") else: qchem_temp.append(line) if line_sl.startswith('jobtype'): jobtype = line_sl.split()[1] if jobtype.startswith('opt'): self.temp_type = 'optimize' elif jobtype.startswith('force'): self.temp_type = 'gradient' if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # self.qchem_temp will enable writing input files with new geometries self.qchem_temp = qchem_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology()