def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False found_final_geo = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy == None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=np.float64)] m.qm_energies = [final_energy] m.build_topology() return m
def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=float)] m.qm_energies = [final_energy] m.build_topology() return m
def get_next_jobs(current_state, verbose=False): """ Take current scan state and generate the next set of optimizations. This function will create a new DihedralScanRepeater object and read all information from current_state, then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache. Return a list of new jobs that needs to be finished for the current iteration Parameters ---------- current_state: dict An dictionary containing information of the scan state, Required keys: 'dihedrals', 'grid_spacing', 'elements', 'init_coords', 'grid_status' Optional keys: 'dihedral_ranges', 'energy_decrease_thresh', 'energy_upper_limit' Returns ------- next_jobs: dict key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo * Note: the order of new_job should correspond to the finished job_info. Examples -------- current_state = { 'dihedrals': [[0,1,2,3], [1,2,3,4]] , 'grid_spacing': [30, 30], 'elements': ['H', 'C', 'O', ...] 'init_coords': [geo1, geo2, ..] 'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...} } >>> get_next_jobs(current_state) { (90, 60): [start_geo1, start_geo2, ..], (90, 90): [start_geo3, start_geo4, ..], } """ dihedrals = current_state['dihedrals'] grid_spacing = current_state['grid_spacing'] # rebuild the init_coords_M molecule object init_coords_M = Molecule() init_coords_M.elem = current_state['elements'] init_coords_M.xyzs = current_state['init_coords'] init_coords_M.build_topology() # create a new scanner object with blank engine engine = EngineBlank() dihedral_ranges = current_state.get('dihedral_ranges') energy_decrease_thresh = current_state.get('energy_decrease_thresh') energy_upper_limit = current_state.get('energy_upper_limit') scanner = DihedralScanRepeater(engine, dihedrals, grid_spacing, init_coords_M=init_coords_M, dihedral_ranges=dihedral_ranges, \ energy_decrease_thresh=energy_decrease_thresh, energy_upper_limit=energy_upper_limit, verbose=verbose) # rebuild the task_cache for scanner scanner.rebuild_task_cache(current_state['grid_status']) # run the scanner until some calculation is not found in cache scanner.repeat_scan_process() return scanner.next_jobs
def load_native_output(self, filename='ligand.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ # Check the log file to see if the optimization was successful opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: for line in logfile: # Accept both # Optimization completed. # Optimization completed on the basis of negligible forces. if 'Optimization completed' in line: opt_result = True break if not opt_result: raise RuntimeError("Geometry optimization failed in %s" % filename2) # Now we want to get the optimized structure from the fchk file as this is more reliable end_xyz_pos = None with open(filename) as outfile: for i, line in enumerate(outfile): if 'Current cartesian coordinates' in line: num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + i + 1) elif end_xyz_pos is not None and i < end_xyz_pos: coords.extend([ float(num) * 0.529177 for num in line.strip('\n').split() ]) elif 'Total Energy' in line: final_energy = float(line.split()[3]) if end_xyz_pos is None: raise RuntimeError( 'Cannot locate coordinates in ligand.fchk file.') # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def load_native_output(self, filename='lig.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: logfile = logfile.readlines() for counter, line in enumerate(logfile): line = line.strip() if line.startswith('Optimization completed'): found_opt_result = True if found_opt_result is not True: raise RuntimeError("Geometry optimisation failed in %s" % filename2) with open(filename) as outfile: outfile = outfile.readlines() for counter, line in enumerate(outfile): if line.startswith('Current cartesian coordinates'): start_xyz_pos = int(counter + 1) num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + start_xyz_pos) if line.startswith('Total Energy'): energy_pos = counter if not start_xyz_pos and end_xyz_pos: raise EOFError('Cannot locate coordinates in lig.fchk file.') for line in outfile[start_xyz_pos:end_xyz_pos]: coords.extend( [float(num) * 0.529177 for num in line.strip('\n').split()]) # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" final_energy = float(outfile[energy_pos].split()[3]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def load_geomeTRIC_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ # the name of the file is consistent with the --prefix tdrive option, # this also requires the input file NOT be named to sth like tdrive.in # otherwise the output will become tdrive_optim.xyz if not os.path.isfile('qdata.txt'): raise OSError("geomeTRIC output qdata.txt file not found") m = Molecule('qdata.txt')[-1] # copy the m.elem since qdata.txt does not have it m.elem = self.M.elem # check the data loaded assert len(m.qm_energies) == 1 assert len( m.qm_grads) == 1 and m.qm_grads[0].shape == self.M.xyzs[0].shape m.build_topology() return m
def get_next_jobs(current_state, verbose=False): """ Take current scan state and generate the next set of optimizations. This function will create a new DihedralScanRepeater object and read all information from current_state, then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache. Return a list of new jobs that needs to be finished for the current iteration Input: ------- current_state: dict, e.g. { 'dihedrals': [[0,1,2,3], [1,2,3,4]] , 'grid_spacing': [30, 30], 'elements': ['H', 'C', 'O', ...] 'init_coords': [geo1, geo2, ..] 'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...} } Output: ------- next_jobs: dict(), key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo * Note: the order of new_job should correspond to the finished job_info. """ dihedrals = current_state['dihedrals'] grid_spacing = current_state['grid_spacing'] # rebuild the init_coords_M molecule object init_coords_M = Molecule() init_coords_M.elem = current_state['elements'] init_coords_M.xyzs = current_state['init_coords'] init_coords_M.build_topology() # create a new scanner object scanner = DihedralScanRepeater(QMEngine(), dihedrals, grid_spacing, init_coords_M, verbose) # rebuild the task_cache for scanner scanner.rebuild_task_cache(current_state['grid_status']) # run the scanner until some calculation is not found in cache scanner.repeat_scan_process() return scanner.next_jobs
def launch_opt_jobs(self): """ Launch constrained optimizations for molecules in opt_queue Tasks current opt_queue will be popped in order. If a task exist in self.task_cache, the cached result will be checked, then put into self.current_finished_job_results Else, the task will be launched by self.launch_constrained_opt, and information is saved as self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id """ assert hasattr(self, 'running_job_path_info') and hasattr( self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, final_gradient, job_folder = self.task_cache[ to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] if final_gradient is not None: result_m.qm_grads = [final_gradient] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) if grid_id is None: print( f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}" ) else: self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy)) else: job_path = self.launch_constrained_opt(m, to_grid_id) self.running_job_path_info[ job_path] = m, from_grid_id, to_grid_id
def launch_opt_jobs(self): """ Launch constrained optimizations for molecules in opt_queue The current opt_queue will be cleaned up Return a dictionary that contains path and grid_ids: { path: (from_grid_id, to_grid_id) } """ assert hasattr(self, 'running_job_path_info') and hasattr(self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, job_folder = self.task_cache[to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy)) else: job_path = self.launch_constrained_opt(m, to_grid_id) self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id
class EngineQChem(QMEngine): def load_input(self, input_file): """ Load QChem input Example input file: $molecule 0 1 H -3.20093 1.59945 -0.91132 O -2.89333 1.61677 -0.01202 O -1.41314 1.60154 0.01202 H -1.10554 1.61886 0.91132 $end $rem jobtype opt exchange hf basis 3-21g geom_opt_max_cycles 150 $end """ elems, coords = [], [] reading_molecule, found_geo = False, False qchem_temp = [ ] # store a template of the input file for generating new ones with open(input_file) as qchemin: for line in qchemin: line_sl = line.strip().lower() if line_sl.startswith("$molecule"): reading_molecule = True qchem_temp.append(line) elif reading_molecule: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if not found_geo: found_geo = True qchem_temp.append("$!geometry@here") elems.append(ls[0]) coords.append(ls[1:]) else: qchem_temp.append(line) if line_sl.startswith('$end'): reading_molecule = False qchem_temp.append("$!optblock@here") else: qchem_temp.append(line) if line_sl.startswith('jobtype'): jobtype = line_sl.split()[1] if jobtype.startswith('opt'): self.temp_type = 'optimize' elif jobtype.startswith('force'): self.temp_type = 'gradient' if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # self.qchem_temp will enable writing input files with new geometries self.qchem_temp = qchem_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=float)] self.M.build_topology() def write_input(self, filename='qc.in'): """ Write QChem input using Molecule Class """ assert hasattr( self, 'qchem_temp'), "self.qchem_temp not set, call load_input() first" with open(filename, 'w') as outfile: for line in self.qchem_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == "$!optblock@here": if hasattr(self, 'optblockStr'): # self.optblockStr will be set by self.optimize_native() outfile.write(self.optblockStr) else: outfile.write(line) def optimize_native(self): """ Run the constrained optimization, following QChem 5.0 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file be an opt job" if self.extra_constraints is not None: raise RuntimeError( 'extra constraints not supported in Q-Chem native optimizations' ) # add the $opt block self.optblockStr = '\n$opt\nCONSTRAINT\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optblockStr += 'tors %d %d %d %d %f\n' % ( d1 + 1, d2 + 1, d3 + 1, d4 + 1, v) self.optblockStr += 'ENDCONSTRAINT\n$end\n' # write input file self.write_input('qc.in') # run the job self.run('qchem qc.in qc.out > qc.log', input_files=['qc.in'], output_files=['qc.out', 'qc.log']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('qc.in') # step 3 cmd = 'geometric-optimize --prefix tdrive --qccnv --reset --epsilon 0.0 --enforce 0.1 --qdata --qchem qc.in constraints.txt' self.run(cmd, input_files=['qc.in', 'constraints.txt'], output_files=['tdrive.log', 'tdrive.xyz', 'qdata.txt']) def load_native_output(self, filename='qc.out'): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule(filename, ftype="qcout")[-1] return m
class EngineGaussian09(QMEngine): def load_input(self, input_file): """ !!!only Cartesian molecule specification is supported at the moment!!! Load Gaussian09 input Example input file: %Mem=6GB %NProcShared=2 %Chk=lig # B3LYP/6-31G(d) Opt=ModRedundant water energy 0 1 O -0.464 0.177 0.0 H -0.464 1.137 0.0 H 0.441 -0.143 0.0 """ elems, coords = [], [] reading_molecule, found_geo = False, False gauss_temp = [ ] # store a template of the input file for generating new ones with open(input_file) as gauss_in: for line in gauss_in: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): reading_molecule = True elems.append(ls[0]) coords.append(ls[1:]) if not found_geo: found_geo = True gauss_temp.append("$!geometry@here") elif reading_molecule: if line.strip().lower() == '': reading_molecule = False gauss_temp.append(line) gauss_temp.append("$!optblock@here") else: gauss_temp.append(line) if 'opt' in line.lower(): self.temp_type = 'optimize' assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" self.gauss_temp = gauss_temp self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=float)] self.M.build_topology() def write_input(self, filename='gaussian.com'): """ Write Gaussian input using Molecule Class """ assert hasattr( self, 'gauss_temp'), "self.gauss_temp not set, call load_input() first" with open(filename, 'w') as outfile: for line in self.gauss_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == "$!optblock@here": if hasattr(self, 'optblockStr'): # self.optblockStr will be set by self.optimize_native() outfile.write(self.optblockStr) else: outfile.write(line) def optimize_native(self): """ Run the constrained optimization, following Gaussian09 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file be an opt job" if self.extra_constraints is not None: raise RuntimeError( 'extra constraints not supported in Gaussian09 native optimizations' ) self.optblockStr = '' for d1, d2, d3, d4, v in self.dihedral_idx_values: self.optblockStr += f'{d1 + 1} {d2 + 1} {d3 + 1} {d4 + 1} ={v:.3f} B\n' # Build the angle self.optblockStr += f'{d1 + 1} {d2 + 1} {d3 + 1} {d4 + 1} F\n' # Freeze the angle # write input file self.write_input('gaussian.com') # run the job self.run('g09 < gaussian.com > gaussian.log', input_files=['gaussian.com'], output_files=['gaussian.log']) self.run( 'formchk lig.chk lig.fchk' ) # More reliable to get the geometry from the log file and convert it def load_native_output(self, filename='lig.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: logfile = logfile.readlines() for counter, line in enumerate(logfile): line = line.strip() if line.startswith('Optimization completed'): found_opt_result = True if found_opt_result is not True: raise RuntimeError("Geometry optimisation failed in %s" % filename2) with open(filename) as outfile: outfile = outfile.readlines() for counter, line in enumerate(outfile): if line.startswith('Current cartesian coordinates'): start_xyz_pos = int(counter + 1) num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + start_xyz_pos) if line.startswith('Total Energy'): energy_pos = counter if not start_xyz_pos and end_xyz_pos: raise EOFError('Cannot locate coordinates in lig.fchk file.') for line in outfile[start_xyz_pos:end_xyz_pos]: coords.extend( [float(num) * 0.529177 for num in line.strip('\n').split()]) # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" final_energy = float(outfile[energy_pos].split()[3]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
class EnginePsi4(QMEngine): def load_input(self, input_file): """ Load a Psi4 input file as a Molecule object into self.M Only xyz input coordinates are supported for now. Exmaple input file: memory 12 gb molecule { 0 1 H -0.90095 -0.50851 -0.76734 O -0.72805 0.02496 0.02398 O 0.72762 0.03316 -0.02696 H 0.90782 -0.41394 0.81465 units angstrom no_reorient symmetry c1 } set globals { basis 6-31+g* freeze_core True guess sad scf_type df print 1 } set_num_threads(1) gradient('mp2') """ coords = [] elems = [] reading_molecule, found_geo = False, False psi4_temp = [ ] # store a template of the input file for generating new ones with open(input_file) as psi4in: for line in psi4in: line_sl = line.strip().lower() if line_sl.startswith("molecule"): reading_molecule = True psi4_temp.append(line) elif reading_molecule is True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if not found_geo: found_geo = True psi4_temp.append("$!geometry@here") # parse the xyz format elems.append(ls[0]) coords.append(ls[1:4]) else: psi4_temp.append(line) if '}' in line: reading_molecule = False psi4_temp.append("$!optking@here") else: psi4_temp.append(line) if line_sl.startswith('gradient('): self.temp_type = "gradient" elif line_sl.startswith('optimize('): self.temp_type = "optimize" assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should contain optimize() command to use native opt" else: assert self.temp_type == 'gradient', "input_file should contain gradient() command to use geomeTRIC" # self.psi4_temp will enable writing input files with new geometries self.psi4_temp = psi4_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=float)] self.M.build_topology() def write_input(self, filename='input.dat'): """ Write output based on self.psi4_temp and self.M, using only geometry of the first frame """ assert hasattr( self, 'psi4_temp'), "psi4_temp not found, call self.load_input() first" with open(filename, 'w') as outfile: for line in self.psi4_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == '$!optking@here': if hasattr(self, 'optkingStr'): outfile.write(self.optkingStr) else: outfile.write(line) def optimize_native(self): """ run the constrained optimization using native Optking, in 2 steps: 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file should have optimize() in it" if self.extra_constraints is not None: raise RuntimeError( 'extra constraints not supported in Psi4 native optimizations') # add the optking command self.optkingStr = '\nset optking {\n fixed_dihedral = ("\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optkingStr += ' %d %d %d %d %f\n' % ( d1 + 1, d2 + 1, d3 + 1, d4 + 1, v) self.optkingStr += ' ")\n}\n' # write input file self.write_input('input.dat') # run the job self.run('psi4 input.dat -o output.dat', input_files=['input.dat'], output_files=['output.dat']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('input.dat') # step 3 cmd = 'geometric-optimize --prefix tdrive --qccnv --reset --epsilon 0.0 --enforce 0.1 --qdata --psi4 input.dat constraints.txt' self.run(cmd, input_files=['input.dat', 'constraints.txt'], output_files=['tdrive.log', 'tdrive.xyz', 'qdata.txt']) def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=float)] m.qm_energies = [final_energy] m.build_topology() return m
class EngineQChem(QMEngine): def load_input(self, input_file): """ Load QChem input Example input file: $molecule 0 1 H -3.20093 1.59945 -0.91132 O -2.89333 1.61677 -0.01202 O -1.41314 1.60154 0.01202 H -1.10554 1.61886 0.91132 $end $rem jobtype opt exchange hf basis 3-21g geom_opt_max_cycles 150 $end """ elems,coords = [], [] reading_molecule, found_geo = False, False qchem_temp = [] # store a template of the input file for generating new ones with open(input_file) as qchemin: for line in qchemin: line_sl = line.strip().lower() if line_sl.startswith("$molecule"): reading_molecule = True qchem_temp.append(line) elif reading_molecule == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True qchem_temp.append("$!geometry@here") elems.append(ls[0]) coords.append(ls[1:]) else: qchem_temp.append(line) if line_sl.startswith('$end'): reading_molecule = False qchem_temp.append("$!optblock@here") else: qchem_temp.append(line) if line_sl.startswith('jobtype'): jobtype = line_sl.split()[1] if jobtype.startswith('opt'): self.temp_type = 'optimize' elif jobtype.startswith('force'): self.temp_type = 'gradient' if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt" else: assert self.temp_type == 'gradient', "input_file should be a gradient job to use geomeTRIC" # self.qchem_temp will enable writing input files with new geometries self.qchem_temp = qchem_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology() def write_input(self, filename='qc.in'): """ Write QChem input using Molecule Class """ assert hasattr(self, 'qchem_temp'), "self.qchem_temp not set, call load_input() first" with open(filename, 'w') as outfile: for line in self.qchem_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == "$!optblock@here": if hasattr(self, 'optblockStr'): # self.optblockStr will be set by self.optimize_native() outfile.write(self.optblockStr) else: outfile.write(line) def optimize_native(self): """ Run the constrained optimization, following QChem 5.0 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file be an opt job" # add the $opt block self.optblockStr = '\n$opt\nCONSTRAINT\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optblockStr += 'tors %d %d %d %d %f\n' % (d1+1, d2+1, d3+1, d4+1, v) self.optblockStr += 'ENDCONSTRAINT\n$end\n' # write input file self.write_input('qc.in') # run the job self.run('qchem qc.in qc.out > qc.log', input_files=['qc.in'], output_files=['qc.out', 'qc.log']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('qc.in') # step 3 self.run('geometric-optimize --qccnv --reset --epsilon 0.0 --qchem qc.in constraints.txt > optimize.log', input_files=['qc.in', 'constraints.txt'], output_files=['optimize.log', 'opt.xyz', 'energy.txt']) def load_native_output(self, filename='qc.out'): """ Load the optimized geometry and energy into a new molecule object and return """ m = Molecule(filename, ftype="qcout")[-1] return m
class EnginePsi4(QMEngine): def load_input(self, input_file): """ Load a Psi4 input file as a Molecule object into self.M Only xyz input coordinates are supported for now. Exmaple input file: memory 12 gb molecule { 0 1 H -0.90095 -0.50851 -0.76734 O -0.72805 0.02496 0.02398 O 0.72762 0.03316 -0.02696 H 0.90782 -0.41394 0.81465 units angstrom no_reorient symmetry c1 } set globals { basis 6-31+g* freeze_core True guess sad scf_type df print 1 } set_num_threads(1) gradient('mp2') """ coords = [] elems = [] reading_molecule, found_geo = False, False psi4_temp = [] # store a template of the input file for generating new ones with open(input_file) as psi4in: for line in psi4in: line_sl = line.strip().lower() if line_sl.startswith("molecule"): reading_molecule = True psi4_temp.append(line) elif reading_molecule is True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): if found_geo == False: found_geo = True psi4_temp.append("$!geometry@here") # parse the xyz format elems.append(ls[0]) coords.append(ls[1:4]) else: psi4_temp.append(line) if '}' in line: reading_molecule = False psi4_temp.append("$!optking@here") else: psi4_temp.append(line) if line_sl.startswith('gradient('): self.temp_type = "gradient" elif line_sl.startswith('optimize('): self.temp_type = "optimize" assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should contain optimize() command to use native opt" else: assert self.temp_type == 'gradient', "input_file should contain gradient() command to use geomeTRIC" # self.psi4_temp will enable writing input files with new geometries self.psi4_temp = psi4_temp # here self.M can be and will be overwritten by external functions self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=np.float64)] self.M.build_topology() def write_input(self, filename='input.dat'): """ Write output based on self.psi4_temp and self.M, using only geometry of the first frame """ assert hasattr(self, 'psi4_temp'), "psi4_temp not found, call self.load_input() first" with open(filename, 'w') as outfile: for line in self.psi4_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == '$!optking@here': if hasattr(self, 'optkingStr'): outfile.write(self.optkingStr) else: outfile.write(line) def optimize_native(self): """ run the constrained optimization using native Optking, in 2 steps: 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file should have optimize() in it" # add the optking command self.optkingStr = '\nset optking {\n fixed_dihedral = ("\n' for d1, d2, d3, d4, v in self.dihedral_idx_values: # Optking use atom index starting from 1 self.optkingStr += ' %d %d %d %d %f\n' % (d1+1, d2+1, d3+1, d4+1, v) self.optkingStr += ' ")\n}\n' # write input file self.write_input('input.dat') # run the job self.run('psi4 input.dat -o output.dat', input_files=['input.dat'], output_files=['output.dat']) def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have gradient() in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('input.dat') # step 3 self.run('geometric-optimize --qccnv --reset --epsilon 0.0 --psi4 input.dat constraints.txt > optimize.log', input_files=['input.dat', 'constraints.txt'], output_files=['optimize.log', 'opt.xyz', 'energy.txt']) def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False found_final_geo = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy == None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=np.float64)] m.qm_energies = [final_energy] m.build_topology() return m
class EngineGaussian(QMEngine): def __init__(self, input_file=None, work_queue=None, native_opt=False, extra_constraints=None, exe=None): super().__init__(input_file, work_queue, native_opt, extra_constraints) # Check which version of gaussain we have access to if exe.lower() in ("g09", "g16"): self.gaussian_exe = exe.lower() else: raise ValueError("Only g16 and g09 are supported.") def load_input(self, input_file): """ !!!only Cartesian molecule specification is supported at the moment!!! Load Gaussian09 input file, note blank lines at the bottom of the file are required Example input file: %Mem=6GB %NProcShared=2 %Chk=lig # B3LYP/6-31G(d) Opt=ModRedundant water energy 0 1 O -0.464 0.177 0.0 H -0.464 1.137 0.0 H 0.441 -0.143 0.0 """ reading_molecule, found_geo = False, False gauss_temp = [ ] # store a template of the input file for generating new ones coords = [] elems = [] with open(input_file) as gauss_in: for line in gauss_in: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): reading_molecule = True if not found_geo: found_geo = True gauss_temp.append("$!geometry@here") charge, mult = previous_line.split() # parse the xyz format elems.append(ls[0]) coords.append(ls[1:4]) elif reading_molecule: if line.strip() == '': reading_molecule = False gauss_temp.append(line) gauss_temp.append("$!optblock@here") elif "%chk" in line.lower(): # we need to overwrite the input to make the name consistent gauss_temp.append("%Chk=ligand\n") else: gauss_temp.append(line) if 'opt=modredundant' in line.lower(): self.temp_type = 'optimize' elif "force=nostep" in line.lower(): self.temp_type = "gradient" previous_line = line assert found_geo, "XYZ geometry not found in molecule block of %s" % input_file if self.native_opt: assert self.temp_type == 'optimize', "input_file should be a opt job to use native opt include the Opt=ModRedundant flag" # make sure the checkpoint file name is included if not any("%chk" in command.lower() for command in gauss_temp): gauss_temp.insert(0, "%Chk=ligand\n") self.gauss_temp = gauss_temp self.M = Molecule() self.M.elem = elems self.M.xyzs = [np.array(coords, dtype=float)] self.M.charge = int(charge) self.M.mult = int(mult) self.M.build_topology() def optimize_geomeTRIC(self): """ run the constrained optimization using geomeTRIC package, in 3 steps: 1. Write a constraints.txt file. 2. Write a gradient job input file. 3. Run the job """ assert self.temp_type == 'gradient', "To use geomeTRIC package, the input file should have Force=NoStep in it" # step 1 self.write_constraints_txt() # step 2 self.write_input('input.com') # step 3 cmd = 'geometric-optimize --prefix tdrive --qccnv yes --reset yes --epsilon 0.0 --enforce 0.1 --qdata yes --engine gaussian input.com constraints.txt' self.run(cmd, input_files=['input.com', 'constraints.txt'], output_files=['tdrive.log', 'tdrive.xyz', 'qdata.txt']) def write_input(self, filename='gaussian.com'): """ Write Gaussian input using Molecule Class """ assert hasattr( self, 'gauss_temp'), "self.gauss_temp not set, call load_input() first" with open(filename, 'w') as outfile: for line in self.gauss_temp: if line == '$!geometry@here': for e, c in zip(self.M.elem, self.M.xyzs[0]): outfile.write("%-7s %13.7f %13.7f %13.7f\n" % (e, c[0], c[1], c[2])) elif line == "$!optblock@here": if hasattr(self, 'optblockStr'): # self.optblockStr will be set by self.optimize_native() outfile.write(self.optblockStr) else: outfile.write(line) def optimize_native(self): """ Run the constrained optimization, following Gaussian09 manual. 1. write a optimization job input file. 2. run the job """ assert self.temp_type == 'optimize', "To use native optimization, the input file must be an opt job" assert hasattr( self, 'gaussian_exe'), 'The version of gaussian could not be determined!' if self.extra_constraints is not None: raise RuntimeError( 'extra constraints not supported in Gaussian native optimizations' ) self.optblockStr = '' for d1, d2, d3, d4, v in self.dihedral_idx_values: self.optblockStr += f'{d1 + 1} {d2 + 1} {d3 + 1} {d4 + 1} ={v:.3f} B\n' # Build the angle self.optblockStr += f'{d1 + 1} {d2 + 1} {d3 + 1} {d4 + 1} F\n' # Freeze the angle self.optblockStr += f'\n' # Add the tailing line. # write input file self.write_input('gaussian.com') # run the job self.run( f'{self.gaussian_exe} < gaussian.com > gaussian.log && formchk ligand.chk ligand.fchk', input_files=['gaussian.com'], output_files=['gaussian.log', 'ligand.fchk']) def load_native_output(self, filename='ligand.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ # Check the log file to see if the optimization was successful opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: for line in logfile: # Accept both # Optimization completed. # Optimization completed on the basis of negligible forces. if 'Optimization completed' in line: opt_result = True break if not opt_result: raise RuntimeError("Geometry optimization failed in %s" % filename2) # Now we want to get the optimized structure from the fchk file as this is more reliable end_xyz_pos = None with open(filename) as outfile: for i, line in enumerate(outfile): if 'Current cartesian coordinates' in line: num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + i + 1) elif end_xyz_pos is not None and i < end_xyz_pos: coords.extend([ float(num) * 0.529177 for num in line.strip('\n').split() ]) elif 'Total Energy' in line: final_energy = float(line.split()[3]) if end_xyz_pos is None: raise RuntimeError( 'Cannot locate coordinates in ligand.fchk file.') # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m