def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=float)] m.qm_energies = [final_energy] m.build_topology() return m
def load_native_output(self, filename='output.dat'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False found_final_geo = False final_energy, elems, coords = None, [], [] with open(filename) as outfile: for line in outfile: line = line.strip() if line.startswith('Final energy is'): final_energy = float(line.split()[-1]) elif line.startswith('Final optimized geometry and variables'): found_opt_result = True elif found_opt_result == True: ls = line.split() if len(ls) == 4 and check_all_float(ls[1:]): elems.append(ls[0]) coords.append(ls[1:4]) if final_energy == None: raise RuntimeError("Final energy not found in %s" % filename) if len(elems) == 0 or len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = elems m.xyzs = [np.array(coords, dtype=np.float64)] m.qm_energies = [final_energy] m.build_topology() return m
def launch_opt_jobs(self): """ Mimicing DihedralScanner.launch_opt_jobs, """ assert hasattr(self, 'next_jobs') and hasattr( self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, job_folder = self.task_cache[ to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) if grid_id is None: print( f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}" ) else: self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) else: # append the job to self.next_jobs, which is the output of torsiondrive-API self.next_jobs[to_grid_id].append(m.xyzs[0].copy())
def current_state_json_load(json_state_dict): """ Load a state from JSON dictionary """ json_state = copy.deepcopy(json_state_dict) natoms = len(json_state['elements']) # convert geometries into correct numpy format init_coords = [ np.array(c, dtype=float).reshape(natoms, 3) * bohr2ang for c in json_state['init_coords'] ] json_state['init_coords'] = init_coords # convert grid_status into dictionary grid_status = defaultdict(list) # create a molecule object here to evaluate dihedrals later m = Molecule() m.xyzs = init_coords m.elem = json_state['elements'] m.build_bonds() dihedrals = json_state['dihedrals'] grid_spacing = json_state['grid_spacing'] # create grid status dictionary for grid_id_str, grid_jobs in json_state['grid_status'].items(): grid_id = tuple(int(i) for i in grid_id_str.split(',')) for start_geo, end_geo, end_energy in grid_jobs: # convert to numpy array, shape should match here start_geo = np.array(start_geo, dtype=float).reshape(natoms, 3) * bohr2ang end_geo = np.array(end_geo, dtype=float).reshape(natoms, 3) * bohr2ang # here we check if the end_geo matches the target grid id m.xyzs = [end_geo] dihedral_values = np.array( [m.measure_dihedrals(*d)[0] for d in dihedrals]) for dv, dref in zip(dihedral_values, grid_id): diff = abs(dv - dref) if min(diff, abs(360 - diff)) > 0.9: print( "Warning! dihedral values inconsistent with target grid_id" ) print('dihedral_values', dihedral_values, 'ref_grid_id', grid_id) dihedral_id = (np.round(dihedral_values / grid_spacing) * grid_spacing).astype(int) real_grid_id = tuple( (d + (180 - d) // 360 * 360) for d in dihedral_id) # here we append the result into the real grid_id grid_status[real_grid_id].append((start_geo, end_geo, end_energy)) json_state['grid_status'] = grid_status return json_state
def get_next_jobs(current_state, verbose=False): """ Take current scan state and generate the next set of optimizations. This function will create a new DihedralScanRepeater object and read all information from current_state, then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache. Return a list of new jobs that needs to be finished for the current iteration Parameters ---------- current_state: dict An dictionary containing information of the scan state, Required keys: 'dihedrals', 'grid_spacing', 'elements', 'init_coords', 'grid_status' Optional keys: 'dihedral_ranges', 'energy_decrease_thresh', 'energy_upper_limit' Returns ------- next_jobs: dict key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo * Note: the order of new_job should correspond to the finished job_info. Examples -------- current_state = { 'dihedrals': [[0,1,2,3], [1,2,3,4]] , 'grid_spacing': [30, 30], 'elements': ['H', 'C', 'O', ...] 'init_coords': [geo1, geo2, ..] 'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...} } >>> get_next_jobs(current_state) { (90, 60): [start_geo1, start_geo2, ..], (90, 90): [start_geo3, start_geo4, ..], } """ dihedrals = current_state['dihedrals'] grid_spacing = current_state['grid_spacing'] # rebuild the init_coords_M molecule object init_coords_M = Molecule() init_coords_M.elem = current_state['elements'] init_coords_M.xyzs = current_state['init_coords'] init_coords_M.build_topology() # create a new scanner object with blank engine engine = EngineBlank() dihedral_ranges = current_state.get('dihedral_ranges') energy_decrease_thresh = current_state.get('energy_decrease_thresh') energy_upper_limit = current_state.get('energy_upper_limit') scanner = DihedralScanRepeater(engine, dihedrals, grid_spacing, init_coords_M=init_coords_M, dihedral_ranges=dihedral_ranges, \ energy_decrease_thresh=energy_decrease_thresh, energy_upper_limit=energy_upper_limit, verbose=verbose) # rebuild the task_cache for scanner scanner.rebuild_task_cache(current_state['grid_status']) # run the scanner until some calculation is not found in cache scanner.repeat_scan_process() return scanner.next_jobs
def load_native_output(self, filename='ligand.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ # Check the log file to see if the optimization was successful opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: for line in logfile: # Accept both # Optimization completed. # Optimization completed on the basis of negligible forces. if 'Optimization completed' in line: opt_result = True break if not opt_result: raise RuntimeError("Geometry optimization failed in %s" % filename2) # Now we want to get the optimized structure from the fchk file as this is more reliable end_xyz_pos = None with open(filename) as outfile: for i, line in enumerate(outfile): if 'Current cartesian coordinates' in line: num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + i + 1) elif end_xyz_pos is not None and i < end_xyz_pos: coords.extend([ float(num) * 0.529177 for num in line.strip('\n').split() ]) elif 'Total Energy' in line: final_energy = float(line.split()[3]) if end_xyz_pos is None: raise RuntimeError( 'Cannot locate coordinates in ligand.fchk file.') # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def finish(self): """ Write qdata.txt and scan.xyz file based on converged scan results """ m = Molecule() m.elem = list(self.engine.M.elem) m.qm_energies, m.xyzs, m.comms = [], [], [] for gid in self.grid_ids: m.qm_energies.append(self.grid_energies[gid]) m.xyzs.append(self.grid_final_geometries[gid]) m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid])) m.write('qdata.txt') print("Final scan energies are written to qdata.txt") m.write('scan.xyz') print("Final scan energies are written to scan.xyz")
def load_native_output(self, filename='lig.fchk', filename2='gaussian.log'): """ Load the optimized geometry and energy into a new molecule object and return """ found_opt_result = False final_energy, elems, coords = None, [], [] with open(filename2) as logfile: logfile = logfile.readlines() for counter, line in enumerate(logfile): line = line.strip() if line.startswith('Optimization completed'): found_opt_result = True if found_opt_result is not True: raise RuntimeError("Geometry optimisation failed in %s" % filename2) with open(filename) as outfile: outfile = outfile.readlines() for counter, line in enumerate(outfile): if line.startswith('Current cartesian coordinates'): start_xyz_pos = int(counter + 1) num_xyz = int(line.split()[5]) end_xyz_pos = int(np.ceil(num_xyz / 5) + start_xyz_pos) if line.startswith('Total Energy'): energy_pos = counter if not start_xyz_pos and end_xyz_pos: raise EOFError('Cannot locate coordinates in lig.fchk file.') for line in outfile[start_xyz_pos:end_xyz_pos]: coords.extend( [float(num) * 0.529177 for num in line.strip('\n').split()]) # Make sure we have all of the coordinates assert len( coords) == num_xyz, "Could not extract the optimised geometry" final_energy = float(outfile[energy_pos].split()[3]) if final_energy is None: raise RuntimeError("Final energy not found in %s" % filename) if len(coords) == 0: raise RuntimeError("Final geometry not found in %s" % filename) m = Molecule() m.elem = self.M.elem m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))] m.qm_energies = [final_energy] m.build_topology() return m
def finish(self): """ Write qdata.txt and scan.xyz file based on converged scan results """ m = Molecule() m.elem = list(self.engine.M.elem) m.qm_energies, m.xyzs, m.comms = [], [], [] # only print grid with energies for gid in sorted(self.grid_energies.keys()): m.qm_energies.append(self.grid_energies[gid]) m.xyzs.append(self.grid_final_geometries[gid]) m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid])) m.write('qdata.txt') print("Final scan energies are written to qdata.txt") m.write('scan.xyz') print("Final scan energies are written to scan.xyz")
def load_geomeTRIC_output(self): """ Load the optimized geometry and energy into a new molecule object and return """ # the name of the file is consistent with the --prefix tdrive option, # this also requires the input file NOT be named to sth like tdrive.in # otherwise the output will become tdrive_optim.xyz if not os.path.isfile('qdata.txt'): raise OSError("geomeTRIC output qdata.txt file not found") m = Molecule('qdata.txt')[-1] # copy the m.elem since qdata.txt does not have it m.elem = self.M.elem # check the data loaded assert len(m.qm_energies) == 1 assert len( m.qm_grads) == 1 and m.qm_grads[0].shape == self.M.xyzs[0].shape m.build_topology() return m
def export_torsiondrive_data(molecule: "Ligand", tdrive_data: "TorsionDriveData") -> None: """ Export the stored torsiondrive data object to a scan.xyz file and qdata.txt file required for ForceBalance. Method taken from <https://github.com/lpwgroup/torsiondrive/blob/ac33066edf447e25e4beaf21c098e52ca0fc6649/torsiondrive/dihedral_scanner.py#L655> Args: molecule: The molecule object which contains the topology. tdrive_data: The results of a torsiondrive on the input molecule. """ from geometric.molecule import Molecule as GEOMol mol = GEOMol() mol.elem = [atom.atomic_symbol for atom in molecule.atoms] mol.qm_energies, mol.xyzs, mol.comms = [], [], [] for angle, grid_data in sorted(tdrive_data.reference_data.items()): mol.qm_energies.append(grid_data.energy) mol.xyzs.append(grid_data.geometry) mol.comms.append(f"Dihedral ({angle},) Energy {grid_data.energy}") mol.write("qdata.txt") mol.write("scan.xyz")
def get_next_jobs(current_state, verbose=False): """ Take current scan state and generate the next set of optimizations. This function will create a new DihedralScanRepeater object and read all information from current_state, then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache. Return a list of new jobs that needs to be finished for the current iteration Input: ------- current_state: dict, e.g. { 'dihedrals': [[0,1,2,3], [1,2,3,4]] , 'grid_spacing': [30, 30], 'elements': ['H', 'C', 'O', ...] 'init_coords': [geo1, geo2, ..] 'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...} } Output: ------- next_jobs: dict(), key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo * Note: the order of new_job should correspond to the finished job_info. """ dihedrals = current_state['dihedrals'] grid_spacing = current_state['grid_spacing'] # rebuild the init_coords_M molecule object init_coords_M = Molecule() init_coords_M.elem = current_state['elements'] init_coords_M.xyzs = current_state['init_coords'] init_coords_M.build_topology() # create a new scanner object scanner = DihedralScanRepeater(QMEngine(), dihedrals, grid_spacing, init_coords_M, verbose) # rebuild the task_cache for scanner scanner.rebuild_task_cache(current_state['grid_status']) # run the scanner until some calculation is not found in cache scanner.repeat_scan_process() return scanner.next_jobs
def launch_opt_jobs(self): """ Launch constrained optimizations for molecules in opt_queue Tasks current opt_queue will be popped in order. If a task exist in self.task_cache, the cached result will be checked, then put into self.current_finished_job_results Else, the task will be launched by self.launch_constrained_opt, and information is saved as self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id """ assert hasattr(self, 'running_job_path_info') and hasattr( self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, final_gradient, job_folder = self.task_cache[ to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] if final_gradient is not None: result_m.qm_grads = [final_gradient] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) if grid_id is None: print( f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}" ) else: self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy)) else: job_path = self.launch_constrained_opt(m, to_grid_id) self.running_job_path_info[ job_path] = m, from_grid_id, to_grid_id
def launch_opt_jobs(self): """ Launch constrained optimizations for molecules in opt_queue The current opt_queue will be cleaned up Return a dictionary that contains path and grid_ids: { path: (from_grid_id, to_grid_id) } """ assert hasattr(self, 'running_job_path_info') and hasattr(self, 'current_finished_job_results') while len(self.opt_queue) > 0: m, from_grid_id, to_grid_id = self.opt_queue.pop() # check if this job already exists m_geo_key = get_geo_key(m.xyzs[0]) if m_geo_key in self.task_cache[to_grid_id]: final_geo, final_energy, job_folder = self.task_cache[to_grid_id][m_geo_key] result_m = Molecule() result_m.elem = list(m.elem) result_m.xyzs = [final_geo] result_m.qm_energies = [final_energy] result_m.build_topology() grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id) self.current_finished_job_results.push((result_m, grid_id), priority=job_folder) #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy)) else: job_path = self.launch_constrained_opt(m, to_grid_id) self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id
def finish(self): """ Write qdata.txt and scan.xyz file based on converged scan results """ m = Molecule() m.elem = list(self.engine.M.elem) m.qm_energies, m.xyzs, m.comms = [], [], [] # optionally writing qm gradients into qdata.txt if avilable writing_gradients = False if len(self.grid_final_gradients) == len(self.grid_final_geometries): m.qm_grads = [] writing_gradients = True # only print grid with energies for gid in sorted(self.grid_energies.keys()): m.qm_energies.append(self.grid_energies[gid]) m.xyzs.append(self.grid_final_geometries[gid]) if writing_gradients: m.qm_grads.append(self.grid_final_gradients[gid]) m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid])) m.write('qdata.txt') print( f"Final scan energies{' and gradients' if writing_gradients else ''} are written to qdata.txt" ) m.write('scan.xyz') print("Final scan energies are written to scan.xyz")