Example #1
0
 def load_native_output(self, filename='output.dat'):
     """ Load the optimized geometry and energy into a new molecule object and return """
     found_opt_result = False
     final_energy, elems, coords = None, [], []
     with open(filename) as outfile:
         for line in outfile:
             line = line.strip()
             if line.startswith('Final energy is'):
                 final_energy = float(line.split()[-1])
             elif line.startswith('Final optimized geometry and variables'):
                 found_opt_result = True
             elif found_opt_result:
                 ls = line.split()
                 if len(ls) == 4 and check_all_float(ls[1:]):
                     elems.append(ls[0])
                     coords.append(ls[1:4])
     if final_energy is None:
         raise RuntimeError("Final energy not found in %s" % filename)
     if len(elems) == 0 or len(coords) == 0:
         raise RuntimeError("Final geometry not found in %s" % filename)
     m = Molecule()
     m.elem = elems
     m.xyzs = [np.array(coords, dtype=float)]
     m.qm_energies = [final_energy]
     m.build_topology()
     return m
Example #2
0
 def load_native_output(self, filename='output.dat'):
     """ Load the optimized geometry and energy into a new molecule object and return """
     found_opt_result = False
     found_final_geo = False
     final_energy, elems, coords = None, [], []
     with open(filename) as outfile:
         for line in outfile:
             line = line.strip()
             if line.startswith('Final energy is'):
                 final_energy = float(line.split()[-1])
             elif line.startswith('Final optimized geometry and variables'):
                 found_opt_result = True
             elif found_opt_result == True:
                 ls = line.split()
                 if len(ls) == 4 and check_all_float(ls[1:]):
                     elems.append(ls[0])
                     coords.append(ls[1:4])
     if final_energy == None:
         raise RuntimeError("Final energy not found in %s" % filename)
     if len(elems) == 0 or len(coords) == 0:
         raise RuntimeError("Final geometry not found in %s" % filename)
     m = Molecule()
     m.elem = elems
     m.xyzs = [np.array(coords, dtype=np.float64)]
     m.qm_energies = [final_energy]
     m.build_topology()
     return m
Example #3
0
 def launch_opt_jobs(self):
     """
     Mimicing DihedralScanner.launch_opt_jobs,
     """
     assert hasattr(self, 'next_jobs') and hasattr(
         self, 'current_finished_job_results')
     while len(self.opt_queue) > 0:
         m, from_grid_id, to_grid_id = self.opt_queue.pop()
         # check if this job already exists
         m_geo_key = get_geo_key(m.xyzs[0])
         if m_geo_key in self.task_cache[to_grid_id]:
             final_geo, final_energy, job_folder = self.task_cache[
                 to_grid_id][m_geo_key]
             result_m = Molecule()
             result_m.elem = list(m.elem)
             result_m.xyzs = [final_geo]
             result_m.qm_energies = [final_energy]
             result_m.build_topology()
             grid_id = self.get_dihedral_id(result_m,
                                            check_grid_id=to_grid_id)
             if grid_id is None:
                 print(
                     f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}"
                 )
             else:
                 self.current_finished_job_results.push((result_m, grid_id),
                                                        priority=job_folder)
         else:
             # append the job to self.next_jobs, which is the output of torsiondrive-API
             self.next_jobs[to_grid_id].append(m.xyzs[0].copy())
Example #4
0
def current_state_json_load(json_state_dict):
    """ Load a state from JSON dictionary """

    json_state = copy.deepcopy(json_state_dict)
    natoms = len(json_state['elements'])

    # convert geometries into correct numpy format
    init_coords = [
        np.array(c, dtype=float).reshape(natoms, 3) * bohr2ang
        for c in json_state['init_coords']
    ]
    json_state['init_coords'] = init_coords

    # convert grid_status into dictionary
    grid_status = defaultdict(list)

    # create a molecule object here to evaluate dihedrals later
    m = Molecule()
    m.xyzs = init_coords
    m.elem = json_state['elements']
    m.build_bonds()

    dihedrals = json_state['dihedrals']
    grid_spacing = json_state['grid_spacing']
    # create grid status dictionary
    for grid_id_str, grid_jobs in json_state['grid_status'].items():
        grid_id = tuple(int(i) for i in grid_id_str.split(','))
        for start_geo, end_geo, end_energy in grid_jobs:

            # convert to numpy array, shape should match here
            start_geo = np.array(start_geo, dtype=float).reshape(natoms,
                                                                 3) * bohr2ang
            end_geo = np.array(end_geo, dtype=float).reshape(natoms,
                                                             3) * bohr2ang

            # here we check if the end_geo matches the target grid id
            m.xyzs = [end_geo]
            dihedral_values = np.array(
                [m.measure_dihedrals(*d)[0] for d in dihedrals])
            for dv, dref in zip(dihedral_values, grid_id):
                diff = abs(dv - dref)
                if min(diff, abs(360 - diff)) > 0.9:
                    print(
                        "Warning! dihedral values inconsistent with target grid_id"
                    )
                    print('dihedral_values', dihedral_values, 'ref_grid_id',
                          grid_id)

            dihedral_id = (np.round(dihedral_values / grid_spacing) *
                           grid_spacing).astype(int)
            real_grid_id = tuple(
                (d + (180 - d) // 360 * 360) for d in dihedral_id)

            # here we append the result into the real grid_id
            grid_status[real_grid_id].append((start_geo, end_geo, end_energy))

    json_state['grid_status'] = grid_status
    return json_state
Example #5
0
def get_next_jobs(current_state, verbose=False):
    """
    Take current scan state and generate the next set of optimizations.
    This function will create a new DihedralScanRepeater object and read all information from current_state,
    then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache.
    Return a list of new jobs that needs to be finished for the current iteration

    Parameters
    ----------
    current_state: dict
        An dictionary containing information of the scan state,
        Required keys: 'dihedrals', 'grid_spacing', 'elements', 'init_coords', 'grid_status'
        Optional keys: 'dihedral_ranges', 'energy_decrease_thresh', 'energy_upper_limit'

    Returns
    -------
    next_jobs: dict
        key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo
        * Note: the order of new_job should correspond to the finished job_info.

    Examples
    --------
    current_state = {
            'dihedrals': [[0,1,2,3], [1,2,3,4]] ,
            'grid_spacing': [30, 30],
            'elements': ['H', 'C', 'O', ...]
            'init_coords': [geo1, geo2, ..]
            'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...}
        }
    >>> get_next_jobs(current_state)
    {
        (90, 60): [start_geo1, start_geo2, ..],
        (90, 90): [start_geo3, start_geo4, ..],
    }
    """
    dihedrals = current_state['dihedrals']
    grid_spacing = current_state['grid_spacing']
    # rebuild the init_coords_M molecule object
    init_coords_M = Molecule()
    init_coords_M.elem = current_state['elements']
    init_coords_M.xyzs = current_state['init_coords']
    init_coords_M.build_topology()
    # create a new scanner object with blank engine
    engine = EngineBlank()
    dihedral_ranges = current_state.get('dihedral_ranges')
    energy_decrease_thresh = current_state.get('energy_decrease_thresh')
    energy_upper_limit = current_state.get('energy_upper_limit')
    scanner = DihedralScanRepeater(engine, dihedrals, grid_spacing, init_coords_M=init_coords_M, dihedral_ranges=dihedral_ranges, \
         energy_decrease_thresh=energy_decrease_thresh, energy_upper_limit=energy_upper_limit, verbose=verbose)
    # rebuild the task_cache for scanner
    scanner.rebuild_task_cache(current_state['grid_status'])
    # run the scanner until some calculation is not found in cache
    scanner.repeat_scan_process()
    return scanner.next_jobs
Example #6
0
    def load_native_output(self,
                           filename='ligand.fchk',
                           filename2='gaussian.log'):
        """ Load the optimized geometry and energy into a new molecule object and return """
        # Check the log file to see if the optimization was successful
        opt_result = False
        final_energy, elems, coords = None, [], []
        with open(filename2) as logfile:
            for line in logfile:
                # Accept both
                # Optimization completed.
                # Optimization completed on the basis of negligible forces.
                if 'Optimization completed' in line:
                    opt_result = True
                    break

        if not opt_result:
            raise RuntimeError("Geometry optimization failed in %s" %
                               filename2)

        # Now we want to get the optimized structure from the fchk file as this is more reliable
        end_xyz_pos = None
        with open(filename) as outfile:
            for i, line in enumerate(outfile):
                if 'Current cartesian coordinates' in line:
                    num_xyz = int(line.split()[5])
                    end_xyz_pos = int(np.ceil(num_xyz / 5) + i + 1)
                elif end_xyz_pos is not None and i < end_xyz_pos:
                    coords.extend([
                        float(num) * 0.529177
                        for num in line.strip('\n').split()
                    ])
                elif 'Total Energy' in line:
                    final_energy = float(line.split()[3])

        if end_xyz_pos is None:
            raise RuntimeError(
                'Cannot locate coordinates in ligand.fchk file.')

        # Make sure we have all of the coordinates
        assert len(
            coords) == num_xyz, "Could not extract the optimised geometry"

        if final_energy is None:
            raise RuntimeError("Final energy not found in %s" % filename)

        m = Molecule()
        m.elem = self.M.elem
        m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))]
        m.qm_energies = [final_energy]
        m.build_topology()
        return m
Example #7
0
 def finish(self):
     """ Write qdata.txt and scan.xyz file based on converged scan results """
     m = Molecule()
     m.elem = list(self.engine.M.elem)
     m.qm_energies, m.xyzs, m.comms = [], [], []
     for gid in self.grid_ids:
         m.qm_energies.append(self.grid_energies[gid])
         m.xyzs.append(self.grid_final_geometries[gid])
         m.comms.append("Dihedral %s Energy %.9f" % (str(gid), self.grid_energies[gid]))
     m.write('qdata.txt')
     print("Final scan energies are written to qdata.txt")
     m.write('scan.xyz')
     print("Final scan energies are written to scan.xyz")
Example #8
0
    def load_native_output(self,
                           filename='lig.fchk',
                           filename2='gaussian.log'):
        """ Load the optimized geometry and energy into a new molecule object and return """
        found_opt_result = False
        final_energy, elems, coords = None, [], []
        with open(filename2) as logfile:
            logfile = logfile.readlines()
            for counter, line in enumerate(logfile):
                line = line.strip()
                if line.startswith('Optimization completed'):
                    found_opt_result = True

        if found_opt_result is not True:
            raise RuntimeError("Geometry optimisation failed in %s" %
                               filename2)

        with open(filename) as outfile:
            outfile = outfile.readlines()
            for counter, line in enumerate(outfile):
                if line.startswith('Current cartesian coordinates'):
                    start_xyz_pos = int(counter + 1)
                    num_xyz = int(line.split()[5])
                    end_xyz_pos = int(np.ceil(num_xyz / 5) + start_xyz_pos)
                if line.startswith('Total Energy'):
                    energy_pos = counter

        if not start_xyz_pos and end_xyz_pos:
            raise EOFError('Cannot locate coordinates in lig.fchk file.')

        for line in outfile[start_xyz_pos:end_xyz_pos]:
            coords.extend(
                [float(num) * 0.529177 for num in line.strip('\n').split()])

        # Make sure we have all of the coordinates
        assert len(
            coords) == num_xyz, "Could not extract the optimised geometry"

        final_energy = float(outfile[energy_pos].split()[3])

        if final_energy is None:
            raise RuntimeError("Final energy not found in %s" % filename)
        if len(coords) == 0:
            raise RuntimeError("Final geometry not found in %s" % filename)
        m = Molecule()
        m.elem = self.M.elem
        m.xyzs = [np.reshape(coords, (int(len(m.elem)), 3))]
        m.qm_energies = [final_energy]
        m.build_topology()
        return m
 def finish(self):
     """ Write qdata.txt and scan.xyz file based on converged scan results """
     m = Molecule()
     m.elem = list(self.engine.M.elem)
     m.qm_energies, m.xyzs, m.comms = [], [], []
     # only print grid with energies
     for gid in sorted(self.grid_energies.keys()):
         m.qm_energies.append(self.grid_energies[gid])
         m.xyzs.append(self.grid_final_geometries[gid])
         m.comms.append("Dihedral %s Energy %.9f" %
                        (str(gid), self.grid_energies[gid]))
     m.write('qdata.txt')
     print("Final scan energies are written to qdata.txt")
     m.write('scan.xyz')
     print("Final scan energies are written to scan.xyz")
Example #10
0
 def load_geomeTRIC_output(self):
     """ Load the optimized geometry and energy into a new molecule object and return """
     # the name of the file is consistent with the --prefix tdrive option,
     # this also requires the input file NOT be named to sth like tdrive.in
     # otherwise the output will become tdrive_optim.xyz
     if not os.path.isfile('qdata.txt'):
         raise OSError("geomeTRIC output qdata.txt file not found")
     m = Molecule('qdata.txt')[-1]
     # copy the m.elem since qdata.txt does not have it
     m.elem = self.M.elem
     # check the data loaded
     assert len(m.qm_energies) == 1
     assert len(
         m.qm_grads) == 1 and m.qm_grads[0].shape == self.M.xyzs[0].shape
     m.build_topology()
     return m
Example #11
0
def export_torsiondrive_data(molecule: "Ligand",
                             tdrive_data: "TorsionDriveData") -> None:
    """
    Export the stored torsiondrive data object to a scan.xyz file and qdata.txt file required for ForceBalance.

    Method taken from <https://github.com/lpwgroup/torsiondrive/blob/ac33066edf447e25e4beaf21c098e52ca0fc6649/torsiondrive/dihedral_scanner.py#L655>

    Args:
        molecule: The molecule object which contains the topology.
        tdrive_data: The results of a torsiondrive on the input molecule.
    """
    from geometric.molecule import Molecule as GEOMol

    mol = GEOMol()
    mol.elem = [atom.atomic_symbol for atom in molecule.atoms]
    mol.qm_energies, mol.xyzs, mol.comms = [], [], []
    for angle, grid_data in sorted(tdrive_data.reference_data.items()):
        mol.qm_energies.append(grid_data.energy)
        mol.xyzs.append(grid_data.geometry)
        mol.comms.append(f"Dihedral ({angle},) Energy {grid_data.energy}")
    mol.write("qdata.txt")
    mol.write("scan.xyz")
Example #12
0
def get_next_jobs(current_state, verbose=False):
    """
    Take current scan state and generate the next set of optimizations.
    This function will create a new DihedralScanRepeater object and read all information from current_state,
    then reproduce the entire scan from the beginning, finish all cached ones, until a new job is not found in the cache.
    Return a list of new jobs that needs to be finished for the current iteration

    Input:
    -------
    current_state: dict, e.g. {
            'dihedrals': [[0,1,2,3], [1,2,3,4]] ,
            'grid_spacing': [30, 30],
            'elements': ['H', 'C', 'O', ...]
            'init_coords': [geo1, geo2, ..]
            'grid_status': {(30, 60): [(start_geo, end_geo, end_energy), ..], ...}
        }

    Output:
    -------
    next_jobs: dict(), key is the target grid_id, value is a list of new_job. Each new_job is represented by its start_geo
        * Note: the order of new_job should correspond to the finished job_info.

    """
    dihedrals = current_state['dihedrals']
    grid_spacing = current_state['grid_spacing']
    # rebuild the init_coords_M molecule object
    init_coords_M = Molecule()
    init_coords_M.elem = current_state['elements']
    init_coords_M.xyzs = current_state['init_coords']
    init_coords_M.build_topology()
    # create a new scanner object
    scanner = DihedralScanRepeater(QMEngine(), dihedrals, grid_spacing,
                                   init_coords_M, verbose)
    # rebuild the task_cache for scanner
    scanner.rebuild_task_cache(current_state['grid_status'])
    # run the scanner until some calculation is not found in cache
    scanner.repeat_scan_process()
    return scanner.next_jobs
Example #13
0
 def launch_opt_jobs(self):
     """
     Launch constrained optimizations for molecules in opt_queue
     Tasks current opt_queue will be popped in order.
     If a task exist in self.task_cache, the cached result will be checked, then put into self.current_finished_job_results
     Else, the task will be launched by self.launch_constrained_opt, and information is saved as
     self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id
     """
     assert hasattr(self, 'running_job_path_info') and hasattr(
         self, 'current_finished_job_results')
     while len(self.opt_queue) > 0:
         m, from_grid_id, to_grid_id = self.opt_queue.pop()
         # check if this job already exists
         m_geo_key = get_geo_key(m.xyzs[0])
         if m_geo_key in self.task_cache[to_grid_id]:
             final_geo, final_energy, final_gradient, job_folder = self.task_cache[
                 to_grid_id][m_geo_key]
             result_m = Molecule()
             result_m.elem = list(m.elem)
             result_m.xyzs = [final_geo]
             result_m.qm_energies = [final_energy]
             if final_gradient is not None:
                 result_m.qm_grads = [final_gradient]
             result_m.build_topology()
             grid_id = self.get_dihedral_id(result_m,
                                            check_grid_id=to_grid_id)
             if grid_id is None:
                 print(
                     f"Cached result from {job_folder} is ignored because optimized geometry is far from grid id {to_grid_id}"
                 )
             else:
                 self.current_finished_job_results.push((result_m, grid_id),
                                                        priority=job_folder)
             #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy))
         else:
             job_path = self.launch_constrained_opt(m, to_grid_id)
             self.running_job_path_info[
                 job_path] = m, from_grid_id, to_grid_id
Example #14
0
 def launch_opt_jobs(self):
     """
     Launch constrained optimizations for molecules in opt_queue
     The current opt_queue will be cleaned up
     Return a dictionary that contains path and grid_ids: { path: (from_grid_id, to_grid_id) }
     """
     assert hasattr(self, 'running_job_path_info') and hasattr(self, 'current_finished_job_results')
     while len(self.opt_queue) > 0:
         m, from_grid_id, to_grid_id = self.opt_queue.pop()
         # check if this job already exists
         m_geo_key = get_geo_key(m.xyzs[0])
         if m_geo_key in self.task_cache[to_grid_id]:
             final_geo, final_energy, job_folder = self.task_cache[to_grid_id][m_geo_key]
             result_m = Molecule()
             result_m.elem = list(m.elem)
             result_m.xyzs = [final_geo]
             result_m.qm_energies = [final_energy]
             result_m.build_topology()
             grid_id = self.get_dihedral_id(result_m, check_grid_id=to_grid_id)
             self.current_finished_job_results.push((result_m, grid_id), priority=job_folder)
             #self.grid_status[to_grid_id].append((m.xyzs[0], final_geo, final_energy))
         else:
             job_path = self.launch_constrained_opt(m, to_grid_id)
             self.running_job_path_info[job_path] = m, from_grid_id, to_grid_id
Example #15
0
 def finish(self):
     """ Write qdata.txt and scan.xyz file based on converged scan results """
     m = Molecule()
     m.elem = list(self.engine.M.elem)
     m.qm_energies, m.xyzs, m.comms = [], [], []
     # optionally writing qm gradients into qdata.txt if avilable
     writing_gradients = False
     if len(self.grid_final_gradients) == len(self.grid_final_geometries):
         m.qm_grads = []
         writing_gradients = True
     # only print grid with energies
     for gid in sorted(self.grid_energies.keys()):
         m.qm_energies.append(self.grid_energies[gid])
         m.xyzs.append(self.grid_final_geometries[gid])
         if writing_gradients:
             m.qm_grads.append(self.grid_final_gradients[gid])
         m.comms.append("Dihedral %s Energy %.9f" %
                        (str(gid), self.grid_energies[gid]))
     m.write('qdata.txt')
     print(
         f"Final scan energies{' and gradients' if writing_gradients else ''} are written to qdata.txt"
     )
     m.write('scan.xyz')
     print("Final scan energies are written to scan.xyz")