def torsion_scan(molecule): """Perform torsion scan.""" append_to_log('Starting torsion_scans') tor_scan = TorsionScan(molecule) # Check that we have a scan order for the molecule this should of been captured from the dihedral file tor_scan.find_scan_order() tor_scan.scan() append_to_log('Finishing torsion_scans') return molecule
def stage_wrapper(self, start_key, begin_log_msg='', fin_log_msg='', torsion_options=None): """ Firstly, check if the stage start_key is in self.order; this tells you if the stage should be called or not. If it isn't in self.order: - Do nothing If it is: - Unpickle the ligand object at the start_key stage - Write to the log that something's about to be done (if specified) - Make (if not restarting) and / or move into the working directory for that stage - Do the thing - Move back out of the working directory for that stage - Write to the log that something's been done (if specified) - Pickle the ligand object again with the next_key marker as its stage """ mol = unpickle()[start_key] # Set the state for logging any exceptions should they arise mol.state = start_key # if we have a torsion options dictionary pass it to the molecule if torsion_options is not None: mol = self.store_torsions(mol, torsion_options) skipping = False if self.order[start_key] == self.skip: printf(f'{COLOURS.blue}Skipping stage: {start_key}{COLOURS.end}') append_to_log(f'skipping stage: {start_key}') skipping = True else: if begin_log_msg: printf(f'{begin_log_msg}...', end=' ') home = os.getcwd() folder_name = f'{str(self.immutable_order.index(start_key) + 1).zfill(2)}_{start_key}' make_and_change_into(folder_name) self.order[start_key](mol) self.order.pop(start_key, None) os.chdir(home) # Begin looping through self.order, but return after the first iteration. for key in self.order: next_key = key if fin_log_msg and not skipping: printf(f'{COLOURS.green}{fin_log_msg}{COLOURS.end}') mol.pickle(state=next_key) return next_key
def mm_optimise(molecule): """ Use an mm force field to get the initial optimisation of a molecule options --------- RDKit MFF or UFF force fields can have strange effects on the geometry of molecules Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff. """ append_to_log('Starting mm_optimisation') # Check which method we want then do the optimisation if molecule.mm_opt_method == 'none' or molecule.parameter_engine == 'OpenFF_generics': # Skip the optimisation step molecule.coords['mm'] = molecule.coords['input'] elif molecule.mm_opt_method == 'openmm': if molecule.parameter_engine != 'none': # Make the inputs molecule.write_pdb(input_type='input') molecule.write_parameters() # Run geometric # TODO Should this be moved to allow a decorator? with open('log.txt', 'w+') as log: sp.run(f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations} --pdb ' f'{molecule.name}.pdb --openmm {molecule.name}.xml ' f'{molecule.constraints_file if molecule.constraints_file is not None else ""}', shell=True, stdout=log, stderr=log) # This will continue even if we don't converge this is fine # Read the xyz traj and store the frames molecule.read_file(f'{molecule.name}_optim.xyz', input_type='traj') # Store the last from the traj as the mm optimised structure molecule.coords['mm'] = molecule.coords['traj'][-1] else: raise OptimisationFailed('You can not optimise a molecule with OpenMM and no initial parameters; ' 'consider parametrising or using UFF/MFF in RDKit') else: # TODO change to qcengine as this can already be done # Run an rdkit optimisation with the right FF rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'} molecule.filename = RDKit().mm_optimise(molecule.filename, ff=rdkit_ff[molecule.mm_opt_method]) append_to_log(f'Finishing mm_optimisation of the molecule with {molecule.mm_opt_method}') return molecule
def lennard_jones(molecule): """Calculate Lennard-Jones parameters, and extract virtual sites.""" append_to_log('Starting Lennard-Jones parameter calculation') charges_folder = os.path.join(molecule.home, '07_charges') for file in os.listdir(charges_folder): if file.startswith('DDEC'): copy(os.path.join(charges_folder, file), file) molecule.NonbondedForce = LennardJones(molecule).calculate_non_bonded_force() # This also now implies the opls combination rule molecule.combination = 'opls' append_to_log('Finishing Lennard-Jones parameter calculation') return molecule
def torsion_optimise(molecule): """Perform torsion optimisation.""" append_to_log('Starting torsion_optimisations') # First we should make sure we have collected the results of the scans if molecule.qm_scans is None: os.chdir(os.path.join(molecule.home, '09_torsion_scan')) scan = TorsionScan(molecule) if molecule.scan_order is None: scan.find_scan_order() scan.collect_scan() os.chdir(os.path.join(molecule.home, '10_torsion_optimise')) TorsionOptimiser(molecule).run() append_to_log('Finishing torsion_optimisations') return molecule
def hessian(molecule): """Using the assigned bonds engine, calculate and extract the Hessian matrix.""" append_to_log('Starting hessian calculation') molecule.find_bond_lengths('qm') if molecule.bonds_engine in ['g09', 'g16']: qm_engine = Gaussian(molecule) # Use the checkpoint file as this has higher xyz precision try: copy(os.path.join(molecule.home, os.path.join('03_qm_optimise', 'lig.chk')), 'lig.chk') result = qm_engine.generate_input('qm', hessian=True, restart=True, execute=molecule.bonds_engine) except FileNotFoundError: append_to_log('qm_optimise checkpoint not found, optimising first to refine atomic coordinates', msg_type='minor') result = qm_engine.generate_input('qm', optimise=True, hessian=True, execute=molecule.bonds_engine) if not result['success']: raise HessianCalculationFailed('The hessian was not calculated check the log file.') hessian = qm_engine.hessian() else: hessian = QCEngine(molecule).call_qcengine(engine='psi4', driver='hessian', input_type='qm') np.savetxt('hessian.txt', hessian) molecule.hessian = hessian append_to_log(f'Finishing Hessian calculation using {molecule.bonds_engine}') return molecule
def density(self, molecule): """Perform density calculation with the qm engine.""" append_to_log('Starting density calculation') if molecule.density_engine == 'onetep': molecule.write_xyz(input_type='qm') # If using ONETEP, stop after this step append_to_log('Density analysis file made for ONETEP') # Edit the order to end here self.order = OrderedDict([('density', self.density), ('charges', self.skip), ('lennard_jones', self.skip), ('torsion_scan', self.skip), ('pause', self.pause), ('finalise', self.finalise)]) else: qm_engine = self.engine_dict[molecule.density_engine](molecule) qm_engine.generate_input(input_type='qm' if list(molecule.coords['qm']) else 'input', density=True, execute=molecule.density_engine) append_to_log('Finishing Density calculation') return molecule
def generate_input(self, input_type='input', optimise=False, hessian=False, density=False, energy=False, fchk=False, restart=False, execute=True): """ Converts to psi4 input format to be run in psi4 without using geometric. :param input_type: The coordinate set of the molecule to be used :param optimise: Optimise the molecule to the desired convergence criterion within the iteration limit :param hessian: Calculate the hessian matrix :param density: Calculate the electron density :param energy: Calculate the single point energy of the molecule :param fchk: Write out a gaussian style Fchk file :param restart: Restart the calculation from a log point (required but unused to match g09's generate_input()) :param execute: Run the desired Psi4 job :return: The completion status of the job True if successful False if not run or failed """ setters = '' tasks = '' if energy: append_to_log('Writing psi4 energy calculation input') tasks += f"\nenergy('{self.molecule.theory}')" if optimise: append_to_log('Writing PSI4 optimisation input', 'minor') setters += f' g_convergence {self.molecule.convergence}\n GEOM_MAXITER {self.molecule.iterations}\n' tasks += f"\noptimize('{self.molecule.theory.lower()}')" if hessian: append_to_log('Writing PSI4 Hessian matrix calculation input', 'minor') setters += ' hessian_write on\n' tasks += f"\nenergy, wfn = frequency('{self.molecule.theory.lower()}', return_wfn=True)" tasks += '\nwfn.hessian().print_out()\n\n' if density: pass # append_to_log('Writing PSI4 density calculation input', 'minor') # setters += " cubeprop_tasks ['density']\n" # # overage = get_overage(self.molecule.name) # setters += ' CUBIC_GRID_OVERAGE [{0}, {0}, {0}]\n'.format(overage) # setters += ' CUBIC_GRID_SPACING [0.13, 0.13, 0.13]\n' # tasks += f"grad, wfn = gradient('{self.molecule.theory.lower()}', return_wfn=True)\ncubeprop(wfn)" if fchk: append_to_log('Writing PSI4 input file to generate fchk file') tasks += f"\ngrad, wfn = gradient('{self.molecule.theory.lower()}', return_wfn=True)" tasks += '\nfchk_writer = psi4.core.FCHKWriter(wfn)' tasks += f'\nfchk_writer.write("{self.molecule.name}_psi4.fchk")\n' # TODO If overage cannot be made to work, delete and just use Gaussian. if self.molecule.solvent: pass # setters += ' pcm true\n pcm_scf_type total\n' # tasks += '\n\npcm = {' # tasks += '\n units = Angstrom\n Medium {\n SolverType = IEFPCM\n Solvent = Chloroform\n }' # tasks += '\n Cavity {\n RadiiSet = UFF\n Type = GePol\n Scaling = False\n Area = 0.3\n Mode = Implicit' # tasks += '\n }\n}' setters += '}\n' if not execute: setters += f'set_num_threads({self.molecule.threads})\n' # input.dat is the PSI4 input file. with open('input.dat', 'w+') as input_file: # opening tag is always writen input_file.write(f'memory {self.molecule.memory} GB\n\nmolecule {self.molecule.name} {{\n' f'{self.molecule.charge} {self.molecule.multiplicity} \n') # molecule is always printed for i, atom in enumerate(self.molecule.coords[input_type]): input_file.write(f' {self.molecule.atoms[i].atomic_symbol} ' f'{float(atom[0]): .10f} {float(atom[1]): .10f} {float(atom[2]): .10f} \n') input_file.write(f" units angstrom\n no_reorient\n}}\n\nset {{\n basis {self.molecule.basis}\n") input_file.write(setters) input_file.write(tasks) if execute: with open('log.txt', 'w+') as log: try: sp.run(f'psi4 input.dat -n {self.molecule.threads}', shell=True, stdout=log, stderr=log, check=True) except sp.CalledProcessError: raise Psi4Error('Psi4 did not execute successfully check log file for details.') # Now check the exit status of the job return self.check_for_errors() else: return {'success': False, 'error': 'Not run'}
def fit(self, atom_index: int): """ The error for the objective functionsis defined as the sum of differences at each sample point between the ideal ESP and the ESP with and without sites. * The ESP is first calculated without any virtual sites, if the error is below 1.0, no fitting is carried out. * Virtual sites are added along pre-defined vectors, and the charges and scale factors of the vectors are fit to give the lowest errors. * This is done for single sites and two sites (sometimes in two orientations). * The two sites may be placed symmetrically, using the bool molecule.symmetry argument. * The errors from the sites are printed to terminal, and a plot is produced showing the positions, sample points, and charges. :param atom_index: The index of the atom being analysed. """ n_sample_points = len(self.no_site_esps) # No site vec = self.get_vector_from_coords(atom_index, n_sites=1) no_site_error = self.one_site_objective_function((0, 1), atom_index, vec) self.site_errors[0] = no_site_error / n_sample_points if self.site_errors[0] <= 1.0: return # Bounds for fitting, format: charge, charge, lambda, lambda # Since the vectors are scaled to be 1 angstrom long, lambda makes the v-site distance -1 to 1 angstrom. bounds = ((-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)) # One site one_site_fit = minimize( self.one_site_objective_function, np.array([0, 1]), args=(atom_index, vec), bounds=bounds[1:3], ) self.site_errors[1] = one_site_fit.fun / n_sample_points q, lam = one_site_fit.x self.one_site_coords = [((vec * lam) + self.coords[atom_index], q, atom_index)] # 1 or 3 bonds if len(self.molecule.atoms[atom_index].bonds) != 2: vec_a, vec_b = self.get_vector_from_coords(atom_index, n_sites=2) two_site_fit = minimize( self.two_sites_objective_function, np.array([0.0, 0.0, 1.0, 1.0]), args=(atom_index, vec_a, vec_b), bounds=bounds, ) self.site_errors[2] = two_site_fit.fun / n_sample_points q_a, q_b, lam_a, lam_b = two_site_fit.x site_a_coords, site_b_coords = self.sites_coords_from_vecs_and_lams( atom_index, lam_a, lam_b, vec_a, vec_b) self.two_site_coords = [ (site_a_coords, q_a, atom_index), (site_b_coords, q_b, atom_index), ] # 2 bonds else: # Arbitrarily large error; this will be overwritten. final_err = 10000 for alt in [True, False]: vec_a, vec_b = self.get_vector_from_coords(atom_index, n_sites=2, alt=alt) if self.molecule.enable_symmetry: two_site_fit = minimize( self.symm_two_sites_objective_function, np.array([0.0, 1.0]), args=(atom_index, vec_a, vec_b), bounds=bounds[1:3], ) if (two_site_fit.fun / n_sample_points) < final_err: final_err = two_site_fit.fun / n_sample_points self.site_errors[ 2] = two_site_fit.fun / n_sample_points q, lam = two_site_fit.x q_a = q_b = q lam_a = lam_b = lam ( site_a_coords, site_b_coords, ) = self.sites_coords_from_vecs_and_lams( atom_index, lam_a, lam_b, vec_a, vec_b) self.two_site_coords = [ (site_a_coords, q_a, atom_index), (site_b_coords, q_b, atom_index), ] else: two_site_fit = minimize( self.two_sites_objective_function, np.array([0.0, 0.0, 1.0, 1.0]), args=(atom_index, vec_a, vec_b), bounds=bounds, ) if (two_site_fit.fun / n_sample_points) < final_err: final_err = two_site_fit.fun / n_sample_points self.site_errors[ 2] = two_site_fit.fun / n_sample_points q_a, q_b, lam_a, lam_b = two_site_fit.x ( site_a_coords, site_b_coords, ) = self.sites_coords_from_vecs_and_lams( atom_index, lam_a, lam_b, vec_a, vec_b) self.two_site_coords = [ (site_a_coords, q_a, atom_index), (site_b_coords, q_b, atom_index), ] max_err = self.molecule.v_site_error_factor if self.site_errors[0] < min(self.site_errors[1] * max_err, self.site_errors[2] * max_err): append_to_log( "No virtual site placement has reduced the error significantly.", "plain", True, ) elif self.site_errors[1] < self.site_errors[2] * max_err: append_to_log( "The addition of one virtual site was found to be best.", "plain", True) self.v_sites_coords.extend(self.one_site_coords) self.molecule.NonbondedForce[atom_index][ 0] -= self.one_site_coords[0][1] self.molecule.ddec_data[atom_index].charge -= self.one_site_coords[ 0][1] else: append_to_log( "The addition of two virtual sites was found to be best.", "plain", True) self.v_sites_coords.extend(self.two_site_coords) self.molecule.NonbondedForce[atom_index][0] -= ( self.two_site_coords[0][1] + self.two_site_coords[1][1]) self.molecule.ddec_data[atom_index].charge -= ( self.two_site_coords[0][1] + self.two_site_coords[1][1]) append_to_log( f"Errors (kcal/mol):\n" f"No Site One Site Two Sites\n" f"{self.site_errors[0]:.4f} {self.site_errors[1]:.4f} {self.site_errors[2]:.4f}", "plain", True, ) self.plot(atom_index)
def qm_optimise(self, molecule): """Optimise the molecule coords. Can be through PSI4 (with(out) geometric) or through Gaussian.""" append_to_log('Starting qm_optimisation') qm_engine = self.engine_dict[molecule.bonds_engine](molecule) max_restarts = 3 if molecule.geometric and (molecule.bonds_engine == 'psi4'): qceng = QCEngine(molecule) result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}') restart_count = 0 while (not result['success']) and (restart_count < max_restarts): append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting', msg_type='minor') try: molecule.coords['temp'] = np.array( result['input_data']['final_molecule']['geometry']).reshape((len(molecule.atoms), 3)) molecule.coords['temp'] *= constants.BOHR_TO_ANGS result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type='temp') except KeyError: result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}') restart_count += 1 if not result['success']: raise OptimisationFailed("The optimisation did not converge") molecule.read_geometric_traj(result['trajectory']) # store the final molecule as the qm optimised structure molecule.coords['qm'] = np.array(result['final_molecule']['geometry']).reshape((len(molecule.atoms), 3)) molecule.coords['qm'] *= constants.BOHR_TO_ANGS molecule.qm_energy = result['energies'][-1] # Write out the trajectory file molecule.write_xyz('traj', name=f'{molecule.name}_opt') molecule.write_xyz('qm', name='opt') # Using Gaussian or geometric off else: result = qm_engine.generate_input(input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}', optimise=True, execute=molecule.bonds_engine) restart_count = 0 while (not result['success']) and (restart_count < max_restarts): append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting', msg_type='minor') if result['error'] == 'FileIO': result = qm_engine.generate_input('mm', optimise=True, restart=True, execute=molecule.bonds_engine) elif result['error'] == 'Max iterations': result = qm_engine.generate_input('input', optimise=True, restart=True, execute=molecule.bonds_engine) else: molecule.coords['temp'] = RDKit().generate_conformers(molecule.rdkit_mol)[0] result = qm_engine.generate_input('temp', optimise=True, execute=molecule.bonds_engine) restart_count += 1 if not result['success']: raise OptimisationFailed(f"{molecule.bonds_engine} " f"optimisation did not converge after 3 restarts; last error {result['error']}") molecule.coords['qm'], molecule.qm_energy = qm_engine.optimised_structure() molecule.write_xyz('qm', name='opt') append_to_log(f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}') return molecule
def generate_input(self, execute=True): """Given a DDEC version (from the defaults), this function writes the job file for chargemol and executes it.""" if (self.molecule.ddec_version != 6) and (self.molecule.ddec_version != 3): append_to_log( message= "Invalid or unsupported DDEC version given, running with default version 6.", msg_type="warning", ) self.molecule.ddec_version = 6 # Write the charges job file. with open("job_control.txt", "w+") as charge_file: charge_file.write( f"<input filename>\n{self.molecule.name}.wfx\n</input filename>" ) charge_file.write("\n\n<net charge>\n0.0\n</net charge>") charge_file.write( "\n\n<periodicity along A, B and C vectors>\n.false.\n.false.\n.false." ) charge_file.write("\n</periodicity along A, B and C vectors>") charge_file.write( f"\n\n<atomic densities directory complete path>\n{self.molecule.chargemol}" f"/atomic_densities/") charge_file.write("\n</atomic densities directory complete path>") charge_file.write( f"\n\n<charge type>\nDDEC{self.molecule.ddec_version}\n</charge type>" ) charge_file.write("\n\n<compute BOs>\n.true.\n</compute BOs>") charge_file.write( "\n\n<print atomic densities>\n.true.\n</print atomic densities>" ) if execute: # Export a variable to the environment that chargemol will use to work out the threads, must be a string os.environ["OMP_NUM_THREADS"] = str(self.molecule.threads) with open("log.txt", "w+") as log: control_path = ( "chargemol_FORTRAN_09_26_2017/compiled_binaries/linux/" "Chargemol_09_26_2017_linux_parallel job_control.txt") try: sp.run( os.path.join(self.molecule.chargemol, control_path), shell=True, stdout=log, stderr=log, check=True, ) except sp.CalledProcessError: raise ChargemolError( "Chargemol did not execute properly; check the output file for details." ) del os.environ["OMP_NUM_THREADS"]