Esempio n. 1
0
    def torsion_scan(molecule):
        """Perform torsion scan."""
        append_to_log('Starting torsion_scans')

        tor_scan = TorsionScan(molecule)

        # Check that we have a scan order for the molecule this should of been captured from the dihedral file
        tor_scan.find_scan_order()
        tor_scan.scan()

        append_to_log('Finishing torsion_scans')

        return molecule
Esempio n. 2
0
    def stage_wrapper(self, start_key, begin_log_msg='', fin_log_msg='', torsion_options=None):
        """
        Firstly, check if the stage start_key is in self.order; this tells you if the stage should be called or not.
        If it isn't in self.order:
            - Do nothing
        If it is:
            - Unpickle the ligand object at the start_key stage
            - Write to the log that something's about to be done (if specified)
            - Make (if not restarting) and / or move into the working directory for that stage
            - Do the thing
            - Move back out of the working directory for that stage
            - Write to the log that something's been done (if specified)
            - Pickle the ligand object again with the next_key marker as its stage
        """

        mol = unpickle()[start_key]

        # Set the state for logging any exceptions should they arise
        mol.state = start_key

        # if we have a torsion options dictionary pass it to the molecule
        if torsion_options is not None:
            mol = self.store_torsions(mol, torsion_options)

        skipping = False
        if self.order[start_key] == self.skip:
            printf(f'{COLOURS.blue}Skipping stage: {start_key}{COLOURS.end}')
            append_to_log(f'skipping stage: {start_key}')
            skipping = True
        else:
            if begin_log_msg:
                printf(f'{begin_log_msg}...', end=' ')

        home = os.getcwd()

        folder_name = f'{str(self.immutable_order.index(start_key) + 1).zfill(2)}_{start_key}'

        make_and_change_into(folder_name)

        self.order[start_key](mol)
        self.order.pop(start_key, None)
        os.chdir(home)

        # Begin looping through self.order, but return after the first iteration.
        for key in self.order:
            next_key = key
            if fin_log_msg and not skipping:
                printf(f'{COLOURS.green}{fin_log_msg}{COLOURS.end}')

            mol.pickle(state=next_key)
            return next_key
Esempio n. 3
0
    def mm_optimise(molecule):
        """
        Use an mm force field to get the initial optimisation of a molecule

        options
        ---------
        RDKit MFF or UFF force fields can have strange effects on the geometry of molecules

        Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff.
        """

        append_to_log('Starting mm_optimisation')
        # Check which method we want then do the optimisation
        if molecule.mm_opt_method == 'none' or molecule.parameter_engine == 'OpenFF_generics':
            # Skip the optimisation step
            molecule.coords['mm'] = molecule.coords['input']

        elif molecule.mm_opt_method == 'openmm':
            if molecule.parameter_engine != 'none':
                # Make the inputs
                molecule.write_pdb(input_type='input')
                molecule.write_parameters()
                # Run geometric
                # TODO Should this be moved to allow a decorator?
                with open('log.txt', 'w+') as log:
                    sp.run(f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations} --pdb '
                           f'{molecule.name}.pdb --openmm {molecule.name}.xml '
                           f'{molecule.constraints_file if molecule.constraints_file is not None else ""}',
                           shell=True, stdout=log, stderr=log)

                # This will continue even if we don't converge this is fine
                # Read the xyz traj and store the frames
                molecule.read_file(f'{molecule.name}_optim.xyz', input_type='traj')
                # Store the last from the traj as the mm optimised structure
                molecule.coords['mm'] = molecule.coords['traj'][-1]
            else:
                raise OptimisationFailed('You can not optimise a molecule with OpenMM and no initial parameters; '
                                         'consider parametrising or using UFF/MFF in RDKit')

        else:
            # TODO change to qcengine as this can already be done
            # Run an rdkit optimisation with the right FF
            rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'}
            molecule.filename = RDKit().mm_optimise(molecule.filename, ff=rdkit_ff[molecule.mm_opt_method])

        append_to_log(f'Finishing mm_optimisation of the molecule with {molecule.mm_opt_method}')

        return molecule
Esempio n. 4
0
    def lennard_jones(molecule):
        """Calculate Lennard-Jones parameters, and extract virtual sites."""

        append_to_log('Starting Lennard-Jones parameter calculation')

        charges_folder = os.path.join(molecule.home, '07_charges')
        for file in os.listdir(charges_folder):
            if file.startswith('DDEC'):
                copy(os.path.join(charges_folder, file), file)

        molecule.NonbondedForce = LennardJones(molecule).calculate_non_bonded_force()

        # This also now implies the opls combination rule
        molecule.combination = 'opls'

        append_to_log('Finishing Lennard-Jones parameter calculation')

        return molecule
Esempio n. 5
0
    def torsion_optimise(molecule):
        """Perform torsion optimisation."""

        append_to_log('Starting torsion_optimisations')

        # First we should make sure we have collected the results of the scans
        if molecule.qm_scans is None:
            os.chdir(os.path.join(molecule.home, '09_torsion_scan'))
            scan = TorsionScan(molecule)
            if molecule.scan_order is None:
                scan.find_scan_order()
            scan.collect_scan()
            os.chdir(os.path.join(molecule.home, '10_torsion_optimise'))

        TorsionOptimiser(molecule).run()

        append_to_log('Finishing torsion_optimisations')

        return molecule
Esempio n. 6
0
    def hessian(molecule):
        """Using the assigned bonds engine, calculate and extract the Hessian matrix."""

        append_to_log('Starting hessian calculation')
        molecule.find_bond_lengths('qm')

        if molecule.bonds_engine in ['g09', 'g16']:
            qm_engine = Gaussian(molecule)

            # Use the checkpoint file as this has higher xyz precision
            try:
                copy(os.path.join(molecule.home, os.path.join('03_qm_optimise', 'lig.chk')), 'lig.chk')
                result = qm_engine.generate_input('qm', hessian=True, restart=True, execute=molecule.bonds_engine)
            except FileNotFoundError:
                append_to_log('qm_optimise checkpoint not found, optimising first to refine atomic coordinates',
                              msg_type='minor')
                result = qm_engine.generate_input('qm', optimise=True, hessian=True, execute=molecule.bonds_engine)

            if not result['success']:
                raise HessianCalculationFailed('The hessian was not calculated check the log file.')

            hessian = qm_engine.hessian()

        else:
            hessian = QCEngine(molecule).call_qcengine(engine='psi4', driver='hessian', input_type='qm')
            np.savetxt('hessian.txt', hessian)

        molecule.hessian = hessian

        append_to_log(f'Finishing Hessian calculation using {molecule.bonds_engine}')

        return molecule
Esempio n. 7
0
    def density(self, molecule):
        """Perform density calculation with the qm engine."""

        append_to_log('Starting density calculation')

        if molecule.density_engine == 'onetep':
            molecule.write_xyz(input_type='qm')
            # If using ONETEP, stop after this step
            append_to_log('Density analysis file made for ONETEP')

            # Edit the order to end here
            self.order = OrderedDict([('density', self.density), ('charges', self.skip), ('lennard_jones', self.skip),
                                      ('torsion_scan', self.skip), ('pause', self.pause),
                                      ('finalise', self.finalise)])

        else:
            qm_engine = self.engine_dict[molecule.density_engine](molecule)
            qm_engine.generate_input(input_type='qm' if list(molecule.coords['qm']) else 'input',
                                     density=True, execute=molecule.density_engine)
            append_to_log('Finishing Density calculation')

        return molecule
Esempio n. 8
0
    def generate_input(self, input_type='input', optimise=False, hessian=False, density=False,
                       energy=False, fchk=False, restart=False, execute=True):
        """
        Converts to psi4 input format to be run in psi4 without using geometric.
        :param input_type: The coordinate set of the molecule to be used
        :param optimise: Optimise the molecule to the desired convergence criterion within the iteration limit
        :param hessian: Calculate the hessian matrix
        :param density: Calculate the electron density
        :param energy: Calculate the single point energy of the molecule
        :param fchk: Write out a gaussian style Fchk file
        :param restart: Restart the calculation from a log point (required but unused to match g09's generate_input())
        :param execute: Run the desired Psi4 job
        :return: The completion status of the job True if successful False if not run or failed
        """

        setters = ''
        tasks = ''

        if energy:
            append_to_log('Writing psi4 energy calculation input')
            tasks += f"\nenergy('{self.molecule.theory}')"

        if optimise:
            append_to_log('Writing PSI4 optimisation input', 'minor')
            setters += f' g_convergence {self.molecule.convergence}\n GEOM_MAXITER {self.molecule.iterations}\n'
            tasks += f"\noptimize('{self.molecule.theory.lower()}')"

        if hessian:
            append_to_log('Writing PSI4 Hessian matrix calculation input', 'minor')
            setters += ' hessian_write on\n'

            tasks += f"\nenergy, wfn = frequency('{self.molecule.theory.lower()}', return_wfn=True)"

            tasks += '\nwfn.hessian().print_out()\n\n'

        if density:
            pass
        #     append_to_log('Writing PSI4 density calculation input', 'minor')
        #     setters += " cubeprop_tasks ['density']\n"
        #
        #     overage = get_overage(self.molecule.name)
        #     setters += ' CUBIC_GRID_OVERAGE [{0}, {0}, {0}]\n'.format(overage)
        #     setters += ' CUBIC_GRID_SPACING [0.13, 0.13, 0.13]\n'
        #     tasks += f"grad, wfn = gradient('{self.molecule.theory.lower()}', return_wfn=True)\ncubeprop(wfn)"

        if fchk:
            append_to_log('Writing PSI4 input file to generate fchk file')
            tasks += f"\ngrad, wfn = gradient('{self.molecule.theory.lower()}', return_wfn=True)"
            tasks += '\nfchk_writer = psi4.core.FCHKWriter(wfn)'
            tasks += f'\nfchk_writer.write("{self.molecule.name}_psi4.fchk")\n'

        # TODO If overage cannot be made to work, delete and just use Gaussian.
        if self.molecule.solvent:
            pass
        #     setters += ' pcm true\n pcm_scf_type total\n'
        #     tasks += '\n\npcm = {'
        #     tasks += '\n units = Angstrom\n Medium {\n  SolverType = IEFPCM\n  Solvent = Chloroform\n }'
        #     tasks += '\n Cavity {\n  RadiiSet = UFF\n  Type = GePol\n  Scaling = False\n  Area = 0.3\n  Mode = Implicit'
        #     tasks += '\n }\n}'

        setters += '}\n'

        if not execute:
            setters += f'set_num_threads({self.molecule.threads})\n'

        # input.dat is the PSI4 input file.
        with open('input.dat', 'w+') as input_file:
            # opening tag is always writen
            input_file.write(f'memory {self.molecule.memory} GB\n\nmolecule {self.molecule.name} {{\n'
                             f'{self.molecule.charge} {self.molecule.multiplicity} \n')
            # molecule is always printed
            for i, atom in enumerate(self.molecule.coords[input_type]):
                input_file.write(f' {self.molecule.atoms[i].atomic_symbol}    '
                                 f'{float(atom[0]): .10f}  {float(atom[1]): .10f}  {float(atom[2]): .10f} \n')

            input_file.write(f" units angstrom\n no_reorient\n}}\n\nset {{\n basis {self.molecule.basis}\n")

            input_file.write(setters)
            input_file.write(tasks)

        if execute:
            with open('log.txt', 'w+') as log:
                try:
                    sp.run(f'psi4 input.dat -n {self.molecule.threads}', shell=True, stdout=log, stderr=log, check=True)
                except sp.CalledProcessError:
                    raise Psi4Error('Psi4 did not execute successfully check log file for details.')

            # Now check the exit status of the job
            return self.check_for_errors()

        else:
            return {'success': False, 'error': 'Not run'}
Esempio n. 9
0
    def fit(self, atom_index: int):
        """
        The error for the objective functionsis defined as the sum of differences at each sample point
        between the ideal ESP and the ESP with and without sites.

        * The ESP is first calculated without any virtual sites, if the error is below 1.0, no fitting
        is carried out.
        * Virtual sites are added along pre-defined vectors, and the charges and scale factors of the vectors
        are fit to give the lowest errors.
        * This is done for single sites and two sites (sometimes in two orientations).
        * The two sites may be placed symmetrically, using the bool molecule.symmetry argument.
        * The errors from the sites are printed to terminal, and a plot is produced showing the positions,
        sample points, and charges.
        :param atom_index: The index of the atom being analysed.
        """

        n_sample_points = len(self.no_site_esps)

        # No site
        vec = self.get_vector_from_coords(atom_index, n_sites=1)
        no_site_error = self.one_site_objective_function((0, 1), atom_index,
                                                         vec)
        self.site_errors[0] = no_site_error / n_sample_points

        if self.site_errors[0] <= 1.0:
            return

        # Bounds for fitting, format: charge, charge, lambda, lambda
        # Since the vectors are scaled to be 1 angstrom long, lambda makes the v-site distance -1 to 1 angstrom.
        bounds = ((-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0))

        # One site
        one_site_fit = minimize(
            self.one_site_objective_function,
            np.array([0, 1]),
            args=(atom_index, vec),
            bounds=bounds[1:3],
        )
        self.site_errors[1] = one_site_fit.fun / n_sample_points
        q, lam = one_site_fit.x
        self.one_site_coords = [((vec * lam) + self.coords[atom_index], q,
                                 atom_index)]

        # 1 or 3 bonds
        if len(self.molecule.atoms[atom_index].bonds) != 2:
            vec_a, vec_b = self.get_vector_from_coords(atom_index, n_sites=2)
            two_site_fit = minimize(
                self.two_sites_objective_function,
                np.array([0.0, 0.0, 1.0, 1.0]),
                args=(atom_index, vec_a, vec_b),
                bounds=bounds,
            )
            self.site_errors[2] = two_site_fit.fun / n_sample_points
            q_a, q_b, lam_a, lam_b = two_site_fit.x
            site_a_coords, site_b_coords = self.sites_coords_from_vecs_and_lams(
                atom_index, lam_a, lam_b, vec_a, vec_b)
            self.two_site_coords = [
                (site_a_coords, q_a, atom_index),
                (site_b_coords, q_b, atom_index),
            ]

        # 2 bonds
        else:
            # Arbitrarily large error; this will be overwritten.
            final_err = 10000
            for alt in [True, False]:
                vec_a, vec_b = self.get_vector_from_coords(atom_index,
                                                           n_sites=2,
                                                           alt=alt)
                if self.molecule.enable_symmetry:
                    two_site_fit = minimize(
                        self.symm_two_sites_objective_function,
                        np.array([0.0, 1.0]),
                        args=(atom_index, vec_a, vec_b),
                        bounds=bounds[1:3],
                    )
                    if (two_site_fit.fun / n_sample_points) < final_err:
                        final_err = two_site_fit.fun / n_sample_points
                        self.site_errors[
                            2] = two_site_fit.fun / n_sample_points
                        q, lam = two_site_fit.x
                        q_a = q_b = q
                        lam_a = lam_b = lam
                        (
                            site_a_coords,
                            site_b_coords,
                        ) = self.sites_coords_from_vecs_and_lams(
                            atom_index, lam_a, lam_b, vec_a, vec_b)
                        self.two_site_coords = [
                            (site_a_coords, q_a, atom_index),
                            (site_b_coords, q_b, atom_index),
                        ]
                else:
                    two_site_fit = minimize(
                        self.two_sites_objective_function,
                        np.array([0.0, 0.0, 1.0, 1.0]),
                        args=(atom_index, vec_a, vec_b),
                        bounds=bounds,
                    )
                    if (two_site_fit.fun / n_sample_points) < final_err:
                        final_err = two_site_fit.fun / n_sample_points
                        self.site_errors[
                            2] = two_site_fit.fun / n_sample_points
                        q_a, q_b, lam_a, lam_b = two_site_fit.x
                        (
                            site_a_coords,
                            site_b_coords,
                        ) = self.sites_coords_from_vecs_and_lams(
                            atom_index, lam_a, lam_b, vec_a, vec_b)
                        self.two_site_coords = [
                            (site_a_coords, q_a, atom_index),
                            (site_b_coords, q_b, atom_index),
                        ]

        max_err = self.molecule.v_site_error_factor
        if self.site_errors[0] < min(self.site_errors[1] * max_err,
                                     self.site_errors[2] * max_err):
            append_to_log(
                "No virtual site placement has reduced the error significantly.",
                "plain",
                True,
            )
        elif self.site_errors[1] < self.site_errors[2] * max_err:
            append_to_log(
                "The addition of one virtual site was found to be best.",
                "plain", True)
            self.v_sites_coords.extend(self.one_site_coords)
            self.molecule.NonbondedForce[atom_index][
                0] -= self.one_site_coords[0][1]
            self.molecule.ddec_data[atom_index].charge -= self.one_site_coords[
                0][1]
        else:
            append_to_log(
                "The addition of two virtual sites was found to be best.",
                "plain", True)
            self.v_sites_coords.extend(self.two_site_coords)
            self.molecule.NonbondedForce[atom_index][0] -= (
                self.two_site_coords[0][1] + self.two_site_coords[1][1])
            self.molecule.ddec_data[atom_index].charge -= (
                self.two_site_coords[0][1] + self.two_site_coords[1][1])
        append_to_log(
            f"Errors (kcal/mol):\n"
            f"No Site     One Site     Two Sites\n"
            f"{self.site_errors[0]:.4f}      {self.site_errors[1]:.4f}       {self.site_errors[2]:.4f}",
            "plain",
            True,
        )
        self.plot(atom_index)
Esempio n. 10
0
    def qm_optimise(self, molecule):
        """Optimise the molecule coords. Can be through PSI4 (with(out) geometric) or through Gaussian."""

        append_to_log('Starting qm_optimisation')
        qm_engine = self.engine_dict[molecule.bonds_engine](molecule)
        max_restarts = 3

        if molecule.geometric and (molecule.bonds_engine == 'psi4'):
            qceng = QCEngine(molecule)
            result = qceng.call_qcengine(engine='geometric', driver='gradient',
                                         input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}')

            restart_count = 0
            while (not result['success']) and (restart_count < max_restarts):
                append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting',
                              msg_type='minor')

                try:
                    molecule.coords['temp'] = np.array(
                        result['input_data']['final_molecule']['geometry']).reshape((len(molecule.atoms), 3))
                    molecule.coords['temp'] *= constants.BOHR_TO_ANGS

                    result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type='temp')

                except KeyError:
                    result = qceng.call_qcengine(engine='geometric', driver='gradient',
                                                 input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}')

                restart_count += 1

            if not result['success']:
                raise OptimisationFailed("The optimisation did not converge")

            molecule.read_geometric_traj(result['trajectory'])

            # store the final molecule as the qm optimised structure
            molecule.coords['qm'] = np.array(result['final_molecule']['geometry']).reshape((len(molecule.atoms), 3))
            molecule.coords['qm'] *= constants.BOHR_TO_ANGS

            molecule.qm_energy = result['energies'][-1]

            # Write out the trajectory file
            molecule.write_xyz('traj', name=f'{molecule.name}_opt')
            molecule.write_xyz('qm', name='opt')

        # Using Gaussian or geometric off
        else:
            result = qm_engine.generate_input(input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}',
                                              optimise=True, execute=molecule.bonds_engine)

            restart_count = 0
            while (not result['success']) and (restart_count < max_restarts):
                append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting',
                              msg_type='minor')

                if result['error'] == 'FileIO':
                    result = qm_engine.generate_input('mm', optimise=True, restart=True, execute=molecule.bonds_engine)
                elif result['error'] == 'Max iterations':
                    result = qm_engine.generate_input('input', optimise=True, restart=True, execute=molecule.bonds_engine)
                else:
                    molecule.coords['temp'] = RDKit().generate_conformers(molecule.rdkit_mol)[0]
                    result = qm_engine.generate_input('temp', optimise=True, execute=molecule.bonds_engine)

                restart_count += 1

            if not result['success']:
                raise OptimisationFailed(f"{molecule.bonds_engine} "
                                         f"optimisation did not converge after 3 restarts; last error {result['error']}")

            molecule.coords['qm'], molecule.qm_energy = qm_engine.optimised_structure()
            molecule.write_xyz('qm', name='opt')

        append_to_log(f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}')

        return molecule
Esempio n. 11
0
    def generate_input(self, execute=True):
        """Given a DDEC version (from the defaults), this function writes the job file for chargemol and executes it."""

        if (self.molecule.ddec_version != 6) and (self.molecule.ddec_version !=
                                                  3):
            append_to_log(
                message=
                "Invalid or unsupported DDEC version given, running with default version 6.",
                msg_type="warning",
            )
            self.molecule.ddec_version = 6

        # Write the charges job file.
        with open("job_control.txt", "w+") as charge_file:

            charge_file.write(
                f"<input filename>\n{self.molecule.name}.wfx\n</input filename>"
            )

            charge_file.write("\n\n<net charge>\n0.0\n</net charge>")

            charge_file.write(
                "\n\n<periodicity along A, B and C vectors>\n.false.\n.false.\n.false."
            )
            charge_file.write("\n</periodicity along A, B and C vectors>")

            charge_file.write(
                f"\n\n<atomic densities directory complete path>\n{self.molecule.chargemol}"
                f"/atomic_densities/")
            charge_file.write("\n</atomic densities directory complete path>")

            charge_file.write(
                f"\n\n<charge type>\nDDEC{self.molecule.ddec_version}\n</charge type>"
            )

            charge_file.write("\n\n<compute BOs>\n.true.\n</compute BOs>")

            charge_file.write(
                "\n\n<print atomic densities>\n.true.\n</print atomic densities>"
            )

        if execute:
            # Export a variable to the environment that chargemol will use to work out the threads, must be a string
            os.environ["OMP_NUM_THREADS"] = str(self.molecule.threads)
            with open("log.txt", "w+") as log:
                control_path = (
                    "chargemol_FORTRAN_09_26_2017/compiled_binaries/linux/"
                    "Chargemol_09_26_2017_linux_parallel job_control.txt")
                try:
                    sp.run(
                        os.path.join(self.molecule.chargemol, control_path),
                        shell=True,
                        stdout=log,
                        stderr=log,
                        check=True,
                    )

                except sp.CalledProcessError:
                    raise ChargemolError(
                        "Chargemol did not execute properly; check the output file for details."
                    )

                del os.environ["OMP_NUM_THREADS"]