Esempio n. 1
0
    def serialise_system(self):
        """Create the OpenMM system; parametrise using frost; serialise the system."""

        # Load the molecule using openforcefield
        pdb_file = app.PDBFile(f'{self.molecule.name}.pdb')

        # Now we need the connection info try using smiles string from rdkit
        rdkit = RDKit()
        molecule = Molecule.from_smiles(
            rdkit.get_smiles(f'{self.molecule.name}.pdb'))

        # Make the openMM system
        omm_topology = pdb_file.topology
        off_topology = Topology.from_openmm(omm_topology,
                                            unique_molecules=[molecule])

        # Load the smirnoff99Frosst force field.
        forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')

        # Parametrize the topology and create an OpenMM System.
        system = forcefield.create_openmm_system(off_topology)

        # Serialise the OpenMM system into the xml file
        with open('serialised.xml', 'w+') as out:
            out.write(XmlSerializer.serializeSystem(system))
Esempio n. 2
0
    def _read_input(self):
        """
        Figure out what the input is (file, smiles, json) and call the relevant method.
        """

        if self.mol_input.__class__.__name__ == 'Molecule':
            # QCArchive object
            self._read_qc_json()

        elif hasattr(self.mol_input, 'stem'):
            # File (pdb, xyz, etc)
            try:
                # Try parse with rdkit:
                self.rdkit_mol = RDKit.mol_input_to_rdkit_mol(self.mol_input)
                self._mol_from_rdkit()
            except AttributeError:
                # Cannot be parsed by rdkit:
                self._read_file()

        elif isinstance(self.mol_input, str):
            # Smiles string input
            self.rdkit_mol = RDKit.smiles_to_rdkit_mol(self.mol_input, self.name)
            self._mol_from_rdkit()

        else:
            raise RuntimeError('Cannot read input. mol_input must be a smiles string, path of a file, or qc json.')
Esempio n. 3
0
    def handle_bulk(self):
        """
        Getting and setting configs for bulk runs is a little different, requiring this method.
        The configs are taken from the .csv, then the .ini, then the terminal.
        This is repeated for each molecule in the bulk run, then Execute is called.

        Configs cannot be changed between molecule analyses as config data is
        only loaded once at the start; -restart is required for that.
        """

        csv_file = self.args.bulk_run
        # mol_data_from_csv handles defaults if no argument is given
        bulk_data = mol_data_from_csv(csv_file)

        names = list(bulk_data)

        home = os.getcwd()

        for name in names:
            printf(f'Analysing: {name}\n')

            # Get pdb from smiles or name if no smiles is given
            if bulk_data[name]['smiles'] is not None:
                smiles_string = bulk_data[name]['smiles']
                rdkit = RDKit()
                self.file = rdkit.smiles_to_pdb(smiles_string, name)

            else:
                self.file = f'{name}.pdb'

            # Initialise molecule, ready to add configs to it
            self.molecule = Ligand(self.file)

            # Read each row in bulk data and set it to the molecule object
            for key, val in bulk_data[name].items():
                setattr(self.molecule, key, val)

            setattr(self.molecule, 'skip', [])

            # Using the config file from the .csv, gather the .ini file configs
            file_configs = Configure.load_config(self.molecule.config)
            for key, val in file_configs.items():
                setattr(self.molecule, key, val)

            # Handle configs which are changed by terminal commands
            for key, val in vars(self.args).items():
                if val is not None:
                    setattr(self.molecule, key, val)

            # Now that all configs are stored correctly: execute.
            Execute(self.molecule)

            os.chdir(home)

        sys.exit(
            'Bulk analysis complete.\nUse QUBEKit -progress to view the completion progress of your molecules'
        )
Esempio n. 4
0
    def __init__(self):
        # First make sure the config folder has been made missing for conda and pip
        home = str(Path.home())
        config_folder = f'{home}/QUBEKit_configs/'
        if not os.path.exists(config_folder):
            os.makedirs(config_folder)
            print(f'Making config folder at: {home}')

        self.args = self.parse_commands()

        # If it's a bulk run, handle it separately
        # TODO Add .sdf as possible bulk_run, not just .csv
        if self.args.bulk_run:
            self.handle_bulk()

        if self.args.restart is not None:
            # Find the pickled checkpoint file and load it as the molecule
            try:
                self.molecule = unpickle()[self.args.restart]
            except FileNotFoundError:
                raise FileNotFoundError('No checkpoint file found!')
        else:
            if self.args.smiles:
                self.file = RDKit().smiles_to_pdb(*self.args.smiles)
            else:
                self.file = self.args.input

            # Initialise molecule
            self.molecule = Ligand(self.file)

        # Find which config file is being used
        self.molecule.config = self.args.config_file

        # Handle configs which are in a file
        file_configs = Configure.load_config(self.molecule.config)
        for name, val in file_configs.items():
            setattr(self.molecule, name, val)

        # Although these may be None always, they need to be explicitly set anyway.
        setattr(self.molecule, 'restart', None)
        setattr(self.molecule, 'end', None)
        setattr(self.molecule, 'skip', None)

        # Handle configs which are changed by terminal commands
        for name, val in vars(self.args).items():
            if val is not None:
                setattr(self.molecule, name, val)

        # If restarting put the molecule back into the checkpoint file with the new configs
        if self.args.restart is not None:
            self.molecule.pickle(state=self.args.restart)
        # Now that all configs are stored correctly: execute.
        Execute(self.molecule)
Esempio n. 5
0
    def mm_optimise(molecule):
        """
        Use an mm force field to get the initial optimisation of a molecule

        options
        ---------
        RDKit MFF or UFF force fields can have strange effects on the geometry of molecules

        Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff.
        """

        append_to_log('Starting mm_optimisation')
        # Check which method we want then do the optimisation
        if molecule.mm_opt_method == 'none' or molecule.parameter_engine == 'OpenFF_generics':
            # Skip the optimisation step
            molecule.coords['mm'] = molecule.coords['input']

        elif molecule.mm_opt_method == 'openmm':
            if molecule.parameter_engine != 'none':
                # Make the inputs
                molecule.write_pdb(input_type='input')
                molecule.write_parameters()
                # Run geometric
                # TODO Should this be moved to allow a decorator?
                with open('log.txt', 'w+') as log:
                    sp.run(f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations} --pdb '
                           f'{molecule.name}.pdb --openmm {molecule.name}.xml '
                           f'{molecule.constraints_file if molecule.constraints_file is not None else ""}',
                           shell=True, stdout=log, stderr=log)

                # This will continue even if we don't converge this is fine
                # Read the xyz traj and store the frames
                molecule.read_file(f'{molecule.name}_optim.xyz', input_type='traj')
                # Store the last from the traj as the mm optimised structure
                molecule.coords['mm'] = molecule.coords['traj'][-1]
            else:
                raise OptimisationFailed('You can not optimise a molecule with OpenMM and no initial parameters; '
                                         'consider parametrising or using UFF/MFF in RDKit')

        else:
            # TODO change to qcengine as this can already be done
            # Run an rdkit optimisation with the right FF
            rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'}
            molecule.filename = RDKit().mm_optimise(molecule.filename, ff=rdkit_ff[molecule.mm_opt_method])

        append_to_log(f'Finishing mm_optimisation of the molecule with {molecule.mm_opt_method}')

        return molecule
Esempio n. 6
0
    def finalise(molecule):
        """
        Make the xml and pdb file; get the RDKit descriptors;
        print the ligand object to terminal (in abbreviated form) and to the log file (unabbreviated).
        """

        molecule.write_pdb()
        molecule.write_parameters()

        molecule.descriptors = RDKit().rdkit_descriptors(
            f'{molecule.name}.pdb')

        pretty_print(molecule, to_file=True)
        pretty_print(molecule)

        return molecule
Esempio n. 7
0
    def finalise(molecule):
        """
        Make the xml and pdb file print the ligand object to terminal (in abbreviated form) and to the log file
        after getting the rdkit descriptors.
        """

        # write the pdb file and xml file to the folder
        molecule.write_pdb()
        molecule.write_parameters()

        # get the molecule descriptors from RDKit
        molecule.descriptors = RDKit.rdkit_descriptors(molecule.filename)

        pretty_print(molecule, to_file=True)
        pretty_print(molecule)

        return molecule
Esempio n. 8
0
 def from_file(cls, file_name: str) -> "ReadInput":
     """
     Read the input file using RDKit and return the molecule data.
     """
     input_file = Path(file_name)
     # if the file is not there raise an error
     if not input_file.exists():
         raise FileNotFoundError(
             f"{input_file.as_posix()} could not be found is this path correct?"
         )
     # xyz is a special case of file only internal readers catch
     if input_file.suffix == ".xyz":
         return cls.from_xyz(file_name=input_file.as_posix())
     # read the input with rdkit
     rdkit_mol = RDKit.file_to_rdkit_mol(file_path=input_file)
     return cls(rdkit_mol=rdkit_mol,
                coords=None,
                name=rdkit_mol.GetProp("_Name"))
Esempio n. 9
0
    def read_file(self):
        """The base file reader used upon instancing the class; it will decide which file reader to use
         based on the file suffix."""

        # Try to load the file using RDKit; this should ensure we always have the connection info
        try:
            rdkit_mol = RDKit().read_file(self.filename.name)
            # Now extract the molecule from RDKit
            self.mol_from_rdkit(rdkit_mol)

        except AttributeError:
            # AttributeError:  errors when reading the input file
            print('RDKit error was found, resorting to standard file readers')
            # Try to read using QUBEKit readers they only get the connections if present
            if self.filename.suffix == '.pdb':
                self.read_pdb(self.filename)
            elif self.filename.suffix == '.mol2':
                self.read_mol2(self.filename)
Esempio n. 10
0
    def mm_optimise(self, molecule):
        """
        Use an mm force field to get the initial optimisation of a molecule

        options
        ---------
        RDKit MFF or UFF force fields can have strange effects on the geometry of molecules

        Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff.
        """

        # Check which method we want then do the optimisation
        # TODO if we don't want geometric we can do a quick native openmm full optimisation?
        if self.molecule.mm_opt_method == 'openmm':
            # Make the inputs
            molecule.write_pdb(input_type='input')
            molecule.write_parameters()
            # Run geometric
            with open('log.txt', 'w+') as log:
                sp.run(
                    f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations}  --pdb '
                    f'{molecule.name}.pdb --openmm {molecule.name}.xml {self.molecule.constraints_file}',
                    shell=True,
                    stdout=log,
                    stderr=log)

            # This will continue even if we don't converge this is fine
            # Read the xyz traj and store the frames
            molecule.read_xyz(f'{molecule.name}_optim.xyz')
            # Store the last from the traj as the mm optimised structure
            molecule.molecule['mm'] = molecule.molecule['traj'][-1]

        else:
            # TODO change to qcengine as this can already be done

            # Run an rdkit optimisation with the right FF
            rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'}
            molecule.filename = RDKit.mm_optimise(
                molecule.filename, ff=rdkit_ff[self.molecule.mm_opt_method])

        append_to_log(
            f'mm_optimised the molecule with {self.molecule.mm_opt_method}')

        return molecule
Esempio n. 11
0
    def from_smiles(cls,
                    smiles: str,
                    name: Optional[str] = None) -> "ReadInput":
        """
        Make a ReadInput object which can be taken by the Ligand class to make the model.

        Note
        ----
        This method will generate a conformer for the molecule.

        Parameters
        ----------
        smiles:
            The smiles string which should be parsed by rdkit.
        name:
            The name that should be given to the molecule.
        """
        # Smiles string input
        rdkit_mol = RDKit.smiles_to_rdkit_mol(smiles_string=smiles, name=name)
        return cls(name=name, coords=None, rdkit_mol=rdkit_mol)
Esempio n. 12
0
    def __init__(self):
        self.args = self.parse_commands()

        # If it's a bulk run, handle it separately
        # TODO Add .sdf as possible bulk_run, not just .csv
        if self.args.bulk_run:
            self.handle_bulk()

        if self.args.restart:
            self.file = [
                file for file in os.listdir(os.getcwd()) if '.pdb' in file
            ][0]
        else:
            if self.args.smiles:
                self.file = RDKit.smiles_to_pdb(self.args.smiles)
            else:
                self.file = self.args.input

        # Initialise molecule
        self.molecule = Ligand(self.file)

        # Find which config file is being used
        self.molecule.config = self.args.config_file

        # Handle configs which are in a file
        file_configs = Configure.load_config(self.molecule.config)
        for name, val in file_configs.items():
            setattr(self.molecule, name, val)

        # Although these may be None always, they need be explicitly set anyway.
        setattr(self.molecule, 'restart', None)
        setattr(self.molecule, 'end', None)
        setattr(self.molecule, 'skip', None)

        # Handle configs which are changed by terminal commands
        for name, val in vars(self.args).items():
            if val is not None:
                setattr(self.molecule, name, val)

        # Now that all configs are stored correctly: execute.
        Execute(self.molecule)
Esempio n. 13
0
    def symmetrise_from_topology(self):
        """
        First, if rdkit_mol has been generated, get the bond and angle symmetry dicts.
        These will be used by L-J and the Harmonic Bond/Angle params

        Then, based on the molecule topology, symmetrise the methyl / amine hydrogens.
        If there's a carbon, does it have 3/2 hydrogens? -> symmetrise
        If there's a nitrogen, does it have 2 hydrogens? -> symmetrise
        Also keep a list of the methyl carbons and amine / nitrile nitrogens
        then exclude these bonds from the rotatable torsions list.
        """

        # TODO This needs to be more applicable to proteins (e.g. if no rdkit_mol is created).

        if self.rdkit_mol is not None:

            self.atom_symmetry_classes = RDKit.find_symmetry_classes(self.rdkit_mol)

            self.get_bond_equiv_classes()
            self.get_angle_equiv_classes()

            if self.dihedrals is not None:
                self.get_dihedral_equiv_classes()

        methyl_hs, amine_hs, other_hs = [], [], []
        methyl_amine_nitride_cores = []

        for atom in self.atoms:
            if atom.atomic_symbol == 'C' or atom.atomic_symbol == 'N':

                hs = []
                for bonded in self.topology.neighbors(atom.atom_index):
                    if len(list(self.topology.neighbors(bonded))) == 1:
                        # now make sure it is a hydrogen (as halogens could be caught here)
                        if self.atoms[bonded].atomic_symbol == 'H':
                            hs.append(bonded)

                if atom.atomic_symbol == 'C' and len(hs) == 2:    # This is part of a carbon hydrogen chain
                    other_hs.append(hs)
                elif atom.atomic_symbol == 'C' and len(hs) == 3:
                    methyl_hs.append(hs)
                    methyl_amine_nitride_cores.append(atom.atom_index)
                elif atom.atomic_symbol == 'N' and len(hs) == 2:
                    amine_hs.append(hs)
                    methyl_amine_nitride_cores.append(atom.atom_index)

        self.symm_hs = {'methyl': methyl_hs, 'amine': amine_hs, 'other': other_hs}

        # Modify the rotatable list to remove methyl and amine / nitrile torsions
        # These are already well represented in most FF's
        remove_list = []
        if self.rotatable is not None:
            rotatable = self.rotatable
            for key in rotatable:
                if key[0] in methyl_amine_nitride_cores or key[1] in methyl_amine_nitride_cores:
                    remove_list.append(key)

            for torsion in remove_list:
                rotatable.remove(torsion)

            self.rotatable = rotatable or None
Esempio n. 14
0
    def qm_optimise(self, molecule):
        """Optimise the molecule coords. Can be through PSI4 (with(out) geometric) or through Gaussian."""

        append_to_log('Starting qm_optimisation')
        qm_engine = self.engine_dict[molecule.bonds_engine](molecule)
        max_restarts = 3

        if molecule.geometric and (molecule.bonds_engine == 'psi4'):
            qceng = QCEngine(molecule)
            result = qceng.call_qcengine(engine='geometric', driver='gradient',
                                         input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}')

            restart_count = 0
            while (not result['success']) and (restart_count < max_restarts):
                append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting',
                              msg_type='minor')

                try:
                    molecule.coords['temp'] = np.array(
                        result['input_data']['final_molecule']['geometry']).reshape((len(molecule.atoms), 3))
                    molecule.coords['temp'] *= constants.BOHR_TO_ANGS

                    result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type='temp')

                except KeyError:
                    result = qceng.call_qcengine(engine='geometric', driver='gradient',
                                                 input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}')

                restart_count += 1

            if not result['success']:
                raise OptimisationFailed("The optimisation did not converge")

            molecule.read_geometric_traj(result['trajectory'])

            # store the final molecule as the qm optimised structure
            molecule.coords['qm'] = np.array(result['final_molecule']['geometry']).reshape((len(molecule.atoms), 3))
            molecule.coords['qm'] *= constants.BOHR_TO_ANGS

            molecule.qm_energy = result['energies'][-1]

            # Write out the trajectory file
            molecule.write_xyz('traj', name=f'{molecule.name}_opt')
            molecule.write_xyz('qm', name='opt')

        # Using Gaussian or geometric off
        else:
            result = qm_engine.generate_input(input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}',
                                              optimise=True, execute=molecule.bonds_engine)

            restart_count = 0
            while (not result['success']) and (restart_count < max_restarts):
                append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting',
                              msg_type='minor')

                if result['error'] == 'FileIO':
                    result = qm_engine.generate_input('mm', optimise=True, restart=True, execute=molecule.bonds_engine)
                elif result['error'] == 'Max iterations':
                    result = qm_engine.generate_input('input', optimise=True, restart=True, execute=molecule.bonds_engine)
                else:
                    molecule.coords['temp'] = RDKit().generate_conformers(molecule.rdkit_mol)[0]
                    result = qm_engine.generate_input('temp', optimise=True, execute=molecule.bonds_engine)

                restart_count += 1

            if not result['success']:
                raise OptimisationFailed(f"{molecule.bonds_engine} "
                                         f"optimisation did not converge after 3 restarts; last error {result['error']}")

            molecule.coords['qm'], molecule.qm_energy = qm_engine.optimised_structure()
            molecule.write_xyz('qm', name='opt')

        append_to_log(f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}')

        return molecule
Esempio n. 15
0
    def qm_optimise(self, molecule):
        """Optimise the molecule with or without geometric."""

        # TODO this method's not always printing completion to log file.

        append_to_log('Starting qm_optimisation')
        qm_engine = self.engine_dict[molecule.bonds_engine](molecule)

        if molecule.geometric and molecule.bonds_engine == 'psi4':

            qceng = QCEngine(molecule)
            # See if the structure is there if not we did not optimise
            if molecule.coords['mm'].any():
                result = qceng.call_qcengine('geometric',
                                             'gradient',
                                             input_type='mm')
            else:
                result = qceng.call_qcengine('geometric',
                                             'gradient',
                                             input_type='input')
            # Check if converged and get the geometry
            if result['success']:

                # Load all of the frames into the molecule's trajectory holder
                molecule.read_geometric_traj(result['trajectory'])

                # store the last frame as the qm optimised structure
                molecule.coords['qm'] = molecule.coords['traj'][-1]

                # Write out the trajectory file
                molecule.write_xyz(input_type='traj',
                                   name=f'{molecule.name}_opt')
                molecule.write_xyz(input_type='qm', name='opt')

                append_to_log(
                    f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}'
                )

                return molecule

            else:
                # TODO catch the qcengine error here
                print(result)  # catch the steps done so far
                raise OptimisationFailed("The optimisation did not converge")

        elif molecule.coords['mm'].any():
            result = qm_engine.generate_input(input_type='mm', optimise=True)

        else:
            result = qm_engine.generate_input(input_type='input',
                                              optimise=True)

        # Check the exit status of the job; if failed restart the job up to 2 times
        restart_count = 1
        while not result['success'] and restart_count < 3:
            append_to_log(
                f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting',
                msg_type='minor')
            # Now we should handle the errors that we have in the results
            # 1) If we have a file read error just start again
            if result['error'] == 'FileIO':
                result = qm_engine.generate_input(input_type='mm',
                                                  optimise=True,
                                                  restart=True)
            # 2) If we have a distance matrix error we should start from a different structure try the input
            elif result['error'] == 'Distance matrix' and restart_count == 1:
                result = qm_engine.generate_input(input_type='input',
                                                  optimise=True)
            # 3) If we have already tried the starting structure generate a conformer and try again
            elif result['error'] == 'Distance matrix':
                molecule.write_pdb()
                rdkit = RDKit()
                molecule.coords['temp'] = rdkit.generate_conformers(
                    f'{molecule.name}.pdb')[0]
                result = qm_engine.generate_input(input_type='temp',
                                                  optimise=True)

            restart_count += 1

        if not result['success']:
            raise OptimisationFailed(
                f"{molecule.bonds_engine} "
                f"optimisation did not converge after 3 restarts; last error {result['error']}"
            )

        molecule.coords[
            'qm'], molecule.qm_energy = qm_engine.optimised_structure()
        molecule.write_xyz(input_type='qm', name='opt')

        append_to_log(
            f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}'
        )

        return molecule