Example #1
0
def checkTorsion(smiles, torsion_indices, ff_name):
    """
    Take mollist and check if the molecules in a list match a specific torsion id

        Parameters
        ----------
        molList : List of objects
            List of oemols with datatags generated in genData function

        Returns
        -------
        molList : list of objects
            List of oemol objects that have a datatag "IDMatch" that contain the torsion id
            involved in the QCA torsion drive
    """

    matches = []
    count = 0
    mols = []
    #tid=''
    #molecule = Molecule.from_mapped_smiles(smiles)
    print(smiles)
    from openeye import oechem
    # create a new molecule
    #mol = oechem.OEGraphMol()
    # convert the SMILES string into a molecule
    #oechem.OESmilesToMol(mol,smiles)
    #molecule = Molecule.from_smiles(smiles)
    #molecule=Molecule.from_openeye(mol)

    molecule = Molecule.from_mapped_smiles(smiles)
    topology = Topology.from_molecules(molecule)
    # Let's label using the Parsley force field
    forcefield = ForceField(ff_name, allow_cosmetic_attributes=True)
    # Run the molecule labeling
    molecule_force_list = forcefield.label_molecules(topology)
    params = []
    indices = []
    # Print out a formatted description of the torsion parameters applied to this molecule
    for mol_idx, mol_forces in enumerate(molecule_force_list):
        # print(f'Forces for molecule {mol_idx}')
        for force_tag, force_dict in mol_forces.items():
            if force_tag == "ProperTorsions":
                for (atom_indices, parameter) in force_dict.items():
                    params.append(parameter.id)
                    indices.append(atom_indices)
                    #torsion_indices=tuple(torsion_indices)
                    #print(type(torsion_indices))
                    print(torsion_indices)
                    #print(type(atom_indices))
                    print(atom_indices)
                    if atom_indices == torsion_indices or tuple(
                            reversed(atom_indices)) == torsion_indices:
                        #mol.SetData("IDMatch", parameter.id)
                        tid = parameter.id
    print(params)
    print(indices)
    return tid
Example #2
0
def get_assigned_torsion_param(tdentry, forcefield):
    """Get the OpenFF forcefield torsion parameter ultimately assigned to the
    given TorsionDrive entry's torsion dihedral.

    Parameters
    ----------
    tdentry : TDEntry
        TDEntry (TorsionDrive entry) to operate on;
        will be used to generate molecule, extract dihedral indices driven.
    forcefield : str, ForceField
        OpenFF forcefield to apply.

    Returns
    -------
    torsion_params : ProperTorsion
        Dict-like object with attributes giving the applied torsion parameters

    Examples
    --------
    Starting with TDEntries from usage of `get_torsiondrives_matching_smarts`
    (see its Example), we can get back the parameter assigned to this by, say
    `"openff-1.0.0.offxml"`:
    
    >>> from openforcefield.typing.engines.smirnoff import ForceField
    >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client)
    >>> ff = ForceField('openff-1.0.0.offxml')
    >>> assigned = [smarts_torsions.get_assigned_torsion_param(tdentry, ff)
                    for tdentry in tdentries]

    >>> print([t.id for t in assigned])
        ['t47', 't47', 't47', 't47', ...]

    """
    mol_smiles = tdentry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
    offmol = Molecule.from_mapped_smiles(mol_smiles)

    if isinstance(forcefield, str):
        forcefield = ForceField(forcefield)

    # apply forcefield parameters
    topology = Topology.from_molecules(offmol)
    
    # we only have one molecule by definition here, so extracting 0th
    molecule_forces = forcefield.label_molecules(topology)[0]

    # by convention, we only have one driven torsion
    # would need to revisit if we are working with 2D torsions
    dihedral_indices = tdentry.td_keywords.dihedrals[0]

    # get torsion parameters corresponding to dihedral indices
    torsions = molecule_forces["ProperTorsions"]
    torsion_params = torsions.get(dihedral_indices)

    # if None, try reversing it
    if torsion_params is None:
        torsion_params = torsions[dihedral_indices[::-1]]

    return torsion_params
def paramUsage(smilesList, offxml):
    """
    Description -
    Reads in list of smiles and returns a dictionary of .offxml style parameters as keys
    and smiles of molecules as items

    Input -
    smilesList: A list of smiles
    offxml: The .offxml format force field that the parameters will be used with

    Return -
    anglebondDict: A dictionary of .offxml style parameters as keys and smiles of molecules that utilize
    parameters. The returned dictionary is only for bond and angle parameters e.g. 'a1', 'b2', etc.
    Note: The function can be modified to return a dictionary of torsion parameters.
    """

    # Initialize storage
    torsionDict = dict()
    anglebondDict = dict()


    # Let's label using our RC force field
    forcefield = ForceField(offxml)

    # Loop over smiles
    for smi in smilesList:

        # Create a simple molecule from SMILES and turn it into a topology.
        molecule = Molecule.from_smiles(smi, allow_undefined_stereo = True)
        topology = Topology.from_molecules([molecule])

        # Run the molecule labeling
        molecule_force_list = forcefield.label_molecules(topology)


        # Print out a formatted description of the parameters applied to this molecule
        for mol_idx, mol_forces in enumerate(molecule_force_list):
            for force_tag, force_dict in mol_forces.items():
                for (atom_indices, parameter) in force_dict.items():
                    pid = parameter.id

                    #create two seperate parameter usage dictionaries for (1) angle and bonds and (2) torsions
                    if "a" in pid or "b" in pid:
                        if not pid in anglebondDict:
                            anglebondDict[pid] = set()
                        anglebondDict[pid].add(smi)

                    #Uncomment this for torsion dictionary
                    #if "t" in pid:
                    #    if not pid in torsionDict:
                    #        torsionDict[pid] = set()
                    #    torsionDict[pid].add(smi)

    #Write out the angle and bond dictionary to "anglebond.p" file
    pickle.dump(anglebondDict, open( "anglebond.p", "wb" ) )

    return anglebondDict
Example #4
0
def checkTorsion(molList, ff_name):
    """
    Take mollist and check if the molecules in a list match a specific torsion id

        Parameters
        ----------
        molList : List of objects
            List of oemols with datatags generated in genData function

        Returns
        -------
        molList : list of objects
            List of oemol objects that have a datatag "IDMatch" that contain the torsion id
            involved in the QCA torsion drive
    """

    matches = []
    count = 0
    mols = []
    for mol in molList:
        molecule = Molecule.from_mapped_smiles(mol.GetData("cmiles"))
        topology = Topology.from_molecules(molecule)
        # Let's label using the Parsley force field
        forcefield = ForceField(ff_name)
        # Run the molecule labeling
        molecule_force_list = forcefield.label_molecules(topology)
        params = []
        # Print out a formatted description of the torsion parameters applied to this molecule
        for mol_idx, mol_forces in enumerate(molecule_force_list):
            # print(f'Forces for molecule {mol_idx}')
            for force_tag, force_dict in mol_forces.items():
                if force_tag == "ProperTorsions":
                    for (atom_indices, parameter) in force_dict.items():
                        params.append(parameter.id)
                        if atom_indices == mol.GetData("TDindices") or tuple(
                            reversed(atom_indices)
                        ) == mol.GetData("TDindices"):
                            count += 1
                            mol.SetData("IDMatch", parameter.id)
                            mols.append(mol)
    print(
        "Out of "
        + str(len(molList))
        + " molecules, "
        + str(count)
        + " were processed with checkTorsion()"
    )

    return mols
    def get_dihedral_parameters(self):
        """
        It returns the parameters of the dihedral that is being tracked,
        according to the OpenFF toolkit.

        Returns
        -------
        parameters : dict
            The dictionary with the parameters
        """
        from openforcefield.topology import Topology
        from openforcefield.typing.engines.smirnoff import ForceField

        topology = Topology.from_molecules([self.molecule.off_molecule])
        ff = ForceField(self.forcefield + '.offxml')
        all_parameters = ff.label_molecules(topology)[0]
        parameters = dict(all_parameters['ProperTorsions'])[self.atom_indexes]

        return parameters
Example #6
0
    def __init__(self, dihedral_benchmark):
        """
        It initializes an OpenFFEnergeticProfile object.

        Parameters
        ----------
        dihedral_benchmark : an peleffybenchmarktools.dihedrals.DihedralBenchmark object
            The DihedralBenchmark object that will be used to obtain the
            energetic profile
        """
        super().__init__(dihedral_benchmark)

        from openforcefield.topology import Topology
        from openforcefield.typing.engines.smirnoff import ForceField

        mol = self.dihedral_benchmark.molecule
        topology = Topology.from_molecules([mol.off_molecule])
        ff = ForceField(mol.forcefield + '.offxml')
        parameters = ff.label_molecules(topology)[0]
        self._parameters = dict(parameters['ProperTorsions'])
def checkParam(cmiles, ff2):

    molecules=Molecule.from_mapped_smiles(cmiles)
    topology = Topology.from_molecules([molecules])


    #added
    # Let's label using the Parsley force field
    forcefield2 = ForceField(ff2, allow_cosmetic_attributes=True)
    # Run the molecule labeling
    molecule_force_list = forcefield2.label_molecules(topology)
    #print(dict(molecule_force_list[0]['ProperTorsions']))
    # Print out a formatted description of the torsion parameters applied to this molecule
    #plot_dict = {}
    for mol_idx, mol_forces in enumerate(molecule_force_list):
        for force_tag, force_dict in mol_forces.items():
            print(force_tag)
            if force_tag == 'Bonds':
                for (atom_indices, parameter) in force_dict.items():
                    if parameter.id == 'b1':
                        print('match')
                        return cmiles
def test_coverage_filter():
    """
    Make sure the coverage filter removes the correct molecules.
    """
    from openforcefield.typing.engines.smirnoff import ForceField

    coverage_filter = workflow_components.CoverageFilter()
    coverage_filter.allowed_ids = ["b83"]
    coverage_filter.filtered_ids = ["b87"]

    mols = get_stereoisomers()

    # we have to remove duplicated records
    # remove duplicates from the set
    molecule_container = get_container(mols)
    result = coverage_filter.apply(molecule_container.molecules, processors=1)

    forcefield = ForceField("openff_unconstrained-1.0.0.offxml")
    # now see if any molecules do not have b83
    parameters_by_id = {}
    for molecule in result.molecules:
        labels = forcefield.label_molecules(molecule.to_topology())[0]
        covered_types = set(
            [label.id for types in labels.values() for label in types.values()]
        )
        # now store the smiles under the ids
        for parameter in covered_types:
            parameters_by_id.setdefault(parameter, []).append(molecule.to_smiles())

    expected = parameters_by_id["b83"]
    for molecule in result.molecules:
        assert molecule.to_smiles() in expected
        assert "dihedrals" not in molecule.properties

    # we now need to check that the molecules passed contain only the allowed atoms
    # do this by running the component again
    result2 = coverage_filter.apply(result.molecules, processors=1)
    assert result2.n_filtered == 0
    assert result.n_molecules == result.n_molecules
Example #9
0
    def get_parameters_from_forcefield(self, forcefield, molecule):
        """
        It returns the parameters that are obtained with the supplied
        forcefield for a certain offpele's molecule.

        Parameters
        ----------
        forcefield : str or an openforcefield.typing.engines.smirnoff.ForceField
                     object
            The forcefield from which the parameters will be obtained
        molecule : an offpele.topology.Molecule
            The offpele's Molecule object

        Returns
        -------
        openforcefield_parameters : an OpenForceFieldParameters object
            The OpenForceFieldParameters object
        """
        from openforcefield.typing.engines.smirnoff import ForceField
        from openforcefield.topology import Topology

        off_molecule = molecule.off_molecule
        topology = Topology.from_molecules([off_molecule])

        if isinstance(forcefield, str):
            forcefield = ForceField(forcefield)
        elif isinstance(forcefield, ForceField):
            pass
        else:
            raise Exception('Invalid forcefield type')

        molecule_parameters_list = forcefield.label_molecules(topology)

        assert len(molecule_parameters_list) == 1, 'A single molecule is ' \
            'expected'
        return self.OpenForceFieldParameters(molecule_parameters_list[0])
Example #10
0
class Run(object):
    """
    Create FEP files from a common substructure for a given set of
    ligands
    """
    def __init__(self, lig, FF, merge, *args, **kwargs):
        """
        The init method is a kind of constructor, called when an instance
        of the class is created. The method serves to initialize what you
        want to do with the object.
        """
        self.lig = lig
        self.FF = FF
        self.merge = merge
        self.ff_list = []
        self.mapping = {}
        self.total_charge = 0
        self.masses = {
            "H": "1.0080",
            "C": "12.0110",
            "N": "14.0070",
            "O": "15.9994",
            "F": "19.0000",
            "P": "30.9700",
            "S": "32.0600",
            "Cl": "35.0000",
            "Br": "79.9000",
            "I": "126.90",
            "DUM": "0.0000"
        }

    def openff(self):
        # Load the molecule (for now mol2, until charges are saved on sdf)
        molecule = Molecule.from_file(self.lig + '.sdf')
        topology = Topology.from_molecules([molecule])

        self.molecule = molecule
        self.topology = topology
        # Label using the smirnoff99Frosst force field
        self.forcefield = ForceField('openff-1.0.0.offxml')
        self.parameters = self.forcefield.label_molecules(topology)[0]

    def read_mol2(self):
        """
            This is basically to get the charge, will later be deprecated when charges are
            transferable in openff
        """
        with open(self.lig + '.mol2') as infile:
            cnt = -1
            for line in infile:
                line = line.split()
                if len(line) == 9:
                    cnt += 1
                    self.mapping[cnt] = [
                        line[0],  # at idex
                        line[1],  # atname
                        line[5].split('.')[0],  # attype
                        line[8],  # charge
                        line[2],  # X coordinate
                        line[3],  # Y coordinte
                        line[4]  # Z coordinate
                    ]
                    self.total_charge += float(line[8])

        if self.total_charge != 0.0:
            print('WARNING: residual charge {} check your mol2 file!'.format(
                self.total_charge))

    def write_lib_Q(self):
        with open(self.lig + '.lib', 'w') as outfile:
            outfile.write('{}    ! atoms no {}   total charge {} \n\n'.format(
                '{LIG}', len(self.mapping), self.total_charge))

            outfile.write("[info] \n SYBYLtype RESIDUE \n\n")

            #atom and charge block:
            outfile.write("[atoms] \n")
            for i, at in enumerate(self.mapping):
                outfile.write('{:>4s}   {:10}{:11}{:>10s}\n'.format(
                    self.mapping[at][0], self.mapping[at][1],
                    self.mapping[at][1].lower(), self.mapping[at][3]))

            # bonded block
            outfile.write("\n[bonds]\n")
            for i, bond in enumerate(self.parameters['Bonds']):
                ai = self.mapping[bond[0]][1]
                aj = self.mapping[bond[1]][1]
                outfile.write('{:10s}{:}\n'.format(ai, aj))

            # improper block
            outfile.write("\n[impropers]\n")
            for i, torsion in enumerate(self.parameters['ImproperTorsions']):
                ai = self.mapping[torsion[0]][1]
                aj = self.mapping[torsion[1]][1]
                ak = self.mapping[torsion[2]][1]
                al = self.mapping[torsion[3]][1]
                outfile.write('{:10}{:10}{:10}{}\n'.format(ai, aj, ak, al))

            #outfile.write("\n[charge_groups]")
            #for i, atom in enumerate(self.mapping):
            #    if self.mapping[atom][2] != 'H':
            #        outfile.write('\n{}'.format(self.mapping[atom][1]))
            #    for j, bond in enumerate(self.parameters['Bonds']):
            #        if bond[0] == i:
            #            if self.mapping[bond[1]][2] == 'H':
            #                outfile.write(' {}'.format(self.mapping[bond[1]][1]))

    def write_prm_Q(self):
        if self.FF == 'AMBER14sb' and self.merge == True:
            prm_file = os.path.join(s.FF_DIR, 'AMBER14sb.prm')
            prm_file_out = self.FF + '_' + self.lig + '.prm'

        elif self.merge == False:
            prm_file = os.path.join(s.FF_DIR, 'NOMERGE.prm')
            prm_file_out = self.lig + '.prm'

        with open(prm_file) as infile, open(prm_file_out, 'w') as outfile:
            for line in infile:
                block = 0
                outfile.write(line)
                if len(line) > 1:
                    if line == "! Ligand vdW parameters\n":
                        block = 1
                    if line == "! Ligand bond parameters\n":
                        block = 2
                    if line == "! Ligand angle parameters\n":
                        block = 3
                    if line == "! Ligand torsion parameters\n":
                        block = 4
                    if line == "! Ligand improper parameters\n":
                        block = 5

                if block == 1:
                    for (atom_indices,
                         parameter) in self.parameters['vdW'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        # This is a bit hacky, check how to get the float out directly
                        epsilon = float('{}'.format(
                            parameter.epsilon).split()[0])
                        epsilon23 = epsilon / 2
                        # TO DO: CHECK IF THIS IS CORRECT!
                        Rmin = '{}'.format(parameter.rmin_half)
                        Rmin = Rmin.split()[0]
                        Rmin = float(Rmin)
                        mass = self.masses[self.mapping[ai][2]]
                        outfile.write(
                            """{:6}{: 8.3f}{: 10.3f}{: 10.3f}{: 10.3f}{: 10.3f}{:>10s}\n"""
                            .format(ai_name, Rmin, 0.00, epsilon, Rmin,
                                    epsilon23, mass))

                if block == 2:
                    for (atom_indices,
                         parameter) in self.parameters['Bonds'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        fc = float('{}'.format(parameter.k).split()[0])
                        l = float('{}'.format(parameter.length).split()[0])
                        outfile.write('{:10}{:10}{:10.1f}{:>10.3f}\n'.format(
                            ai_name, aj_name, fc, l))

                if block == 3:
                    for (atom_indices,
                         parameter) in self.parameters['Angles'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        fc = float('{}'.format(parameter.k).split()[0])
                        angle = float('{}'.format(parameter.angle).split()[0])

                        outfile.write(
                            """{:10}{:10}{:10}{: 8.2f}{:>12.3f}\n""".format(
                                ai_name, aj_name, ak_name, fc, angle))

                if block == 4:
                    for (atom_indices, parameter
                         ) in self.parameters['ProperTorsions'].items():
                        forces = []
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        al = atom_indices[3]
                        al_name = self.mapping[al][1].lower()
                        max_phase = len(parameter.phase)

                        # Now check if there are multiple minima
                        for i in range(0, max_phase):
                            fc = float('{}'.format(parameter.k[i]).split()[0])
                            phase = float('{}'.format(
                                parameter.phase[i]).split()[0])
                            paths = int(parameter.idivf[i])

                            if i != max_phase - 1 and max_phase > 1:
                                minimum = float(parameter.periodicity[i]) * -1

                            else:
                                minimum = float(parameter.periodicity[i])

                            force = (fc, minimum, phase, paths)
                            forces.append(force)

                        for force in forces:
                            outfile.write(
                                """{:10}{:10}{:10}{:10}{:>10.3f}{:>10.3f}{:>10.3f}{:>5d}\n"""
                                .format(ai_name, aj_name, ak_name, al_name,
                                        force[0], force[1], force[2],
                                        force[3]))

                if block == 5:
                    for (atom_indices, parameter
                         ) in self.parameters['ImproperTorsions'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        al = atom_indices[3]
                        al_name = self.mapping[al][1].lower()
                        fc = float('{}'.format(parameter.k[0]).split()[0])
                        phase = float('{}'.format(
                            parameter.phase[0]).split()[0])
                        outfile.write(
                            """{:10}{:10}{:10}{:10}{:10.3f}{:10.3f}\n""".
                            format(ai_name, aj_name, ak_name, al_name, fc,
                                   phase))

    def write_PDB(self):
        with open(self.lig + '.pdb', 'w') as outfile:
            for atom in self.mapping:
                ai = atom + 1
                ai_name = self.mapping[atom][1]
                a_el = self.mapping[atom][2]
                ax = float(self.mapping[atom][4])
                ay = float(self.mapping[atom][5])
                az = float(self.mapping[atom][6])
                at_entry = [
                    'HETATM',  #  0 ATOM/HETATM
                    ai,  #  1 ATOM serial number
                    ai_name,  #  2 ATOM name
                    '',  #  3 Alternate location indicator
                    'LIG',  #  4 Residue name
                    '',  #  5 Chain identifier
                    1,  #  6 Residue sequence number
                    '',  #  7 Code for insertion of residue
                    ax,  #  8 Orthogonal coordinates for X
                    ay,  #  9 Orthogonal coordinates for Y
                    az,  # 10 Orthogonal coordinates for Z
                    0.0,  # 11 Occupancy
                    0.0,  # 12 Temperature factor
                    a_el,  # 13 Element symbol
                    ''  # 14 Charge on atom
                ]
                outfile.write(IO.pdb_parse_out(at_entry) + '\n')

    def report_missing_parameters(self):
        """
        Analyze a molecule using a provided ForceField, generating a report of any
        chemical groups in the molecule that are lacking parameters.

        Parameters
        ----------
        molecule : an openforcefield.topology.FrozenMolecule
            The molecule to analyze
        forcefield : an openforcefield.typing.engine.smirnoff.ForceField
            The ForceField object to use

        Returns
        -------
        missing_parameters : dict[tagname: list[dict[tagged_smiles:string, image:PIL.Image, atom indices:list[int]]]]
            A hierarchical dictionary, with first level keys indicating ForceField tag 
            names (eg. "Bonds"), and first-level values which are lists of dictionaries. 
            Each dictionary in this list reflects one missing parameter, and contains the 
            following key:value pairs :
            * "image": PIL.Image 
                * shows a 2D drawing, highlighting the feature that could not be parametrized
            * "tagged_smiles": string
                * SMILES of the whole molecule, tagging the atom indices which could not be
                  parametrized
            * "atom_indices": tuple(int)
                * The indices of atoms which could not be parametrized

        """
        highlight_color = (0.75, 0.75, 0.75)

        # Make deepcopies of both inputs, since we may modify them in this function
        forcefield = deepcopy(self.forcefield)
        molecule = deepcopy(self.molecule)

        # Set partial charges to placeholder values so that we can skip AM1-BCC
        # during parameterization
        molecule.partial_charges = (np.zeros(molecule.n_atoms) +
                                    0.1) * unit.elementary_charge

        # Prepare dictionary to catch parameterization failure info
        success = False
        missing_params = {}

        while not success:
            # Try to parameterize the system, catching the exception if there is one.
            try:
                forcefield.create_openmm_system(
                    molecule.to_topology(), charge_from_molecules=[molecule])
                success = True
            except UnassignedValenceParameterException as e:
                success = False

                # Ensure that there is a list initialized for missing parameters
                # under this tagname
                handler_tagname = e.handler_class._TAGNAME
                if handler_tagname not in missing_params:
                    missing_params[handler_tagname] = []

                # Create a shortcut to the topology atom tuples attached to
                # the parametrization error
                top_atom_tuples = e.unassigned_topology_atom_tuples

                # Make a summary of the missing parameters from this attempt and add it to
                # the missing_params dict
                rdmol = molecule.to_rdkit()
                for top_atom_tuple in top_atom_tuples:
                    orig_atom_indices = [
                        i.topology_atom_index for i in top_atom_tuple
                    ]
                    # Make a copy of the input RDMol so that we don't modify the original
                    this_rdmol = deepcopy(rdmol)

                    # Attach tags to relevant atoms so that a tagged SMILES can be written
                    orig_rdatoms = []
                    for tag_idx, atom_idx in enumerate(orig_atom_indices):
                        rdatom = this_rdmol.GetAtomWithIdx(atom_idx)
                        rdatom.SetAtomMapNum(tag_idx + 1)
                        orig_rdatoms.append(rdatom)

                    tagged_smiles = Chem.MolToSmiles(this_rdmol)

                    # Make tagged hydrogens into deuteriums so that RemoveHs doesn't get rid of them
                    for rdatom in orig_rdatoms:
                        if rdatom.GetAtomicNum() == 1:
                            rdatom.SetIsotope(2)

                    # Remove hydrogens, since they clutter up the 2D drawing
                    # (tagged Hs are not removed, since they were converted to deuterium)
                    h_less_rdmol = Chem.RemoveHs(this_rdmol)

                    # Generate 2D coords, since drawing from 3D can look really weird
                    Draw.rdDepictor.Compute2DCoords(h_less_rdmol)

                    # Search over the molecule to find the indices of the tagged atoms
                    # after hydrogen removal
                    h_less_atom_indices = [None for i in orig_atom_indices]
                    for rdatom in h_less_rdmol.GetAtoms():
                        # Convert deuteriums back into hydrogens
                        if rdatom.GetAtomicNum() == 1:
                            rdatom.SetIsotope(1)

                        atom_map_num = rdatom.GetAtomMapNum()
                        if atom_map_num == 0:
                            continue
                        h_less_atom_indices[atom_map_num - 1] = rdatom.GetIdx()

                    # Once the new atom indices are found, use them to find the H-less
                    # bond indices
                    h_less_rdbonds = []
                    for i in range(len(h_less_atom_indices) - 1):
                        rdbond = h_less_rdmol.GetBondBetweenAtoms(
                            h_less_atom_indices[i], h_less_atom_indices[i + 1])
                        h_less_rdbonds.append(rdbond)
                    h_less_bond_indices = [
                        bd.GetIdx() for bd in h_less_rdbonds
                    ]

                    # Create a 2D drawing of the molecule, highlighting the
                    # parameterization failure
                    highlight_atom_colors = {
                        idx: highlight_color
                        for idx in h_less_atom_indices
                    }
                    highlight_bond_colors = {
                        idx: highlight_color
                        for idx in h_less_bond_indices
                    }
                    image = Draw.MolsToGridImage(
                        [h_less_rdmol],
                        highlightAtomLists=[h_less_atom_indices],
                        highlightBondLists=[h_less_bond_indices],
                        molsPerRow=1,
                        highlightAtomColors=[highlight_atom_colors],
                        highlightBondColors=[highlight_bond_colors],
                        subImgSize=(600, 600))

                    # Structure and append the relevant info to the missing_params dictionary
                    param_description = {
                        'atom_indices': orig_atom_indices,
                        'image': image,
                        'tagged_smiles': tagged_smiles
                    }
                    missing_params[handler_tagname].append(param_description)

                # Add a "super generic" parameter to the top of this handler's ParameterList,
                # which will make it always find parameters for each term. This will prevent the same
                # parameterization exception from being raised in the next attempt.
                param_list = forcefield.get_parameter_handler(
                    handler_tagname).parameters
                param_list.insert(0, super_generics[handler_tagname])

        if success != True:
            print(missing_params)
        else:
            print('Parameters succesfully assigned')
Example #11
0
class LegacyForceField:
    """ Class to hold legacy forcefield for typing and parameter assignment.

    Parameters
    ----------
    forcefield : string
        name and version of the forcefield.

    """
    def __init__(self, forcefield="gaff-1.81"):
        self.forcefield = forcefield
        self._prepare_forcefield()

    @staticmethod
    def _convert_to_off(mol):
        import openforcefield

        if isinstance(mol, esp.Graph):
            return mol.mol

        elif isinstance(mol, openforcefield.topology.molecule.Molecule):
            return mol
        elif isinstance(mol, rdkit.Chem.rdchem.Mol):
            return Molecule.from_rdkit(mol)
        elif "openeye" in str(
                type(mol)):  # because we don't want to depend on OE
            return Molecule.from_openeye(mol)

    def _prepare_forcefield(self):

        if "gaff" in self.forcefield:
            self._prepare_gaff()

        elif "smirnoff" in self.forcefield:
            # do nothing for now
            self._prepare_smirnoff()

        elif "openff" in self.forcefield:
            self._prepare_openff()

        else:
            raise NotImplementedError

    def _prepare_openff(self):

        from openforcefield.typing.engines.smirnoff import ForceField

        self.FF = ForceField("%s.offxml" % self.forcefield)

    def _prepare_smirnoff(self):

        from openforcefield.typing.engines.smirnoff import ForceField

        self.FF = ForceField("test_forcefields/%s.offxml" % self.forcefield)

    def _prepare_gaff(self):
        import os
        import xml.etree.ElementTree as ET

        import openmmforcefields

        # get the openforcefields path
        openmmforcefields_path = os.path.dirname(openmmforcefields.__file__)

        # get the xml path
        ffxml_path = (openmmforcefields_path + "/ffxml/amber/gaff/ffxml/" +
                      self.forcefield + ".xml")

        # parse xml
        tree = ET.parse(ffxml_path)
        root = tree.getroot()
        nonbonded = root.getchildren()[-1]
        atom_types = [atom.get("type") for atom in nonbonded.findall("Atom")]

        # remove redundant types
        [atom_types.remove(bad_type) for bad_type in REDUNDANT_TYPES.keys()]

        # compose the translation dictionaries
        str_2_idx = dict(zip(atom_types, range(len(atom_types))))
        idx_2_str = dict(zip(range(len(atom_types)), atom_types))

        # provide mapping for redundant types
        for bad_type, good_type in REDUNDANT_TYPES.items():
            str_2_idx[bad_type] = str_2_idx[good_type]

        # make translation dictionaries attributes of self
        self._str_2_idx = str_2_idx
        self._idx_2_str = idx_2_str

    def _type_gaff(self, g):
        """ Type a molecular graph using gaff force fields.

        """
        # assert the forcefield is indeed of gaff family
        assert "gaff" in self.forcefield

        # make sure mol is in OpenForceField format `
        mol = g.mol

        # import template generator
        from openmmforcefields.generators import GAFFTemplateGenerator

        gaff = GAFFTemplateGenerator(molecules=mol, forcefield=self.forcefield)

        # create temporary directory for running antechamber
        import os
        import shutil
        import tempfile

        tempdir = tempfile.mkdtemp()
        prefix = "molecule"
        input_sdf_filename = os.path.join(tempdir, prefix + ".sdf")
        gaff_mol2_filename = os.path.join(tempdir, prefix + ".gaff.mol2")
        frcmod_filename = os.path.join(tempdir, prefix + ".frcmod")

        # write sdf for input
        mol.to_file(input_sdf_filename, file_format="sdf")

        # run antechamber
        gaff._run_antechamber(
            molecule_filename=input_sdf_filename,
            input_format="mdl",
            gaff_mol2_filename=gaff_mol2_filename,
            frcmod_filename=frcmod_filename,
        )

        gaff._read_gaff_atom_types_from_mol2(gaff_mol2_filename, mol)
        gaff_types = [atom.gaff_type for atom in mol.atoms]
        shutil.rmtree(tempdir)

        # put types into graph object
        if g is None:
            g = esp.Graph(mol)

        g.nodes["n1"].data["legacy_typing"] = torch.tensor(
            [self._str_2_idx[atom] for atom in gaff_types])

        return g

    def _parametrize_gaff(self, mol, g=None):
        raise NotImplementedError

    def _parametrize_smirnoff(self, g):
        # mol = self._convert_to_off(mol)

        forces = self.FF.label_molecules(g.mol.to_topology())[0]

        g.heterograph.apply_nodes(
            lambda node: {
                "k_ref":
                torch.Tensor([
                    forces["Bonds"][tuple(node.data["idxs"][idx].numpy())].k.
                    value_in_unit(esp.units.FORCE_CONSTANT_UNIT)
                    for idx in range(node.data["idxs"].shape[0])
                ])[:, None]
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "eq_ref":
                torch.Tensor([
                    forces["Bonds"][tuple(node.data["idxs"][idx].numpy())].
                    length.value_in_unit(esp.units.DISTANCE_UNIT)
                    for idx in range(node.data["idxs"].shape[0])
                ])[:, None]
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "k_ref":
                torch.Tensor([
                    forces["Angles"][tuple(node.data["idxs"][idx].numpy())].k.
                    value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT)
                    for idx in range(node.data["idxs"].shape[0])
                ])[:, None]
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "eq_ref":
                torch.Tensor([
                    forces["Angles"][tuple(node.data["idxs"][idx].numpy())].
                    angle.value_in_unit(esp.units.ANGLE_UNIT)
                    for idx in range(node.data["idxs"].shape[0])
                ])[:, None]
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "epsilon_ref":
                torch.Tensor([
                    forces["vdW"][
                        (idx, )].epsilon.value_in_unit(esp.units.ENERGY_UNIT)
                    for idx in range(g.heterograph.number_of_nodes("n1"))
                ])[:, None]
            },
            ntype="n1",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "sigma_ref":
                torch.Tensor([
                    forces["vdW"][(idx, )].rmin_half.value_in_unit(
                        esp.units.DISTANCE_UNIT)
                    for idx in range(g.heterograph.number_of_nodes("n1"))
                ])[:, None]
            },
            ntype="n1",
        )

        def apply_torsion(node, n_max_phases=6):
            phases = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            periodicity = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            k = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            force = forces["ProperTorsions"]

            for idx in range(g.heterograph.number_of_nodes("n4")):
                idxs = tuple(node.data["idxs"][idx].numpy())
                if idxs in force:
                    _force = force[idxs]
                    for sub_idx in range(len(_force.periodicity)):
                        if hasattr(_force, "k%s" % sub_idx):
                            k[idx, sub_idx] = getattr(
                                _force, "k%s" % sub_idx).value_in_unit(
                                    esp.units.ENERGY_UNIT)

                            phases[idx, sub_idx] = getattr(
                                _force, "phase%s" % sub_idx).value_in_unit(
                                    esp.units.ANGLE_UNIT)

                            periodicity[idx, sub_idx] = getattr(
                                _force, "periodicity%s" % sub_idx)

            return {
                "k_ref": k,
                "periodicity_ref": periodicity,
                "phases_ref": phases,
            }

        g.heterograph.apply_nodes(apply_torsion, ntype="n4")

        return g

    def _multi_typing_smirnoff(self, g):
        # mol = self._convert_to_off(mol)

        forces = self.FF.label_molecules(g.mol.to_topology())[0]

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing":
                torch.Tensor([
                    int(forces["Bonds"][tuple(node.data["idxs"][idx].numpy())].
                        id[1:]) for idx in range(node.data["idxs"].shape[0])
                ]).long()
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing":
                torch.Tensor([
                    int(forces["Angles"][tuple(node.data["idxs"][idx].numpy())]
                        .id[1:]) for idx in range(node.data["idxs"].shape[0])
                ]).long()
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing":
                torch.Tensor([
                    int(forces["vdW"][(idx, )].id[1:])
                    for idx in range(g.heterograph.number_of_nodes("n1"))
                ]).long()
            },
            ntype="n1",
        )

        return g

    def parametrize(self, g):
        """ Parametrize a molecular graph.

        """
        if "smirnoff" in self.forcefield or "openff" in self.forcefield:
            return self._parametrize_smirnoff(g)

        else:
            raise NotImplementedError

    def typing(self, g):
        """ Type a molecular graph.

        """
        if "gaff" in self.forcefield:
            return self._type_gaff(g)

        else:
            raise NotImplementedError

    def multi_typing(self, g):
        """ Type a molecular graph for hetero nodes. """
        if "smirnoff" in self.forcefield:
            return self._multi_typing_smirnoff(g)

        else:
            raise NotImplementedError

    def __call__(self, *args, **kwargs):
        return self.typing(*args, **kwargs)
Example #12
0
class Run(object):
    """
    Create FEP files from a common substructure for a given set of
    ligands
    """
    def __init__(self, lig, FF, merge, *args, **kwargs):
        """
        The init method is a kind of constructor, called when an instance
        of the class is created. The method serves to initialize what you
        want to do with the object.
        """
        self.lig = lig
        self.FF = FF
        self.merge = merge
        self.ff_list = []
        self.mapping = {}
        self.total_charge = 0
        self.masses = {
            "H": "1.0080",
            "C": "12.0110",
            "N": "14.0070",
            "O": "15.9994",
            "F": "19.0000",
            "P": "30.9700",
            "S": "32.0600",
            "Cl": "35.0000",
            "Br": "79.9000",
            "I": "126.90",
            "DUM": "0.0000"
        }

    def openff(self):
        # Load the molecule (for now mol2, until charges are saved on sdf)
        molecule = Molecule.from_file(self.lig + '.mol2')
        topology = Topology.from_molecules([molecule])

        # Label using the smirnoff99Frosst force field
        self.forcefield = ForceField('smirnoff99Frosst.offxml')
        self.parameters = self.forcefield.label_molecules(topology)[0]

    def read_mol2(self):
        """
            This is basically to get the charge, will later be deprecated when charges are
            transferable in openff
        """
        with open(self.lig + '.mol2') as infile:
            cnt = -1
            for line in infile:
                line = line.split()
                if len(line) == 9:
                    cnt += 1
                    self.mapping[cnt] = [
                        line[0],  # at idex
                        line[1],  # atname
                        line[5].split('.')[0],  # attype
                        line[8],  # charge
                        line[2],  # X coordinate
                        line[3],  # Y coordinte
                        line[4]  # Z coordinate
                    ]
                    self.total_charge += float(line[8])

        if self.total_charge != 0.0:
            print('WARNING: residual charge {} check your mol2 file!'.format(
                self.total_charge))

    def write_lib_Q(self):
        with open(self.lig + '.lib', 'w') as outfile:
            outfile.write('{}    ! atoms no {}   total charge {} \n\n'.format(
                '{LIG}', len(self.mapping), self.total_charge))

            outfile.write("[info] \n SYBYLtype RESIDUE \n\n")

            #atom and charge block:
            outfile.write("[atoms] \n")
            for i, at in enumerate(self.mapping):
                outfile.write('{:>4s}   {:10}{:11}{:>10s}\n'.format(
                    self.mapping[at][0], self.mapping[at][1],
                    self.mapping[at][1].lower(), self.mapping[at][3]))

            # bonded block
            outfile.write("\n[bonds]\n")
            for i, bond in enumerate(self.parameters['Bonds']):
                ai = self.mapping[bond[0]][1]
                aj = self.mapping[bond[1]][1]
                outfile.write('{:10s}{:}\n'.format(ai, aj))

            # improper block
            outfile.write("\n[impropers]\n")
            for i, torsion in enumerate(self.parameters['ImproperTorsions']):
                ai = self.mapping[torsion[0]][1]
                aj = self.mapping[torsion[1]][1]
                ak = self.mapping[torsion[2]][1]
                al = self.mapping[torsion[3]][1]
                outfile.write('{:10}{:10}{:10}{}\n'.format(ai, aj, ak, al))

            #outfile.write("\n[charge_groups]")
            #for i, atom in enumerate(self.mapping):
            #    if self.mapping[atom][2] != 'H':
            #        outfile.write('\n{}'.format(self.mapping[atom][1]))
            #    for j, bond in enumerate(self.parameters['Bonds']):
            #        if bond[0] == i:
            #            if self.mapping[bond[1]][2] == 'H':
            #                outfile.write(' {}'.format(self.mapping[bond[1]][1]))

    def write_prm_Q(self):
        if self.FF == 'AMBER14sb' and self.merge == True:
            prm_file = os.path.join(s.FF_DIR, 'AMBER14sb.prm')
            prm_file_out = self.FF + '_' + self.lig + '.prm'

        elif self.merge == False:
            prm_file = os.path.join(s.FF_DIR, 'NOMERGE.prm')
            prm_file_out = self.lig + '.prm'

        with open(prm_file) as infile, open(prm_file_out, 'w') as outfile:
            for line in infile:
                block = 0
                outfile.write(line)
                if len(line) > 1:
                    if line == "! Ligand vdW parameters\n":
                        block = 1
                    if line == "! Ligand bond parameters\n":
                        block = 2
                    if line == "! Ligand angle parameters\n":
                        block = 3
                    if line == "! Ligand torsion parameters\n":
                        block = 4
                    if line == "! Ligand improper parameters\n":
                        block = 5

                if block == 1:
                    for (atom_indices,
                         parameter) in self.parameters['vdW'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        # This is a bit hacky, check how to get the float out directly
                        epsilon = float('{}'.format(
                            parameter.epsilon).split()[0])
                        epsilon23 = epsilon / 2
                        # TO DO: CHECK IF THIS IS CORRECT!!
                        Rmin = float('{}'.format(
                            parameter.sigma).split()[0]) / 2
                        mass = self.masses[self.mapping[ai][2]]
                        outfile.write(
                            """{:6}{: 8.3f}{: 10.3f}{: 10.3f}{: 10.3f}{: 10.3f}{:>10s}\n"""
                            .format(ai_name, Rmin, 0.00, epsilon, Rmin,
                                    epsilon23, mass))

                if block == 2:
                    for (atom_indices,
                         parameter) in self.parameters['Bonds'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        fc = float('{}'.format(parameter.k).split()[0])
                        l = float('{}'.format(parameter.length).split()[0])
                        outfile.write('{:10}{:10}{:10.1f}{:>10.3f}\n'.format(
                            ai_name, aj_name, fc, l))

                if block == 3:
                    for (atom_indices,
                         parameter) in self.parameters['Angles'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        fc = float('{}'.format(parameter.k).split()[0])
                        angle = float('{}'.format(parameter.angle).split()[0])

                        outfile.write(
                            """{:10}{:10}{:10}{: 8.2f}{:>12.3f}\n""".format(
                                ai_name, aj_name, ak_name, fc, angle))

                if block == 4:
                    for (atom_indices, parameter
                         ) in self.parameters['ProperTorsions'].items():
                        forces = []
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        al = atom_indices[3]
                        al_name = self.mapping[al][1].lower()
                        max_phase = len(parameter.phase)

                        # Now check if there are multiple minima
                        for i in range(0, max_phase):
                            fc = float('{}'.format(parameter.k[i]).split()[0])
                            phase = float('{}'.format(
                                parameter.phase[i]).split()[0])
                            paths = int(parameter.idivf[i])

                            if i != max_phase - 1 and max_phase > 1:
                                minimum = float(parameter.periodicity[i]) * -1

                            else:
                                minimum = float(parameter.periodicity[i])

                            force = (fc, minimum, phase, paths)
                            forces.append(force)

                        for force in forces:
                            outfile.write(
                                """{:10}{:10}{:10}{:10}{:>10.3f}{:>10.3f}{:>10.3f}{:>5d}\n"""
                                .format(ai_name, aj_name, ak_name, al_name,
                                        force[0], force[1], force[2],
                                        force[3]))

                if block == 5:
                    for (atom_indices, parameter
                         ) in self.parameters['ImproperTorsions'].items():
                        ai = atom_indices[0]
                        ai_name = self.mapping[ai][1].lower()
                        aj = atom_indices[1]
                        aj_name = self.mapping[aj][1].lower()
                        ak = atom_indices[2]
                        ak_name = self.mapping[ak][1].lower()
                        al = atom_indices[3]
                        al_name = self.mapping[al][1].lower()
                        fc = float('{}'.format(parameter.k[0]).split()[0])
                        phase = float('{}'.format(
                            parameter.phase[0]).split()[0])
                        outfile.write(
                            """{:10}{:10}{:10}{:10}{:10.3f}{:10.3f}\n""".
                            format(ai_name, aj_name, ak_name, al_name, fc,
                                   phase))

    def write_PDB(self):
        with open(self.lig + '.pdb', 'w') as outfile:
            for atom in self.mapping:
                ai = atom + 1
                ai_name = self.mapping[atom][1]
                a_el = self.mapping[atom][2]
                ax = float(self.mapping[atom][4])
                ay = float(self.mapping[atom][5])
                az = float(self.mapping[atom][6])
                at_entry = [
                    'HETATM',  #  0 ATOM/HETATM
                    ai,  #  1 ATOM serial number
                    ai_name,  #  2 ATOM name
                    '',  #  3 Alternate location indicator
                    'LIG',  #  4 Residue name
                    '',  #  5 Chain identifier
                    1,  #  6 Residue sequence number
                    '',  #  7 Code for insertion of residue
                    ax,  #  8 Orthogonal coordinates for X
                    ay,  #  9 Orthogonal coordinates for Y
                    az,  # 10 Orthogonal coordinates for Z
                    0.0,  # 11 Occupancy
                    0.0,  # 12 Temperature factor
                    a_el,  # 13 Element symbol
                    ''  # 14 Charge on atom
                ]
                outfile.write(IO.pdb_parse_out(at_entry) + '\n')
Example #13
0
def find_smirks_parameters(parameter_tag='vdW', *smiles_patterns):
    """Finds those force field parameters with a given tag which
    would be assigned to a specified set of molecules defined by
    the their smiles patterns.

    Parameters
    ----------
    parameter_tag: str
        The tag of the force field parameters to find.
    smiles_patterns: str
        The smiles patterns to assign the force field parameters
        to.

    Returns
    -------
    dict of str and list of str
        A dictionary with keys of parameter smirks patterns, and
        values of lists of smiles patterns which would utilize
        those parameters.
    """

    stdout_ = sys.stdout  # Keep track of the previous value.
    stderr_ = sys.stderr  # Keep track of the previous value.

    stream = StringIO()
    sys.stdout = stream
    sys.stderr = stream
    force_field = ForceField('smirnoff99Frosst-1.1.0.offxml')
    sys.stdout = stdout_  # restore the previous stdout.
    sys.stderr = stderr_

    parameter_handler = force_field.get_parameter_handler(parameter_tag)

    smiles_by_parameter_smirks = {}

    # Initialize the array with all possible smirks pattern
    # to make it easier to identify which are missing.
    for parameter in parameter_handler.parameters:

        if parameter.smirks in smiles_by_parameter_smirks:
            continue

        smiles_by_parameter_smirks[parameter.smirks] = set()

    # Populate the dictionary using the open force field toolkit.
    for smiles in smiles_patterns:

        if smiles not in cached_smirks_parameters or parameter_tag not in cached_smirks_parameters[smiles]:

            try:
                molecule = Molecule.from_smiles(smiles)
            except UndefinedStereochemistryError:
                # Skip molecules with undefined stereochemistry.
                continue

            topology = Topology.from_molecules([molecule])

            if smiles not in cached_smirks_parameters:
                cached_smirks_parameters[smiles] = {}

            if parameter_tag not in cached_smirks_parameters[smiles]:
                cached_smirks_parameters[smiles][parameter_tag] = []

            cached_smirks_parameters[smiles][parameter_tag] = [
                parameter.smirks for parameter in force_field.label_molecules(topology)[0][parameter_tag].values()
            ]

        parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag]

        for smirks in parameters_with_tag:
            smiles_by_parameter_smirks[smirks].add(smiles)

    return smiles_by_parameter_smirks
Example #14
0
def probe_by_parameter(probe_param, ffxml, subdir, all_probe_mols, inpickle):
    """
    For a single force field parameter, (1) find all molecules that use this
    parameter, and (2) save them to a mol2 file labeled with the parameter ID.

    Parameters
    ----------
    probe_param : string
        Name of the parameter to investigate
    ffxml : string
        Name of the FFXML force field file
    subdir : string
        Name of subdirectory in which to save mol2 files for each parameter
    all_probe_mols : dict
        key is string of a parameter id to be probed;
        value is an empty list
    inpickle : string
        Name of the pickle file from output of tailed_parameters.py

    Returns
    -------
    all_probe_mols : dict
        key is string of a parameter id to be probed;
        value is a list of oegraphmols with this parameter id
    """
    prefix_dict = {'a':'Angles', 'b':'Bonds', 'i':'ImproperTorsions', 'n':'vdW', 't':'ProperTorsions'}

    # load parameter dictionaries from pickle
    with open(inpickle, 'rb') as f:
        data_all, data_out = pickle.load(f)
    params_id_out = data_out['params_id']

    # find the first mol in outlier set with given param
    mols_with_probe = list(params_id_out[probe_param])
    probe_mol = Molecule.from_smiles(mols_with_probe[0], allow_undefined_stereo=True)
    topology = Topology.from_molecules([probe_mol])

    # load in force field
    ff = ForceField(ffxml)

    # run molecule labeling
    molecule_force_list = ff.label_molecules(topology)

    # get the smirks pattern associated with param
    prefix = probe_param[0]
    force_dict = molecule_force_list[0][prefix_dict[prefix]]
    for (k, v) in force_dict.items():
        if v.id == probe_param:
            probe_smirks = v.smirks
            break
    print(f"\n=====\n{probe_param}: {probe_smirks}\n=====")

    # find all molecules with this parameter and save to file.
    # conformers are not considered here so these smiles refer to
    # an arbitrary conformer assigned in dict after zip
    # (since duplicate keys are removed in dict)
    outfile = f'{subdir}/param_{probe_param}.mol2'
    ofs = oechem.oemolostream()
    if not ofs.open(outfile):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile)

    for m in mols_with_probe:
        key = data_out['smi_dict'][m]
        print(f"writing out {key}")
        mymol = data_out['mols_dict'][key]['structure']
        oechem.OEWriteConstMolecule(ofs, mymol)

        # save to write full pdf later on
        all_probe_mols[probe_param].append(oechem.OEGraphMol(mymol))

    return all_probe_mols
class ForceFieldEditor:
    def __init__(self, forcefield_name: str):
        """
        Gather the forcefield ready for manipulation.

        Parameters
        ----------
        forcefield_name: str
            The string of the target forcefield path.

        Notes
        ------
            This will always try to strip the constraints parameter handler as the FF should be unconstrained for fitting.
        """
        self.forcefield = ForceField(forcefield_name,
                                     allow_cosmetic_attributes=True)

        # try and strip a constraint handler
        try:
            del self.forcefield._parameter_handlers["Constraints"]
        except KeyError:
            pass

    def add_smirks(
        self,
        smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks,
                           TorsionSmirks]],
        parameterize: bool = True,
    ) -> None:
        """
        Work out which type of smirks this is and add it to the forcefield, if this is not a bespoke parameter update the value in the forcefield.
        """

        _smirks_conversion = {
            SmirksType.Bonds: BondHandler.BondType,
            SmirksType.Angles: AngleHandler.AngleType,
            SmirksType.ProperTorsions: ProperTorsionHandler.ProperTorsionType,
            SmirksType.Vdw: vdWHandler.vdWType,
        }
        _smirks_ids = {
            SmirksType.Bonds: "b",
            SmirksType.Angles: "a",
            SmirksType.ProperTorsions: "t",
            SmirksType.Vdw: "n",
        }
        new_params = {}
        for smirk in smirks:
            if smirk.type not in new_params:
                new_params[smirk.type] = [
                    smirk,
                ]
            else:
                if smirk not in new_params[smirk.type]:
                    new_params[smirk.type].append(smirk)

        for smirk_type, parameters in new_params.items():
            current_params = self.forcefield.get_parameter_handler(
                smirk_type).parameters
            no_params = len(current_params)
            for i, parameter in enumerate(parameters, start=2):
                smirk_data = parameter.to_off_smirks()
                if not parameterize:
                    del smirk_data["parameterize"]
                # check if the parameter is new
                try:
                    current_param = current_params[parameter.smirks]
                    smirk_data["id"] = current_param.id
                    # update the parameter using the init to get around conditional assigment
                    current_param.__init__(**smirk_data)
                except IndexError:
                    smirk_data["id"] = _smirks_ids[smirk_type] + str(
                        no_params + i)
                    current_params.append(
                        _smirks_conversion[smirk_type](**smirk_data))

    def label_molecule(self, molecule: off.Molecule) -> Dict[str, str]:
        """
        Type the molecule with the forcefield and return a molecule parameter dictionary.

        Parameters
        ----------
        molecule: off.Molecule
            The openforcefield.topology.Molecule that should be labeled by the forcefield.

        Returns
        -------
        Dict[str, str]
            A dictionary of each parameter assigned to molecule organised by parameter handler type.
        """
        return self.forcefield.label_molecules(molecule.to_topology())[0]

    def get_smirks_parameters(
        self, molecule: off.Molecule, atoms: List[Tuple[int, ...]]
    ) -> List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]]:
        """
        For a given molecule label it and get back the smirks patterns and parameters for the requested atoms.
        """
        _atoms_to_params = {
            1: SmirksType.Vdw,
            2: SmirksType.Bonds,
            3: SmirksType.Angles,
            4: SmirksType.ProperTorsions,
        }
        smirks = []
        labels = self.label_molecule(molecule=molecule)
        for atom_ids in atoms:
            # work out the parameter type from the length of the tuple
            smirk_class = _atoms_to_params[len(atom_ids)]
            # now we can get the handler type using the smirk type
            off_param = labels[smirk_class.value][atom_ids]
            smirk = smirks_from_off(off_smirks=off_param)
            smirk.atoms.add(atom_ids)
            if smirk not in smirks:
                smirks.append(smirk)
            else:
                # update the covered atoms
                index = smirks.index(smirk)
                smirks[index].atoms.add(atom_ids)
        return smirks

    def update_smirks_parameters(
        self,
        smirks: Iterable[Union[AtomSmirks, AngleSmirks, BondSmirks,
                               TorsionSmirks]],
    ) -> None:
        """
        Take a list of input smirks parameters and update the values of the parameters using the given forcefield in place.

        Parameters
        ----------
        smirks : Iterable[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]]
            An iterable containing smirks schemas that are to be updated.

        """

        for smirk in smirks:
            new_parameter = self.forcefield.get_parameter_handler(
                smirk.type).parameters[smirk.smirks]
            # now we just need to update the smirks with the new values
            smirk.update_parameters(off_smirk=new_parameter)

    def get_initial_parameters(
        self,
        molecule: off.Molecule,
        smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks,
                           TorsionSmirks]],
        clear_existing: bool = True,
    ) -> List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]]:
        """
        Find the initial parameters assigned to the atoms in the given smirks pattern and update the values to match the forcefield.
        """
        labels = self.label_molecule(molecule=molecule)
        # now find the atoms
        for smirk in smirks:
            parameters = labels[smirk.type]
            if smirk.type == SmirksType.ProperTorsions:
                # here we can combine multiple parameter types
                # TODO is this needed?
                openff_params = []
                for atoms in smirk.atoms:
                    param = parameters[atoms]
                    openff_params.append(param)

                # now check if they are different types
                types = set([param.id for param in openff_params])

                # now update the parameter
                smirk.update_parameters(off_smirk=openff_params[0],
                                        clear_existing=clear_existing)
                # if there is more than expand the k terms
                if len(types) > 1:
                    for param in openff_params[1:]:
                        smirk.update_parameters(param, clear_existing=False)
            else:
                atoms = list(smirk.atoms)[0]
                param = parameters[atoms]
                smirk.update_parameters(off_smirk=param, clear_existing=True)

        return smirks
Example #16
0
class SMIRNOFF(OpenMM):

    """ Derived from Engine object for carrying out OpenMM calculations that use the SMIRNOFF force field. """

    def __init__(self, name="openmm", **kwargs):
        self.valkwd = ['ffxml', 'pdb', 'mol2', 'platname', 'precision', 'mmopts', 'vsite_bonds', 'implicit_solvent', 'restrain_k', 'freeze_atoms']
        super(SMIRNOFF,self).__init__(name=name, **kwargs)

    def readsrc(self, **kwargs):
        """
        SMIRNOFF simulations always require the following passed in via kwargs:

        Parameters
        ----------
        pdb : string
            Name of a .pdb file containing the topology of the system
        mol2 : list
            A list of .mol2 file names containing the molecule/residue templates of the system

        Also provide 1 of the following, containing the coordinates to be used:
        mol : Molecule
            forcebalance.Molecule object
        coords : string
            Name of a file (readable by forcebalance.Molecule)
            This could be the same as the pdb argument from above.
        """

        pdbfnm = kwargs.get('pdb')
        # Determine the PDB file name.
        if not pdbfnm:
            raise RuntimeError('Name of PDB file not provided.')
        elif not os.path.exists(pdbfnm):
            logger.error("%s specified but doesn't exist\n" % pdbfnm)
            raise RuntimeError

        if 'mol' in kwargs:
            self.mol = kwargs['mol']
        elif 'coords' in kwargs:
            if not os.path.exists(kwargs['coords']):
                logger.error("%s specified but doesn't exist\n" % kwargs['coords'])
                raise RuntimeError
            self.mol = Molecule(kwargs['coords'])
        else:
            logger.error('Must provide either a molecule object or coordinate file.\n')
            raise RuntimeError

        # Here we cannot distinguish the .mol2 files linked by the target
        # vs. the .mol2 files to be provided by the force field.
        # But we can assume that these files should exist when this function is called.

        self.mol2 = kwargs.get('mol2')
        if self.mol2:
            for fnm in self.mol2:
                if not os.path.exists(fnm):
                    if hasattr(self, 'FF') and fnm in self.FF.fnms: continue
                    logger.error("%s doesn't exist" % fnm)
                    raise RuntimeError
        else:
            logger.error("Must provide a list of .mol2 files.\n")

        self.abspdb = os.path.abspath(pdbfnm)
        mpdb = Molecule(pdbfnm)
        for i in ["chain", "atomname", "resid", "resname", "elem"]:
            self.mol.Data[i] = mpdb.Data[i]

        # Store a separate copy of the molecule for reference restraint positions.
        self.ref_mol = deepcopy(self.mol)

    def prepare(self, pbc=False, mmopts={}, **kwargs):

        """
        Prepare the calculation.  Note that we don't create the
        Simulation object yet, because that may depend on MD
        integrator parameters, thermostat, barostat etc.

        This is mostly copied and modified from openmmio.py's OpenMM.prepare(),
        but we are calling ForceField() from the OpenFF toolkit and ignoring
        AMOEBA stuff.
        """
        self.pdb = PDBFile(self.abspdb)

        # Create the OpenFF ForceField object.
        if hasattr(self, 'FF'):
            self.offxml = [self.FF.offxml]
            self.forcefield = self.FF.openff_forcefield
        else:
            self.offxml = listfiles(kwargs.get('offxml'), 'offxml', err=True)
            self.forcefield = OpenFF_ForceField(*self.offxml)

        ## Load mol2 files for smirnoff topology
        openff_mols = []
        for fnm in self.mol2:
            try:
                mol = OffMolecule.from_file(fnm)
            except Exception as e:
                logger.error("Error when loading %s" % fnm)
                raise e
            openff_mols.append(mol)
        self.off_topology = OffTopology.from_openmm(self.pdb.topology, unique_molecules=openff_mols)

        # used in create_simulation()
        self.mod = Modeller(self.pdb.topology, self.pdb.positions)

        ## OpenMM options for setting up the System.
        self.mmopts = dict(mmopts)

        ## Specify frozen atoms and restraint force constant
        if 'restrain_k' in kwargs:
            self.restrain_k = kwargs['restrain_k']
        if 'freeze_atoms' in kwargs:
            self.freeze_atoms = kwargs['freeze_atoms'][:]

        ## Set system options from ForceBalance force field options.
        fftmp = False
        if hasattr(self,'FF'):
            self.mmopts['rigidWater'] = self.FF.rigid_water
            if not all([os.path.exists(f) for f in self.FF.fnms]):
                # If the parameter files don't already exist, create them for the purpose of
                # preparing the engine, but then delete them afterward.
                fftmp = True
                self.FF.make(np.zeros(self.FF.np))

        ## Set system options from periodic boundary conditions.
        self.pbc = pbc
        ## print warning for 'nonbonded_cutoff' keywords
        if 'nonbonded_cutoff' in kwargs:
            logger.warning("nonbonded_cutoff keyword ignored because it's set in the offxml file\n")

        ## Generate OpenMM-compatible positions
        self.xyz_omms = []
        for I in range(len(self.mol)):
            position = self.mol.xyzs[I] * angstrom
            # xyz_omm = [Vec3(i[0],i[1],i[2]) for i in xyz]*angstrom
            # An extra step with adding virtual particles
            # mod = Modeller(self.pdb.topology, xyz_omm)
            # LPW commenting out because we don't have virtual sites yet.
            # mod.addExtraParticles(self.forcefield)
            if self.pbc:
                # Obtain the periodic box
                if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0:
                    logger.error('OpenMM cannot handle nonorthogonal boxes.\n')
                    raise RuntimeError
                box_omm = np.diag([self.mol.boxes[I].a, self.mol.boxes[I].b, self.mol.boxes[I].c]) * angstrom
            else:
                box_omm = None
            # Finally append it to list.
            self.xyz_omms.append((position, box_omm))

        ## Build a topology and atom lists.
        Top = self.pdb.topology
        Atoms = list(Top.atoms())
        Bonds = [(a.index, b.index) for a, b in list(Top.bonds())]

        # vss = [(i, [system.getVirtualSite(i).getParticle(j) for j in range(system.getVirtualSite(i).getNumParticles())]) \
        #            for i in range(system.getNumParticles()) if system.isVirtualSite(i)]
        self.AtomLists = defaultdict(list)
        self.AtomLists['Mass'] = [a.element.mass.value_in_unit(dalton) if a.element is not None else 0 for a in Atoms]
        self.AtomLists['ParticleType'] = ['A' if m >= 1.0 else 'D' for m in self.AtomLists['Mass']]
        self.AtomLists['ResidueNumber'] = [a.residue.index for a in Atoms]
        self.AtomMask = [a == 'A' for a in self.AtomLists['ParticleType']]
        self.realAtomIdxs = [i for i, a in enumerate(self.AtomMask) if a is True]
        if hasattr(self,'FF') and fftmp:
            for f in self.FF.fnms:
                os.unlink(f)

    def update_simulation(self, **kwargs):

        """
        Create the simulation object, or update the force field
        parameters in the existing simulation object.  This should be
        run when we write a new force field XML file.
        """
        if len(kwargs) > 0:
            self.simkwargs = kwargs

        # Because self.forcefield is being updated in forcebalance.forcefield.FF.make()
        # there is no longer a need to create a new force field object here.
        try:
            self.system = self.forcefield.create_openmm_system(self.off_topology)
        except Exception as error:
            logger.error("Error when creating system for %s" % self.mol2)
            raise error
        # Commenting out all virtual site stuff for now.
        # self.vsinfo = PrepareVirtualSites(self.system)
        self.nbcharges = np.zeros(self.system.getNumParticles())

        #----
        # If the virtual site parameters have changed,
        # the simulation object must be remade.
        #----
        # vsprm = GetVirtualSiteParameters(self.system)
        # if hasattr(self,'vsprm') and len(self.vsprm) > 0 and np.max(np.abs(vsprm - self.vsprm)) != 0.0:
        #     if hasattr(self, 'simulation'):
        #         delattr(self, 'simulation')
        # self.vsprm = vsprm.copy()

        if hasattr(self, 'simulation'):
            UpdateSimulationParameters(self.system, self.simulation)
        else:
            self.create_simulation(**self.simkwargs)

    def optimize(self, shot=0, align=True, crit=1e-4):
        return super(SMIRNOFF,self).optimize(shot=shot, align=align, crit=crit, disable_vsite=True)

    def interaction_energy(self, fraga, fragb):

        """
        Calculate the interaction energy for two fragments.
        Because this creates two new objects and requires passing in the mol2 argument,
        the codes are copied and modified from the OpenMM class.
        """

        self.update_simulation()

        if self.name == 'A' or self.name == 'B':
            logger.error("Don't name the engine A or B!\n")
            raise RuntimeError

        # Create two subengines.
        if hasattr(self,'target'):
            if not hasattr(self,'A'):
                self.A = SMIRNOFF(name="A", mol=self.mol.atom_select(fraga), mol2=self.mol2, target=self.target)
            if not hasattr(self,'B'):
                self.B = SMIRNOFF(name="B", mol=self.mol.atom_select(fragb), mol2=self.mol2, target=self.target)
        else:
            if not hasattr(self,'A'):
                self.A = SMIRNOFF(name="A", mol=self.mol.atom_select(fraga), mol2=self.mol2, platname=self.platname, \
                                  precision=self.precision, offxml=self.offxml, mmopts=self.mmopts)
            if not hasattr(self,'B'):
                self.B = SMIRNOFF(name="B", mol=self.mol.atom_select(fragb), mol2=self.mol2, platname=self.platname, \
                                  precision=self.precision, offxml=self.offxml, mmopts=self.mmopts)

        # Interaction energy needs to be in kcal/mol.
        D = self.energy()
        A = self.A.energy()
        B = self.B.energy()

        return (D - A - B) / 4.184

    def get_smirks_counter(self):
        """Get a counter for the time of appreance of each SMIRKS"""
        smirks_counter = Counter()
        molecule_force_list = self.forcefield.label_molecules(self.off_topology)
        for mol_idx, mol_forces in enumerate(molecule_force_list):
            for force_tag, force_dict in mol_forces.items():
                # e.g. force_tag = 'Bonds'
                for parameter in force_dict.values():
                    smirks_counter[parameter.smirks] += 1
        return smirks_counter
import time

print("Starting simulation")
start = time.process_time()

# Run the simulation
simulation.step(num_steps)

end = time.process_time()
print("Elapsed time %.2f seconds" % (end - start))
print("Hurra!")

# In[273]:

ff_applied_parameters = forcefield.label_molecules(off_topology)[0]
ff_values = []
ff_valuefile = open('ff_valuefile.txt', 'w+')

for atoms, bonds in ff_applied_parameters['Bonds'].items():
    ff_valuefile.write(F'{atoms},{bonds}')
    ff_valuefile.write('\n')

ff_valuefile.close()

# In[274]:

import numpy

ff_valuefile = open('ff_valuefile.txt', 'r')