def test_no_fail_cluster(smiles_list, layers):
    smirks_atom_lists1 = [[(0, 1), (1, 2)]] * len(smiles_list)
    smirks_atom_lists2 = [[(0, ), (1, ), (2, )]] * len(smiles_list)
    mols_list = [mol_toolkit.Mol.from_smiles(s) for s in smiles_list]
    c1 = ClusterGraph(mols_list, smirks_atom_lists1, layers=layers)
    c2 = ClusterGraph(mols_list, smirks_atom_lists2, layers=layers)
    assert c1.add_atom(None) is None
    assert c2.add_atom(None) is None
Beispiel #2
0
    def _make_cluster_graphs(self, layers):
        """
        Creates a list of SMIRKS using the stored
        molecules and clusters with the specified number
        of layers (atoms away from the indexed atoms)

        Parameters
        -----------
        layers : int
            number of layers (atoms away from indexed atoms) to
            include in this round of graphs

        Returns
        --------
        smirks_list : list of two tuples
            SMIRKS list in the form [ (label: SMIRKS), ...]
        """
        smirks_list = list()

        # loop through the list of fragment clusters
        for label, smirks_atom_list in self.cluster_list:
            # make a ClusterGraph for that label
            graph = ClusterGraph(self.molecules, smirks_atom_list, layers)

            # extract and save the SMIRKS for the cluster
            smirks = graph.as_smirks(compress=True)
            smirks_list.append(('zz_' + str(label), smirks))

        return smirks_list
def test_mols_mismatch():
    """
    tests that an exception is raised when the number of molecules
    and the number of smirks dictionaries is not equal
    """
    mols_list = [mol_toolkit.Mol.from_smiles('CC')]
    smirks_atom_lists = [[(0, 1)], [(1, 2)]]
    with pytest.raises(Exception):
        ClusterGraph(mols_list, smirks_atom_lists)
def make_cluster_graph(smiles_list, layers=0):
    """
    Generates a chemper Mol for each of the smiles in smiles_list and then
    uses those Mols to build a ClusterGraph where the same smirks indices are used for all Mols.
    Specifically, atom 0 is assigned SMIRKS index 1 and atom 1 is assigned SMIRKS index 2.

    The variable layers is used to set the number of atoms away from the indexed atoms to include.
    For example if layers is 0 then only the SMIRKS indexed atoms are included in the graph;
    and if layers is 1 then atoms 1 bond away from the indexed atoms are included, and so forth.
    Layers can also be "all" which will lead to all atoms in the molecule being added to the graph.
    """
    smirks_atom_lists = [[(0, 1)]] * len(smiles_list)
    mols_list = [mol_toolkit.MolFromSmiles(smiles) for smiles in smiles_list]
    return ClusterGraph(mols_list, smirks_atom_lists, layers=layers)
Beispiel #5
0
    def _make_cluster_graphs(self, layers):
        """
        Creates a list of SMIRKS with the form
        [ (label: SMIRKS), ]
        using the stored molecules and cluster_list
        """
        smirks_list = list()

        # loop through the list of fragment clusters
        for label, smirks_atom_list in self.cluster_list:
            # make a ClusterGraph for that label
            graph = ClusterGraph(self.molecules, smirks_atom_list, layers)

            # extract and save the SMIRKS for the cluster
            smirks = graph.as_smirks(compress=True)
            smirks_list.append(('zz_' + str(label), smirks))

        return smirks_list
Beispiel #6
0
    def __init__(self,
                 molecules,
                 cluster_list,
                 max_layers=5,
                 verbose=True,
                 strict_smirks=True):
        """
        Parameters
        ----------
        molecules : list of Mols
            These can be chemper Mols or molecules from any supported toolkit
            (currently OpenEye or RDKit)

        cluster_list : list of labels and smirks_atom_lists
            For each label the user should provide a list tuples for atom indices
            in each molecule you want included in that cluster.

            For example, if you wanted all atoms with indices (0,1) and (1,2) to be in cluster 'c1'
            and atoms (2,3) in cluster 'c2' for each of two molecules then cluster_list would be

            [ ('c1', [ (0,1), (1,2) ], [ (0,1), (1,2) ]),
              ('c2', [ (2,3)        ], [ (2,3)        ]) ]

            To see an example of this in action checkout
            https://github.com/MobleyLab/chemper/tree/master/examples

        max_layers : int (optional)
            default = 5
            how many atoms away from the indexed atoms should
            we consider at the maximum

        verbose : boolean (optional)
            default = True
            If true information is printed to the command line during reducing

        strict_smirks : boolean (optional)
            default = True
            If False it will not raise an error when incapable of making SMIRKS
            This setting is not recommended unless you are a master user
            or developer trying to test current behavior.
            The variable SMIRKSifier.checks will tell you if the SMIRKS
            generation failed when strict_smirks = False
        """
        self.molecules = [mol_toolkit.Mol(m) for m in molecules]
        self.intermediate_smirks = dict()
        self.cluster_list = cluster_list
        self.verbose = verbose
        self.max_layers = max_layers
        self.strict_smirks = strict_smirks

        # determine the type of SMIRKS for symmetry in indices purposes
        # This is done by making a test SMIRKS
        graph = ClusterGraph(self.molecules, cluster_list[0][1], 0)
        test_smirks = graph.as_smirks(compress=True)
        env = CE(test_smirks)
        if env.get_type() is None:
            # corresponds to an unknown chemical pattern
            self.dict_type = dict
        elif env.get_type().lower() == 'impropertorsion':
            self.dict_type = ImproperDict
        else:
            self.dict_type = ValenceDict

        # Convert input "smirks_atom_list" into a dictionary with the form:
        # {mol_idx: {(atom indices): label, ...}, ... }
        self.cluster_dict = dict()
        self.ref_labels = set()
        self.total = 0
        # form of cluster_list is [(label, [for each mol [ (tuples of atom indices)] ) ]
        for label, mol_list in self.cluster_list:
            self.ref_labels.add(label)
            # [for each mol [ (tuples of atom indices)]
            for mol_idx, atom_indice_tuples in enumerate(mol_list):
                if mol_idx not in self.cluster_dict:
                    self.cluster_dict[mol_idx] = self.dict_type()
                for atom_tuple in atom_indice_tuples:
                    self.total += 1
                    self.cluster_dict[mol_idx][atom_tuple] = label

        # make SMIRKS patterns for input clusters
        self.current_smirks, self.layers = self.make_smirks()
        if self.verbose: print_smirks(self.current_smirks)
        # check SMIRKS and save the matches to input clusters
        self.type_matches, self.checks = self.types_match_reference()

        if not self.checks:
            msg = """
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with %i layers. Try increasing the number of layers
                      or changing your clusters
                      """ % self.max_layers
            if self.strict_smirks:
                raise ClusteringError(msg)
            else:
                print("WARNING!", msg)
from chemper.mol_toolkits.mol_toolkit import Mol
from chemper.graphs.cluster_graph import ClusterGraph

# make molecules from smiles
mols = [
    Mol.from_smiles('CCO'),
    Mol.from_smiles('CC=C')
]
# identify atoms for tagging # one set of atoms in second molecule
tagged = [[ (0,1) ],  # one set of atoms in first molecule
          [ (0,1) ]   # one set of atoms in second molecule
          ]
# try multiple options for layers
for layers in [0,1,'all']:
    # make graph
    graph = ClusterGraph(mols, tagged, layers)
    print(graph.as_smirks())   # complex is the default output
    print(graph.as_smirks(compress=True))   # and's common decorators to the end of each atom
Beispiel #8
0
from chemper.mol_toolkits import mol_toolkit
from chemper.graphs.cluster_graph import ClusterGraph

mol1 = mol_toolkit.MolFromSmiles('CCC')
mol2 = mol_toolkit.MolFromSmiles('CCCCC')
atoms1 = [(0, 1)]
atoms2 = [(0, 1), (1, 2)]
graph = ClusterGraph([mol1, mol2], [atoms1, atoms2])
print(graph.as_smirks())
# "[#6AH2X4x0r0+0,#6AH3X4x0r0+0:1]-;!@[#6AH2X4x0r0+0:2]"
Beispiel #9
0
def _combine_molecules_offxml(
    molecules: List["Ligand"],
    parameters: List[str],
    rfree_data: Dict[str, Dict[str, Union[str, float]]],
    filename: str,
    water_model: Literal["tip3p"] = "tip3p",
):
    """
    Main worker function to build the combined offxmls.
    """

    if sum([molecule.extra_sites.n_sites for molecule in molecules]) > 0:
        raise NotImplementedError(
            "Virtual sites can not be safely converted into offxml format yet."
        )

    if sum([molecule.RBTorsionForce.n_parameters for molecule in molecules]) > 0:
        raise NotImplementedError(
            "RBTorsions can not yet be safely converted into offxml format yet."
        )

    try:
        from chemper.graphs.cluster_graph import ClusterGraph
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            "chemper is required to make an offxml, please install with `conda install chemper -c conda-forge`."
        )

    fit_ab = False
    # if alpha and beta should be fit
    if "AB" in parameters:
        fit_ab = True

    rfree_codes = set()  # keep track of all rfree codes used by these molecules
    # create the master ff
    offxml = ForceField(allow_cosmetic_attributes=True, load_plugins=True)
    offxml.author = f"QUBEKit_version_{qubekit.__version__}"
    offxml.date = datetime.now().strftime("%Y_%m_%d")
    # get all of the handlers
    _ = offxml.get_parameter_handler("Constraints")
    bond_handler = offxml.get_parameter_handler("Bonds")
    angle_handler = offxml.get_parameter_handler("Angles")
    proper_torsions = offxml.get_parameter_handler("ProperTorsions")
    improper_torsions = offxml.get_parameter_handler("ImproperTorsions")
    _ = offxml.get_parameter_handler(
        "Electrostatics", handler_kwargs={"scale14": 0.8333333333, "version": 0.3}
    )
    using_plugin = False
    if parameters:
        # if we want to optimise the Rfree we need our custom handler
        vdw_handler = offxml.get_parameter_handler(
            "QUBEKitvdWTS", allow_cosmetic_attributes=True
        )
        using_plugin = True
    else:
        vdw_handler = offxml.get_parameter_handler(
            "vdW", allow_cosmetic_attributes=True
        )
    library_charges = offxml.get_parameter_handler("LibraryCharges")

    for molecule in molecules:
        rdkit_mol = molecule.to_rdkit()
        bond_types = molecule.bond_types
        # for each bond type collection create a single smirks pattern
        for bonds in bond_types.values():
            graph = ClusterGraph(
                mols=[rdkit_mol], smirks_atoms_lists=[bonds], layers="all"
            )
            qube_bond = molecule.BondForce[bonds[0]]
            bond_handler.add_parameter(
                parameter_kwargs={
                    "smirks": graph.as_smirks(),
                    "length": qube_bond.length * unit.nanometers,
                    "k": qube_bond.k * unit.kilojoule_per_mole / unit.nanometers**2,
                }
            )

        angle_types = molecule.angle_types
        for angles in angle_types.values():
            graph = ClusterGraph(
                mols=[rdkit_mol],
                smirks_atoms_lists=[angles],
                layers="all",
            )
            qube_angle = molecule.AngleForce[angles[0]]
            angle_handler.add_parameter(
                parameter_kwargs={
                    "smirks": graph.as_smirks(),
                    "angle": qube_angle.angle * unit.radian,
                    "k": qube_angle.k * unit.kilojoule_per_mole / unit.radians**2,
                }
            )

        torsion_types = molecule.dihedral_types
        for dihedrals in torsion_types.values():
            graph = ClusterGraph(
                mols=[rdkit_mol],
                smirks_atoms_lists=[dihedrals],
                layers="all",
            )
            qube_dihedral = molecule.TorsionForce[dihedrals[0]]
            proper_torsions.add_parameter(
                parameter_kwargs={
                    "smirks": graph.as_smirks(),
                    "k1": qube_dihedral.k1 * unit.kilojoule_per_mole,
                    "k2": qube_dihedral.k2 * unit.kilojoule_per_mole,
                    "k3": qube_dihedral.k3 * unit.kilojoule_per_mole,
                    "k4": qube_dihedral.k4 * unit.kilojoule_per_mole,
                    "periodicity1": qube_dihedral.periodicity1,
                    "periodicity2": qube_dihedral.periodicity2,
                    "periodicity3": qube_dihedral.periodicity3,
                    "periodicity4": qube_dihedral.periodicity4,
                    "phase1": qube_dihedral.phase1 * unit.radians,
                    "phase2": qube_dihedral.phase2 * unit.radians,
                    "phase3": qube_dihedral.phase3 * unit.radians,
                    "phase4": qube_dihedral.phase4 * unit.radians,
                    "idivf1": 1,
                    "idivf2": 1,
                    "idivf3": 1,
                    "idivf4": 1,
                }
            )

        improper_types = molecule.improper_types
        for torsions in improper_types.values():
            impropers = [
                (improper[1], improper[0], *improper[2:]) for improper in torsions
            ]
            graph = ClusterGraph(
                mols=[rdkit_mol], smirks_atoms_lists=[impropers], layers="all"
            )
            qube_improper = molecule.ImproperTorsionForce[torsions[0]]
            # we need to multiply each k value by as they will be applied as trefoil see
            # <https://openforcefield.github.io/standards/standards/smirnoff/#impropertorsions> for more details
            # we assume we only have a k2 term for improper torsions via a periodic term
            improper_torsions.add_parameter(
                parameter_kwargs={
                    "smirks": graph.as_smirks(),
                    "k1": qube_improper.k2 * 3 * unit.kilojoule_per_mole,
                    "periodicity1": qube_improper.periodicity2,
                    "phase1": qube_improper.phase2 * unit.radians,
                }
            )

        atom_types = {}
        for atom_index, cip_type in molecule.atom_types.items():
            atom_types.setdefault(cip_type, []).append((atom_index,))
        for sym_set in atom_types.values():
            graph = ClusterGraph(
                mols=[rdkit_mol], smirks_atoms_lists=[sym_set], layers="all"
            )
            qube_non_bond = molecule.NonbondedForce[sym_set[0]]
            rfree_code = _get_parameter_code(
                molecule=molecule, atom_index=sym_set[0][0]
            )
            atom_data = {
                "smirks": graph.as_smirks(),
            }

            if rfree_code in parameters or fit_ab:
                # keep track of present codes to optimise
                rfree_codes.add(rfree_code)
            if using_plugin:
                # this is to be refit
                atom = molecule.atoms[qube_non_bond.atoms[0]]
                atom_data["volume"] = atom.aim.volume * unit.angstroms**3
            else:
                atom_data["epsilon"] = qube_non_bond.epsilon * unit.kilojoule_per_mole
                atom_data["sigma"] = qube_non_bond.sigma * unit.nanometers

            vdw_handler.add_parameter(parameter_kwargs=atom_data)

        charge_data = dict(
            (f"charge{param.atoms[0] + 1}", param.charge * unit.elementary_charge)
            for param in molecule.NonbondedForce
        )
        charge_data["smirks"] = molecule.to_smiles(mapped=True)
        library_charges.add_parameter(parameter_kwargs=charge_data)

    # now loop over all the parameters to be fit and add them as cosmetic attributes
    to_parameterize = []
    for parameter_to_fit in parameters:
        if parameter_to_fit != "AB" and parameter_to_fit in rfree_codes:
            setattr(
                vdw_handler,
                f"{parameter_to_fit.lower()}free",
                unit.Quantity(
                    rfree_data[parameter_to_fit]["r_free"], unit=unit.angstroms
                ),
            )
            to_parameterize.append(f"{parameter_to_fit.lower()}free")
    if fit_ab:
        vdw_handler.alpha = rfree_data["alpha"]
        vdw_handler.beta = rfree_data["beta"]
        to_parameterize.extend(["alpha", "beta"])
    if to_parameterize:
        vdw_handler.add_cosmetic_attribute("parameterize", ", ".join(to_parameterize))

    # now add a water model to the force field
    _add_water_model(
        force_field=offxml, water_model=water_model, using_plugin=using_plugin
    )
    offxml.to_file(filename=filename)