def _make_cluster_graphs(self, layers): """ Creates a list of SMIRKS using the stored molecules and clusters with the specified number of layers (atoms away from the indexed atoms) Parameters ----------- layers : int number of layers (atoms away from indexed atoms) to include in this round of graphs Returns -------- smirks_list : list of two tuples SMIRKS list in the form [ (label: SMIRKS), ...] """ smirks_list = list() # loop through the list of fragment clusters for label, smirks_atom_list in self.cluster_list: # make a ClusterGraph for that label graph = ClusterGraph(self.molecules, smirks_atom_list, layers) # extract and save the SMIRKS for the cluster smirks = graph.as_smirks(compress=True) smirks_list.append(('zz_' + str(label), smirks)) return smirks_list
def _make_cluster_graphs(self, layers): """ Creates a list of SMIRKS with the form [ (label: SMIRKS), ] using the stored molecules and cluster_list """ smirks_list = list() # loop through the list of fragment clusters for label, smirks_atom_list in self.cluster_list: # make a ClusterGraph for that label graph = ClusterGraph(self.molecules, smirks_atom_list, layers) # extract and save the SMIRKS for the cluster smirks = graph.as_smirks(compress=True) smirks_list.append(('zz_' + str(label), smirks)) return smirks_list
def test_mols_mismatch(): """ tests that an exception is raised when the number of molecules and the number of smirks dictionaries is not equal """ mols_list = [mol_toolkit.Mol.from_smiles('CC')] smirks_atom_lists = [[(0, 1)], [(1, 2)]] with pytest.raises(Exception): ClusterGraph(mols_list, smirks_atom_lists)
def test_no_fail_cluster(smiles_list, layers): smirks_atom_lists1 = [[(0, 1), (1, 2)]] * len(smiles_list) smirks_atom_lists2 = [[(0, ), (1, ), (2, )]] * len(smiles_list) mols_list = [mol_toolkit.Mol.from_smiles(s) for s in smiles_list] c1 = ClusterGraph(mols_list, smirks_atom_lists1, layers=layers) c2 = ClusterGraph(mols_list, smirks_atom_lists2, layers=layers) assert c1.add_atom(None) is None assert c2.add_atom(None) is None
def make_cluster_graph(smiles_list, layers=0): """ Generates a chemper Mol for each of the smiles in smiles_list and then uses those Mols to build a ClusterGraph where the same smirks indices are used for all Mols. Specifically, atom 0 is assigned SMIRKS index 1 and atom 1 is assigned SMIRKS index 2. The variable layers is used to set the number of atoms away from the indexed atoms to include. For example if layers is 0 then only the SMIRKS indexed atoms are included in the graph; and if layers is 1 then atoms 1 bond away from the indexed atoms are included, and so forth. Layers can also be "all" which will lead to all atoms in the molecule being added to the graph. """ smirks_atom_lists = [[(0, 1)]] * len(smiles_list) mols_list = [mol_toolkit.MolFromSmiles(smiles) for smiles in smiles_list] return ClusterGraph(mols_list, smirks_atom_lists, layers=layers)
def __init__(self, molecules, cluster_list, max_layers=5, verbose=True, strict_smirks=True): """ Parameters ---------- molecules : list of Mols These can be chemper Mols or molecules from any supported toolkit (currently OpenEye or RDKit) cluster_list : list of labels and smirks_atom_lists For each label the user should provide a list tuples for atom indices in each molecule you want included in that cluster. For example, if you wanted all atoms with indices (0,1) and (1,2) to be in cluster 'c1' and atoms (2,3) in cluster 'c2' for each of two molecules then cluster_list would be [ ('c1', [ (0,1), (1,2) ], [ (0,1), (1,2) ]), ('c2', [ (2,3) ], [ (2,3) ]) ] To see an example of this in action checkout https://github.com/MobleyLab/chemper/tree/master/examples max_layers : int (optional) default = 5 how many atoms away from the indexed atoms should we consider at the maximum verbose : boolean (optional) default = True If true information is printed to the command line during reducing strict_smirks : boolean (optional) default = True If False it will not raise an error when incapable of making SMIRKS This setting is not recommended unless you are a master user or developer trying to test current behavior. The variable SMIRKSifier.checks will tell you if the SMIRKS generation failed when strict_smirks = False """ self.molecules = [mol_toolkit.Mol(m) for m in molecules] self.intermediate_smirks = dict() self.cluster_list = cluster_list self.verbose = verbose self.max_layers = max_layers self.strict_smirks = strict_smirks # determine the type of SMIRKS for symmetry in indices purposes # This is done by making a test SMIRKS graph = ClusterGraph(self.molecules, cluster_list[0][1], 0) test_smirks = graph.as_smirks(compress=True) env = CE(test_smirks) if env.get_type() is None: # corresponds to an unknown chemical pattern self.dict_type = dict elif env.get_type().lower() == 'impropertorsion': self.dict_type = ImproperDict else: self.dict_type = ValenceDict # Convert input "smirks_atom_list" into a dictionary with the form: # {mol_idx: {(atom indices): label, ...}, ... } self.cluster_dict = dict() self.ref_labels = set() self.total = 0 # form of cluster_list is [(label, [for each mol [ (tuples of atom indices)] ) ] for label, mol_list in self.cluster_list: self.ref_labels.add(label) # [for each mol [ (tuples of atom indices)] for mol_idx, atom_indice_tuples in enumerate(mol_list): if mol_idx not in self.cluster_dict: self.cluster_dict[mol_idx] = self.dict_type() for atom_tuple in atom_indice_tuples: self.total += 1 self.cluster_dict[mol_idx][atom_tuple] = label # make SMIRKS patterns for input clusters self.current_smirks, self.layers = self.make_smirks() if self.verbose: print_smirks(self.current_smirks) # check SMIRKS and save the matches to input clusters self.type_matches, self.checks = self.types_match_reference() if not self.checks: msg = """ SMIRKSifier was not able to create SMIRKS for the provided clusters with %i layers. Try increasing the number of layers or changing your clusters """ % self.max_layers if self.strict_smirks: raise ClusteringError(msg) else: print("WARNING!", msg)
from chemper.mol_toolkits.mol_toolkit import Mol from chemper.graphs.cluster_graph import ClusterGraph # make molecules from smiles mols = [ Mol.from_smiles('CCO'), Mol.from_smiles('CC=C') ] # identify atoms for tagging # one set of atoms in second molecule tagged = [[ (0,1) ], # one set of atoms in first molecule [ (0,1) ] # one set of atoms in second molecule ] # try multiple options for layers for layers in [0,1,'all']: # make graph graph = ClusterGraph(mols, tagged, layers) print(graph.as_smirks()) # complex is the default output print(graph.as_smirks(compress=True)) # and's common decorators to the end of each atom
from chemper.mol_toolkits import mol_toolkit from chemper.graphs.cluster_graph import ClusterGraph mol1 = mol_toolkit.MolFromSmiles('CCC') mol2 = mol_toolkit.MolFromSmiles('CCCCC') atoms1 = [(0, 1)] atoms2 = [(0, 1), (1, 2)] graph = ClusterGraph([mol1, mol2], [atoms1, atoms2]) print(graph.as_smirks()) # "[#6AH2X4x0r0+0,#6AH3X4x0r0+0:1]-;!@[#6AH2X4x0r0+0:2]"
def _combine_molecules_offxml( molecules: List["Ligand"], parameters: List[str], rfree_data: Dict[str, Dict[str, Union[str, float]]], filename: str, water_model: Literal["tip3p"] = "tip3p", ): """ Main worker function to build the combined offxmls. """ if sum([molecule.extra_sites.n_sites for molecule in molecules]) > 0: raise NotImplementedError( "Virtual sites can not be safely converted into offxml format yet." ) if sum([molecule.RBTorsionForce.n_parameters for molecule in molecules]) > 0: raise NotImplementedError( "RBTorsions can not yet be safely converted into offxml format yet." ) try: from chemper.graphs.cluster_graph import ClusterGraph except ModuleNotFoundError: raise ModuleNotFoundError( "chemper is required to make an offxml, please install with `conda install chemper -c conda-forge`." ) fit_ab = False # if alpha and beta should be fit if "AB" in parameters: fit_ab = True rfree_codes = set() # keep track of all rfree codes used by these molecules # create the master ff offxml = ForceField(allow_cosmetic_attributes=True, load_plugins=True) offxml.author = f"QUBEKit_version_{qubekit.__version__}" offxml.date = datetime.now().strftime("%Y_%m_%d") # get all of the handlers _ = offxml.get_parameter_handler("Constraints") bond_handler = offxml.get_parameter_handler("Bonds") angle_handler = offxml.get_parameter_handler("Angles") proper_torsions = offxml.get_parameter_handler("ProperTorsions") improper_torsions = offxml.get_parameter_handler("ImproperTorsions") _ = offxml.get_parameter_handler( "Electrostatics", handler_kwargs={"scale14": 0.8333333333, "version": 0.3} ) using_plugin = False if parameters: # if we want to optimise the Rfree we need our custom handler vdw_handler = offxml.get_parameter_handler( "QUBEKitvdWTS", allow_cosmetic_attributes=True ) using_plugin = True else: vdw_handler = offxml.get_parameter_handler( "vdW", allow_cosmetic_attributes=True ) library_charges = offxml.get_parameter_handler("LibraryCharges") for molecule in molecules: rdkit_mol = molecule.to_rdkit() bond_types = molecule.bond_types # for each bond type collection create a single smirks pattern for bonds in bond_types.values(): graph = ClusterGraph( mols=[rdkit_mol], smirks_atoms_lists=[bonds], layers="all" ) qube_bond = molecule.BondForce[bonds[0]] bond_handler.add_parameter( parameter_kwargs={ "smirks": graph.as_smirks(), "length": qube_bond.length * unit.nanometers, "k": qube_bond.k * unit.kilojoule_per_mole / unit.nanometers**2, } ) angle_types = molecule.angle_types for angles in angle_types.values(): graph = ClusterGraph( mols=[rdkit_mol], smirks_atoms_lists=[angles], layers="all", ) qube_angle = molecule.AngleForce[angles[0]] angle_handler.add_parameter( parameter_kwargs={ "smirks": graph.as_smirks(), "angle": qube_angle.angle * unit.radian, "k": qube_angle.k * unit.kilojoule_per_mole / unit.radians**2, } ) torsion_types = molecule.dihedral_types for dihedrals in torsion_types.values(): graph = ClusterGraph( mols=[rdkit_mol], smirks_atoms_lists=[dihedrals], layers="all", ) qube_dihedral = molecule.TorsionForce[dihedrals[0]] proper_torsions.add_parameter( parameter_kwargs={ "smirks": graph.as_smirks(), "k1": qube_dihedral.k1 * unit.kilojoule_per_mole, "k2": qube_dihedral.k2 * unit.kilojoule_per_mole, "k3": qube_dihedral.k3 * unit.kilojoule_per_mole, "k4": qube_dihedral.k4 * unit.kilojoule_per_mole, "periodicity1": qube_dihedral.periodicity1, "periodicity2": qube_dihedral.periodicity2, "periodicity3": qube_dihedral.periodicity3, "periodicity4": qube_dihedral.periodicity4, "phase1": qube_dihedral.phase1 * unit.radians, "phase2": qube_dihedral.phase2 * unit.radians, "phase3": qube_dihedral.phase3 * unit.radians, "phase4": qube_dihedral.phase4 * unit.radians, "idivf1": 1, "idivf2": 1, "idivf3": 1, "idivf4": 1, } ) improper_types = molecule.improper_types for torsions in improper_types.values(): impropers = [ (improper[1], improper[0], *improper[2:]) for improper in torsions ] graph = ClusterGraph( mols=[rdkit_mol], smirks_atoms_lists=[impropers], layers="all" ) qube_improper = molecule.ImproperTorsionForce[torsions[0]] # we need to multiply each k value by as they will be applied as trefoil see # <https://openforcefield.github.io/standards/standards/smirnoff/#impropertorsions> for more details # we assume we only have a k2 term for improper torsions via a periodic term improper_torsions.add_parameter( parameter_kwargs={ "smirks": graph.as_smirks(), "k1": qube_improper.k2 * 3 * unit.kilojoule_per_mole, "periodicity1": qube_improper.periodicity2, "phase1": qube_improper.phase2 * unit.radians, } ) atom_types = {} for atom_index, cip_type in molecule.atom_types.items(): atom_types.setdefault(cip_type, []).append((atom_index,)) for sym_set in atom_types.values(): graph = ClusterGraph( mols=[rdkit_mol], smirks_atoms_lists=[sym_set], layers="all" ) qube_non_bond = molecule.NonbondedForce[sym_set[0]] rfree_code = _get_parameter_code( molecule=molecule, atom_index=sym_set[0][0] ) atom_data = { "smirks": graph.as_smirks(), } if rfree_code in parameters or fit_ab: # keep track of present codes to optimise rfree_codes.add(rfree_code) if using_plugin: # this is to be refit atom = molecule.atoms[qube_non_bond.atoms[0]] atom_data["volume"] = atom.aim.volume * unit.angstroms**3 else: atom_data["epsilon"] = qube_non_bond.epsilon * unit.kilojoule_per_mole atom_data["sigma"] = qube_non_bond.sigma * unit.nanometers vdw_handler.add_parameter(parameter_kwargs=atom_data) charge_data = dict( (f"charge{param.atoms[0] + 1}", param.charge * unit.elementary_charge) for param in molecule.NonbondedForce ) charge_data["smirks"] = molecule.to_smiles(mapped=True) library_charges.add_parameter(parameter_kwargs=charge_data) # now loop over all the parameters to be fit and add them as cosmetic attributes to_parameterize = [] for parameter_to_fit in parameters: if parameter_to_fit != "AB" and parameter_to_fit in rfree_codes: setattr( vdw_handler, f"{parameter_to_fit.lower()}free", unit.Quantity( rfree_data[parameter_to_fit]["r_free"], unit=unit.angstroms ), ) to_parameterize.append(f"{parameter_to_fit.lower()}free") if fit_ab: vdw_handler.alpha = rfree_data["alpha"] vdw_handler.beta = rfree_data["beta"] to_parameterize.extend(["alpha", "beta"]) if to_parameterize: vdw_handler.add_cosmetic_attribute("parameterize", ", ".join(to_parameterize)) # now add a water model to the force field _add_water_model( force_field=offxml, water_model=water_model, using_plugin=using_plugin ) offxml.to_file(filename=filename)