def checkTorsion(smiles, torsion_indices, ff_name): """ Take mollist and check if the molecules in a list match a specific torsion id Parameters ---------- molList : List of objects List of oemols with datatags generated in genData function Returns ------- molList : list of objects List of oemol objects that have a datatag "IDMatch" that contain the torsion id involved in the QCA torsion drive """ matches = [] count = 0 mols = [] #tid='' #molecule = Molecule.from_mapped_smiles(smiles) print(smiles) from openeye import oechem # create a new molecule #mol = oechem.OEGraphMol() # convert the SMILES string into a molecule #oechem.OESmilesToMol(mol,smiles) #molecule = Molecule.from_smiles(smiles) #molecule=Molecule.from_openeye(mol) molecule = Molecule.from_mapped_smiles(smiles) topology = Topology.from_molecules(molecule) # Let's label using the Parsley force field forcefield = ForceField(ff_name, allow_cosmetic_attributes=True) # Run the molecule labeling molecule_force_list = forcefield.label_molecules(topology) params = [] indices = [] # Print out a formatted description of the torsion parameters applied to this molecule for mol_idx, mol_forces in enumerate(molecule_force_list): # print(f'Forces for molecule {mol_idx}') for force_tag, force_dict in mol_forces.items(): if force_tag == "ProperTorsions": for (atom_indices, parameter) in force_dict.items(): params.append(parameter.id) indices.append(atom_indices) #torsion_indices=tuple(torsion_indices) #print(type(torsion_indices)) print(torsion_indices) #print(type(atom_indices)) print(atom_indices) if atom_indices == torsion_indices or tuple( reversed(atom_indices)) == torsion_indices: #mol.SetData("IDMatch", parameter.id) tid = parameter.id print(params) print(indices) return tid
def get_assigned_torsion_param(tdentry, forcefield): """Get the OpenFF forcefield torsion parameter ultimately assigned to the given TorsionDrive entry's torsion dihedral. Parameters ---------- tdentry : TDEntry TDEntry (TorsionDrive entry) to operate on; will be used to generate molecule, extract dihedral indices driven. forcefield : str, ForceField OpenFF forcefield to apply. Returns ------- torsion_params : ProperTorsion Dict-like object with attributes giving the applied torsion parameters Examples -------- Starting with TDEntries from usage of `get_torsiondrives_matching_smarts` (see its Example), we can get back the parameter assigned to this by, say `"openff-1.0.0.offxml"`: >>> from openforcefield.typing.engines.smirnoff import ForceField >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client) >>> ff = ForceField('openff-1.0.0.offxml') >>> assigned = [smarts_torsions.get_assigned_torsion_param(tdentry, ff) for tdentry in tdentries] >>> print([t.id for t in assigned]) ['t47', 't47', 't47', 't47', ...] """ mol_smiles = tdentry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] offmol = Molecule.from_mapped_smiles(mol_smiles) if isinstance(forcefield, str): forcefield = ForceField(forcefield) # apply forcefield parameters topology = Topology.from_molecules(offmol) # we only have one molecule by definition here, so extracting 0th molecule_forces = forcefield.label_molecules(topology)[0] # by convention, we only have one driven torsion # would need to revisit if we are working with 2D torsions dihedral_indices = tdentry.td_keywords.dihedrals[0] # get torsion parameters corresponding to dihedral indices torsions = molecule_forces["ProperTorsions"] torsion_params = torsions.get(dihedral_indices) # if None, try reversing it if torsion_params is None: torsion_params = torsions[dihedral_indices[::-1]] return torsion_params
def checkTorsion(molList, ff_name): """ Take mollist and check if the molecules in a list match a specific torsion id Parameters ---------- molList : List of objects List of oemols with datatags generated in genData function Returns ------- molList : list of objects List of oemol objects that have a datatag "IDMatch" that contain the torsion id involved in the QCA torsion drive """ matches = [] count = 0 mols = [] for mol in molList: molecule = Molecule.from_mapped_smiles(mol.GetData("cmiles")) topology = Topology.from_molecules(molecule) # Let's label using the Parsley force field forcefield = ForceField(ff_name) # Run the molecule labeling molecule_force_list = forcefield.label_molecules(topology) params = [] # Print out a formatted description of the torsion parameters applied to this molecule for mol_idx, mol_forces in enumerate(molecule_force_list): # print(f'Forces for molecule {mol_idx}') for force_tag, force_dict in mol_forces.items(): if force_tag == "ProperTorsions": for (atom_indices, parameter) in force_dict.items(): params.append(parameter.id) if atom_indices == mol.GetData("TDindices") or tuple( reversed(atom_indices) ) == mol.GetData("TDindices"): count += 1 mol.SetData("IDMatch", parameter.id) mols.append(mol) print( "Out of " + str(len(molList)) + " molecules, " + str(count) + " were processed with checkTorsion()" ) return mols
def checkParam(cmiles, ff2): molecules=Molecule.from_mapped_smiles(cmiles) topology = Topology.from_molecules([molecules]) #added # Let's label using the Parsley force field forcefield2 = ForceField(ff2, allow_cosmetic_attributes=True) # Run the molecule labeling molecule_force_list = forcefield2.label_molecules(topology) #print(dict(molecule_force_list[0]['ProperTorsions'])) # Print out a formatted description of the torsion parameters applied to this molecule #plot_dict = {} for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): print(force_tag) if force_tag == 'Bonds': for (atom_indices, parameter) in force_dict.items(): if parameter.id == 'b1': print('match') return cmiles
def _apply(self, molecules: List[Molecule]) -> ComponentResult: """ Fragment the molecules using the WBOFragmenter. Parameters: molecules: The list of molecules which should be processed by this component. Note: * If the input molecule fails fragmentation it will be fail this component and be removed even when `include_parent` is set to true. * When a molecule can not be fragmented to meet the wbo threshold the parent is likely to be included in the dataset. * """ from fragmenter import fragment result = self._create_result() for molecule in molecules: # not having a conformer can cause issues if molecule.n_conformers == 0: molecule.generate_conformers(n_conformers=1) if self.include_parent: result.add_molecule(molecule) fragment_factory = fragment.WBOFragmenter( molecule=molecule.to_openeye(), functional_groups=self.functional_groups, verbose=False, ) try: fragment_factory.fragment( threshold=self.threshold, keep_non_rotor_ring_substituents=self. keep_non_rotor_ring_substituents, ) # we need to store the central bond which was fragmented around # to make sure this is the bond we torsiondrive around fragmets_dict = fragment_factory.to_torsiondrive_json() # check we have fragments if fragmets_dict: for fragment_data in fragmets_dict.values(): frag_mol = Molecule.from_mapped_smiles( mapped_smiles=fragment_data["identifiers"] ["canonical_isomeric_explicit_hydrogen_mapped_smiles"] ) torsion_index = tuple(fragment_data["dihedral"][0]) # this is stored back into the molecule and will be used when generating the cmiles tags latter torsion_tag = TorsionIndexer() torsion_tag.add_torsion(torsion=torsion_index) frag_mol.properties["dihedrals"] = torsion_tag result.add_molecule(frag_mol) # if we have no fragments and we dont want the parent then we failed to fragment elif not fragmets_dict and not self.include_parent: result.filter_molecule(molecule) except (RuntimeError, ValueError): # this will catch cmiles errors for molecules with undefined stero result.filter_molecule(molecule) return result
def get_torsiondrives_matching_smarts(smarts, datasets, client): """Get all TorsionDrive entries from the given QCArchive instance datasets that match the given SMARTS pattern. Parameters ---------- smarts : str SMARTS to query TorsionDrives against; if the SMARTS has 4 indexed atoms, then these must match the dihedral atoms exercised in the TorsionDrive; if the SMARTS has 2 indexed atoms, then these must match the atoms of the central, rotated bond in the TorsionDrive. datasets : iterable of strings TorsionDriveDataset names to sample from. client : qcportal.FractalClient Fractal client to use for database queries. Returns ------- tdentries : list List of TDEntries matching given SMARTS. Examples -------- Get back TDEntries corresponding to a C-C bond torsiondrive: >>> from qcportal import FractalClient >>> client = FractalClient() >>> smarts = "[*:1]~[#6:2]-[#6:3]~[*:4]" >>> datasets = ["OpenFF Substituted Phenyl Set 1"] >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client) Equivalent to the above due to wildcard matching, but specifying only central bond: >>> smarts = "[#6:1]-[#6:2]" >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client) """ # first, we want to grab all datasets into memory tdrs = [] for dataset in datasets: ds = client.get_collection("TorsionDriveDataset", dataset) for entry in ds.data.records.values(): # need to build molecule from smiles so we can query against it mol_smiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] offmol = Molecule.from_mapped_smiles(mol_smiles) # apply SMARTS to each TorsionDrive object matching_indices = offmol.chemical_environment_matches(smarts) # if we get back nothing, move on if len(matching_indices) == 0: continue # by convention, we only have one driven torsion # would need to revisit if we are working with 2D torsions dihedral_indices = entry.td_keywords.dihedrals[0] # assemble matches for indices in matching_indices: if len(indices) == 2: if sorted(indices) == sorted(dihedral_indices[1:3]): tdrs.append(entry) break elif len(indices) == 4: if sorted(indices) == sorted(dihedral_indices): tdrs.append(entry) break else: raise ValueError("Number of indices returned not 2 or 4;" " check number of tagged atoms in SMARTS") return tdrs
def torsion_barrier_for_molecule(tdr_object, mapped_smiles, show_plots=False): """ Takes in a single torsion drive record that has energies from multiple conformers (at different torsion angles), evaluates the torsion barrier Parameters ---------- tdr_object : object torsion drive record from QC archive for a molecule Returns ------- mol: oemol object oemol from the smiles in dataframe index that contains datatags with the following: tdr_object.id : int (id of the TD record) with datatag "TDid" dihedral_indices: list (list of atom indices for which torsion is driven in this record) datatag "TDindices" torsion_barrier: float (torsion barrier energy in KJ/mol, maximum of all the barriers) datatag "TB" cmiles: str (string for the cmiles of the molecule in canonical_isomeric_explicit_hydrogen_mapped_smiles) datatag "cmiles" """ energies = list(tdr_object.get_final_energies().values()) tmp = list(tdr_object.get_final_energies().keys()) angles = [i[0] * np.pi / 180 for i in tmp] angles, energies = zip(*sorted(zip(angles, energies))) angles = np.array(angles) energies = np.array(energies) angles = np.append( angles[-3:] - 2 * np.pi, np.append(angles, angles[:3] + 2 * np.pi) ) energies = np.append(energies[-3:], np.append(energies, energies[:3])) idx = [] for i in range(len(angles) - 2): m1 = (energies[i + 1] - energies[i]) / (angles[i + 1] - angles[i]) m2 = (energies[i + 2] - energies[i + 1]) / (angles[i + 2] - angles[i + 1]) if np.sign(m1) == np.sign(m2): continue else: idx.append(i + 1) if show_plots: min_ener = min(energies) energies_y = (energies - min_ener) * HARTREE_2_KJMOL fontsize = 14 plt.figure() plt.plot( angles * 180 / np.pi, energies_y, "b-X", angles[idx] * 180 / np.pi, energies_y[idx], "ro", ) plt.legend(["QM data", "Max, min"], bbox_to_anchor=(1, 1), fontsize=fontsize) plt.title("Torsion drive interpolation", fontsize=fontsize) plt.xlabel("Dihedral Angle [Degrees]", fontsize=fontsize) plt.ylabel("Relative energy [KJ / mol]", fontsize=fontsize) plt.xticks(fontsize=fontsize) plt.yticks(fontsize=fontsize) fig_name = "plot_" + tdr_object.id + ".png" plt.savefig(fig_name) plt.show() torsion_barriers = [] for i in range(int(len(idx) - 1)): torsion_barriers.append( abs(HARTREE_2_KJMOL * abs(energies[idx[i]] - energies[idx[i + 1]])) ) torsion_barriers = np.array(torsion_barriers) # get dihedral indices and pass on to get_wbo function dihedral_indices = tdr_object.dict()["keywords"]["dihedrals"][0] offmol = Molecule.from_mapped_smiles(mapped_smiles) offmol.assign_fractional_bond_orders() bond = offmol.get_bond_between(dihedral_indices[1], dihedral_indices[2]) mol = chemi.smiles_to_oemol(mapped_smiles) mol.SetData("WBO", bond.fractional_bond_order) mol.SetData("TB", max(torsion_barriers)) mol.SetData("TDindices", dihedral_indices) mol.SetData("TDid", tdr_object.id) mol.SetData("cmiles", mapped_smiles) return mol
def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries): """ Low level call to load each torsion drive dataset and return a list of molecules Parameters ---------- datasetName : str torsion drive dataset name. specification : str specification in the dataset. Example: "B3LYP-D3", "default", "UFF" Returns ------- molList : list of objects each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object """ while True: try: assert datasetName break except AssertionError: print("datasetName is empty. Check input list of dataset tuples") raise while True: try: assert specification break except AssertionError: print("specification is empty. Check input list of dataset tuples") raise # initiate qc portal instance client = ptl.FractalClient() # from the TorsionDriveDataset collection picking up given datasetName ds = client.get_collection("TorsionDriveDataset", datasetName) ds.status([specification], status="COMPLETE") # Serial implementation # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark # https://openforcefield.org/force-fields/force-fields/ # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi with open(benchmark_smiles) as f: bm_smiles = f.readlines() bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles] tb = [] overlaps = 0 qca_entries = [] for i in range(ds.df.size): if ds.df.iloc[i, 0].status == "COMPLETE": smiles = ds.df.index[i] mapped_smiles = ds.get_entry(smiles).attributes[ "canonical_isomeric_explicit_hydrogen_mapped_smiles" ] mol1 = Molecule.from_mapped_smiles(mapped_smiles) not_identical = True for mol in bm_mols: isomorphic,atom_map = Molecule.are_isomorphic(mol1, mol, return_atom_map=False, aromatic_matching=False, formal_charge_matching=False, bond_order_matching=False, atom_stereochemistry_matching=False, bond_stereochemistry_matching=False, ) if(isomorphic): not_identical = False overlaps += 1 entry = ds.get_entry(smiles) tdr_id = entry.object_map['default'] # print(tdr_id) qca_entries.append(tdr_id) break if(not_identical): tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles)) # overlaps_qca_ids.txt is also a hardcoded file with open(qca_overlapped_entries, "a") as f: for item in qca_entries: f.write("%s\n" % item) print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps) print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df)) return tb
def fragment(self, molecule: Molecule) -> List[FragmentData]: """ Fragment the molecule using the WBOFragmenter. Parameters: molecule: The openff molecule to be fragmented using the provided class settings Returns: A list of FragmentData schema which details how a parent molecule is related to a fragment and which bond we fragmented around. Raises: FragmenterError: If the molecule can not be fragmented. """ from fragmenter import fragment # make sure the molecule has at least one conformer as this can cause issues if molecule.n_conformers == 0: molecule.generate_conformers(n_conformers=1) # set up the fragmenter fragment_factory = fragment.WBOFragmenter( molecule=molecule.to_openeye(), verbose=False) fragments: List[FragmentData] = [] try: # fragment the molecule fragment_factory.fragment( threshold=self.wbo_threshold, keep_non_rotor_ring_substituents=self. keep_non_rotor_ring_substituents, ) # now we work out the relation between the fragment and the parent fragments_data = fragment_factory.to_torsiondrive_json() # now store the data for data in fragments_data.values(): off_frag = Molecule.from_mapped_smiles( data["identifiers"] ["canonical_isomeric_explicit_hydrogen_mapped_smiles"]) # get the fragment parent mapping frag_dihedral = data["dihedral"][0][1:3] # in some cases we get one fragment back which is the parent molecule # we should not work out a mapping if not molecule.is_isomorphic_with(off_frag): mapping = self._get_fragment_parent_mapping( fragment=off_frag, parent=molecule) # get the parent torsion parent_dihedral = tuple( [mapping[i] for i in frag_dihedral]) parent_molecule = molecule else: # reuse the current fragment data as dummy parent data mapping = dict((i, i) for i in range(molecule.n_atoms)) parent_dihedral = frag_dihedral parent_molecule = off_frag # this is the data we need so make the fragmnetdata frag_data = FragmentData( parent_molecule=parent_molecule, parent_torsion=parent_dihedral, fragment_molecule=off_frag, fragment_torsion=frag_dihedral, fragment_attributes=data["identifiers"], fragment_parent_mapping=mapping, ) fragments.append(frag_data) return fragments except RuntimeError: raise FragmenterError( f"The molecule {molecule} could not be fragmented so no fitting target was made." )