def rdkit_mmff94_xyz(smiles, **kwargs): """ Returns the string of the XYZ file obtained performing the MMFF94 molecular mechanics optimization of the given SMILES using RDKit. Writing temporary files in $MM_WORKING_DIR if defined or otherwise in /tmp :param smiles: input_SMILES :param max_iterations: max number of iterations (default 500) :return : XYZ string of optimized geometry, success (whether the MM optimization was successful and the smiles has stayed identical after optimization) """ working_dir = os.environ[ "MM_WORKING_DIR"] if "MM_WORKING_DIR" in os.environ else "/tmp" # Converting the molecule to RDKit object mol = MolFromSmiles(smiles) smi_canon = MolToSmiles(MolFromSmiles(smiles)) # Setting paths filename_smiles = str(os.getpid()) + "_" + smi_to_filename(smi_canon) xyz_path = join(working_dir, filename_smiles + '.xyz') post_MM_smi_path = join(working_dir, filename_smiles + '.smi') # Computing geometry try: # Adding implicit hydrogens mol = AddHs(mol) # MM optimization EmbedMolecule(mol) value = MMFFOptimizeMolecule(mol, maxIters=kwargs["max_iterations"]) # Success if returned value is null success_RDKIT_output = value == 0 # Computing XYZ from optimized molecule xyz_str = MolToXYZBlock(mol) # Writing optimized XYZ to file with open(xyz_path, "w") as f: f.writelines(xyz_str) # Success if the optimization has converged and the post MM smiles is identical the pre MM smiles success = success_RDKIT_output and check_identical_geometries( xyz_path, smi_canon, post_MM_smi_path) except Exception as e: success = False xyz_str = None finally: # Removing files remove_files([post_MM_smi_path, xyz_path]) return xyz_str, success
def embed(self, mol): success = EmbedMolecule(mol) if success == -1: msg = 'Failed to Embed Molecule {}'.format(mol.name) if self.error_on_fail: raise RuntimeError(msg) elif self.warn_on_fail: warnings.warn(msg) return None if self.add_hs: return mol.add_hs(add_coords=True) else: return mol
def gen3D(mol): mol = Chem.AddHs(mol) if EmbedMolecule(mol) != 0: raise ValueError("EmbedMolecule failed") uff = False try: optimize(mol, MMFFOptimizeMolecule) except ValueError: uff = True if uff: optimize(mol, UFFOptimizeMolecule) return uff, mol
# 入力する分子(thiametoxam) smiles = 'CN1COCN(C1=N[N+](=O)[O-])CC2=CN=C(S2)Cl' # ファイル名を決める t = datetime.datetime.fromtimestamp(time.time()) psi4.set_output_file("{}_{}{}{}_{}{}.log".format(smiles, t.year, t.month, t.day, t.hour, t.minute)) # SMILES から三次元構造を発生させて、粗3D構造最適化 mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) params = ETKDGv3() params.randomSeed = 1 EmbedMolecule(mol, params) # MMFF(Merck Molecular Force Field) で構造最適化する MMFFOptimizeMolecule(mol) #UFF(Universal Force Field)普遍力場で構造最適化したい場合は #UFFOptimizeMolecule(mol) conf = mol.GetConformer() # Psi4 に入力可能な形式に変換する。 # 電荷とスピン多重度を設定(下は、電荷0、スピン多重度1) mol_input = "0 1" #各々の原子の座標をXYZフォーマットで記述 for atom in mol.GetAtoms(): mol_input += "\n " + atom.GetSymbol() + " " + str(conf.GetAtomPosition(atom.GetIdx()).x)\
def EnumerateStereoisomers(m, options=StereoEnumerationOptions(), verbose=False): """ returns a generator that yields possible stereoisomers for a molecule Arguments: - m: the molecule to work with - options: parameters controlling the enumeration - verbose: toggles how verbose the output is If m has stereogroups, they will be expanded A small example with 3 chiral atoms and 1 chiral bond (16 theoretical stereoisomers): >>> from rdkit import Chem >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions >>> m = Chem.MolFromSmiles('BrC=CC1OC(C2)(F)C2(Cl)C1') >>> isomers = tuple(EnumerateStereoisomers(m)) >>> len(isomers) 16 >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers): ... print(smi) ... F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 F[C@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2 Because the molecule is constrained, not all of those isomers can actually exist. We can check that: >>> opts = StereoEnumerationOptions(tryEmbedding=True) >>> isomers = tuple(EnumerateStereoisomers(m, options=opts)) >>> len(isomers) 8 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): ... print(smi) ... F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2 Or we can force the output to only give us unique isomers: >>> m = Chem.MolFromSmiles('FC(Cl)C=CC=CC(F)Cl') >>> opts = StereoEnumerationOptions(unique=True) >>> isomers = tuple(EnumerateStereoisomers(m, options=opts)) >>> len(isomers) 10 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): ... print(smi) ... F[C@@H](Cl)/C=C/C=C/[C@@H](F)Cl F[C@@H](Cl)/C=C/C=C/[C@H](F)Cl F[C@@H](Cl)/C=C/C=C\\[C@H](F)Cl F[C@@H](Cl)/C=C\\C=C/[C@@H](F)Cl F[C@@H](Cl)/C=C\\C=C/[C@H](F)Cl F[C@@H](Cl)/C=C\\C=C\\[C@@H](F)Cl F[C@@H](Cl)/C=C\\C=C\\[C@H](F)Cl F[C@H](Cl)/C=C/C=C/[C@H](F)Cl F[C@H](Cl)/C=C\\C=C/[C@H](F)Cl F[C@H](Cl)/C=C\\C=C\\[C@H](F)Cl By default the code only expands unspecified stereocenters: >>> m = Chem.MolFromSmiles('BrC=C[C@H]1OC(C2)(F)C2(Cl)C1') >>> isomers = tuple(EnumerateStereoisomers(m)) >>> len(isomers) 8 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): ... print(smi) ... F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2 But we can change that behavior: >>> opts = StereoEnumerationOptions(onlyUnassigned=False) >>> isomers = tuple(EnumerateStereoisomers(m, options=opts)) >>> len(isomers) 16 Since the result is a generator, we can allow exploring at least parts of very large result sets: >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F') >>> opts = StereoEnumerationOptions(maxIsomers=0) >>> isomers = EnumerateStereoisomers(m, options=opts) >>> for x in range(5): ... print(Chem.MolToSmiles(next(isomers),isomericSmiles=True)) F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)Br F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)Br F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)Br F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br Or randomly sample a small subset. Note that if we want that sampling to be consistent across python versions we need to provide a random number seed: >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F') >>> opts = StereoEnumerationOptions(maxIsomers=3,rand=0xf00d) >>> isomers = EnumerateStereoisomers(m, options=opts) >>> for smi in isomers: #sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers): ... print(Chem.MolToSmiles(smi)) F[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)Br F[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br F[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br """ tm = Chem.Mol(m) for atom in tm.GetAtoms(): atom.ClearProp("_CIPCode") flippers = _getFlippers(tm, options) nCenters = len(flippers) if not nCenters: yield tm return if (options.maxIsomers == 0 or 2**nCenters <= options.maxIsomers): bitsource = _RangeBitsGenerator(nCenters) else: if options.rand is None: # deterministic random seed invariant to input atom order seed = hash( tuple( sorted([(a.GetDegree(), a.GetAtomicNum()) for a in tm.GetAtoms()]))) rand = random.Random(seed) elif isinstance(options.rand, random.Random): # other implementations of Python random number generators # can inherit from this class to pick up utility methods rand = options.rand else: rand = random.Random(options.rand) bitsource = _UniqueRandomBitsGenerator(nCenters, options.maxIsomers, rand) isomersSeen = set() numIsomers = 0 for bitflag in bitsource: for i in range(nCenters): flag = bool(bitflag & (1 << i)) flippers[i].flip(flag) isomer = Chem.Mol(tm) Chem.SetDoubleBondNeighborDirections(isomer) isomer.ClearComputedProps() Chem.AssignStereochemistry(isomer, cleanIt=True, force=True, flagPossibleStereoCenters=True) if options.unique: cansmi = Chem.MolToSmiles(isomer, isomericSmiles=True) if cansmi in isomersSeen: continue isomersSeen.add(cansmi) if options.tryEmbedding: ntm = Chem.AddHs(isomer) cid = EmbedMolecule(ntm, randomSeed=bitflag) if cid >= 0: conf = Chem.Conformer(isomer.GetNumAtoms()) for aid in range(isomer.GetNumAtoms()): conf.SetAtomPosition( aid, ntm.GetConformer().GetAtomPosition(aid)) isomer.AddConformer(conf) else: cid = 1 if cid >= 0: yield isomer numIsomers += 1 if options.maxIsomers != 0 and numIsomers >= options.maxIsomers: break elif verbose: print("%s failed to embed" % (Chem.MolToSmiles(isomer, isomericSmiles=True)))
def build_molecules(starting_mol, atom_additions=None, stereoisomers=True, sa_score_threshold=3., tryEmbedding=True): """Return an iterator of molecules that result from a single manipulation (i.e., atom / bond addition) to the starting molecule Arguments: starting_mol: rdkit.Mol The starting molecule as an rdkit Mol object atom_additions: list of elements Types of atoms that can be added. Defaults to ('C', 'N', 'O') stereoisomers: bool Whether to iterate over potential stereoisomers of the given molecule as seperate molecules sa_score_threshold: float or None Whether to calculate the sa_score of the given molecule, and withold molecules that have a sa_score higher than the threshold. tryEmbedding: bool whether to try an rdkit 3D embedding of the molecule Yields: rdkit.Mol, corresponding to modified input """ if atom_additions == None: atom_additions = ('C', 'N', 'O') def get_valid_partners(atom): """ For a given atom, return other atoms it can be connected to """ return list( set(range(starting_mol.GetNumAtoms())) - set((neighbor.GetIdx() for neighbor in atom.GetNeighbors())) - set(range( atom.GetIdx())) - # Prevent duplicates by only bonding forward set((atom.GetIdx(), )) | set(np.arange(len(atom_additions)) + starting_mol.GetNumAtoms())) def get_valid_bonds(atom1_idx, atom2_idx): """ Compare free valences of two atoms to calculate valid bonds """ free_valence_1 = get_free_valence( starting_mol.GetAtomWithIdx(atom1_idx)) if atom2_idx < starting_mol.GetNumAtoms(): free_valence_2 = get_free_valence( starting_mol.GetAtomWithIdx(int(atom2_idx))) else: free_valence_2 = pt.GetDefaultValence( atom_additions[atom2_idx - starting_mol.GetNumAtoms()]) return range(min(min(free_valence_1, free_valence_2), 3)) def add_bond(atom1_idx, atom2_idx, bond_type): """ Given two atoms and a bond type, execute the addition using rdkit """ num_atom = starting_mol.GetNumAtoms() rw_mol = Chem.RWMol(starting_mol) if atom2_idx < num_atom: rw_mol.AddBond(atom1_idx, atom2_idx, bond_orders[bond_type]) else: rw_mol.AddAtom(Chem.Atom(atom_additions[atom2_idx - num_atom])) rw_mol.AddBond(atom1_idx, num_atom, bond_orders[bond_type]) return rw_mol def enumerate_stereoisomers(mol, to_use): """ We likely want to distinguish between stereoisomers, so we do that here """ if not to_use: # Give an easy way to pass through this function if this feature isn't used return (mol, ) else: opts = StereoEnumerationOptions(unique=True) return tuple(EnumerateStereoisomers(mol, options=opts)) generated_smiles = [] # Construct the generator for i, atom in enumerate(starting_mol.GetAtoms()): for partner in get_valid_partners(atom): for bond_order in get_valid_bonds(i, partner): mol = add_bond(i, partner, bond_order) Chem.SanitizeMol(mol) for isomer in enumerate_stereoisomers(mol, stereoisomers): smiles = Chem.MolToSmiles(mol) if smiles not in generated_smiles: generated_smiles += [smiles] if sa_score_threshold is not None: if sascorer.calculateScore( isomer) >= sa_score_threshold: continue if tryEmbedding: ntm = Chem.AddHs(isomer) try: assert EmbedMolecule(ntm) >= 0 except (AssertionError, RuntimeError): # Failed a 3D embedding continue yield isomer