def _get_inputs(self, rxn_list, pickaxe): """rxn_list will be pickaxe eventually""" # Get reactions information def get_cpd_smiles(cpd_id): return pickaxe.compounds[cpd_id]["SMILES"] reactions_info = {} for rxn_id in rxn_list: rxn = pickaxe.reactions[rxn_id] reactants = [ get_cpd_smiles(v[1]) for v in rxn["Reactants"] if v[1].startswith("C") ] products = [ get_cpd_smiles(v[1]) for v in rxn["Products"] if v[1].startswith("C") ] pairs = product(reactants, products) reactions_info[rxn["_id"]] = list(pairs) # Process this information input_info = {} input_fails = {} for rxn_id, reaction_pairs in reactions_info.items(): if not reaction_pairs: continue for i, (reactant_smiles, product_smiles) in enumerate(reaction_pairs): if len(reactant_smiles) <= 120: if len(product_smiles) <= 120: mol1 = MolFromSmiles(reactant_smiles) mol2 = MolFromSmiles(product_smiles) mol1 = RemoveHs(mol1) mol2 = RemoveHs(mol2) reactant_smiles = MolToSmiles(mol1) product_smiles = MolToSmiles(mol2) # TODO what does this fix? from original code if "M" in reactant_smiles or "M" in product_smiles: input_fails[rxn_id + "_" + str(i)] = None else: input_info[rxn_id + "_" + str(i)] = [ reactant_smiles, product_smiles, ] else: input_fails[rxn_id + "_" + str(i)] = None else: input_fails[rxn_id + "_" + str(i)] = None return input_info, input_fails
def graph2(self, m): from rdkit.Chem import EditableMol, RemoveHs, Atom, rdchem #, SanitizeMol, rdmolops #natoms = m.GetNumAtoms() # create new molecule using single bonds only em = EditableMol(Mol()) hcount = 0 for atom in m.GetAtoms(): atnum = atom.GetAtomicNum() hcount += atom.GetTotalNumHs(False) newatom = Atom(atnum) #newatom.SetFormalCharge(atom.GetFormalCharge()) em.AddAtom(newatom) for bond in m.GetBonds(): em.AddBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), rdchem.BondType.SINGLE) try: mol = RemoveHs(em.GetMol()) except: mol = em.GetMol() #mol = em.GetMol() #SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) #Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS cansmi = self.cansmiles(mol) return "%s%s%d%+d" % (cansmi, ' H', hcount, GetFormalCharge(m))
def remove_hs(self, inplace=False, sanitize=True, update_explicit=False, implicit_only=False): """ Remove hydrogens from self. Args: inplace (bool): Whether to add Hs to `Mol`, or return a new `Mol`. sanitize (bool): Whether to sanitize after Hs are removed. update_explicit (bool): Whether to update explicit count after the removal. implicit_only (bool): Whether to remove explict and implicit Hs, or Hs only. Returns: skchem.Mol: `Mol` with Hs removed. """ if inplace: msg = 'Inplace removed of Hs is not yet supported.' raise NotImplementedError(msg) raw = RemoveHs(self, implicitOnly=implicit_only, updateExplicitCount=update_explicit, sanitize=sanitize) return self.__class__.from_super(raw)
def get_orientation(self, xyz, rtol=0.15): """ get orientation, needs to check the tolerance """ from rdkit.Geometry import Point3D from rdkit.Chem import rdMolAlign, RemoveHs, rdmolfiles, rdMolTransforms mol = self.rdkit_mol(self.smile) conf0 = mol.GetConformer(0) conf1 = mol.GetConformer(1) conf2 = mol.GetConformer(2) angs = self.get_torsion_angles(xyz) xyz0 = self.set_torsion_angles(conf0, angs) #conf0 with aligned xyz1 = self.set_torsion_angles(conf0, angs, True) #conf0 with aligned for i in range(len(self.mol)): x0,y0,z0 = xyz0[i] x1,y1,z1 = xyz1[i] x,y,z = xyz[i] conf0.SetAtomPosition(i,Point3D(x0,y0,z0)) conf1.SetAtomPosition(i,Point3D(x,y,z)) conf2.SetAtomPosition(i,Point3D(x1,y1,z1)) mol = RemoveHs(mol) rmsd1, trans1 = rdMolAlign.GetAlignmentTransform(mol, mol, 1, 0) rmsd2, trans2 = rdMolAlign.GetAlignmentTransform(mol, mol, 1, 2) tol = rtol*mol.GetNumAtoms() #print(rmsd1, rmsd2) if rmsd1 < tol: trans = trans1[:3,:3].T r = Rotation.from_matrix(trans) return r.as_euler('zxy', degrees=True), rmsd1, False elif rmsd2 < tol: trans = trans2[:3,:3].T r = Rotation.from_matrix(trans) return r.as_euler('zxy', degrees=True), rmsd2, True else: print(rmsd1, rmsd2) #rdmolfiles.MolToXYZFile(mol, '1.xyz', 0) #rdmolfiles.MolToXYZFile(mol, '2.xyz', 1) #rdmolfiles.MolToXYZFile(mol, '3.xyz', 2) print(self.get_torsion_angles(xyz)) print(self.get_torsion_angles(xyz0)) print(self.get_torsion_angles(xyz1)) raise ValueError("Problem in conformer")
def remove_hydrogen(cls, mol_in, addCoords=True): """Implicit all hydrogens. :param mol_in: RDKit Mol :param addCoords: Add coordinate to added Hs, bool :return mol_out: RDKit Mol """ return RemoveHs(mol_in, explicitOnly=False, addCoords=addCoords)
def generate_png(mol, pngpath, logfile=devnull, size=300): with stdout_redirected(to=sys.stdout, stdout=sys.stderr): with stdout_redirected(to=logfile, stdout=sys.stdout): nhmol = RemoveHs(mol, implicitOnly=False, updateExplicitCount=True, sanitize=False) SanitizeMol(nhmol, catchErrors=True) op = DrawingOptions() op.atomLabelFontSize = size / 25 MolToFile(PrepareMolForDrawing(nhmol,forceCoords=True,addChiralHs=True),\ pngpath,fitImage=True,size=(size, size),options=op)
def __init__(self, **kwargs) -> None: super().__init__(**kwargs) # If supplied, convert self.molecule into a SMILES string if self.molecule and RDKIT_EX is not None: err = f"Molecule to SMILES conversion requires the 'rdkit' package: {RDKIT_EX}" raise JobError(err).with_traceback(RDKIT_EX.__traceback__) elif self.molecule: mol_list = [self.molecule] if isinstance(self.molecule, Molecule) else self.molecule k1 = self.settings.input.find_case('smiles') k2 = self.settings.input.find_case('-smiles') smiles = k1 if k2 not in self.settings.input else k2 self.settings.input[smiles] = [MolToSmiles(RemoveHs(mol)) for mol in mol_list]
def processline(t, step, line): global lensum if t.incr(): return 1 if step == 0: lensum += len(line) else: m = MolFromSmiles(line) if step == 100: lensum += len(line) elif step == 105: lensum += len(sha256(line).hexdigest()) elif step in (110, 120): with open(tmpname, 'wb+') as f: print(line, file=f) if step == 120: os.fsync(f.fileno()) lensum += os.stat(tmpname).st_size elif step == 210: lensum += m.GetNumAtoms() elif step == 220: lensum += m.GetNumBonds() elif step == 300: lensum += len(MolToSmiles(m)) elif step == 400: lensum += len(MolToMolBlock(m)) elif step == 420: m2 = AddHs(m) EmbedMolecule(m2, randomSeed=2020) m2 = RemoveHs(m2) m2.SetProp("_Name", "test") lensum += len(MolToMolBlock(m2)) elif step == 600: lensum += mol2file(m, 'svg') elif step == 610: lensum += mol2file(m, 'png') else: raise ValueError("Not implemented step " + str(step)) return 0
def genericise_scaffold(mol): """Make a scaffold generic. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule to make generic. Returns ------- rdkit.Chem.rdchem.Mol Genericised scaffold. Notes ----- Copy pasta'd from rdkit Murcko Scaffold module. Adds a degree check to make sure output will not fail sanitization when an atom has a degree > 4. Achieved by using a dummy atom to replace such atoms. """ out = Mol(mol) for atom in out.GetAtoms(): if atom.GetAtomicNum() != 1: if atom.GetDegree() <= 4: atom.SetAtomicNum(6) else: atom.SetAtomicNum(0) atom.SetIsAromatic(False) atom.SetFormalCharge(0) atom.SetChiralTag(CHI_UNSPECIFIED) atom.SetNoImplicit(0) atom.SetNumExplicitHs(0) for bond in out.GetBonds(): bond.SetBondType(BondType.SINGLE) bond.SetIsAromatic(False) return RemoveHs(out)
def rmhs(self, mol): from rdkit.Chem import RemoveHs return RemoveHs(mol)
def _removeHs(mol, implicitOnly=False): return RemoveHs(mol, implicitOnly=implicitOnly)
def get_mcs(m1, m2, f): return rdFMCS.FindMCS([RemoveHs(f(m1)), RemoveHs(f(m2))], bondCompare=rdFMCS.BondCompare.CompareOrderExact, ringMatchesRingOnly=True)
#! /usr/bin/env python import sys from rdkit.Chem import SDMolSupplier, MolToPDBFile, AllChem, AddHs, RemoveHs from rdkit.Chem.Draw import MolsToGridImage spl = SDMolSupplier(sys.argv[1]) mols = [m for m in spl] for i, m in enumerate(mols): m = AddHs(m) AllChem.EmbedMolecule(m, useBasicKnowledge=True, maxAttempts=100) AllChem.MMFFOptimizeMolecule(m) RemoveHs(m) MolToPDBFile(m, 'ligand_%d.pdb' % i) img = MolsToGridImage(mols, legends=["ligand_%d" % i for i in range(len(mols))]) img.save('ligands.png')
def depict( self, ids=None, sketch=True, filename=None, ipython=False, optimize=False, optimizemode="std", removeHs=True, legends=None, highlightAtoms=None, mols_perrow=3, ): """ Depicts the molecules into a grid. It is possible to save it into an svg file and also generates a jupiter-notebook rendering Parameters ---------- ids: list The index of the molecules to depict sketch: bool Set to True for 2D depiction filename: str Set the filename for the svg file ipython: bool Set to True to return the jupiter-notebook rendering optimize: bool Set to True to optimize the conformation. Works only with 3D. optimizemode: ['std', 'mmff'] Set the optimization mode for 3D conformation removeHs: bool Set to True to hide hydrogens in the depiction legends: str The name to used for each molecule. Can be 'names':the name of themselves; or 'items': a incremental id highlightAtoms: list A List of atom to highligh for each molecule. It can be also a list of atom list, in this case different colors will be used mols_perrow: int The number of molecules to depict per row of the grid Returns ------- ipython_svg: SVG object if ipython is set to True """ from rdkit.Chem.AllChem import ( Compute2DCoords, EmbedMolecule, MMFFOptimizeMolecule, ETKDG, ) from rdkit.Chem import RemoveHs from moleculekit.smallmol.util import depictMultipleMols if sketch and optimize: raise ValueError( "Impossible to use optmization in 2D sketch representation") if legends is not None and legends not in ["names", "items"]: raise ValueError('The "legends" should be "names" or "items"') _smallmols = self.getMols(ids) if ids is None: _mols = [m._mol for m in self._mols] else: _mols = [m._mol for m in self.getMols(ids)] if highlightAtoms is not None: if len(highlightAtoms) != len(_mols): raise ValueError( "The highlightAtoms {} should have the same length of the " "mols {}".format(len(highlightAtoms), len(_mols))) if sketch: for _m in _mols: Compute2DCoords(_m) if removeHs: _mols = [RemoveHs(_m) for _m in _mols] # activate 3D coords optimization if optimize: if optimizemode == "std": for _m in _mols: EmbedMolecule(_m) elif optimizemode == "mmff": for _m in _mols: MMFFOptimizeMolecule(_m, ETKDG()) legends_list = [] if legends == "names": legends_list = [_m.getProp("ligname") for _m in _smallmols] elif legends == "items": legends_list = [str(n + 1) for n in range(len(_smallmols))] return depictMultipleMols( _mols, ipython=ipython, legends=legends_list, highlightAtoms=highlightAtoms, filename=filename, mols_perrow=mols_perrow, )
def unprotonated_molecule(self) -> Mol: """Return molecule with all hydrogens removed """ return RemoveHs(self.molecule)