def from_smiles(self, smiles, hydrogens_are_explicit=True): """ It initializes an RDKit's Molecule object from a SMILES tag. Parameters ---------- smiles : str The SMILES tag to construct the molecule structure with hydrogens_are_explicit : bool Whether the SMILES tag has explicit information about hydrogen atoms or not. Otherwise, they will be added when the molecule is built. Default is True Returns ------- molecule : an rdkit.Chem.rdchem.Mol object The RDKit's Molecule object """ from rdkit.Chem import AllChem as Chem molecule = Chem.MolFromSmiles(smiles, sanitize=False) Chem.SanitizeMol( molecule, Chem.SANITIZE_ALL ^ Chem.SANITIZE_ADJUSTHS ^ Chem.SANITIZE_SETAROMATICITY) # Add hydrogens to molecule if not hydrogens_are_explicit: molecule = Chem.AddHs(molecule) # Generate 3D coordinates Chem.EmbedMolecule(molecule) return molecule
def generate_structures(vae, smi, char_to_index, limit=1e4, write=False): rdkit_mols = [] temps = [] iterations = [] iteration = limit_counter = 0 while True: iteration += 1 limit_counter += 1 t = random.random()*2 candidate = decode_smiles(vae, smi, char_to_index, temp=t).split(" ")[0] try: sampled = Chem.MolFromSmiles(candidate) cation = Chem.AddHs(sampled) Chem.EmbedMolecule(cation, Chem.ETKDG()) Chem.UFFOptimizeMolecule(cation) cation = Chem.RemoveHs(cation) candidate = Chem.MolToSmiles(cation) if candidate not in rdkit_mols: temps.append(t) iterations.append(iteration) rdkit_mols.append(candidate) limit_counter = 0 df = pd.DataFrame([rdkit_mols,temps,iterations]).T df.columns = ['smiles', 'temperature', 'iteration'] print(df) except: pass if limit_counter > limit: break if write: df = pd.DataFrame([rdkit_mols,temps,iterations]).T df.columns = ['smiles', 'temperature', 'iteration'] pd.DataFrame.to_csv(df, path_or_buf='{}.csv'.format(write), index=False) return df
def from_smiles(self, smiles): """ It initializes an RDKit's Molecule object from a SMILES tag. Parameters ---------- smiles : str The SMILES tag to construct the molecule structure with. Returns ------- molecule : an rdkit.Chem.rdchem.Mol object The RDKit's Molecule object """ from rdkit.Chem import AllChem as Chem molecule = Chem.MolFromSmiles(smiles) # Add hydrogens to molecule molecule = Chem.AddHs(molecule) # Generate 3D coordinates Chem.EmbedMolecule(molecule) return molecule
def _getMolWithEHTcharges(self, mol : Chem.Mol): """ Prepare the molecule and calculate the charges. TODO: perhaps this should be divided into more methods for more flexibility... :param mol: :return: """ mol_ = Chem.RemoveHs(mol) mol_ = Chem.AddHs(mol_) Chem.EmbedMolecule(mol_) # should create two output files ('run.out' and 'nul') TODO: maybe this is not really needed or even desirable in the final version? passed, res = rdEHTTools.RunMol(mol_) nat = len(mol_.GetAtoms()) charges = res.GetAtomicCharges() if not self.includeHs: mol_ = Chem.RemoveHs(mol) # creates a mol object with charges set as double properties on atoms of that mol object for i in range(nat): if i < len(mol_.GetAtoms()): mol_.GetAtomWithIdx(i).SetDoubleProp('EHTcharge', charges[i]) else: break return mol_
def get_descriptors(smiles): """ Get a dictionary of RDKit descriptors from a SMILES string. Parameters ---------- smiles : str The SMILES string of the chemical of interest Returns ------- descriptors : dict A collection of molecular descriptors Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1 """ mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) Chem.EmbedMolecule(mol, Chem.ETKDG()) descriptors = {} # Starting with simple descriptors: # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html # Molecular weight descriptors['molwt'] = Descriptors.ExactMolWt(mol) # Partial charge metrics descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol) descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol) descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol) descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol) # Basic electron counts descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol) descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol) # 3-D descriptors # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html # Calculating these should produce the same result, according to some basic tests # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol) # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol) descriptors['asphericity'] = Descriptors3D.Asphericity(mol) descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol) descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol) descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol) descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol) # Graph descriptors # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol) descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol) descriptors['chi0'] = GraphDescriptors.Chi0(mol) descriptors['chi0n'] = GraphDescriptors.Chi0n(mol) descriptors['chi0v'] = GraphDescriptors.Chi0v(mol) descriptors['chi1'] = GraphDescriptors.Chi1(mol) descriptors['chi1n'] = GraphDescriptors.Chi1n(mol) descriptors['chi1v'] = GraphDescriptors.Chi1v(mol) descriptors['chi2n'] = GraphDescriptors.Chi2n(mol) descriptors['chi2v'] = GraphDescriptors.Chi2v(mol) descriptors['chi3n'] = GraphDescriptors.Chi3n(mol) descriptors['chi3v'] = GraphDescriptors.Chi3v(mol) descriptors['chi4n'] = GraphDescriptors.Chi4n(mol) descriptors['chi4v'] = GraphDescriptors.Chi4v(mol) descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol) descriptors['kappa1'] = GraphDescriptors.Kappa1(mol) descriptors['kappa2'] = GraphDescriptors.Kappa2(mol) descriptors['kappa3'] = GraphDescriptors.Kappa3(mol) # Predicted properties from Wildman and Crippen descriptors['log_p'] = Descriptors.MolLogP(mol) descriptors['refractivity'] = Descriptors.MolMR(mol) return descriptors