def get_mordred_descriptors(smiles_list): calc = mordred.Calculator() calc.register(mordred.AtomCount) #16 calc.register(mordred.RingCount) #139 calc.register(mordred.BondCount) #9 calc.register(mordred.HydrogenBond) #2 calc.register(mordred.CarbonTypes) #10 calc.register(mordred.SLogP) #2 calc.register(mordred.Constitutional) #16 calc.register(mordred.TopoPSA) #2 calc.register(mordred.Weight) #2 calc.register(mordred.Polarizability) #2 calc.register(mordred.McGowanVolume) #1 name_list = [] for desc_name in calc.descriptors: name_list.append(str(desc_name)) descriptors_list = [] for smiles in smiles_list: # print(smiles) mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) calculated_descriptors = calc(mol) descriptors_list.append(calculated_descriptors._values) descriptors_df = pd.DataFrame(descriptors_list, columns=name_list) descriptors_df = descriptors_df.select_dtypes(exclude=['object']) return descriptors_df
def hdonor_count(mol): try: return list( mordred.Calculator( mordred.HydrogenBond.HBondDonor)(mol).values())[0] except: return None
def Mordred_2D( mol: Union[Union[pybel.Molecule, Chem.Mol], List[Union[pybel.Molecule, Chem.Mol]]] ) -> dict: """Calculate 2D molecular descriptors with mordred. :param mol: either one or multiple molecules the fingerprints will be calculated from. """ mordred_calculator = mordred.Calculator(descriptors, ignore_3D=True) if isinstance(mol, list): results = [] for i in range(len(mol)): result = {} for descriptor in mordred_calculator.descriptors: try: if isinstance(mol[i], Chem.Mol): result[str(descriptor)] = descriptor(mol[i]) else: mol_ = Chem.MolFromMolBlock(mol[i].write('mol')) result[str(descriptor)] = descriptor(mol_) except (ZeroDivisionError, ValueError): pass results.append(result) else: results = {} for descriptor in mordred_calculator.descriptors: try: if isinstance(mol, Chem.Mol): results[str(descriptor)] = descriptor(mol) else: mol_ = Chem.MolFromMolBlock(mol.write('mol')) results[str(descriptor)] = descriptor(mol_) except (ZeroDivisionError, ValueError): pass return results
def acid_count(mol): try: return list( mordred.Calculator( mordred.AcidBase.AcidicGroupCount)(mol).values())[0] except: return None
def rotate_bond_count(mol): try: return list( mordred.Calculator( mordred.RotatableBond.RotatableBondsCount)(mol).values())[0] except: return None
def find_model(self): """Finds and loads a model""" modelFilepath = filedialog.askopenfile() self.modelname[ 'text'] = "Model Name: " + modelFilepath.name[:-7].split( '/')[-1] + "\n" self.model = model('load') self.model.load_model(modelFilepath.name) self.modelname['text'] += "Model Type: " + str( self.model.information['Training']['Model Type']) + "\n" self.modelname['text'] += "Model Parameters: " + str( self.model.information['Training']['Model Parameters']) + "\n" self.modelname['text'] += "PCA: " + str( self.model.information['Training']['PCA']) + "\n" self.modelname['text'] += "Features: " + str( self.model.information['Training']['Features']) + "\n" self.modelname['text'] += "Training Samples: " + str( self.model.information['Training']['Samples']) + "\n" self.modelname['text'] += "Training RMSE: " + str( self.model.information['Training']['RMSE']) + " ºC\n" self.modelname['text'] += "Test Samples: " + str( self.model.information['Testing']['Samples']) + "\n" self.modelname['text'] += "Test RMSE: " + str( self.model.information['Testing']['RMSE']) + " ºC" self.calc = m.Calculator() for desc in self.model.getDescriptors(): self.calc.register(alldesc[desc]) mol = Chem.MolFromSmiles(self.SMILES.get()) if mol == None: self.mp['text'] = "Please enter valid SMILES string" else: self.mp['text'] = self.predictMP(mol)
def generateDescriptors(dataset,filename,descs=None,big=None): """ Generates descriptor .csv file for given dataset Note: Run inside of if __name__ == "__main__": Parameters ---------- dataset : string Filepath of .csv dataset to generate descriptors for filename : string Filepath to save new descriptor dataset to decs : dict Dictionary of mordred chemical descriptors Defaults to using all Mordred descriptors if not specified big : integer The size of each batch size Only calculates in batches if big is specified """ data = pd.read_csv(dataset) mols = [Chem.MolFromSmiles(smi) for smi in data['SMILES']] calc = m.Calculator() if descs: for mod in descs: calc.register(mod) else: calc.register(descriptors) if big: with tempfile.NamedTemporaryFile() as temp: df = pd.DataFrame() df.to_csv(temp.name + '.csv') for i in range(0, len(mols), big): molcalc =calc.pandas(mols[i:i + big]) molcalc.index = data['SMILES'][i:i + big] frame = pd.read_csv(temp.name + '.csv') frame = frame.append(molcalc,ignore_index=False) frame.to_csv(temp.name + '.csv') frame = None molcalc = None df = pd.read_csv(temp.name + '.csv') df = df.dropna(axis=1) df = df._get_numeric_data() df['MP'] = data['Melting Point {measured, converted}'] df.index = data['SMILES'] else: df = calc.pandas(mols) df = df._get_numeric_data() df['MP'] = data['Melting Point {measured, converted}'] df.index = data['SMILES'] if descs: nameString = filename+" descriptors = "+str(list(d.__name__.strip("mordred.") for d in descs))+" .csv" else: nameString = filename+" descriptors = All .csv" df.to_csv(nameString)
def get_fingerprints(smiles, type='Morgan'): fingerprints = [] calculator = mordred.Calculator() if type == 'Morgan': for smile in smiles: mol = Chem.MolFromSmiles(smile) morgan_fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024) fingerprints.append(morgan_fp) fingerprints = np.array(fingerprints) if type == 'Mordred': for smile in smiles: mol = Chem.MolFromSmiles(smile) mordred_fp = calculator(mol) fingerprints.append(mordred_fp) fingerprints = np.array(fingerprints) return fingerprints
def molecular_weight(mol): try: return list(mordred.Calculator(mordred.Weight.Weight)(mol).values())[0] except: return None
def logps(mol): try: return list(mordred.Calculator(mordred.SLogP.SLogP)(mol).values())[0] except: return None
def predefined_mordred(mol, desc_type="best", desc_names=False): calc1 = mordred.Calculator() if (desc_type in ["best"]): calc1.register(mordred.SLogP) calc1.register(mordred.HydrogenBond.HBondAcceptor) calc1.register(mordred.HydrogenBond.HBondDonor) calc1.register(mordred.AtomCount.AtomCount("HeavyAtom")) calc1.register(mordred.TopoPSA.TopoPSA(True)) calc1.register( mordred.RingCount.RingCount(None, False, False, None, None)) calc1.register(mordred.BondCount.BondCount("any", False)) if (desc_type in ["all", "atom"]): calc1.register(mordred.AtomCount.AtomCount("X")) calc1.register(mordred.AtomCount.AtomCount("HeavyAtom")) calc1.register(mordred.Aromatic.AromaticAtomsCount) if (desc_type in ["all", "bond"]): calc1.register(mordred.HydrogenBond.HBondAcceptor) calc1.register(mordred.HydrogenBond.HBondDonor) calc1.register(mordred.RotatableBond.RotatableBondsCount) calc1.register(mordred.BondCount.BondCount("any", False)) calc1.register(mordred.Aromatic.AromaticBondsCount) calc1.register(mordred.BondCount.BondCount("heavy", False)) calc1.register(mordred.BondCount.BondCount("single", False)) calc1.register(mordred.BondCount.BondCount("double", False)) calc1.register(mordred.BondCount.BondCount("triple", False)) if (desc_type in ["all", "topological"]): calc1.register(mordred.McGowanVolume.McGowanVolume) calc1.register(mordred.TopoPSA.TopoPSA(True)) calc1.register(mordred.TopoPSA.TopoPSA(False)) calc1.register(mordred.MoeType.LabuteASA) calc1.register(mordred.Polarizability.APol) calc1.register(mordred.Polarizability.BPol) calc1.register(mordred.AcidBase.AcidicGroupCount) calc1.register(mordred.AcidBase.BasicGroupCount) calc1.register( mordred.EccentricConnectivityIndex.EccentricConnectivityIndex) calc1.register(mordred.TopologicalCharge.TopologicalCharge("raw", 1)) calc1.register(mordred.TopologicalCharge.TopologicalCharge("mean", 1)) if (desc_type in ["all", "index"]): calc1.register(mordred.SLogP) calc1.register(mordred.BertzCT.BertzCT) calc1.register(mordred.BalabanJ.BalabanJ) calc1.register(mordred.WienerIndex.WienerIndex(True)) calc1.register(mordred.ZagrebIndex.ZagrebIndex(1, 1)) calc1.register(mordred.ABCIndex) if (desc_type in ["all", "ring"]): calc1.register( mordred.RingCount.RingCount(None, False, False, None, None)) calc1.register( mordred.RingCount.RingCount(None, False, False, None, True)) calc1.register( mordred.RingCount.RingCount(None, False, False, True, None)) calc1.register( mordred.RingCount.RingCount(None, False, False, True, True)) calc1.register( mordred.RingCount.RingCount(None, False, False, False, None)) calc1.register( mordred.RingCount.RingCount(None, False, True, None, None)) if (desc_type in ["all", "estate"]): calc1.register(mordred.EState) # if desc_names is "True" returns only name list if (desc_names): name_list = [] for desc in calc1.descriptors: name_list.append(str(desc)) return name_list # return list(calc1._name_dict.keys()) else: result = calc1(mol) return result._values
import tkinter as tk from tkinter import filedialog, CENTER, S from random import randint from Model_Training import model from rdkit import Chem from rdkit.Chem import Draw from mordred import descriptors import mordred as m import numpy as np import pandas as pd from PIL import ImageTk, Image from pubchemprops.pubchemprops import get_second_layer_props import pubchempy as pcp testcalc = m.Calculator(descriptors) alldesc = dict( (descript.__str__(), descript) for descript in testcalc.descriptors) class Overview(tk.Tk): def __init__(self, *args, **kwargs): tk.Tk.__init__(self, *args, **kwargs) container = tk.Frame(self) self.attributes("-fullscreen", True) self.title("Chemical Melting Point Prediction Program") container.pack(side="top", fill="both", expand=True) container.grid_rowconfigure(0, weight=1) container.grid_columnconfigure(0, weight=1) self.frames = {} for F in (mainPage, predictionPage, examplesPage): page_name = F.__name__