def filter_props(self, mol): """Quickly filters mols, doesn't save any calculated values and moves to the next one as soon as a molecules has a property not within desired range """ for desc in self.descriptors: desc = self.descriptors[desc] f = Descriptors.__getattribute__(desc[0]) if desc[1] <= f(mol) <= desc[2]: pass else: return False # If all pass return True
def set_computables_from_mol(self, mol): try: # warning comes up in pycharm (bug of pycharm) self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula( mol) self.molecular_weight = Descriptors.ExactMolWt(mol) self.inchi = inchi.MolToInchi(mol) self.inchikey = inchi.MolToInchiKey(mol) self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False) except Exception as e: raise SpectrumError("Error occurred while computing properties" + e.args) from e assert self.molecular_formula is not None, "molecular-formula can't be None" assert self.molecular_weight is not None, "molecular-weight can't be None" assert self.inchi is not None, "inchi can't be None" assert self.inchikey is not None, "inchikey can't be None" assert self.smiles is not None, "smiles can't be None"
def _reward(self): """Calculates the reward of the current state. The reward is defined as the negative l1 distance between the current molecular weight and target molecular weight range. Returns: Float. The negative distance. """ molecule = Chem.MolFromSmiles(self._state) if molecule is None: return -self.target_weight**2 lower, upper = self.target_weight - 25, self.target_weight + 25 mw = Descriptors.MolWt(molecule) if lower <= mw <= upper: return 1 return -min(abs(lower - mw), abs(upper - mw))
def calc_mol_weight(smiles): mol = Chem.MolFromSmiles(smiles) try: wt = Descriptors.MolWt(mol) f_charge = Chem.rdmolops.GetFormalCharge(mol) except: wt = 100 f_charge = 0 # lithium is often omitted in the compound database. add the weight of lithium if f_charge < 0: wt = wt - 6.941 * f_charge # Ca and Mg is often used just to reperesent the repeating units wt = wt - 40.078 * smiles.count("Ca") wt = wt - 24.305 * smiles.count("Mg") return wt
def calculate_properties_from_mol(self): """ Function to calculate some molecular properties based on RDKit functionalities Return: Static physico-chemical properties: molecular weight, crippen logP, number of hydrogen bond acceptors and donors """ # Generate molecule from sequence mol = Chem.MolFromSmiles(self.smiles) mol.SetProp("_Name", self.sequence) # Calculate the descriptors self.num_hdonors = Lipinski.NumHDonors(mol) self.num_hacceptors = Lipinski.NumHAcceptors(mol) self.mol_weight = Descriptors.MolWt(mol) self.mol_logp = Crippen.MolLogP(mol)
def getDiscriptor(self): from rdkit.Chem import Crippen from rdkit import Chem import pandas as pd from rdkit.Chem import Descriptors, Lipinski import os os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data") df = pd.read_csv('extChronicStrcture.csv', engine='python') df = df[['CAS', 'canonical_smiles']] df = df.dropna(how='any') #df = pd.read_csv('extractInchi.csv',header=None) columns = [ 'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts', 'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3', 'AromaticCarbocycles', 'AromaticHeterocycles' ] CAS = df['CAS'] SMILES = df['canonical_smiles'] resultDf = pd.DataFrame(columns=columns) for cas, smiles in zip(CAS, SMILES): mol = Chem.MolFromSmiles(smiles) wt = Descriptors.MolWt(mol) rot = Lipinski.NumRotatableBonds(mol) heavy = Lipinski.HeavyAtomCount(mol) logp = Crippen.MolLogP(mol) aromaticHeavyatoms = len( mol.GetSubstructMatches(Chem.MolFromSmarts('[a]'))) numAtoms = mol.GetNumAtoms() aromprop = float(aromaticHeavyatoms / numAtoms) TPSA = Descriptors.TPSA(mol) HDonors = Descriptors.NumHDonors(mol) HAcceptors = Descriptors.NumHAcceptors(mol) FractionCSP3 = Descriptors.FractionCSP3(mol) AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol) AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol) (print(HDonors, HAcceptors)) tempDf = pd.DataFrame([[ cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors, FractionCSP3, AromaticCarbocycles, AromaticHeterocycles ]], columns=columns) resultDf = pd.concat([resultDf, tempDf]) resultDf.to_csv('Descriptors.csv', index=False)
def compute_descriptors(smiles): smiles = list(map(lambda x: Chem.MolFromSmiles(x), smiles)) CanonicalSmiles = list(map(lambda x: Chem.MolToSmiles(x), smiles)) HBA = list(map(lambda x: Descriptors.NumHAcceptors(x), smiles)) HBD = list(map(lambda x: Descriptors.NumHDonors(x), smiles)) RB = list(map(lambda x: Descriptors.NumRotatableBonds(x), smiles)) LOGP = list(map(lambda x: Descriptors.MolLogP(x), smiles)) TPSA = list(map(lambda x: Descriptors.TPSA(x), smiles)) MW = list(map(lambda x: Descriptors.MolWt(x), smiles)) return CanonicalSmiles, HBA, HBD, RB, LOGP, TPSA, MW
def score(smiles, num_docking=3): smiles_md5 = str(hashlib.md5(smiles.encode('utf-8')).hexdigest()) docking_result_file = '{}_out'.format(smiles_md5) sdf_name = '{}.sdf'.format(smiles_md5) score_name = '<SCORE.INTER>' # <SCORE> or <SCORE.INTER> min_score = 1e10 # Translation from SMILES to sdf if smiles == '': mol = None else: mol = Chem.MolFromSmiles(smiles) try: if mol is not None and Descriptors.MolWt(mol) < 500: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol, maxIters=200) fw = Chem.SDWriter(sdf_name) fw.write(mol) fw.close() # rdock calculation cmd = '$RBT_ROOT/bin/rbdock -r cavity.prm '\ '-p $RBT_ROOT/data/scripts/dock.prm '\ '-i {} -o {} -T 1 -n {} > /dev/null'\ .format(sdf_name, docking_result_file, num_docking) path = docking_result_file + '.sd' if not os.path.exists(path): subprocess.call(cmd, shell=True) # find the minimum score of rdock from multiple docking results if os.path.exists(path): with open(path, 'r') as f: lines = f.readlines() isScore = False for line in lines: if isScore: min_score = min(float(line), min_score) isScore = False if score_name in line: # next line has score isScore = True except Exception: pass return min_score
def run_filter(self, mol): """ This runs a Ghose filter for drug-likeliness. Ghose filter filters molecules by Molecular weight (MW), the number of atoms, and the logP value. We protonate the mol in this filter because hydrogens affect atom count. Our Ghose implementation counts hydrogens in against the total number of atoms. To pass the filter a molecule must be: MW between 160 and 500 dalton Number of Atoms: between 20 and 70 logP between -0,4 and +5,6 Inputs: :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be tested if it passes the filters Returns: :returns: bool bool: True if the mol passes the filter; False if it fails the filter """ copy_mol = copy.deepcopy(mol) copy_mol = Chem.AddHs(copy_mol) exact_mwt = Descriptors.ExactMolWt(copy_mol) if ((exact_mwt < 160) or (exact_mwt > 500)): return False num_atoms = copy_mol.GetNumAtoms() if ((num_atoms < 20) or (num_atoms > 70)): return False # molar Refractivity MolMR = Crippen.MolMR(copy_mol) if ((MolMR < 40) or (MolMR > 130)): return False # molar LogP mol_log_p = Crippen.MolLogP(copy_mol) if ((mol_log_p < -0.4) or (mol_log_p > 5.6)): return False # passed all filters return True
def calc_props_dude_extended(smiles): try: mol = Chem.MolFromSmiles(smiles) # Calculate properties and store in dict prop_dict = {} # molweight prop_dict.update({'mol_wg': Descriptors.MolWt(mol)}) # logP prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)}) # HBA prop_dict.update( {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)}) # HBD prop_dict.update( {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)}) # ring count prop_dict.update({'ring_ct': Chem.rdMolDescriptors.CalcNumRings(mol)}) # rotatable bonds prop_dict.update( {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)}) # Formal (net) charge prop_dict.update({'net_charge': Chem.rdmolops.GetFormalCharge(mol)}) # Topological polar surface area prop_dict.update({'tpsa': Chem.rdMolDescriptors.CalcTPSA(mol)}) # Stereo centers prop_dict.update({ 'stereo_cnts': len( Chem.FindMolChiralCenters(mol, force=True, includeUnassigned=True)) }) prop_array = [ prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'], prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'], prop_dict['net_charge'], prop_dict['tpsa'], prop_dict['stereo_cnts'] ] return (prop_dict, prop_array) except: return ({}, [-10, -10, -10, -10, -10, -10, -10, -10, -10])
def get_score_components_from_mol(this_mol): try: logP = Descriptors.MolLogP(this_mol) except: logP = 0.0 SA_score = -sascorer.calculateScore(this_mol) cycle_list = nx.cycle_basis(nx.Graph( rdmolops.GetAdjacencyMatrix(this_mol))) if len(cycle_list) == 0: cycle_length = 0 else: cycle_length = max([len(j) for j in cycle_list]) if cycle_length <= 6: cycle_length = 0 else: cycle_length = cycle_length - 6 cycle_score = -cycle_length return logP, SA_score, cycle_score
def zinc_logp(smile): logp_value=[] compound=[] for i in range(len(smile)): middle=[] for j in range(len(smile[i])): middle.append(smile[i][j]) com=''.join(middle) compound.append(com) for i in range(len(compound)): m = Chem.MolFromSmiles(compound[i]) logp=Descriptors.MolLogP(m) logp_value.append(logp) ma=6.66134 print(max(logp_value)) print(logp_value)
def logp_calculation(new_compound): print new_compound[0] logp_value = [] valid_smile = [] all_smile = [] distance = [] m = Chem.MolFromSmiles(str(new_compound[0])) try: if m is not None: logp = Descriptors.MolLogP(m) valid_smile.append(new_compound) else: logp = -100 except: logp = -100 all_smile.append(str(new_compound[0])) return logp, valid_smile, all_smile
def get_mol_set(self, smiles, atsym=['H', 'C', 'N', 'O'], MaxNa=20): mols = [] for i, sm in enumerate(smiles): mol = Chem.MolFromSmiles(sm) if mol: mol = Chem.AddHs(mol) check = AllChem.EmbedMolecule(mol) fc = 0 for a in mol.GetAtoms(): fc += a.GetFormalCharge() if check == 0: X, S = __convert_rdkitmol_to_nparr__(mol) if set(S).issubset(atsym) and len(S) < MaxNa and fc == 0: dec = Descriptors.NumRotatableBonds(mol) if dec > 0: mols.append(mol) return mols
def simulation(chem_model, state): val = [ '\n', '&', 'C', '(', ')', 'c', '1', '2', 'o', '=', 'O', 'N', '3', 'F', '[C@@H]', 'n', '-', '#', 'S', 'Cl', '[O-]', '[C@H]', '[NH+]', '[C@]', 's', 'Br', '/', '[nH]', '[NH3+]', '4', '[NH2+]', '[C@@]', '[N+]', '[nH+]', '\\', '[S@]', '5', '[N-]', '[n+]', '[S@@]', '[S-]', '6', '7', 'I', '[n-]', 'P', '[OH+]', '[NH-]', '[P@@H]', '[P@@]', '[PH2]', '[P@]', '[P+]', '[S+]', '[o+]', '[CH2-]', '[CH-]', '[SH+]', '[O+]', '[s+]', '[PH+]', '[PH]', '8', '[S@@+]' ] all_posible = chem_kn_simulation(chem_model, state, val) generate_smile = predict_smile(all_posible, val) new_compound = make_input_smile(generate_smile) kao = [] try: m = Chem.MolFromSmiles(str(new_compound[0])) except: m = None if m != None: try: logp = Descriptors.MolLogP(m) except: logp = -1000 SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0])) cycle_list = nx.cycle_basis( nx.Graph( rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0])))) if len(cycle_list) == 0: cycle_length = 0 else: cycle_length = max([len(j) for j in cycle_list]) if cycle_length <= 6: cycle_length = 0 else: cycle_length = cycle_length - 6 cycle_score = -cycle_length SA_score_norm = SA_score #(SA_score-SA_mean)/SA_std logp_norm = logp #(logp-logP_mean)/logP_std cycle_score_norm = cycle_score #(cycle_score-cycle_mean)/cycle_std score_one = SA_score_norm + logp_norm + cycle_score_norm score = score_one / (1 + abs(score_one)) else: score = -1000 / (1 + 1000) return score, new_compound[0]
def check_others(self, infile): """ Check radicals and partial charges, MMFF_SMILES is used to check Args: infile: data index file """ update_list = [] os.chdir(self.datadir) data = pd.read_csv(os.path.join(self.outdir, infile)) index_list = list(data["index"]) for i in index_list: mol = Chem.SDMolSupplier(str(i) + self.suffix)[0] if Descriptors.NumRadicalElectrons(mol) == 0: if len([atom for atom in mol.GetAtoms() if atom.GetFormalCharge() != 0]) == 0: update_list.append(i) data = data[data["index"].isin(update_list)] data.to_csv(os.path.join(self.outdir, infile.split(".")[0] + "_rmrpc.csv"), index = False) os.chdir(self.olddir)
def check_node_type(new_compound, SA_mean, SA_std, logP_mean, logP_std, cycle_mean, cycle_std): node_index = [] valid_compound = [] score = [] for i in range(len(new_compound)): try: m = Chem.MolFromSmiles(str(new_compound[i])) except: None if m != None and len(new_compound[i]) <= 81: try: logp = Descriptors.MolLogP(m) except ValueError: # habdle Sanitization error: Explicit valence for atom is greater than permitted continue node_index.append(i) valid_compound.append(new_compound[i]) SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i])) cycle_list = nx.cycle_basis( nx.Graph( rdmolops.GetAdjacencyMatrix(MolFromSmiles( new_compound[i])))) if len(cycle_list) == 0: cycle_length = 0 else: cycle_length = max([len(j) for j in cycle_list]) if cycle_length <= 6: cycle_length = 0 else: cycle_length = cycle_length - 6 cycle_score = -cycle_length # print(cycle_score) # print(SA_score) # print(logp) SA_score_norm = (SA_score - SA_mean) / SA_std logp_norm = (logp - logP_mean) / logP_std cycle_score_norm = (cycle_score - cycle_mean) / cycle_std score_one = SA_score_norm + logp_norm + cycle_score_norm score.append(score_one) return node_index, score, valid_compound
def generate_fingerprints_and_create_list(self): #generate fingerprints of predicted ligands and known ligands: gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048, radius=2) predicted_fps = [ gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules'] ] true_fps = [ gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules'] ] similarities = list() for count, mol in enumerate(predicted_fps): tanimoto_values = ([ DataStructs.TanimotoSimilarity(mol, i) for i in true_fps ]) index_of_highest = np.argmax(tanimoto_values) similarities.append(tanimoto_values[index_of_highest]) #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score sa_score = [ sascorer.calculateScore(i) for i in list(self.predicted['molecules']) ] #create a list holding the QED drug-likeness score #reference: https://doi.org/10.1038/nchem.1243 qeds = [qed(mol) for mol in self.predicted['molecules']] #create a list holding logp: logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']] #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536 params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK) catalog = FilterCatalog(params) self.brenk = np.array( [catalog.HasMatch(m) for m in self.predicted['molecules']]) #add these lists as columns to the 'predicted' pd.DataFrame self.predicted['similarities'] = similarities self.predicted['sa_score'] = sa_score self.predicted['qeds'] = qeds self.predicted['logp'] = logp print(self.predicted['logp'] < 6) shortlist_mask = ((self.predicted['similarities'] < 0.2) & (self.predicted['sa_score'] < 4) & (self.predicted['qeds'] > 0.25) & (self.predicted['logp'] < 6) & (~self.brenk))
def _make_compound_info(mol_object): return { 'smiles': AllChem.MolToSmiles(mol_object, True), 'inchikey': AllChem.InchiToInchiKey(AllChem.MolToInchi(mol_object)), 'mass': Descriptors.MolWt(mol_object), 'exactmass': AllChem.CalcExactMolWt(mol_object), 'formula': AllChem.CalcMolFormula(mol_object), 'charge': AllChem.GetFormalCharge(mol_object), 'fingerprints': { 'maccs': dict([(str(x), 1) for x in AllChem.GetMACCSKeysFingerprint( mol_object).GetOnBits()]), 'rdkit': dict([(str(x), 1) for x in AllChem.RDKFingerprint(mol_object).GetOnBits()]), }, 'dblinks': {}, }
def make_fingerprints(length, verbose, mols, chosen=None): if chosen == 1: fp_list = [ fingerprint(lambda x : GetHashedAtomPairFingerprintAsBitVect(x, nBits = length), "&qfuot;Atom pair (1985)")] elif chosen == 2: fp_list = [ fingerprint(lambda x : GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits = length), "Topological torsion (1987)")] elif chosen == 3: fp_list = [ fingerprint(lambda x : GetMorganFingerprintAsBitVect(x, 3, nBits = length), "Morgan circular ")] elif chosen == 4: fp_list = [ fingerprint(FingerprintMol, "Estate (1995)")] elif chosen == 5: fp_list = [ fingerprint(lambda x: GetAvalonFP(x, nBits=length), "Avalon bit based (2006)")] elif chosen == 6: fp_list = [ fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)), "Avalon+mol. weight")] elif chosen == 7: fp_list = [ fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)")] elif chosen == 8: fp_list = [ fingerprint(lambda x : RDKFingerprint(x, fpSize=length), "RDKit fingerprint")] elif chosen == 9: fp_list = [ fingerprint(lambda x : FingerprintMols.FingerprintMol(x), "RDKit fingerprint2")] else: fp_list = [fingerprint(lambda x : MACCSkeys.GenMACCSKeys(x), "RDKit MACCSkeys")] for fp in fp_list: if (verbose): print("doing", fp.name) fp.apply_fp(mols) return fp_list
def add_features_to_test_file(input, output): with open(input, 'r') as f: lines = f.read().splitlines() with open(output, 'w') as f: f.writelines(lines[0] + ',atom,bonds,molwt,double_bonds,valence_electrons\n') for line in range(1, len(lines)): smiles = lines[line].split(',')[1] m = Chem.MolFromSmiles(smiles) l = lines[line] + \ ',' + str(m.GetNumAtoms()) + \ ',' + str(m.GetNumBonds()) + \ ',' + str(Descriptors. MolWt(m)) + \ ',' + str(smiles.count('=')) + \ ',' + str(Descriptors.NumValenceElectrons(m)) + '\n' if line % 10000 == 0: print line f.write(l)
def fragment(mol, mode, quiet=False): frags = Chem.GetMolFrags(mol, asMols=True) if len(frags) == 1: return mol else: # TODO - handle ties biggest_index = -1 i = 0 if mode == 'hac': biggest_count = 0 for frag in frags: hac = frag.GetNumHeavyAtoms() if hac > biggest_count: biggest_count = hac biggest_mol = frag biggest_index = i i+=1 if not quiet: utils.log("Chose fragment", biggest_index, "from", len(frags), "based on HAC") elif mode == 'mw': biggest_mw = 0 for frag in frags: mw = Descriptors.MolWt(frag) if mw > biggest_mw: biggest_mw = mw biggest_mol = frag biggest_index = i i+=1 if not quiet: utils.log("Chose fragment", biggest_index, "from", len(frags), "based on MW") else: raise ValueError('Invalid fragment mode:',mode) # copy the properties across for name in mol.GetPropNames(): biggest_mol.SetProp(name, mol.GetProp(name)) # _Name is a magical property that is not in the ones returned by GetPropNames if '_Name' in mol.GetPropNames(): biggest_mol.SetProp("_Name", mol.GetProp("_Name")) return biggest_mol
def gaussion_workers(chem_model,val,state,m): all_posible=chem_kn_simulation(chem_model,state,val,m) generate_smile=predict_smile(all_posible,val) new_compound=make_input_smile(generate_smile) score=[] kao=[] try: m = Chem.MolFromSmiles(str(new_compound[0])) except: m=None #if m!=None and len(task[i])<=81: if m!=None: try: logp=Descriptors.MolLogP(m) except: logp=-1000 SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0])) cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0])))) if len(cycle_list) == 0: cycle_length = 0 else: cycle_length = max([ len(j) for j in cycle_list ]) if cycle_length <= 6: cycle_length = 0 else: cycle_length = cycle_length - 6 cycle_score = -cycle_length #print cycle_score #print SA_score #print logp SA_score_norm=(SA_score-SA_mean)/SA_std logp_norm=(logp-logP_mean)/logP_std cycle_score_norm=(cycle_score-cycle_mean)/cycle_std score_one = SA_score_norm + logp_norm + cycle_score_norm score.append(score_one) else: score.append(-1000) score.append(new_compound[0]) return score
def calc_descriptors(rdmol): fp = Chem.GetMorganFingerprintAsBitVect(rdmol, radius=2, nBits=N_BITS, useFeatures=False) np_fp = np.zeros(N_BITS) ecfp = DataStructs.ConvertToNumpyArray(fp, np_fp) logp = Descriptors.MolLogP(rdmol) mwt = Descriptors.MolWt(rdmol) rtb = Descriptors.NumRotatableBonds(rdmol) hbd = Descriptors.NumHDonors(rdmol) hba = Descriptors.NumHAcceptors(rdmol) tpsa = Descriptors.TPSA(rdmol) return [logp, mwt, rtb, hbd, hba, tpsa, np_fp]
def check_smi(insmi): label = True mol = Chem.MolFromSmiles(insmi) if mol: atms = mol.GetAtoms() n_atoms = len(atms) if n_atoms < 10: label = False s_n = 0 for atom in atms: if atom.GetSymbol() not in allowed_elements: label = False break if atom.GetIsotope(): label = False break if atom.GetChiralTag() != non_s: s_n += 1 if s_n >= 5: label = False for patt in rules_smarts: if len(mol.GetSubstructMatches(patt)) > 0: label = False break # check MW mw = Descriptors.MolWt(mol) if mw > 750: label = False r_info = mol.GetRingInfo() if r_info.NumRings() > 7: label = False r_a_size = [len(i) for i in r_info.AtomRings()] if r_a_size: max_r_size = max(r_a_size) min_r_size = min(r_a_size) if max_r_size > 8: label = False else: label = False return label
def HOF_examples(output_dir): """ Prepare figure showing the value of d for all molecules used in the BioHOFs from: 10.1021/jacs.9b06589 """ # the n-phenyl esters mol_list_1 = [ 'fluorescein', 'hydrogen_peroxide', 'methanol', 'formaldehyde', 'urea' ] smiles_list_1 = [ 'C1=CC=C2C(=C1)C(=O)OC23C4=C(C=C(C=C4)O)OC5=C3C=CC(=C5)O', 'OO', 'CO', 'C=O', 'C(=O)(N)N' ] fig, ax = plt.subplots(figsize=(8, 5)) for i, name in enumerate(mol_list_1): out_file = (f"{output_dir}/" f"{name.replace(' ', '_').replace('/', '__')}" '_diam_result.csv') if os.path.exists(out_file) is False: continue results = pd.read_csv(out_file) mid_diam = min(results['diam2']) mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i])) MW = Descriptors.MolWt(mol) print(name, mol_list_1[i], MW, mid_diam) ax.scatter(MW, mid_diam, c='#5499C7', edgecolors='k', marker='o', alpha=1.0, s=140) # ax.axhline(y=11.8, c='k', alpha=0.2) pfn.define_standard_plot(ax, xtitle='molecular weight [g/mol]', ytitle=r'$d$ [$\mathrm{\AA}$]', xlim=(10, 500), ylim=(2.5, 15)) fig.tight_layout() fig.savefig("HOF_examples.pdf", dpi=720, bbox_inches='tight')
def cyt_C_perox_assay(output_dir): """ Prepare figure showing the change in intermediate diameter for 3 peroxide molcules degraded by Cyt-C in ZIF-8 (One-Pot Synthesis of Protein-Embedded Metal–Organic Frameworks with Enhanced Biological Activities, DOI:10.1021/nl5026419) """ # the n-phenyl esters mol_list_1 = [ 'hydrogen peroxide', 'methyl ethyl ketone peroxide', 'tert-butyl hydroperoxide' ] smiles_list_1 = ['OO', 'CCC(C)(OO)OOC(C)(CC)OO', 'CC(C)(C)OO'] fig, ax = plt.subplots() for i, name in enumerate(mol_list_1): out_file = (f"{output_dir}/" f"{name.replace(' ', '_').replace('/', '__')}" '_diam_result.csv') if os.path.exists(out_file) is False: continue results = pd.read_csv(out_file) mid_diam = min(results['diam2']) mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i])) MW = Descriptors.MolWt(mol) print(name, mol_list_1[i], MW, mid_diam) ax.scatter(MW, mid_diam, c='k', edgecolors='k', marker='o', alpha=1.0, s=100) ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2, hatch="/") pfn.define_standard_plot(ax, xtitle='molecular weight [g/mol]', ytitle=r'$d$ [$\mathrm{\AA}$]', xlim=(10, 250), ylim=(2.5, 8)) fig.tight_layout() fig.savefig("cytC_comp.pdf", dpi=720, bbox_inches='tight')
def getGeneralInfo(self, params={}): if not "smi" in params: print("Parameter 'smi' not found in argument list") return [] req_smiles = params["smi"] mol = Chem.MolFromSmiles(req_smiles) canonized_smiles = Chem.MolToSmiles(mol) inchi = Chem.inchi.MolToInchi(mol) inchiKey = Chem.inchi.InchiToInchiKey(inchi) MW = Descriptors.MolWt(mol) LogP = Crippen.MolLogP(mol) return { "canonized_smiles": canonized_smiles, "inchi": inchiKey, "MW": MW, "LogP": LogP }
def _featurize(self, mol): """ Calculate molecular weight. Parameters ---------- mol : RDKit Mol Molecule. Returns ------- np.ndarray of length 1 containing the molecular weight. """ try: from rdkit.Chem import Descriptors except ModuleNotFoundError: raise ValueError("This class requires RDKit to be installed.") wt = Descriptors.ExactMolWt(mol) wt = [wt] return np.asarray(wt)
def main(name, argv): if len(argv) != 3: print_usage(name) return MW = float(argv[1]) RB = int(argv[2]) #read molport building blocks with open(argv[0], 'r') as f: for line in f: line_s = line.split() molecule = [Chem.MolFromSmiles(line_s[0]), line_s[1]] if molecule[0] == None: continue if Descriptors.MolWt( molecule[0] ) <= MW and rdMolDescriptors.CalcNumRotatableBonds( molecule[0]) <= RB: print Chem.MolToSmiles(molecule[0]) + "\t" + line_s[1]
def cut_some_bonds(frag_weight, mol, seed): cuttable_bonds = common.find_cuttable_bonds(mol) cut_bonds_indexes = [b.GetIdx() for b in cuttable_bonds] total_weight = Descriptors.MolWt(mol) nb_frags = round(total_weight / frag_weight) max_cuts = min(len(cut_bonds_indexes), nb_frags - 1) # print("mol %s; cut %d bonds" % (mol.GetProp("name"), max_cuts), # file=sys.stderr) random.shuffle(cut_bonds_indexes) to_cut = cut_bonds_indexes[0:max_cuts] if len(to_cut) == 0: # molecule too small: not fragmented # still, we output it so that input and output SMILES files can be # visualized side-by-side smi = Chem.MolToSmiles(mol) name = get_name(mol) dico = {} return (smi, name, dico) else: return fragment_on_bonds_and_label(mol, to_cut)
def phys_featurizer(s): m=Chem.MolFromSmiles(s) phys_features=[] #Featurization begins phys_features.append(Descriptors.BertzCT(m)) #0 phys_features.append(Descriptors.Chi0(m)) phys_features.append(Descriptors.Chi0n(m)) phys_features.append(Descriptors.Chi0v(m)) phys_features.append(Descriptors.Chi1(m)) phys_features.append(Descriptors.Chi1n(m)) phys_features.append(Descriptors.Chi1v(m)) phys_features.append(Descriptors.Chi2n(m)) phys_features.append(Descriptors.Chi2v(m)) phys_features.append(Descriptors.Chi3n(m)) phys_features.append(Descriptors.Chi3v(m)) #10 phys_features.append(Descriptors.Chi4n(m)) phys_features.append(Descriptors.Chi4v(m)) phys_features.append(Descriptors.EState_VSA1(m)) phys_features.append(Descriptors.EState_VSA10(m)) phys_features.append(Descriptors.EState_VSA11(m)) phys_features.append(Descriptors.EState_VSA2(m)) phys_features.append(Descriptors.EState_VSA3(m)) phys_features.append(Descriptors.EState_VSA4(m)) phys_features.append(Descriptors.EState_VSA5(m)) phys_features.append(Descriptors.EState_VSA6(m)) #20 phys_features.append(Descriptors.EState_VSA7(m)) phys_features.append(Descriptors.EState_VSA8(m)) phys_features.append(Descriptors.EState_VSA9(m)) phys_features.append(Descriptors.ExactMolWt(m)) #24 phys_features.append(Descriptors.FractionCSP3(m)) phys_features.append(Descriptors.HallKierAlpha(m)) phys_features.append(Descriptors.HeavyAtomCount(m)) phys_features.append(Descriptors.HeavyAtomMolWt(m)) phys_features.append(Descriptors.Ipc(m)) phys_features.append(Descriptors.Kappa1(m)) #30 phys_features.append(Descriptors.Kappa2(m)) phys_features.append(Descriptors.Kappa3(m)) phys_features.append(Descriptors.LabuteASA(m)) phys_features.append(Descriptors.MaxAbsEStateIndex(m)) phys_features.append(Descriptors.MaxAbsPartialCharge(m)) phys_features.append(Descriptors.MaxEStateIndex(m)) phys_features.append(Descriptors.MaxPartialCharge(m)) phys_features.append(Descriptors.MinAbsEStateIndex(m)) phys_features.append(Descriptors.MinAbsPartialCharge(m)) phys_features.append(Descriptors.MinEStateIndex(m)) #40 phys_features.append(Descriptors.MinPartialCharge(m)) phys_features.append(Descriptors.MolLogP(m)) phys_features.append(Descriptors.MolMR(m)) phys_features.append(Descriptors.MolWt(m)) phys_features.append(Descriptors.NHOHCount(m)) phys_features.append(Descriptors.NOCount(m)) phys_features.append(Descriptors.NumAliphaticCarbocycles(m)) phys_features.append(Descriptors.NumAliphaticHeterocycles(m)) phys_features.append(Descriptors.NumAliphaticRings(m)) phys_features.append(Descriptors.NumAromaticCarbocycles(m)) #50 phys_features.append(Descriptors.NumAromaticHeterocycles(m)) phys_features.append(Descriptors.NumAromaticRings(m)) phys_features.append(Descriptors.NumHAcceptors(m)) phys_features.append(Descriptors.NumHDonors(m)) phys_features.append(Descriptors.NumHeteroatoms(m)) phys_features.append(Descriptors.NumRadicalElectrons(m)) phys_features.append(Descriptors.NumRotatableBonds(m)) phys_features.append(Descriptors.NumSaturatedCarbocycles(m)) phys_features.append(Descriptors.NumSaturatedHeterocycles(m)) phys_features.append(Descriptors.NumSaturatedRings(m)) #60 phys_features.append(Descriptors.NumValenceElectrons(m)) phys_features.append(Descriptors.PEOE_VSA1(m)) phys_features.append(Descriptors.PEOE_VSA10(m)) phys_features.append(Descriptors.PEOE_VSA11(m)) phys_features.append(Descriptors.PEOE_VSA12(m)) phys_features.append(Descriptors.PEOE_VSA13(m)) phys_features.append(Descriptors.PEOE_VSA14(m)) phys_features.append(Descriptors.PEOE_VSA2(m)) phys_features.append(Descriptors.PEOE_VSA3(m)) phys_features.append(Descriptors.PEOE_VSA4(m)) #70 phys_features.append(Descriptors.PEOE_VSA5(m)) phys_features.append(Descriptors.PEOE_VSA6(m)) phys_features.append(Descriptors.PEOE_VSA7(m)) phys_features.append(Descriptors.PEOE_VSA8(m)) phys_features.append(Descriptors.PEOE_VSA9(m)) phys_features.append(Descriptors.RingCount(m)) phys_features.append(Descriptors.SMR_VSA1(m)) phys_features.append(Descriptors.SMR_VSA10(m)) phys_features.append(Descriptors.SMR_VSA2(m)) phys_features.append(Descriptors.SMR_VSA3(m)) #80 phys_features.append(Descriptors.SMR_VSA4(m)) phys_features.append(Descriptors.SMR_VSA5(m)) phys_features.append(Descriptors.SMR_VSA6(m)) phys_features.append(Descriptors.SMR_VSA7(m)) phys_features.append(Descriptors.SMR_VSA8(m)) phys_features.append(Descriptors.SMR_VSA9(m)) phys_features.append(Descriptors.SlogP_VSA1(m)) phys_features.append(Descriptors.SlogP_VSA10(m)) phys_features.append(Descriptors.SlogP_VSA11(m)) phys_features.append(Descriptors.SlogP_VSA12(m)) #90 phys_features.append(Descriptors.SlogP_VSA2(m)) phys_features.append(Descriptors.SlogP_VSA3(m)) phys_features.append(Descriptors.SlogP_VSA4(m)) phys_features.append(Descriptors.SlogP_VSA5(m)) phys_features.append(Descriptors.SlogP_VSA6(m)) phys_features.append(Descriptors.SlogP_VSA7(m)) phys_features.append(Descriptors.SlogP_VSA8(m)) phys_features.append(Descriptors.SlogP_VSA9(m)) phys_features.append(Descriptors.TPSA(m)) phys_features.append(Descriptors.VSA_EState1(m)) #100 phys_features.append(Descriptors.VSA_EState10(m)) phys_features.append(Descriptors.VSA_EState2(m)) phys_features.append(Descriptors.VSA_EState3(m)) phys_features.append(Descriptors.VSA_EState4(m)) phys_features.append(Descriptors.VSA_EState5(m)) phys_features.append(Descriptors.VSA_EState6(m)) phys_features.append(Descriptors.VSA_EState7(m)) phys_features.append(Descriptors.VSA_EState8(m)) phys_features.append(Descriptors.VSA_EState9(m)) phys_features.append(Descriptors.fr_Al_COO(m)) #110 phys_features.append(Descriptors.fr_Al_OH(m)) phys_features.append(Descriptors.fr_Al_OH_noTert(m)) phys_features.append(Descriptors.fr_ArN(m)) phys_features.append(Descriptors.fr_Ar_COO(m)) phys_features.append(Descriptors.fr_Ar_N(m)) phys_features.append(Descriptors.fr_Ar_NH(m)) phys_features.append(Descriptors.fr_Ar_OH(m)) phys_features.append(Descriptors.fr_COO(m)) phys_features.append(Descriptors.fr_COO2(m)) phys_features.append(Descriptors.fr_C_O(m)) #120 phys_features.append(Descriptors.fr_C_O_noCOO(m)) phys_features.append(Descriptors.fr_C_S(m)) phys_features.append(Descriptors.fr_HOCCN(m)) phys_features.append(Descriptors.fr_Imine(m)) phys_features.append(Descriptors.fr_NH0(m)) phys_features.append(Descriptors.fr_NH1(m)) phys_features.append(Descriptors.fr_NH2(m)) phys_features.append(Descriptors.fr_N_O(m)) phys_features.append(Descriptors.fr_Ndealkylation1(m)) phys_features.append(Descriptors.fr_Ndealkylation2(m)) #130 phys_features.append(Descriptors.fr_Nhpyrrole(m)) phys_features.append(Descriptors.fr_SH(m)) phys_features.append(Descriptors.fr_aldehyde(m)) phys_features.append(Descriptors.fr_alkyl_carbamate(m)) phys_features.append(Descriptors.fr_alkyl_halide(m)) phys_features.append(Descriptors.fr_allylic_oxid(m)) phys_features.append(Descriptors.fr_amide(m)) phys_features.append(Descriptors.fr_amidine(m)) phys_features.append(Descriptors.fr_aniline(m)) phys_features.append(Descriptors.fr_aryl_methyl(m)) #140 phys_features.append(Descriptors.fr_azide(m)) phys_features.append(Descriptors.fr_azo(m)) phys_features.append(Descriptors.fr_barbitur(m)) phys_features.append(Descriptors.fr_benzene(m)) phys_features.append(Descriptors.fr_benzodiazepine(m)) phys_features.append(Descriptors.fr_bicyclic(m)) phys_features.append(Descriptors.fr_diazo(m)) phys_features.append(Descriptors.fr_dihydropyridine(m)) phys_features.append(Descriptors.fr_epoxide(m)) phys_features.append(Descriptors.fr_ester(m)) #150 phys_features.append(Descriptors.fr_ether(m)) phys_features.append(Descriptors.fr_furan(m)) phys_features.append(Descriptors.fr_guanido(m)) phys_features.append(Descriptors.fr_halogen(m)) phys_features.append(Descriptors.fr_hdrzine(m)) phys_features.append(Descriptors.fr_hdrzone(m)) phys_features.append(Descriptors.fr_imidazole(m)) phys_features.append(Descriptors.fr_imide(m)) phys_features.append(Descriptors.fr_isocyan(m)) phys_features.append(Descriptors.fr_isothiocyan(m)) #160 phys_features.append(Descriptors.fr_ketone(m)) phys_features.append(Descriptors.fr_ketone_Topliss(m)) phys_features.append(Descriptors.fr_lactam(m)) phys_features.append(Descriptors.fr_lactone(m)) phys_features.append(Descriptors.fr_methoxy(m)) phys_features.append(Descriptors.fr_morpholine(m)) phys_features.append(Descriptors.fr_nitrile(m)) phys_features.append(Descriptors.fr_nitro(m)) phys_features.append(Descriptors.fr_nitro_arom(m)) phys_features.append(Descriptors.fr_nitro_arom_nonortho(m)) #170 phys_features.append(Descriptors.fr_nitroso(m)) phys_features.append(Descriptors.fr_oxazole(m)) phys_features.append(Descriptors.fr_oxime(m)) phys_features.append(Descriptors.fr_para_hydroxylation(m)) phys_features.append(Descriptors.fr_phenol(m)) phys_features.append(Descriptors.fr_phenol_noOrthoHbond(m)) phys_features.append(Descriptors.fr_phos_acid(m)) phys_features.append(Descriptors.fr_phos_ester(m)) phys_features.append(Descriptors.fr_piperdine(m)) phys_features.append(Descriptors.fr_piperzine(m)) #180 phys_features.append(Descriptors.fr_priamide(m)) phys_features.append(Descriptors.fr_prisulfonamd(m)) phys_features.append(Descriptors.fr_pyridine(m)) phys_features.append(Descriptors.fr_quatN(m)) phys_features.append(Descriptors.fr_sulfide(m)) phys_features.append(Descriptors.fr_sulfonamd(m)) phys_features.append(Descriptors.fr_sulfone(m)) phys_features.append(Descriptors.fr_term_acetylene(m)) phys_features.append(Descriptors.fr_tetrazole(m)) phys_features.append(Descriptors.fr_thiazole(m)) #190 phys_features.append(Descriptors.fr_thiocyan(m)) phys_features.append(Descriptors.fr_thiophene(m)) phys_features.append(Descriptors.fr_unbrch_alkane(m)) phys_features.append(Descriptors.fr_urea(m)) #194 return phys_features