def read_db_from_sd_3d(infile, gz=False): sub = {} failed = [] if gz: with gzip.open(infile, mode="rt") as inf: content = inf.readlines() else: with open(infile, "r") as sd_file: content = sd_file.readlines() try: sd_blocks = [ list(group) for k, group in groupby(content, lambda x: x == "$$$$\n") if not k ] except ValueError: return sub, failed del content for i in range(len(sd_blocks)): mol_block_list = sd_blocks[i][:sd_blocks[i].index("M END\n") + 1] mol_block = ''.join([elem for elem in mol_block_list]) mol = Chem.MolFromMolBlock(mol_block) if mol: if mol.GetConformer().Is3D(): name = mol.GetProp("_Name") mol = Chem.AddHs(mol, addCoords=True) tags = sd_blocks[i][sd_blocks[i].index("M END\n") + 1:] props = read_tags(name, tags) sub[i] = { "confs": mol, "props": props, "pattern": Chem.MolToSmiles(mol) } else: failed.append(i) return sub, failed
def index(self, smiles): # bridged atoms bridg_Matcher = pybel.Smarts('[x3]') # spiro atoms spiro_Matcher = pybel.Smarts('[x4]') # linked rings RR_Matcher = pybel.Smarts('[R]!@[R]') # separated rings R_R_Matcher = pybel.Smarts('[R]!@*!@[R]') rd_mol: Mol = Chem.MolFromSmiles(smiles) py_mol = pybel.readstring('smi', smiles) index = [ py_mol.OBMol.NumHvyAtoms(), int(round(py_mol.molwt, 1) * 10), self.get_shortest_wiener(rd_mol)[0], Chem.CalcNumRotatableBonds(Chem.AddHs(rd_mol)), len(bridg_Matcher.findall(py_mol)), len(spiro_Matcher.findall(py_mol)), len(RR_Matcher.findall(py_mol)), len(R_R_Matcher.findall(py_mol)), ] + \ list(self.get_ring_info(py_mol)) return np.array(index)
def to_mol(self, add_hs: bool = False, num_confs: int = 10) -> "Mol": """Convert widget value to RDKit molecule. If Hydrogen atoms are added, calculate the optimal conformer to get Hydrogen atom coordinates. :param add_hs: Add Hydrogen atoms :type add_hs: bool :param num_confs: Number of conformers to generate :type num_confs: int :return: RDKit molecule object :rtype: Mol """ if self.value.smiles != "": mol = Chem.MolFromSmiles(self.value.smiles) elif self.value.molblock != "": mol = Chem.MolFromMolBlock(self.value.molblock) else: raise ValueError("Cannot create Mol object: JSME value is empty") if add_hs is True and mol is not None: mol = Chem.AddHs(mol) # Calculate conformers after adding hydrogens Chem.EmbedMultipleConfs(mol, numConfs=num_confs) if mol is None: raise ValueError(f"Cannot convert JSME widget value to Mol object:\ \n{self.value}\ \nPlease try clicking 'Save' button on widget or remove \ Hydrogen atoms from SMILES string and try again.") return mol
def hygrogen_ends(self, macro_mol): """ Removes all atoms tagged for deletion and adds hydrogens. In polymers, you want to replace the functional groups at the ends with hydrogen atoms. Parameters ---------- macro_mol : :class:`.Polymer` The polymer being assembled. Returns ------- None : :class:`NoneType` """ emol = rdkit.EditableMol(macro_mol.mol) # Remove all extra atoms. for atom in reversed(macro_mol.mol.GetAtoms()): if atom.HasProp('del'): emol.RemoveAtom(atom.GetIdx()) macro_mol.mol = remake(emol.GetMol()) macro_mol.mol = rdkit.AddHs(macro_mol.mol, addCoords=True)
def from_mol(self) -> dict: if self.get('extension') in ('mol', 'sdf', 'mdl'): mol = Chem.MolFromMolBlock(self.get('block'), sanitize=True, removeHs=False, strictParsing=True) elif self.get('extension') in ('mol2', ): mol = Chem.MolFromMol2Block(self.get('block'), sanitize=True, removeHs=False) elif self.get('extension') in ('pdb', ): mol = Chem.MolFromPDBBlock(self.get('block'), sanitize=True, removeHs=False, proximityBonding=False) else: raise exc.HTTPClientError( f"Format {self.get('extension')} not supported") if self.get_bool('protons') is True: mol = AllChem.AddHs(mol) p = Params.from_mol(mol, self.name, generic=self.generic, atomnames=self.atomnames) return self.to_dict(p)
def GenerateSDFFromMols(mols, inp_type): files = [] cwd = os.getcwd() for ind, m in enumerate(mols): m = AllChem.AddHs(m, addCoords=True) AllChem.EmbedMolecule(m) AllChem.MMFFOptimizeMolecule(m) Chem.rdmolops.AssignStereochemistryFrom3D(m) f = inp_type + '_Mol_' + str(ind) + "_.sdf" files.append(f[:-4]) fullf = os.path.join(cwd, f) save3d = Chem.SDWriter(fullf) save3d.write(m) return files
def _from_smiles_w_pdb(cls, pdb: Chem.Mol, smiles, generic, name): dodgy = Chem.SplitMolByPDBResidues(pdb, whiteList=[name])[name] AllChem.SanitizeMol(dodgy) good = Chem.MolFromSmiles(smiles) good.SetProp('_Name', name) dummies = [] for atom in good.GetAtoms(): if atom.GetSymbol() == '*': atom.SetAtomicNum(9) dummies.append(atom.GetIdx()) Chem.SanitizeMol(good) good = AllChem.AddHs(good) AllChem.EmbedMolecule(good) AllChem.ComputeGasteigerCharges(good) AllChem.MMFFOptimizeMolecule(good) for d in dummies: good.GetAtomWithIdx(d).SetAtomicNum(0) self = cls.load_mol(good, generic=generic, name=name) self.move_aside() self.rename_from_template(dodgy) self.move_back() self.convert_mol() ##### warnings.warn('CHI DISABLED. - has issues with this mode' ) # todo correct this issue! self.CHI.data = [] # !!!! return self
def smile_to_pdb(smile, pdb_out, mol_name, method_3d='rdkit', iter_num=5000): """ """ if method_3d == 'openbabel': from openbabel import pybel conf = pybel.readstring("smi", smile) # Get charge charge = conf.charge conf.make3D(forcefield='mmff94', steps=iter_num) conf.write(format='pdb', filename=pdb_out, overwrite=True) elif method_3d == 'rdkit': from rdkit.Chem import AllChem as Chem conf = Chem.MolFromSmiles(smile) conf = Chem.AddHs(conf) # Get charge charge = Chem.GetFormalCharge(conf) Chem.EmbedMolecule(conf) Chem.MMFFOptimizeMolecule(conf, mmffVariant='MMFF94', maxIters=iter_num) Chem.MolToPDBFile(conf, filename=pdb_out) # Change resname of pdb file to `self.mol_name` coor = pdb_manip.Coor(pdb_out) index_list = coor.get_index_selection(selec_dict={'res_name': ['UNL']}) coor.change_index_pdb_field(index_list, change_dict={'res_name': mol_name}) coor.write_pdb(pdb_out, check_file_out=False) return (charge)
def generateXYZ(smiPath, ogMonomer): #print smiPath file = open(smiPath, "r") smiles = str(file.next()) file.close() mol1 = Chem.MolFromSmiles(smiles) # Add missing Hs - recommended for 3D structure generation mol = Chem.AddHs(mol1) # EmbedMolecule sets up a 3D representation of the molecule Chem.EmbedMolecule(mol) # You can run an FF geometry optimization now if you want Chem.UFFOptimizeMolecule(mol) #Chem.MMFFOptimizeMolecule(mol) newFile = open(smiPath[:-4] + ".xyz", 'w') newFile.write(str(mol.GetNumAtoms())+"\n" + "$Comment" + " \n") # Print the generated conformation in xyz format print "%d" % mol.GetNumAtoms() print "Generated by %s" % argv[0] for i in range(0,mol.GetNumAtoms()): atom = mol.GetAtomWithIdx(i).GetSymbol() coords = mol.GetConformer().GetAtomPosition(i) print "%6s%12.4f%12.4f%12.4f" % (atom,coords.x,coords.y,coords.z) newFile.write(str(atom) + " " + str(coords.x) +" " + str(coords.y) +" " +str (coords.z) + "\n") newFile.close() #os.system("obabel -ismi " + smiPath + " -O " + smiPath[:-4] + ".xyz --gen3d ---errorlevel 1 >/dev/null 2>/dev/null") print " Recusive spinning Dx" grabAndSpin( smiPath[:-4] + ".xyz") #unBendAndCenter(smiPath[:-4] + ".xyz", ogMonomer) return smiPath[:-4] + ".xyz"
def prepare_mol_2(mol, property_name="", do_charge=False): err = 0 if do_charge: Chem.ComputeGasteigerCharges(mol) property_name = "_GasteigerCharge" err = check_mol(mol, property_name, do_charge) elif not do_charge: n_at = mol.GetNumAtoms() if property_name: mol = Chem.RemoveHs(mol) list_prop = mol.GetPropsAsDict() # extracts the property according to the set name string_values = list_prop[property_name] string_values = string_values.split("\n") w = np.asarray(map(float, string_values)) elif not property_name: mol = Chem.AddHs(mol) w = np.ones((n_at, 1)) / n_at # same format as previous calculation w = np.asarray(map(float, w)) property_name = 'equal_w' err = 0 for atom in range(n_at): mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom]) return mol, property_name, err
def CleanUp(InputFiles): # check input file types CleanedInputFiles = [] cwd = os.getcwd() for f in InputFiles: if f.endswith('.sdf'): f = f[:-4] fullf = os.path.join(cwd, f + 'cleaned.sdf') m = GenerateMolFromSDF(f) m = AllChem.AddHs(m, addCoords=True) AllChem.EmbedMolecule(m) AllChem.MMFFOptimizeMolecule(m) Chem.rdmolops.AssignStereochemistryFrom3D(m) save3d = Chem.SDWriter(fullf) save3d.write(m) CleanedInputFiles.append(f + 'cleaned') return CleanedInputFiles
def hmol(input, output): # create mol from input mol file rd_mol = Chem.MolFromMolFile(input, removeHs=False) # add hydrogens h_rd_mol = AllChem.AddHs(rd_mol, addCoords=True) # save mol with hydrogens Chem.MolToMolFile(h_rd_mol, output)
def smiles_to_pdb(smiles_string, name=None): """ Converts smiles strings to RDKit molobject. :param smiles_string: The hydrogen free smiles string :param name: The name of the molecule this will be used when writing the pdb file :return: The RDKit molecule """ # Originally written by venkatakrishnan; rewritten and extended by Chris Ringrose if 'H' in smiles_string: raise SyntaxError( 'Smiles string contains hydrogen atoms; try again.') m = AllChem.MolFromSmiles(smiles_string) if name is None: name = input('Please enter a name for the molecule:\n>') m.SetProp('_Name', name) mol_hydrogens = AllChem.AddHs(m) AllChem.EmbedMolecule(mol_hydrogens, AllChem.ETKDG()) AllChem.SanitizeMol(mol_hydrogens) print(AllChem.MolToMolBlock(mol_hydrogens), file=open(f'{name}.mol', 'w+')) AllChem.MolToPDBFile(mol_hydrogens, f'{name}.pdb') return f'{name}.pdb'
def make_entry( mol: rdkit.Mol, sa_scorer: SyntheticAccesibilityScorer, ): # Ensure hydrogens are added to molecule. mol = rdkit.AddHs(mol) sascore, scscore, rfmodel = [ sa_scorer.calculate_sa(mol, func) for func in sa_scorer.sa_funcs ] try: fg_name = str( list( filter( lambda x: len( mol.GetSubstructMatch(rdkit.MolFromSmarts(fg_names[x])) ) != 0, fg_names, ) )[0] ) except: fg_name = "" return ( rdkit.MolToSmiles(mol), str(fg_name), float(sascore), float(scscore), float(rfmodel), )
def create_from_smiles(self, smiles_code): # Creating base rdkit molecule object m = Chem.MolFromSmiles(smiles_code) m = AllChem.AddHs(m, False, False) confID = AllChem.Compute2DCoords(m, False, True) conf = m.GetConformer(confID) AllChem.WedgeMolBonds(m, conf) num_atoms = m.GetNumAtoms() # Getting all atomic coordinates and transforming for i in range(num_atoms): element = m.GetAtomWithIdx(i).GetAtomicNum() pos = np.array([ m.GetConformer().GetAtomPosition(i).x, m.GetConformer().GetAtomPosition(i).y ]) new_atom = atom(element, self.atom_radii, pos) self.atoms.append(new_atom) self.calculate_center() self.scale(self.scale_factor) # Getting all bonds with types for mbond in m.GetBonds(): atom1 = self.atoms[mbond.GetBeginAtomIdx()] atom2 = self.atoms[mbond.GetEndAtomIdx()] type = mbond.GetBondTypeAsDouble() new_bond = bond(atom1, atom2, type, self.scale_factor) self.bonds.append(new_bond)
def descriptor_calc(smiles, mol_name): ''' Main function to calculate descriptors for molecules ''' # read in molecules mol = (Chem.MolFromSmiles(smiles)) # make parent dictionary d[mol_name]= {} # calculate descriptors and store as child dictionary for name, desc in Descriptors.descList: d[mol_name][name]= desc(mol) m2 = AllChem.AddHs(mol) AllChem.EmbedMolecule(m2) AllChem.MMFFOptimizeMolecule(m2) d[mol_name]['Asphericity'] = Descriptors3D.Asphericity(m2) d[mol_name]['PMI1'] = Descriptors3D.PMI1(m2) d[mol_name]['PMI2'] = Descriptors3D.PMI2(m2) d[mol_name]['PMI3'] = Descriptors3D.PMI3(m2) d[mol_name]['NPR1'] = Descriptors3D.NPR1(m2) d[mol_name]['NPR2'] = Descriptors3D.NPR2(m2) d[mol_name]['RadiusOfGyration'] = Descriptors3D.RadiusOfGyration(m2) d[mol_name]['InertialShapeFactor'] = Descriptors3D.InertialShapeFactor(m2) d[mol_name]['Eccentricity'] = Descriptors3D.Eccentricity(m2) d[mol_name]['SpherocityIndex'] = Descriptors3D.SpherocityIndex(m2)
def run(self): proasis_out = ProasisOut.objects.filter(proasis=ProasisHits.objects.get(crystal_name_id=self.crystal_id, refinement_id=self.refinement_id)) for o in proasis_out: lig = o.ligand infile = os.path.join(o.root, o.start, str(o.start + '_' + lig.replace(' ', '') + '.mol')) outfile = infile.replace('mol', 'mol2') rd_mol = Chem.MolFromMolFile(infile, removeHs=False) h_rd_mol = AllChem.AddHs(rd_mol, addCoords=True) Chem.MolToMolFile(h_rd_mol, outfile.replace('.mol2', '_h.mol')) o.h_mol = outfile.replace('.mol2', '_h.mol').split('/')[-1] rd_mol = Chem.MolFromMolFile(outfile.replace('.mol2', '_h.mol'), removeHs=False) infile = os.path.join(o.root, o.start, str(o.start + '_' + lig.replace(' ', '') + '_h.mol')) net_charge = AllChem.GetFormalCharge(rd_mol) command_string = str("antechamber -i " + infile + " -fi mdl -o " + outfile + " -fo mol2 -at sybyl -c bcc -nc " + str(net_charge)) print(command_string) process = subprocess.Popen(command_string, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() out = out.decode('ascii') if err: err = err.decode('ascii') raise Exception(err) print(out) print(err) o.mol2 = outfile.split('/')[-1] o.save()
def get_properties(self): """ Calculate some general molecule properties from SMILES From RDKit: - synthetic accesibility: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3225829/ (1 = easy to make, 10 = harder) - logP (hydrophobicity): https://pubs.acs.org/doi/10.1021/ci990307l (smaller = more hydrophilic) - logS (aqueous solubility): https://github.com/PatWalters/solubility (smaller = less water soluble) - purchasable (ZINC purchasability): https://github.com/stevenbennett96/chemcost (True = has at least 3 vendors on ZINC) """ print('>>> collect molecular properties using RDKit.') # logP and SA from RDKIT with SMILES: rdkitmol = Chem.MolFromSmiles(self.SMILES) rdkitmol = Chem.AddHs(rdkitmol) rdkitmol.Compute2DCoords() self.logP = Descriptors.MolLogP(rdkitmol, includeHs=True) self.logS = rdkf.get_logSw(rdkitmol) self.Synth_score = rdkf.get_SynthA_score(rdkitmol)
def align_mcs(mols, num_confs): suppl = [m for m in AllChem.SDMolSupplier('/Users/tom/code_test_repository/arrow_testing/cdk2.sdf', removeHs=False)] ref_mol = suppl[0] print(f'ref mol has atoms = {ref_mol.GetNumAtoms()}') mols_b = copy.deepcopy(mols) mol_blocks = [] for mol in mols_b: mol = AllChem.AddHs(mol) AllChem.EmbedMultipleConfs(mol, numConfs=num_confs) mcs = rdFMCS.FindMCS([mol, ref_mol]) smarts = mcs.smartsString match = Chem.MolFromSmarts(smarts) test_match_atoms = mol.GetSubstructMatch(match) ref_match_atoms = ref_mol.GetSubstructMatch(match) #Find alignments of all conformers of new drug to old drug: alignments_scores =[rdMolAlign.AlignMol(mol, ref_mol, prbCid=i, atomMap=[[i,j] for i,j in zip(test_match_atoms, ref_match_atoms)]) for i in range(num_confs)] confId=int(np.argmin(alignments_scores)) AllChem.CanonicalizeConformer(mol.GetConformer(confId)) # print(Chem.MolToMolBlock(mol)) mol_blocks.append(Chem.MolToMolBlock(mol)) return pa.array(mol_blocks)
def smiles2conformers(smiles, N=10, optimize=True): """ Convert smiles string to N conformers. Keyword Arguments: smiles (str) - smiles string for molecule N (int) - number of conformers to generate using the ETKDG algorithm optimize (bool) - flag for UFF optimization (default=True) Returns: mol (RDKit molecule ::class::) - contains N conformers """ # Read SMILES and add Hs mol = rdkit.MolFromSmiles(smiles) if mol is None: print('RDKit error for', smiles) return None mol = rdkit.AddHs(mol) # try based on RuntimeError from RDKit try: # 2D to 3D with multiple conformers cids = rdkit.EmbedMultipleConfs( mol=mol, numConfs=N, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, ) # quick UFF optimize for cid in cids: rdkit.UFFOptimizeMolecule(mol, confId=cid) except RuntimeError: print('RDKit error for', smiles) return None return mol
def get_similar_compound(condon): com = condon['smiles'] save_img(com, 'static//compound_img//smiles_img.png', 300, 300) output_num = condon['MaxLength'] smiles_file_path = 'data//kegg_smiles2.txt' with open(smiles_file_path) as file: f = file.readlines() smiles_list = [x.split()[1] for x in f] output_num = min(output_num, len(smiles_list)) top_idx = [0] * output_num top_score = [0] * output_num mol1 = Chem.MolFromSmiles(com) if mol1 is None: print('input smiles not exist') return [] mol1 = AllChem.AddHs(mol1) fps1 = AllChem.GetMorganFingerprint(mol1, 2) for i, item in enumerate(smiles_list): mol2 = Chem.MolFromSmiles(item) if mol2 is None: continue mol2 = AllChem.AddHs(mol2) fps2 = AllChem.GetMorganFingerprint(mol2, 2) score = DataStructs.DiceSimilarity(fps1, fps2) score = round(score, 2) if score > min(top_score): min_idx = top_score.index(min(top_score)) top_idx[min_idx] = i top_score[min_idx] = score top_keggid = [f[i].split()[0] for i in top_idx] top_smiles = [f[i].split()[1] for i in top_idx] result = sorted(zip(top_keggid, top_smiles, top_score), key=lambda x: x[2], reverse=True) for i in range(len(result)): result[i] = list(result[i]) result[i].insert(1, compound_dict[result[i][0]][0]) return result
def predict(mol, uncharged=True): if uncharged: un = rdMolStandardize.Uncharger() mol = un.uncharge(mol) mol = AllChem.AddHs(mol) base_dict = predict_base(mol) acid_dict = predict_acid(mol) return base_dict, acid_dict
def _calc_3d_coord(mol): AllChem.AddHs(mol) AllChem.EmbedMolecule(mol, useRandomCoords=True) try: AllChem.MMFFOptimizeMolecule(mol) except ValueError: logging.warning("Unable to make 3d cords.") AllChem.RemoveHs(mol)
def prepare_mol(mol, property_name, do_charge): """ Sets atomic properties if they are specified in the sdf, otherwise computes them. If specified, computes 3D coordinates using MMF. The default number of iterations is 200, but it is progressively increased to 5000 (with a step of 500) in case convergence is not reached. ==================================================================================================================== :param mol: molecule to be analyzed (from rdkit supplier) property_name: name of the property to be used do_charge: if True, partial charge is computed do_geom: if True, molecular geometry is optimized :return: mol: molecule with property and 3D coordinates (H depleted) property_name: updated on the basis of the settings ==================================================================================================================== Francesca Grisoni, 12/2016, v. alpha ETH Zurich """ from rdkit.Chem import AllChem as Chem err = 0 # partial charges if do_charge is False: if property_name is not '': err = check_mol(mol, property_name, do_charge) if err == 0: # prepares molecule # mol = Chem.AddHs(mol) mol = Chem.RemoveHs(mol) n_at = mol.GetNumAtoms() # takes properties list_prop = mol.GetPropsAsDict() string_values = list_prop[ property_name] # extracts the property according to the set name string_values = string_values.split("\n") w = np.asarray(map(float, string_values)) else: mol = Chem.AddHs(mol) n_at = mol.GetNumAtoms() w = np.ones((n_at, 1)) / n_at w = np.asarray(map(float, w)) # same format as previous calculation property_name = 'equal_w' err = 0 # extract properties for atom in range(n_at): mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom]) mol = Chem.RemoveHs(mol) # Gasteiger-Marsili Charges elif (do_charge is True) and (err is 0): Chem.ComputeGasteigerCharges(mol) property_name = '_GasteigerCharge' err = check_mol(mol, property_name, do_charge) return mol, property_name, err
def gen_mol_blocks_from_confs(mols, num_confs): mols_b = copy.deepcopy(mols) mol_blocks = [] for mol in mols_b: mol = AllChem.AddHs(mol) AllChem.EmbedMultipleConfs(mol, numConfs=num_confs) for i in range(num_confs): mol_blocks.append(Chem.MolToMolBlock(mol, confId=i)) return mol_blocks
def smiles2fps(self, smiles): arr = np.zeros((1, )) mol = Chem.MolFromSmiles(smiles) mol = AllChem.AddHs(mol) fp = AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=self.state_size) DataStructs.ConvertToNumpyArray(fp, arr) return np.array([arr])
def index(self, smiles): rdk_mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) index = [rdk_mol.GetNumAtoms(), int(round(Descriptors.MolWt(rdk_mol), 1) * 10), self.get_shortest_wiener(rdk_mol)[0], Chem.CalcNumRotatableBonds(rdk_mol)] # py_mol = pybel.readstring('smi', smiles) # index += list(self.get_ring_info(py_mol)) return np.array(index)
def from_smiles(cls, smiles: str, add_hs: bool = True, num_confs: int = 10): mol = Chem.MolFromSmiles(smiles) if add_hs: mol = Chem.AddHs(mol) # Calculate conformers after adding hydrogens Chem.EmbedMultipleConfs(mol, numConfs=num_confs) return cls(mol=mol)
def calculate_all_MW(molecules): """ Calculate the molecular weight of all molecules in DB dictionary. {name: SMILES} """ for m, smile in molecules.items(): # Read SMILES and add Hs mol = rdkit.AddHs(rdkit.MolFromSmiles(smile)) MW = Descriptors.MolWt(mol) print(m, '---', smile, '---', 'MW =', MW, 'g/mol')
def calculate_drug_similarity(input_dir='../data/DrugBank5.0_Approved_drugs/', drug_dir='../data/DrugBank5.0_Approved_drugs/', output_file='../data/output.csv'): drugbank_drugs = glob.glob(drug_dir + '*') input_drugs = glob.glob(input_dir + '*') drug_similarity_info = {} for each_drug_id1 in drugbank_drugs: drugbank_id = os.path.basename(each_drug_id1).split('.')[0] drug_similarity_info[drugbank_id] = {} drug1_mol = Chem.MolFromMolFile(each_drug_id1) drug1_mol = AllChem.AddHs(drug1_mol) for each_drug_id2 in input_drugs: input_drug_id = os.path.basename(each_drug_id2).split('.')[0] drug2_mol = Chem.MolFromMolFile(each_drug_id2) drug2_mol = AllChem.AddHs(drug2_mol) fps = AllChem.GetMorganFingerprint(drug1_mol, 2) fps2 = AllChem.GetMorganFingerprint(drug2_mol, 2) score = DataStructs.DiceSimilarity(fps, fps2) drug_similarity_info[drugbank_id][input_drug_id] = score df = pd.DataFrame.from_dict(drug_similarity_info) df.to_csv(output_file)