def read_fragment(self, name=None, fmt=None, handle=None, zero_to_origin=True): """Read a molecular structure from a file. Guess at the file type from extension if caller does not supply explicit fmt. If file handle is provided, read data from it. Otherwise open file name for reading. :param name: file to open :type name : str :param fmt: optional OpenBabel format code e.g. "xyz" :type fmt : str :param zero_to_origin: translate geometry to put atom 0 at origin :type zero_to_origin : bool :return: molecular fragment :rtype : Fragment """ if not fmt: try: fmt = name.rsplit(".", 1)[-1] except (IndexError, AttributeError): msg = "No fmt given for {0} and unable to guess from file extension".format(repr(name)) if handle is None: molecule = pybel.readfile(fmt, name).next() else: data = handle.read() molecule = pybel.readstring(fmt, data) fragment = Fragment(molecule) if zero_to_origin: fragment.set_zero_to_origin() return fragment
def prepare(target): for typ in ('actives', 'decoys'): url = URL % (target.lower(), typ) d = os.path.join("Targets_test", target, typ) if not os.path.isdir(d): os.makedirs(d) molfn = os.path.join(d, os.path.basename(url)) if not os.path.isfile(molfn): print "Downloading %s" % url i = urllib2.urlopen(url) o = open(molfn, 'wb') o.write(i.read()) i.close() o.close() #########Only 7 RB ######### if not os.path.isfile(molfn.replace('.sdf.gz', '_7_RB.sdf')): o = pybel.Outputfile('sdf', molfn.replace('.sdf.gz', '_7_RB.sdf')) n = 0 for mol in pybel.readfile('sdf', molfn): if mol.OBMol.NumRotors() <= 7: o.write(mol) n += 1 o.close() if not n: try: os.remove(molfn.replace('.sdf.gz', '_7_RB.sdf')) except: pass ######################### clean(validate(molfn, typ)) return target
def clean(fn): ligands = os.path.join(os.path.dirname(fn), "..", "PDB", "ligands_allgood.sdf") if not os.path.isfile(ligands): return inchikeys = set( [mol.write('inchikey') for mol in pybel.readfile('sdf', ligands)]) ofn = fn.replace('.sdf', '_filtered.sdf') if not os.path.isfile(ofn): good = 0 total = 0 o = rdk.Outputfile('sdf', ofn, overwrite=True) for mol in pybel.readfile('sdf', fn): total += 1 if mol.write('inchikey') not in inchikeys: try: o.write(rdk.readstring('mol', mol.write('mol'))) good += 1 except Exception, e: #print e pass o.close() print "%s %% remaining mols (%s/%s)" % ( (good * 100. / total), good, total)
def parse_decoy_files(decoyfilelist): """ Parses files containing known decoys """ decoy_set = set() for decoyfile in decoyfilelist: decoyfile = str(decoyfile) mols = pybel.readfile(get_fileformat(decoyfile), decoyfile) for mol in mols: try: cmol = ComparableMol(mol) cmol.calcdesc() decoy_set.add(cmol) except Exception, e: print e continue
def parse_query_files(filelist): """ Parses files containing active ligands """ query_dict = {} for file in filelist: file = str(file) mols = pybel.readfile(get_fileformat(file), file) for mol in mols: try: cmol = ComparableMol(mol) cmol.calcdesc() query_dict[cmol] = 0 except Exception, e: print e continue
def parse_db_files(filelist): """ Parses files where to look for decoys """ filecount = 0 if type(filelist) == list: random.shuffle(filelist) for dbfile in filelist: mols = pybel.readfile(get_fileformat(dbfile), dbfile) for mol in mols: try: cmol = ComparableMol(mol) except Exception, e: print e cmol = False if cmol: yield cmol, filecount, dbfile filecount += 1
def validate(fn, typ): ofn = fn.replace('.sdf.gz', '_sanitized.sdf') if not os.path.isfile(ofn): good = 0 total = 0 o = rdk.Outputfile('sdf', ofn, overwrite=True) n = 0 for mol in pybel.readfile('sdf', fn): mol.title = typ + "_" + mol.title + "_" + str(n) total += 1 n += 1 try: o.write(rdk.readstring('mol', mol.write('mol'))) good += 1 except Exception, e: print e pass print "%s %% good mols (%s/%s)" % ((good * 100. / total), good, total) o.close()
def read_fragment(self, name=None, fmt=None, handle=None, zero_to_origin=True): """Read a molecular structure from a file. Guess at the file type from extension if caller does not supply explicit fmt. If file handle is provided, read data from it. Otherwise open file name for reading. :param name: file to open :type name : str :param fmt: optional OpenBabel format code e.g. "xyz" :type fmt : str :param zero_to_origin: translate geometry to put atom 0 at origin :type zero_to_origin : bool :return: molecular fragment :rtype : Fragment """ if not fmt: try: fmt = name.rsplit(".", 1)[-1] except (IndexError, AttributeError): msg = "No fmt given for {0} and unable to guess from file extension".format( repr(name)) if handle is None: molecule = pybel.readfile(fmt, name).next() else: data = handle.read() molecule = pybel.readstring(fmt, data) fragment = Fragment(molecule) if zero_to_origin: fragment.set_zero_to_origin() return fragment
def get_coord_dict(format, inputmol): molH = pybel.readfile(format, inputmol).next() molH.OBMol.DeleteHydrogens() return {atom.idx: atom.coords for atom in molH}
elif n == 2: if idx in coord_map: pass else: coord_map.update({idx: ind}) elif n == 1: if idx in coord_map: pass else: coord_map.update({idx: ind}) else: pass if len(coord_map) == len(original_mol2): coord_conform = {} for index1, index2 in coord_map.items(): coord_conform.update({index1: docked_pdbqt.get(index2)}) mol2 = pybel.readfile('mol2', origanal_mol2_mol).next() mol2.OBMol.DeleteHydrogens() for atom in mol2: atom.OBAtom.SetVector( coord_conform.get(atom.idx)[0], coord_conform.get(atom.idx)[1], coord_conform.get(atom.idx)[2]) mol2.write('mol2', output_mol2, overwrite=True) else: print 'Lost coordinates in mapping' else: print 'Not equal number of atoms in molecules' print docked_pdbqt_mol
if n == 3: coord_map.update({idx:ind}) elif n == 2: if idx in coord_map: pass else: coord_map.update({idx:ind}) elif n == 1: if idx in coord_map: pass else: coord_map.update({idx:ind}) else: pass if len(coord_map) == len(original_mol2): coord_conform = {} for index1, index2 in coord_map.items(): coord_conform.update({index1:docked_pdbqt.get(index2)}) mol2 = pybel.readfile('mol2', origanal_mol2_mol).next() mol2.OBMol.DeleteHydrogens() for atom in mol2: atom.OBAtom.SetVector(coord_conform.get(atom.idx)[0], coord_conform.get(atom.idx)[1], coord_conform.get(atom.idx)[2]) mol2.write('mol2', output_mol2, overwrite=True) else: print 'Lost coordinates in mapping' else: print 'Not equal number of atoms in molecules' print docked_pdbqt_mol