def mp_helper(file_one, file_two, iteration_depth = 1): """ Helper function for the multiprocessing library. Two fragment files gets passed and we merge all against all molecules in that two files. """ results = list() fragments = list() for mol_one in pybel.readfile( 'smi', file_one ): for i,mol_two in enumerate(pybel.readfile( 'smi', file_two )): #print 'merge:', str(mol_one).strip(), str(mol_two).strip() result, fragment = merge(mol_two,mol_one, options, iteration_depth) if result: #print '\tr', result results.extend( result ) if fragment: #print '\tf', fragment fragments.extend( fragment ) fragment_return, molecule_return = None, None if fragments: fragment_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) fragment_file.writelines( fragments ) fragment_file.close() fragment_return = fragment_file.name if results: result_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) result_file.writelines( results ) result_file.close() molecule_return = result_file.name return molecule_return, fragment_return
def do_for_each(name, num_threads): _format = name[name.rfind(".")+1:] basename = name[:name.rfind(".")] out_name = basename + ".num" pdb_id = basename.split("_")[-1] pdb_name = "/home/xmluo/jlpeng/cMet/pdb_protein/%s_protein.pdb"%pdb_id global ligand,protein,outf,ap_iter ligand = pybel.readfile(_format, name).next() protein = pybel.readfile("pdb", pdb_name).next() outf = open(out_name, "w") #make_pairs() #print "after creating pair_queue" ap_iter = AtomPairs() threads = [] for i in xrange(num_threads): threads.append(AtomPairWalker()) print "len(threads)=%d"%len(threads) for t in threads: t.start() for t in threads: t.join() if t.isAlive(): print "thread %s is alive"%t.getName() else: print "thread %s is dead"%t.getName() outf.close()
def dock_ligands_to_receptors(docking_dir, worker_pool=False, exhaustiveness=None, chosen_receptor=None, restrict_box=True): subdirs = glob.glob(os.path.join(docking_dir, '*/')) for subdir in subdirs: subdir = subdir.rstrip('/') receptor_name = os.path.basename(subdir) if chosen_receptor is not None and chosen_receptor != receptor_name: continue print("receptor name = %s" % receptor_name) receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name) if not os.path.exists(receptor_filename): continue print("Examining %s" % receptor_filename) receptor_pybel = next(pybel.readfile("pdb", os.path.join(subdir, "%s.pdb" % receptor_name))) protein_centroid, protein_range = get_molecule_data(receptor_pybel) box_dims = protein_range + 5.0 ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt'))) print("Num ligands = %d" % len(ligands)) dock_ligand_to_receptor_partial = partial(dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=protein_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) if restrict_box: active_ligand = "" for ligand in ligands: if "CHEM" in ligand: active_ligand = ligand break print("Docking to %s first to ascertain centroid and box dimensions" % active_ligand) out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand) ligand_pybel = next(pybel.readfile("pdbqt", out_pdb_qt)) ligand_centroid, _ = get_molecule_data(ligand_pybel) print("Protein centroid = %s" %(str(protein_centroid))) print("Ligand centroid = %s" %(str(ligand_centroid))) box_dims = np.array([20., 20., 20.]) dock_ligand_to_receptor_partial = partial(dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=ligand_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) print("Finished docking to %s, docking to remainder of ligands now." % active_ligand) if worker_pool is False: for i, ligand_file in enumerate(ligands): a = time.time() dock_ligand_to_receptor_partial(ligand) print("took %f seconds to dock single ligand." %(time.time() - a)) else: print("parallelizing docking over worker pool") worker_pool.map_sync(dock_ligand_to_receptor_partial, ligands)
def helper_run(self, result): result_pdbqt = result.output().path result_lig = pybel.readfile("pdbqt", result_pdbqt).next() native_lig = pybel.readfile("pdbqt", LigPdbqt(self.tname).output().path).next() result_rmsd = rmsd_between(native_lig, result_lig) return result_rmsd
def create_inchi(job, chemid, xyz_file=''): if xyz_file == '': xyz_file = os.path.expanduser(job) + 'xyz/' + chemid + '.xyz' obmol = list(pybel.readfile('xyz', xyz_file))[0] try: obmol = list(pybel.readfile('xyz', xyz_file))[0] except NameError: logging.error('Pybel is not installed or loaded correctly.') sys.exit() return obmol.write("inchi", opt={'T': 'nostereo'}).split()[0]
def prepare_dsstox_dataset(root, name, dest=None, overwrite=False): """ This method bootstraps the analysis of DSSTox data. - Rename the compounds - Merge train/test - Generate 3D conformations - Save "master" and "saliviewer" tables - Redirects stdout/stderr to a "prepare.log" file """ if not dest: dest = root dataset_root = op.join(dest, name) dest_sdf = op.join(dataset_root, name + '.sdf') if op.exists(dest_sdf) and not overwrite: print '%s is already there and not overwriting requested' % dest_sdf return print 'Reading %s' % name train_mols = list(pybel.readfile('sdf', op.join(root, name + '_training.sdf'))) test_mols = list(pybel.readfile('sdf', op.join(root, name + '_prediction.sdf'))) print '\tCreating dataset root: %s' % dataset_root if not op.exists(dataset_root): os.makedirs(dataset_root) print '\tRenaming the compounds to keep track of the provenance' rename_mols_by_index(train_mols, name + '-train-') rename_mols_by_index(test_mols, name + '-test-') print '\tGenerating conformations' for mol in train_mols + test_mols: #Some molecules from mutagenicity produce segfault on make3D #See bug report at https://sourceforge.net/tracker/?func=detail&aid=3374324&group_id=40728&atid=428740 #Train 3988: OC(=O)[C@]1(C)CCC[C@]2(C1CC[C@]13C2CC[C@](C3)([C@]2(C1)OC2)O)C #Train 4205: CC(CCC[C@H]([C@H]1CC[C@@H]2[C@]1(C)CC[C@H]1[C@H]2CC2([C@@H]3[C@]1(C)CC[C@@H](C3)Br)S(=O)(=O)CCS2(=O)=O)C)C #This kind of fatal errors are worrying, is there any robust way of controlling them in python/java? Will need to create one if not any(name in mol.title for name in ('train-3988', 'train-4205')): try: print 'Conformation for %s' % mol.title mol.make3D() except Exception: print 'Error computing a 3D conformation for %s' % mol.title print '\tSaving compounds' save_mols(train_mols + test_mols, dest_sdf) master_table = op.join(dataset_root, name + '-master.csv') print '\tCreating \"master\" table: %s' % master_table create_master_table(dest_sdf, master_table) sali_table = op.join(dataset_root, name + '-saliviewer.csv') print '\tCreating \"saliviewer\" table: %s' % sali_table create_saliviewer_input(master_table, sali_table)
def pred_foo(tname): try: geaux_sdf = os.path.join(GEAUX_OUTPUT, tname, tname + '_pred_0.sdf') native_sdf = VinaPath(tname).lig_sdf geaux_lig = pybel.readfile("sdf", geaux_sdf).next() native_lig = pybel.readfile("sdf", native_sdf).next() result_rmsd = rmsd_between(native_lig, geaux_lig) return result_rmsd except Exception as e: print(e) return None
def __init__(self, filename, ext): self.data = {'name':'','index':'', 'metal':'', 'topology':'', 'parent':'', 'atomic_info':'', 'bond_table':'', 'connectivity':'', 'connect_flag':'', 'connect_sym':''} name = os.path.split(filename)[-1] self.name = clean(name, ext) self.update(name=self.name) # may be a source of error.. untested if version_info.major >= 3: self.mol = next(pybel.readfile(ext, filename)) else: self.mol = pybel.readfile(ext, filename).next() self._reset_formal_charges()
def caculateRMSD(self): vina_task = self.requires() predicted_pdbqt = vina_task.output().path predicted_mol = pybel.readfile('pdbqt', predicted_pdbqt).next() crystal_pdbqt = vina_task.lig_pdbqt crystal_mol = pybel.readfile('pdbqt', crystal_pdbqt).next() def rmsd(m1, m2): c1 = [a.coords for a in m1 if not a.OBAtom.IsHydrogen()] c2 = [a.coords for a in m2 if not a.OBAtom.IsHydrogen()] return dockedpose.rmsd(c1, c2) return rmsd(predicted_mol, crystal_mol)
def run(self): data = {} for sdf_id in self.getSdfs(): path = Path(sdf_id) lig = pybel.readfile('sdf', path.astex_sdf()).next() lig.removeh() prt = pybel.readfile('pdb', path.astex_pdb()).next() prt.removeh() lig_sz = len(lig.atoms) prt_sz = len(prt.atoms) data[sdf_id] = {"lig_sz": lig_sz, "prt_sz": prt_sz} dset = pd.DataFrame(data) dset.to_csv(self.output().path)
def main(): file_list=GetFileList() dc_fn='dist_splited_train_0525.dat' dc=dist.UnformatedInput(dc_fn) pts=dist.PTS(dc) for i in range(len(file_list)): lig_fn=file_list[i][0] pro_fn=file_list[i][1] pdbid=file_list[i][2] if os.path.isfile(lig_fn): lig=pybel.readfile('sdf',lig_fn).next() pro=pybel.readfile('pdb',pro_fn).next() score,scorev=GetFinger(lig,pro,pts,pdbid) print score,scorev
def helper_geauxdock(tname, version="0.7"): try: native_sdf = VinaPath(tname).lig_sdf geaux_sdf = os.path.join( ModelPath(tname, version=version).work_dir, "{}_{}_0.sdf".format(tname, version)) geaux_lig = pybel.readfile("sdf", geaux_sdf).next() native_lig = pybel.readfile("sdf", native_sdf).next() result_rmsd = rmsd_between(native_lig, geaux_lig) return result_rmsd except Exception as e: print(e) return None
def run(self): mypath = self.getPath() lig_ifn = mypath.sdf prt_ifn = mypath.pdb lig_ext = os.path.basename(lig_ifn).split('.')[-1] lig = pybel.readfile(lig_ext, lig_ifn).next() lig.removeh() parser = PDBParser(QUIET=True) structure = parser.get_structure('prt', prt_ifn) typetable = OBTypeTable() typetable.SetFromType('INT') typetable.SetToType('SYB') dat = [] atom_types = [typetable.Translate(atom.type) for atom in lig.atoms] atom_types = shuffle(atom_types) for residue in structure.get_residues(): dists = residueDistances2LigandAtoms(residue, lig) dat.append({"dists": dists, "atom_types": atom_types, "residue": residue.get_resname()}) to_write = json.dumps(dat, indent=4, separators=(',', ':')) with self.output().open('w') as ofs: ofs.write(to_write)
def parse_molfile(file_name): mol=pybel.readfile('mol',file_name).next() mol.addh() temp_array=[] for atom in mol.atoms: #if atom.isotope==0: # symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol') #else: # symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol') symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol') temp_array.append([symbel,1,atom.atomicnum,atom.isotope]) #消除重复项 return_array=[] stack=[] for item in temp_array: if item not in stack: stack.append(item) return_array.append([item[0],temp_array.count(item),item[2],item[3]]) ''' ['12C', 9, 6, 0L] ['15N', 1, 7, 15L] ['16O', 2, 8, 0L] ['35Cl', 1, 17, 0L] ['1H', 10, 1, 0L] [0]:同位素元素符号 [1]:该元素的个数 [2]:原子序数 [3]:同位素序数,0表示是天然丰度的 ''' return return_array
def main(input_ext, inputfile, output_ext, outputfilename, nconfs, rmsd_cutoff, energy_cutoff): ff = pybel._forcefields['mmff94'] outputfile = pybel.Outputfile(output_ext, outputfilename, overwrite=True) for i, mol in enumerate(pybel.readfile(input_ext, inputfile)): t = time.time() print "**Molecule %d\n..title = %s" % (i, mol.title) print "..number of rotatable bonds = %d" % mol.OBMol.NumRotors() mol.addh() ff.Setup(mol.OBMol) ff.DiverseConfGen(rmsd_cutoff, nconfs, energy_cutoff) ff.GetConformers(mol.OBMol) confdata = pybel.ob.toConformerData(mol.OBMol.GetData(pybel.ob.ConformerData)) energies = confdata.GetEnergies() N = mol.OBMol.NumConformers() assert N == len(energies) print "..generated %d conformers" u = time.time() data = [] for i in range(N): mol.OBMol.SetConformer(i) outputfile.write(mol) print "..(overall time = %.1fs writing results = %.1fs)" % (time.time() - t, time.time() -u) print "\n" outputfile.close()
def annotate_sdf_volume(insdf, outsdf): output = pybel.Outputfile("sdf", outsdf, overwrite=True); for mol in pybel.readfile("sdf", insdf): mol.data["estimated_volume"] = estimate_volume(mol); output.write(mol); output.close()
def recompta(program): #dictionaries for the number of conformers and the files num_conf = {x:0 for x in xrange(8)} total_files = {x:0 for x in xrange(8)} for hetid in hetids: #set of conformers for a specific ligand generated by a specific tool files_conformations = glob(path_to_conformers + program + '/' + hetid +'_*.sdf') for file in files_conformations: #list of conformers for a specific ligand mollist = list( pybel.readfile('sdf', file)) nmols = len(mollist) mol = mollist[0] enrotlist = [mol.OBMol.NumRotors() for mol in mollist] enrot = max(enrotlist) minenrot = min(enrotlist) #Error in case of there are conformers in the same file with different number of rotatable bonds if enrot != minenrot: print "!!!%s!!!" % file print "%s vs %s" %(enrot, minenrot) shutil.copy(file, "enrot_changing") for key in keys: if key == enrot: #addition of conformers for a specific number of rotatable bond. num_conf[key] += nmols total_files[key] += 1 print program, key, ':', num_conf[key] print program, key, ':', total_files[key] return num_conf, total_files, program
def fp_mds(fptype): fpss = [] for mol in pybel.readfile('sdf', 'solubility.test.sdf'): fps = mol.calcfp(fptype=fptype).bits if len(fps) > 0: fpss.append(fps) cols = max(max(fps) for fps in fpss) mat = np.zeros((len(fpss), cols+1), dtype=np.float32) for i,fps in enumerate(fpss): mat[i,fps] = 1.0 mat = scale(mat) pcs = np.real(pca(mat,npc=2)) activities = np.random.randn(mat.shape[0]) # dummy activity # Set up a regular grid of interpolation points xi, yi = np.linspace(pcs[:,0].min(), pcs[:,0].max()), np.linspace(pcs[:,1].min(), pcs[:,1].max()) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.Rbf(pcs[:,0], pcs[:,1], activities, function='linear', smooth=0.1) zi = rbf(xi, yi) plt.subplot(2,2,pybel.fps[-4:].index(fptype)+1) plt.title('%s' % fptype) plt.imshow(zi, vmin=zi.min(), vmax=zi.max(), origin='lower', cmap='RdYlGn_r', aspect='auto', extent=[pcs[:,0].min(), pcs[:,0].max(), pcs[:,1].min(), pcs[:,1].max()]) plt.scatter(pcs[:,0], pcs[:,1], c=activities, cmap='RdYlGn_r')
def main(): fa = sys.stdin.read() fa = fa.rstrip() of = open(args['outfile'], 'w') of.write('cid\t') labels = 'InChI\tInChIkey' of.write(labels + '\n') inputTemp = tempfile.NamedTemporaryFile(suffix='.sdf', delete=False) inputTemp.write(fa) inputTempName = inputTemp.name inputTemp.close() for mol in pybel.readfile('sdf', inputTempName): info = '' myid = mol.title mol.addh() mol.make3D() conv = ob.OBConversion() conv.SetInAndOutFormats("sdf", "inchi") inchi = conv.WriteString(mol.OBMol) conv.SetInAndOutFormats("sdf", "inchikey") inchikey = conv.WriteString(mol.OBMol) info = re.match(r"^InChI=(.*)\n", inchi).group(1) of.write(myid.strip() + '\t' + info + '\t' + inchikey + '\n') of.close() os.unlink(inputTempName)
def readfile(format, filename, opt=None, lazy=False): if lazy and format == 'mol2': return _filereader_mol2(filename, opt=opt) elif lazy and format == 'sdf': return _filereader_sdf(filename, opt=opt) else: return pybel.readfile(format, filename, opt=opt)
def step(context): # pull the contents of our SD test file fn = os.path.join(os.path.dirname(__file__), 'files/behave_sdf.sdf') mols = [mol.write("smi").split("\t")[0] for mol in readfile('sdf', fn)] print(len(mols)) context.post_data["type"] = "Smiles" context.post_data["objects"] = mols
def step(context, action=None, projkey=None): # something here from cbh_core_model.models import Project, CBHCompoundBatch from rdkit import Chem from rdkit.Chem import AllChem, inchi path = "/dev/cbh_compound_batches/" resp = context.api_client.get( path, format='json', data=context.post_data, ) reg_cmpds = context.ser.deserialize(resp.content)["objects"] reg_inchis = [] # get a list of inchis from the reponse for cmpd in reg_cmpds: reg_inchis.append(cmpd['standardInchi'].strip()) fn = os.path.join(os.path.dirname(__file__), 'files/inchi-list.txt') inchis = [mol.write("inchi").split("\t")[0].strip() for mol in readfile('inchi', fn)] # do an array subtraction of the hardcoded inchis from the registered inchis # print(set(inchis)) print(len(inchis)) # print(set(reg_inchis)) print(len(reg_inchis)) diff = list(set(inchis) - set(reg_inchis)) print(len(diff)) # print(diff) assert len(diff) == 0
def pocketSection(self): cleaned = self.__cleanedPdb() prt = pybel.readstring("pdb", cleaned) if type(self.lig_path) is str and os.path.exists(self.lig_path): suffix = self.lig_path.split('.')[-1] lig = pybel.readfile(suffix, self.lig_path).next() elif type(self.lig_path) is pybel.Molecule: lig = self.lig_path else: raise Exception("Wrong input for ligand") pkt_lines = [] residues = set() for line, atom in zip(cleaned.split("\n")[:-1], prt.atoms): coords = atom.coords dists = [euclidean(coords, a.coords) for a in lig.atoms] if any([d < self.threshold for d in dists]): pkt_lines.append(line) res_num = int(line[22:26]) residues.add(res_num) if self.title == "": start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues), lig.title.split('/')[-1]) else: start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues), self.title) return start_pkt_line + "\n".join(pkt_lines) + "\nTER\n"
def aggregate(self): """remove hydrogen and add <MOLID> """ untarred_dir = self.requires().output().path zincs = glob(os.path.join(untarred_dir, "ZINC*")) mols = [] for zinc in zincs: try: mols.append(list(pybel.readfile('sdf', zinc))) except Exception: print("WARNING: ", "Fail to load zinc ligand %s" % zinc, file=sys.stderr) mols = [mol for sub in mols for mol in sub] ofn = os.path.join(self.subset_work_dir, self.ligand_code + '_1.sdf') self.aggregated_ofn = ofn ofs = pybel.Outputfile('sdf', ofn, overwrite=True) try: for mol in mols: mol.removeh() mol.data['MOLID'] = mol.title ofs.write(mol) except Exception as detail: print("WARNING:", detail, file=sys.stderr) finally: ofs.close()
def main( args ): for infile in args.infile: file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.') if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']: sys.exit('Could not guess the format from the file extension please specify with the --format option.') molecules = pybel.readfile(file_extension, infile) for mol in molecules: args.outfile.write( 't # id %s\n' % mol.title.strip() ) for atom in openbabel.OBMolAtomIter( mol.OBMol): label = atom.GetAtomicNum() vertex_index = atom.GetIdx() args.outfile.write('v %s %s\n' % (vertex_index, label)) for bond in openbabel.OBMolBondIter( mol.OBMol): src_index = bond.GetBeginAtomIdx() dest_index = bond.GetEndAtomIdx() assert(src_index > 0) assert(dest_index > 0) if bond.IsAromatic(): label = 'a' elif bond.IsSingle(): label = 's' elif bond.IsDouble(): label = 'd' elif bond.IsTriple(): label = 't' atom1 = bond.GetBeginAtom() atom2 = bond.GetEndAtom() args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label))
def main(): fa = sys.stdin.read() fa = fa.rstrip() of = open(args['outfile'], 'w') of.write('cid,') labels = '' for i in properties: labels = labels + i + ',' labels = re.match(r"^(.*),", labels).group(1) of.write(labels + '\n') inputTemp = tempfile.NamedTemporaryFile(suffix='.sdf', delete=False) inputTemp.write(fa) inputTempName = inputTemp.name inputTemp.close() for mol in pybel.readfile('sdf', inputTempName): info = '' myid = mol.title mol.addh() mol.make3D() desc = mol.calcdesc() for thisdesc in properties: info = info + str(desc[thisdesc]) + ',' info = re.match(r"^(.*),", info).group(1) of.write(myid.strip() + ',' + info + '\n') of.close() os.unlink(inputTempName)
def getRingAtomsMulti(molfilePath): for m in pybel.readfile("mol", molfilePath): #print m.OBMol.GetFormula() outString = "" rings = m.OBMol.GetSSSR() numR = 0; for r in rings: numR = numR+1 outString+="Ring count "+str(numR)+"\n" for r in rings: outString+="Ring size "+str(r.Size())+"\n" path = r._path for p in path: outString+=str(p-1)+"\n" outString+="Atom count "+str(m.OBMol.NumAtoms())+"\n" outString+="Index HowManyRings RingSize Hybridization Hydro_count Aromaticity AntiClockwise_chiral"+"\n" i = 0 for a in openbabel.OBMolAtomIter(m.OBMol): outString+=str(i)+" "+str(a.MemberOfRingCount())+" "+str(a.MemberOfRingSize())+" "+str(a.GetHyb())+" "+str(a.ImplicitHydrogenCount())+" " if a.IsAromatic(): outString+="1"+" " else: outString+="0"+" " if a.IsClockwise(): outString+="1"+"\n" else: outString+="0"+"\n" i = i + 1 return outString
def read_file(filename, name=None, format=None): """ Read a molecule from a file Note: Currently only reads the first conformation in a file Args: filename (str): path to file name (str): name to assign to molecule format (str): File format: pdb, sdf, mol2, bbll, etc. Returns: moldesign.Molecule: parsed result """ # TODO: check for openbabel molecule name? if format is None: format = filename.split('.')[-1] if force_remote: with open(filename, 'r') as infile: mol = read_string(infile.read(), format, name=name) return mol else: pbmol = pb.readfile(format=format, filename=filename).next() if name is None: name = filename mol = pybel_to_mol(pbmol, name=os.path.basename(name)) mol.filename = filename return mol
def readatoms(self, frame): try: if self.info.num_frames <= frame: raise IndexError("Frame {} not found".format(frame)) file_extension = os.path.splitext(self.path)[1][1:] mol_iter = pybel.readfile(file_extension.encode('utf8'), self.path.encode('utf8')) # get the correct frame try: for _ in range(frame): mol_iter.next() mol = mol_iter.next() except StopIteration: raise IndexError("Frame {} not found".format(frame)) # read the atom information symbols = [] positions = [] for atom in mol.atoms: positions.append(tuple(float(c) for c in atom.coords)) symbol = core.elements.symbols[atom.atomicnum] symbols.append(symbol) return data.Atoms(positions, None, symbols, self.info.volume) except (IOError, IndexError): raise except Exception as e: raise FileError("Cannot read atom data.", e)
def run(): inputfile=pybel.readfile(sys.argv[1].split(".")[-1],sys.argv[1]) value=() for mol in inputfile: descvalues=mol.calcdesc() value= value+(descvalues.get('TPSA'),) value= value+(descvalues.get('HBD'),) value= value+(descvalues.get('logP'),) value= value+(descvalues.get('MW'),) value= value+(descvalues.get('tbonds'),) value= value+(descvalues.get('nF'),) value= value+(descvalues.get('bonds'),) value= value+(descvalues.get('atoms'),) value= value+(descvalues.get('HBA1'),) value= value+(descvalues.get('HBA2'),) value= value+(descvalues.get('sbonds'),) value= value+(descvalues.get('dbonds'),) value= value+(descvalues.get('MR'),) value= value+(descvalues.get('abonds'),) smarts = pybel.Smarts("[+]") num=smarts.findall(mol) value= value+(len(num),) smarts = pybel.Smarts("[-]") num=smarts.findall(mol) value= value+(len(num),) model=joblib.load('volume_model/volume.pkl') for result in model.predict(value): return round(result,2)
def get_dihedrals(fname, a1, a2, a3): # read all the molecules from file for mol in pybel.readfile(os.path.splitext(fname)[1][1:], fname): print( mol.OBMol.GetAngle(mol.OBMol.GetAtom(a1), mol.OBMol.GetAtom(a2), mol.OBMol.GetAtom(a3)))
import sys import copy import pybel import openbabel from correct_sdf import * if __name__ == "__main__": nbo_filename = sys.argv[1] xyz_filename = sys.argv[2] bonds, charges, total_charge = get_bonds_nbo(nbo_filename) mol = pybel.readfile("xyz", xyz_filename).next() mol = delete_bonds_from_mol(mol) mol_corrected = add_bonds_to_mol(mol, bonds) total_charge_check = 0 for i, atom in enumerate(mol): nuc = atom.OBAtom.GetAtomicNum() formal_charge = nuc - charges[i] total_charge_check += formal_charge
# - removes all fields with ligprep info # - adds a field "id" with the vendor code ##################################################################### import pybel, sys infile = sys.argv[1] outfile = sys.argv[2] ## remove "M CHG 0" error lines nochg_file = infile.replace(".sdf", "_noCHG0.sdf") out_nochg = open(nochg_file, "w") for line in open(infile): if "CHG 0" not in line: out_nochg.write(line) out_nochg.close() ## add the first line ID into a field "vendor_id" #open input file (without CHG 0 lines) and output sdf file for final output file input_sdf = pybel.readfile("sdf", nochg_file) output_sdf = pybel.Outputfile("sdf", outfile) #loop for each molecule #remove all ligprep info fields and add field id with vendor_id for mol in input_sdf: mol.data.clear() mol.data["id"] = mol.title output_sdf.write(mol) output_sdf.close()
def remove_protonation( args ): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] outfile.write( mol ) outfile.close()
parser.add_argument("-glide_ranking", required=True, help=variables_for_actives_recovery.glide_ranking_help) parser.add_argument( "-simulations_report", required=True, help=variables_for_actives_recovery.simulations_report_help) parser.add_argument("-output_file", default=variables_for_actives_recovery.output_default_name, help=variables_for_actives_recovery.output_file_help) args = parser.parse_args() print 'processing mol2 files' format_actives_file = args.actives_file.split('.')[-1] total_actives_variants = [ mol.title for mol in py.readfile(format_actives_file, args.actives_file) ] total_actives_compounds = list(set(total_actives_variants)) format_inactives_file = args.inactives_file.split('.')[-1] total_inactives_variants = [ mol.title for mol in py.readfile(format_inactives_file, args.inactives_file) ] total_inactives_compounds = list(set(total_inactives_variants)) total_compounds = float( len(total_actives_compounds) + len(total_inactives_compounds)) total_variants = float( len(total_actives_variants) + len(total_inactives_variants))
def smi_split(file=""): for i, mol in enumerate(pybel.readfile("smi", "zz.smi")): temp = str(i) mol.write("smi", "%s.smi" % temp)
def run(input_path, id_string, is_dir, inchi, images, write_mol, consisting_of_isoprens_required, ): print("Processing:\t%s" % input_path, id_string) file_handles = create_file_handles(id_string, is_dir) for mol in pybel.readfile("mol", input_path): g = mol_to_networkxgraph(mol) largest_component = get_largest_component_length(g) cid = mol.data[id_string] # print "%s\t%s" % (cid, largest_component) rings = len(mol.sssr) if inchi: out = "%s\t%s\t%s\n" % (cid, rings, mol.write("inchi").strip()) else: out = "%s\t%s\n" % (cid, rings) if largest_component % 5 == 0: # a multiple of 5 carbon atoms if consisting_of_isoprens_required: if not consisting_of_isoprens(mol): continue # all remaining atoms are none carbon atoms if largest_component > 40: file_handles["polyterpene"].write(out) try: if images: mol.draw( show=False, update=True, usecoords=True, filename=os.path.join( os.path.dirname(file_handles["polyterpene"].name), cid + ".png", ), ) if write_mol: mol.write( "sdf", os.path.join( os.path.dirname(file_handles["polyterpene"].name), cid + ".sdf", ), ) except Exception: print("no image for %s" % cid) elif largest_component == 0: # print 'Largest Component is Zero:', cid pass else: file_handles[largest_component].write(out) try: if images: mol.draw( show=False, update=True, usecoords=True, filename=os.path.join( os.path.dirname(file_handles[largest_component].name), cid + ".png", ), ) if write_mol: mol.write( "sdf", os.path.join( os.path.dirname(file_handles[largest_component].name), cid + ".sdf", ), ) except Exception: pass # print 'no image for %s' % cid else: # __not__ a multiple of 5 carbon atoms file_handles["potential_terpenoids"].write(out) """
def sdfVox(name, activeMatrix, trans, d, l, f): """ Read in all molecules form the sdf files and save to list """ molList = [] molEnergy = 0 molCount = 0 os.chdir(posesPath) for mol in pybel.readfile('sdf', name): molList.append(mol) molCount += 1 print(molCount) """ For every molecule from the sdf file, go through voxelization process """ for mol in molList: """ Transforms the nuclei by the same transformations of the activesite """ coords = [] #nucleus xyz location aNum = [] #elements atomic number for atom in mol: aNum.append(atom.atomicnum) coords.append(atom.coords) transformedNuclei = [] for i in range(len(coords)): transformedNuclei.append(tuple([ addRoundHundredth(coords[i][0], -trans[0]), addRoundHundredth(coords[i][1], -trans[1]), addRoundHundredth(coords[i][2], -trans[2])])) """ Places electron cloud around each ligand atom. """ os.chdir(cloudPath) transformedElectrons = [] for i in range(len(transformedNuclei)): cloudFile = open(getAtomType(aNum[i]) + ".txt", 'r') for line in cloudFile: split = [x.strip() for x in line.split(',')] transformedElectrons.append(tuple([ addRoundHundredth(transformedNuclei[i][0],float(split[0])), addRoundHundredth(transformedNuclei[i][1],float(split[1])), addRoundHundredth(transformedNuclei[i][2],float(split[2])), gNum(aNum[i])])) """ Adds the ligand information into the protein active site matrix in a manner that simulates the docked pose """ tempMat = activeMatrix dockedLigandMatrix = voxData(tempMat, transformedElectrons) """ Append all voxelized values to hdf5 file """ molEnergy = mol.data['minimizedAffinity'] outEnergy = np.asarray(molEnergy, dtype = np.float32) d.appendVal(dockedLigandMatrix) #Appends matrix l.appendVal(outEnergy) #Appends energy f.appendVal(np.string_(name)) #Appends file name os.chdir(posesPath)
#!/usr/bin/python import pybel, openbabel, glob, sys, os lis = sys.argv lis.pop(0) for x in sorted(lis): for mol in pybel.readfile("g09", x): for ring in mol.sssr: homaring = "HOMA.pl " fluring = "FLU.pl " homaring += x fluring += x # for ring in mol.sssr: for atom in list(ring._path): homaring += " " + str(atom) fluring += " " + str(atom) print homaring + " " + str(ring._path[0]) os.system(homaring + " " + str(ring._path[0])) os.system(fluring + " " + str(ring._path[0]))
def convert_input_molecule(argv): """Converts input into pybel molecule and returns it""" if argv.informat: return pybel.readfile(argv.informat, argv.input).next() else: return pybel.readfile(DEFAULT_INPUT, argv.input).next()
def get_dihedrals(fname, a1, a2, a3, a4): # read all the molecules from file for mol in pybel.readfile(os.path.splitext(fname)[1][1:], fname): print("%f" % mol.OBMol.GetTorsion(a1, a2, a3, a4))
def main(argv): mol = pybel.readfile("mopout", argv.pop()).next() mol.draw()
def log2xyz(logfile): mol = pybel.readfile("g09", logfile).next() xyzfile = logfile.replace(".log", ".xyz") #overwrite=True mol.write("xyz", xyzfile, overwrite=True)
def __default_load(self, fileName, fileExtension): """ Uses pybel.readfile to read a file and returns a list of pybel.molecules""" return list(pybel.readfile(fileExtension, fileName))
import pybel line = 0 inputfile = pybel.readfile("smi", "clearance_fixpka.smiles") outfile = open("clearance_fingerprint.txt", 'w') num_molecule = 0 for mol in inputfile: outfile.write(mol.title) outfile.write(" ") maccsfile = open("fp_noduplicate.smi", 'r') while True: line_maccs = maccsfile.readline() line = line + 1 if not line_maccs: break if line_maccs.find(":") > 0: line_maccs = line_maccs[line_maccs.find("'") + 1:line_maccs.rfind("'")] if len(line_maccs) > 0: smarts = pybel.Smarts(line_maccs) num = smarts.findall(mol) outfile.write(str(len(num))) outfile.write(" ") maccsfile.close() outfile.write("\n")
def get_coords(ac_mol2_file): pmol = PandasMol2().read_mol2(ac_mol2_file) coords = [] molecule = [] for atom in pmol.df.itertuples(): coords.append([atom.x, atom.y, atom.z]) return np.array(coords) if __name__=="__main__": for idx, sdf_dataset in enumerate(DATA_SETS): logp_dataset = dict() database = pybel.readfile('sdf', sdf_dataset) #read the molecules in the sdf files for sd_record in database: mol_id = sd_record.data['MOLECULEID'] file_path = mol2_file_path[idx] + mol_id+'.mol2' molecule_coords = get_coords(file_path) #molecule.data.keys() gives all the properties molecule = pybel.readstring("smi", sd_record.data['SMILES']) #add hydrogen molecule.OBMol.AddHydrogens() #minimize the energy molecule.make3D(forcefield="gaff", steps=STEPS) molecule.localopt(forcefield="gaff", steps=STEPS)
def assign(self, write=False, outfn='tmp.pdb'): """Assign pharmacophore type. Details description of pharmacophore assign are commented in code. Parameters ---------- write : logic Control to write the filtered PDB after pharmacophore assignment Remove atoms not in element list outfn : str Output PDB file name with default 'tmp.pdb' Returns --------- AtomIdx : list Atom index in the structure AtomPharma : dict Dict with atom index as key and pharmacophore type as value """ # Nine element will be used in the study elementint = [6, 7, 8, 9, 15, 16, 17, 35, 53] # supress the logging information pybel.ob.obErrorLog.StopLogging() # table of convert OB internal atom type to Sybyl ttab = pybel.ob.OBTypeTable() ttab.SetFromType("INT") ttab.SetToType("SYB") # read in molecule __, ft = os.path.splitext(self.fn) mol = pybel.readfile(ft[1:], self.fn).next() # convert the atom type from internal to sybyl for atom in mol.atoms: # AtomIdx.append(atom.idx) # convert the atom type and make it upper case at = ttab.Translate(atom.OBAtom.GetType()) at = at.upper() atom.OBAtom.SetType(at) # assign pharmacophore type for atom in mol.atoms: # append atom idx to the AtomIdx self.AtomIdx.append(atom.idx) at = atom.type #print at # pharma type for element not in C,N,O,P,S,F,Cl,Br,I if atom.atomicnum not in elementint: p = 'NU' # pharma type for oxygen elif at in ['O.3', 'O.2', 'O.CO2']: p = 'A' # nbr of oxygen to be one atom or error nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)] if len(nbrs) == 2: for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'DA' elif len(nbrs) == 1: nbr = list(nbrs)[0] # nbr is carbon check if it is coo- if nbr.GetAtomicNum() in [6, 15]: c = 0 for nbr2 in ob.OBAtomAtomIter(nbr): if nbr2.GetAtomicNum() in [8, 16]: if len(list(ob.OBAtomAtomIter(nbr2))) == 1: c += 1 if c >= 2: p = 'N' elif nbr.GetAtomicNum() == 16: c = 0 for nbr2 in ob.OBAtomAtomIter(nbr): if nbr2.GetAtomicNum() == 8: if len(list(ob.OBAtomAtomIter(nbr2))) == 1: c += 1 if c >= 3: p = 'N' # pharma type for nitrogen elif at == 'N.4': p = 'P' elif at == 'N.3': p = 'A' for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'DA' break elif at == 'N.2': p = 'A' nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)] if len(nbrs) == 3: p = 'P' else: for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'DA' elif at == 'N.1': p = 'A' elif at == 'N.AR': p = 'AR' nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)] if len(nbrs) == 3: for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'D' elif len(nbrs) == 2: p = 'A' elif at == 'N.AM': p = 'PL' for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'D' elif at == 'N.PL3': p = 'A' for nbr in ob.OBAtomAtomIter(atom.OBAtom): #print atom.idx, at, nbr.GetAtomicNum() ,atom.OBAtom.GetBond(nbr).GetBondOrder() if nbr.GetType() == 'C.CAT': p = 'P' break elif nbr.GetAtomicNum() == 1: p = 'DA' # pharma type for sulfur elif at in ['S.3', 'S.2', 'S.O', 'S.O2']: p = 'PL' nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)] if len(nbrs) == 1: p = 'A' nbr = nbrs[0] if nbr.GetAtomicNum() == 6: nbrs2 = [i for i in ob.OBAtomAtomIter(nbr)] if len(nbrs2) == 4: p = 'N' elif len(nbrs2) == 3: c = 0 for nbr3 in ob.OBAtomAtomIter(nbr): if nbr3.GetAtomicNum() in [8, 16]: if len(list(ob.OBAtomAtomIter(nbr3))) == 1: c += 1 if c >= 2: p = 'N' elif len(nbrs) == 2: p = 'A' for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() == 1: p = 'DA' # pharma type for carbon elif at == 'C.AR': p = 'AR' elif at in ['C.1', 'C.2', 'C.3', 'C.CAT']: p = 'H' for nbr in ob.OBAtomAtomIter(atom.OBAtom): if nbr.GetAtomicNum() in [7, 8, 9, 15, 16]: p = 'PL' break # pharma type for P and Halogen elif at == 'P.3': p = 'PL' elif at in ['F', 'CL', 'BR', 'I']: p = 'HA' # pharma type for general carbon not be assigned elif atom.atomicnum == 6: p = 'H' # pharma type for N,O,F,S,P,Cl,Br,I not be assigned elif atom.atomicnum in elementint: p = 'PL' # AtomPharma dict with atomicnum, pharma tyep, and coords # the coords is for SADE only self.AtomPharma[atom.idx] = [atom.atomicnum, p, atom.coords] #print atom.idx, AtomPharma[atom.idx] #print atom.idx, atom.type, p, atom.OBAtom.GetResidue().GetName() if write: for idx in self.AtomIdx[::-1]: if self.AtomPharma[idx][0] not in elementint: mol.OBMol.DeleteAtom(mol.OBMol.GetAtom(idx)) output = pybel.Outputfile("pdb", outfn, overwrite=True) output.write(mol) output.close() return self.AtomIdx, self.AtomPharma
def extract_docking_poses(ligands_dict, no_checks=False, verbosity=0): """ :param dict ligands_dict: dict containing docking poses :param bool no_checks: ignore checks and tries to go on :param int verbosity: be verbosity :rtype: dict """ os_util.local_print( 'Entering extract_docking_poses(poses_data={}, verbosity={})' ''.format(ligands_dict, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) os_util.local_print('{:=^50}\n{:<15} {:<20}'.format( ' Poses read ', 'Name', 'File'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) docking_mol_local = {} for each_name, each_mol in ligands_dict.items(): if isinstance(each_mol, str): ligand_format = splitext(each_mol)[1].lower() docking_mol_rd = generic_mol_read(ligand_format, each_mol, verbosity=verbosity) elif isinstance(each_mol, all_classes.Namespace): docking_mol_rd = generic_mol_read(each_mol.format, each_mol.data, verbosity=verbosity) elif isinstance(each_mol, dict): if isinstance(each_mol['molecule'], rdkit.Chem.Mol): docking_mol_rd = each_mol['molecule'] else: ligand_format = each_mol.setdefault( 'format', os.path.splitext(each_mol['molecule'])[1]) docking_mol_rd = generic_mol_read(ligand_format, each_mol['molecule'], verbosity=verbosity) elif isinstance(each_mol, rdkit.Chem.Mol): docking_mol_rd = each_mol else: os_util.local_print( "Could not understand type {} (repr: {}) for your ligand {}" "".format(type(each_mol), repr(each_mol), each_name), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise TypeError('Ligand must be str or all_classes.Namespace') if docking_mol_rd is not None: os_util.local_print("Read molecule {} from {}" "".format(each_name, each_mol), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info) docking_mol_rd = mol_util.process_dummy_atoms(docking_mol_rd, verbosity=verbosity) # docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd, verbosity=verbosity) docking_mol_local[each_name] = docking_mol_rd os_util.local_print('{:<15} {:<18}'.format(each_name, str(each_mol)), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) os_util.local_print('Read molecule {} (SMILES: {}) from file {}' ''.format( each_name, rdkit.Chem.MolToSmiles(docking_mol_rd), each_mol), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) elif no_checks: os_util.local_print( 'Could not read data in {} using rdkit. Falling back to openbabel. It is strongly ' 'advised you to check your file and convert it to a valid mol2.' ''.format(str(each_mol)), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) import pybel if verbosity <= 3: pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError) try: if type(each_mol) == str: ligand_format = splitext(each_mol)[1].lstrip('.').lower() docking_mol_ob = pybel.readfile(ligand_format, each_mol).__next__() elif type(each_mol) == all_classes.Namespace: docking_mol_ob = pybel.readstring(each_mol.format, each_mol.data) else: os_util.local_print( "Could not understand type {} (repr: {}) for your ligand {}" "".format(type(each_mol), repr(each_mol), each_name)) raise TypeError( 'Ligand must be str or all_classes.Namespace') except (OSError, StopIteration) as error_data: os_util.local_print( 'Could not read your ligand {} from {} using rdkit nor openbabel. Please ' 'check/convert your ligand file. Openbabel error was: {}' ''.format(each_name, str(each_mol), error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) if not no_checks: raise SystemExit(1) else: # Convert and convert back to apply mol_util.process_dummy_atoms docking_mol_rd = mol_util.process_dummy_atoms( mol_util.obmol_to_rwmol(docking_mol_ob)) #docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd) docking_mol_local[each_name] = docking_mol_rd os_util.local_print( '{:<15} {:<18}' ''.format( each_name, each_mol['comment'] if isinstance( each_mol, dict) else each_mol), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) os_util.local_print( 'Extracted molecule {} (SMILES: {}) using openbabel fallback from {}.' ''.format(each_name, rdkit.Chem.MolToSmiles(docking_mol_rd), str(each_mol)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) else: os_util.local_print( 'Could not read data in {} using rdkit. Please, check your file and convert it to a ' 'valid mol2. (You can also use "no_checks" to enable reading using pybel)' ''.format(str(each_mol)), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(-1) return docking_mol_local
def __init__(self): self.Mol = pybel.readfile("xyz", self.Filename).next() self.AwesomeMol = ReadCoordzAwesome(self.Filename)
def extract_features(miss_container, id, molcode=None, db_path=Global_var.DB_GENERAL_PATH.value, relative_central_mass=np.zeros((3, ))): '''Extract features from the complex param molcode: pointing whether it is a protein or ligand param id: the name of the complex code param db_path: the path where the complex is located param relative_central_mass: the central of the ligand return coords, features, central_mass: decoded coords, features of the complex, central_mass is estimated only for ligands ''' atom_codes_init() mol_id = id if molcode == -1 else f"{id}_ligand" path_to_molecule = os.path.abspath("{}/{}/{}.mol2".format( db_path, id, mol_id)) try: assert os.path.isfile(path_to_molecule) except: logger.info(f"{mol_id} was excluded") if molcode == 1: return [], [], [] else: return [], [] if (is_file_empty(path_to_molecule)): if molcode == 1: return [], [], [] else: return [], [] try: molecule = next(pybel.readfile('mol2', path_to_molecule)) except Warning: logger.info(f"{mol_id} was excluded") return [], [] coords = [] features = [] heavy_atoms = [] mismatch = 0 charges_db = read_json(id, db_path=db_path) total_coord_mismatch = 0 for i, atom in enumerate(molecule): if atom.atomicnum > 1: atomic_features = [ atom.__getattribute__(prop) for prop in NAMED_PROPS ] charge = sys.maxsize if (molcode == 1): central_mass = central_mass_compute(molecule) charge = atom.__getattribute__('partialcharge') elif (molcode == -1): if not are_coordinates_acceptable( atom.coords, relative_central_mass, exclude_radius=10): total_coord_mismatch += 1 continue charge = get_probs(miss_container, id, atom, charges_db) # if the charge was assigned to max, it means that the # particular charge does not exist in our json file if (charge == sys.maxsize): mismatch += 1 continue atomic_features.append(charge) heavy_atoms.append(i) coords.append(atom.coords) features.append( np.concatenate((encode_num(atom.atomicnum), atomic_features))) coords = np.array(coords, dtype=np.float64) features = np.array(features, dtype=np.float64) try: assert features.shape[0] > 0 and coords.shape[0] > 0 features = np.hstack((features, molcode * np.ones((len(features), 1)))) features = np.hstack([features, find_smarts(molecule)[heavy_atoms]]) except: coords = np.array([0]) features = np.array([0]) if molcode == 1: return coords, features, central_mass else: return coords, features
import pybel import chemml from sklearn.externals import joblib chemml.max_atoms = 30 data = chemml.pd.read_csv("test_pe.csv") #load test data C = [] mols = list(dict.fromkeys(data['molecule_name'])) #get unique molecule names for mol_name in mols: m = next(pybel.readfile("xyz", "structures/" + mol_name + ".xyz")) if m is None: #check if the structure is loaded data = data[ data.molecule_name != mol_name] #if there is mismatch, delete this entry from dataframe print("Error in loading molecule: " + str(mol_name) + ". Skipping...") continue C.append(chemml.CoulombMatrixEig(m)) kr = joblib.load('pe.model') #load model data['predicted-pe'] = kr.predict(C) data.to_csv(r"predicted_pe.csv")
import pybel import binascii # import openbabel # # obConversion = openbabel.OBConversion() # obConversion.SetInAndOutFormats("pdbqt", "mol2") # obmol = openbabel.OBMol() # obConversion.ReadFile(obmol, '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000226.pdbqt') # Open Babel will uncompress automatically # obmol.AddHydrogens() # print mol.NumAtoms() # print mol.NumBonds() # print [method for method in dir(mol) if callable(getattr(mol, method))] mol = pybel.readfile("pdbqt", 'workspace/DockingResultRepositoryAPI/main/temp/ligands/ZINC00000125.pdbqt').next() # mol = pybel.readfile("pdbqt", '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000226.pdbqt').next() # mol = pybel.readfile("pdbqt", '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000384.pdbqt').next() mol = pybel.readfile("pdbqt", '../main/temp/ligands/ZINC00000125.pdbqt').next() # if not mol.OBMol.HasHydrogensAdded(): mol.OBMol.AddHydrogens() descvalues = mol.calcdesc() # In Python, the update method of a dictionary allows you # to add the contents of one dictionary to another # for key in descvalues.keys(): # print key + ": " # print descvalues[key] # for key in pybel.outformats.keys(): # print key + ": " + pybel.outformats[key]
import shelve import os import pybel import cPickle import pandas as pd import shutil df = pd.DataFrame from clustering import DSET_PATH with open(DSET_PATH, 'r') as f: dset = cPickle.load(f) drugs = [_ for _ in pybel.readfile("sdf", "../dat/approved.txt")] ids, sizes = [], [] for drug in drugs: drug_id = drug.data['DRUGBANK_ID'] drug.removeh() sz = len(drug.atoms) sizes.append(sz) ids.append(drug_id) sz_dset = df([ids, sizes]).T sz_dset.columns = ['DRUGBANK_ID', 'HeavyAtomNum'] dset = dset.merge(sz_dset) filter_dset = { 'DRUGBANK_ID': [], 'ProteinBoundLig': [], 'HeavyAtomNum': [], 'LigSize': [], 'LigPath': [],
return dr if __name__ == '__main__': import os import numpy as np import numpy.linalg as la from ase.calculators import mopac from ase import Atoms import pybel os.chdir('../test') MOPAC = os.path.join(os.getcwd(), 'MOPAC') os.environ['MOPAC_LICENSE'] = MOPAC os.environ['LD_LIBRARY_PATH'] = MOPAC mopac_calc = mopac.MOPAC() mopac_calc.command = 'MOPAC/MOPAC2016.exe PREFIX.mop 2> /dev/null' mopac_calc.set(method='pm3') mol = next(pybel.readfile('xyz', 'ts2.xyz')) atoms = Atoms(numbers=[a.atomicnum for a in mol.atoms], positions=[a.coords for a in mol.atoms]) atoms.set_positions(atoms.positions - atoms.get_center_of_mass()) atoms.set_calculator(mopac_calc) irccalc = IRC(atoms, stride=0.15, mw=True, forward=True, trajectory='ts1.traj') for _ in irccalc.run(): pass
chargestate, mz, shortinchi, inchi, inputrecord[0], inputrecord[1]) return [0, outstring] except: return [-1, inputrecord[0], inputrecord[1]] cc = 0 #finp=open(infile,'r'); fout = open(outfile, 'w') ferr = open(outerror, 'w') fnames = open(outnames, 'w') dblist = [] totcount = 0 for mol in pybel.readfile('sdf', infile): totcount += 1 #print(totcount); mol.addh() smi = mol.write('smi').replace('\n', '').replace('\t', ' ').split(' ')[0] idx = int(mol.data['DATABASE_ID'].replace('YMDB', '')) if 'GENERIC_NAME' in mol.data: names = mol.data['GENERIC_NAME'] else: names = 'N/A' print(smi, idx, names) #print(names); #print(idx); #> <>
def get_sdf_molecules(sdf_file, rmsd): """returns list of pybel molecules and name of sdf file""" sdf_mol_gen = pybel.readfile('sdf', sdf_file) sdf_name = os.path.splitext(sdf_file)[0] + '_r' + str(rmsd) + '.sdf' return list(sdf_mol_gen), sdf_name
def al_exp_ins(org, ec, k, exp, neg=None, beta=1.0, kernel='rbf', degree=3, gamma=0.005, iterations=100, batch=1, C=1.0, initial=2, decf=False, random_seed=None, fp='FP4', simfp=fptr.integer_sim): a = bi(org, ec) if neg is not None: a.add_from_sdf(neg, k, pos=False) else: a.random_negatives(k) suppl = pybel.readfile('sdf', os.path.join(CHEMPATH, exp)) excl = [] for mol in suppl: smi = mol.write('can').strip() cls = int(mol.data['label']) a.add_from_smiles(smi, k, cls) excl.append(smi) smiles_access = [t[0] for t in a.pos[k]] + [t[0] for t in a.neg[k]] n = max([len(str(x)) for x in smiles_access]) if fp == 'FP4': x_pos_array = np.vstack(tuple([t[1] for t in a.pos[k]])) x_neg_array = np.vstack(tuple([t[1] for t in a.neg[k]])) y_obj = [] y_obj += [1] * x_pos_array.shape[0] y_obj += [-1] * x_neg_array.shape[0] x = np.vstack((x_pos_array, x_neg_array)) y = np.array(zip(y_obj, smiles_access), dtype=[('label', 'i4'), ('smiles', '|S%s' % str(n))]) elif fp == 'FP2': x_pos_array = np.vstack( tuple([ np.array(fptr.reconstruct_fp(t[0], fptype='FP2')) for t in a.pos[k] ])) x_neg_array = np.vstack( tuple([ np.array(fptr.reconstruct_fp(t[0], fptype='FP2')) for t in a.neg[k] ])) y_obj = [] y_obj += [1] * x_pos_array.shape[0] y_obj += [-1] * x_neg_array.shape[0] x = np.vstack((x_pos_array, x_neg_array)) y = np.array(zip(y_obj, smiles_access), dtype=[('label', 'i4'), ('smiles', '|S%s' % str(n))]) else: raise IOError("Valid values for fp are FP2 and FP4.") outfile = "al_expins_%s_%s_beta%s_batch%s_%s_rseed%s" % (org, ec, str( beta).replace('.', ''), str(batch), kernel, str(random_seed)) out = routines.dw_exp_ins(x, y, outfile, smiles_access, excl, C=C, gamma=gamma, iterations=iterations, batch=batch, degree=degree, kernel=kernel, beta=beta, decf=decf, seed=random_seed, simfp=simfp, initial=initial)
def find_PiPi(pdb_file, lig_name, centroid_distance=5.0, dih_parallel=25, dih_tshape=80, verbose=1): """ Find Pi-Pi interactions around the specified ligand residue from the pdb file. :param pdb_file: path of the target file in PDB format. :param lig_name: ligand residue name. :param centroid_distance: Max ring centroid distance :param dih_parallel: Max dihedral (parallel) :param dih_tshape: Min dihedral (T-shaped) :return: number of Pi-Pi interactions found """ # Get ligand residue and print its name. ligAtomList = [] ligAtomIdList = [] mol = next(pybel.readfile('pdb', pdb_file)) if verbose: print("A total of %s residues" % mol.OBMol.NumResidues()) lig = None for res in ob.OBResidueIter(mol.OBMol): # print res.GetName() if res.GetName() == lig_name: lig = res if verbose: print("Ligand residue name is:", lig.GetName()) break if not lig: if verbose: print("No ligand residue %s found, please confirm." % lig_name) return -1 else: for atom in ob.OBResidueAtomIter(lig): # print atom.GetIdx() ligAtomList.append(atom) ligAtomIdList.append(atom.GetIdx()) # Set ring_id i = 0 for ring in mol.sssr: ring.ring_id = i i += 1 # print ring.ring_id # Determine which rings are from ligand. ligRingList = [] ligAroRingList = [] ligRingIdList = [] recRingList = [] recAroRingList = [] for ring in mol.sssr: for atom in ligAtomList: if ring.IsMember(atom): if ring not in ligRingList: ligRingList.append(ring) ligRingIdList.append(ring.ring_id) if verbose: print("ligand ring_ID: ", ring.ring_id, end=' ') if ring.IsAromatic(): if verbose: print("aromatic") ligAroRingList.append(ring) else: if verbose: print("saturated") for ring in mol.sssr: if ring.ring_id not in ligRingIdList: recRingList.append(ring) if ring.IsAromatic(): recAroRingList.append(ring) if verbose: print("\nReceptor has ", len(recRingList), " rings,", end=' ') if verbose: print(" has ", len(recAroRingList), " aromatic rings.") # Find and show the rings ligRingCenter = ob.vector3() recRingCenter = ob.vector3() ligNorm1 = ob.vector3() ligNorm2 = ob.vector3() recNorm1 = ob.vector3() recNorm2 = ob.vector3() count = 0 lig_ring_index = 0 for ligRing in ligAroRingList: lig_ring_index += 1 ligRing.findCenterAndNormal(ligRingCenter, ligNorm1, ligNorm2) rec_ring_index = 0 for recRing in recAroRingList: rec_ring_index += 1 recRing.findCenterAndNormal(recRingCenter, recNorm1, recNorm2) dist = ligRingCenter.distSq(recRingCenter)**0.5 angle = vecAngle(ligNorm1, recNorm1) if (dist < centroid_distance and (angle < dih_parallel or angle > dih_tshape)): # the criteria count += 1 if verbose: print( "Pi-Pi ring pairs: %3s,%3s Angle(deg.): %5.2f Distance(A): %.2f" % (recRing.ring_id, ligRing.ring_id, angle, dist)) if verbose: print("Total Pi-Pi interactions:", count) return count
nargs='+', help='Types files to process') parser.add_argument('--filter', type=float, default=100.0, help='Filter out examples greater the specified value') parser.add_argument('--suffix', type=str, default='_wc', help='Suffix for new types files') args = parser.parse_args() centerinfo = dict() #first process all gninatypes files in current directory tree for ligfile in glob.glob('*/*_ligand.sdf'): mol = next(pybel.readfile('sdf', ligfile)) #calc center center = np.mean([a.coords for a in mol.atoms], axis=0) dir = ligfile.split('/')[0] for gtypes in glob.glob('%s/*.gninatypes' % dir): buf = open(gtypes, 'rb').read() n = len(buf) / 4 vals = np.array(struct.unpack('f' * n, buf)).reshape(n / 4, 4) lcenter = np.mean(vals, axis=0)[0:3] dist = np.linalg.norm(center - lcenter) centerinfo[gtypes] = dist for tfile in args.typefiles: fname, ext = os.path.splitext(tfile) outname = fname + args.suffix + ext out = open(outname, 'w')
def to_OBMol(file_list): """Converts given MOPAC output to pybel format. Returns list""" return [pybel.readfile("mopout", filename).next().OBMol for filename in file_list]