def pydock(wrkdir): """ Calculate pyDock scoring. [1] T. M.-K. Cheng, T. L. Blundell, and J. Fernandez-Recio, "pyDock: Electrostatics and desolvation for effective scoring of rigid-body protein–protein docking", Proteins: Structure, Function, and Bioinformatics, vol. 68, no. 2, pp. 503-515, 2007. """ time_start = timer() log.info("Getting pyDock scoring...") basepath = os.getcwd() os.chdir(wrkdir) rec = ppdx.Pdb('receptor.pdb') rec.set_chain('A') rec.write('receptorA.pdb') lig = ppdx.Pdb('ligand.pdb') lig.set_chain('B') lig.write('ligandB.pdb') xavg = np.mean([atom.x for atom in lig.atoms]) yavg = np.mean([atom.y for atom in lig.atoms]) zavg = np.mean([atom.z for atom in lig.atoms]) with open("pydock.ini", 'w') as fp: fp.write("[receptor]\n") fp.write("pdb = receptorA.pdb\n") fp.write("mol = %s\n" % ('A')) fp.write("newmol = %s\n" % ('A')) fp.write("\n") fp.write("[ligand]\n") fp.write("pdb = ligandB.pdb\n") fp.write("mol = %s\n" % ('B')) fp.write("newmol = %s\n" % ('B')) ret = ppdx.tools.execute("%s pydock setup >pydock_setup.out 2>&1" % (os.path.join(ppdx.PYDOCK, 'pyDock3'))) if ret != 0: os.chdir(basepath) raise ValueError("pyDock setup failed!") with open("pydock.rot", 'w') as fp: fp.write("1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 %f %f %f 1\n" % (xavg, yavg, zavg)) ret = ppdx.tools.execute("%s pydock dockser >pydock_dockser.out 2>&1" % (os.path.join(ppdx.PYDOCK, 'pyDock3'))) if ret != 0: os.chdir(basepath) raise ValueError("pyDock dockser failed!") with open('pydock.ene', 'r') as fp: line = fp.readlines()[-1] _, elec, desolv, vdw, total, _ = line.split() os.chdir(basepath) desc = { 'pyDock': float(total), 'pyDock_elec': float(elec), 'pyDock_desolv': float(desolv), 'pyDock_vdw': float(vdw) } time_end = timer() desc['>TIME_pyDock'] = time_end - time_start return desc
def charmify(fname, nsteps=100): basepath = os.getcwd() wrkdir, name = os.path.split(fname) basename = ''.join(name.split('.')[0:-1]) + '-chm' if os.path.isfile(os.path.join(wrkdir, basename+'.psf')) and os.path.isfile(os.path.join(wrkdir, basename+'.pdb')): return else: log.info("Charmify-ing pdb %s" % (fname)) os.chdir(wrkdir) pdb = ppdx.Pdb(name) pdb.fix4charmm() pdb.chain2segid() pdb.set_occupancy(1.0) pdb.set_beta(1.0) pdb.remove_hydrogens() chains = pdb.split_by_chain() nchains = len(chains) cmd = ppdx.CHARMM cmd += ' nc=%d ' % nchains i=1 for ch, pdb in chains.items(): pdb.write("chain_%s.pdb" % (ch.lower())) cmd += 'c%d=%s ' % (i, ch) i += 1 cmd += 'name=chain_ out=%s ' % (basename) cmd += 'nsteps=%d ' % (nsteps) cmd += 'ffpath=%s ' % (ppdx.FFPATH) cmd += '-i buildgen.inp >%s 2>&1' % (basename+'.out') ppdx.link_data('buildgen.inp') ppdx.link_data('disu.str') ret = ppdx.tools.execute(cmd) os.chdir(basepath) if ret!=0: raise ValueError("Charmm failed while running < %s > in %s" % (cmd, wrkdir))
def split_complex(wrkdir, nchains): """ Split the model-chm.pdb/cor/psf in wrkdir in a ligand-chm.pdb/cor/psf, receptor-chm.pdb/cor/psf, complex-chm.pdb/cor/psf. nchains is a tuple containing the number of chains in the receptor and the number of chains in the ligand. Assume the chains are called alphabetically (as done by Modeller). """ basepath = os.getcwd() os.chdir(wrkdir) if not os.path.isfile('ligand-chm.psf') or not os.path.isfile('receptor-chm.psf') or not os.path.isfile('complex-chm.psf') \ or not os.path.isfile('ligand.pdb') or not os.path.isfile('receptor.pdb') or not os.path.isfile('complex.pdb') \ or not os.path.isfile('ligandB.pdb') or not os.path.isfile('receptorA.pdb') or not os.path.isfile('complexAB.pdb'): log.info('Splitting model-chm.psf in ligand, receptor and complex.') cpx = ppdx.Pdb(os.path.join(wrkdir, 'model-chm.pdb')) cpx.segid2chain() nchains_tot = len(cpx.split_by_chain()) lrec = nchains[0] llig = nchains[1] if llig+lrec!=nchains_tot: raise ValueError('PDB %s contains %d chains, but ligand (%d) and receptor (%d) contain %d' % (wrkdir, nchains_tot, llig, lrec, llig+lrec)) alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' crec = alphabet[:lrec] clig = alphabet[lrec:nchains_tot] sele_rec = ' .or. '.join(['segid %s' % (c) for c in crec]) sele_lig = ' .or. '.join(['segid %s' % (c) for c in clig]) ppdx.link_data('extract.inp') cmd = '%s basename=%s sel="%s" outname=%s ffpath=%s -i extract.inp >%s 2>&1' % (ppdx.CHARMM, 'model-chm', sele_rec, 'receptor-chm', ppdx.FFPATH, 'receptor-chm.out') ret = ppdx.tools.execute(cmd) if ret!=0: raise ValueError("Charmm failed.") cmd = '%s basename=%s sel="%s" outname=%s ffpath=%s -i extract.inp >%s 2>&1' % (ppdx.CHARMM, 'model-chm', sele_lig, 'ligand-chm', ppdx.FFPATH, 'ligand-chm.out') ret = ppdx.tools.execute(cmd) if ret!=0: raise ValueError("Charmm failed.") for f in ['cor', 'pdb', 'psf']: if not os.path.isfile('complex-chm.'+f): os.symlink('model-chm.'+f, 'complex-chm.'+f) rec = ppdx.Pdb('receptor-chm.pdb') lig = ppdx.Pdb('ligand-chm.pdb') rec.make_standard() lig.make_standard() cpx = rec+lig rec.write('receptor.pdb') lig.write('ligand.pdb') cpx.write('complex.pdb') rec.set_chain('A') rec.set_segid('A') lig.set_chain('B') lig.set_segid('B') cpx = rec+lig rec.write('receptorA.pdb') lig.write('ligandB.pdb') cpx.write('complexAB.pdb') os.chdir(basepath)
def rosetta1(target_sequence, template, wrkdir): """ Build one model for the complex. Save it in the wrkdir folder. """ # Make working directory and enter it if os.path.isdir(wrkdir): if os.path.isfile(os.path.join(wrkdir, 'model.pdb')): return else: os.makedirs(wrkdir) time_start = timer() log.info("Creating model %s/model.pdb..." % (wrkdir)) basepath = os.getcwd() os.chdir(wrkdir) # Create fasta file with target sequence target_sequence_file = 'target_sequence.fasta' with open(target_sequence_file, 'w') as fp: fp.write(">target\n%s\n" % (target_sequence)) # Prepare template if not os.path.isfile('PDBID.pdb'): if isinstance(template, ppdx.Pdb): template.write('PDBID.pdb') else: template_path = os.path.join(basepath, template) if os.path.isfile(template_path): os.symlink(os.path.join(basepath, template), 'PDBID.pdb') else: raise ValueError('Impossible to find template file %s' % (template)) # Remove / because Bio do not recognize them and rosetta thread neither... target_sequence = target_sequence.replace('/', '') template_sequence = ppdx.Pdb("PDBID.pdb").get_sequence().replace('/', '') # Make alignment alignment = Bio.pairwise2.align.globalds(target_sequence, template_sequence, Bio.SubsMat.MatrixInfo.blosum62, -10, -0.5) s1 = alignment[0][0] s2 = alignment[0][1] # Write sequence alignemnt in grishnan (?) format with open('alignment.gri', 'w') as fp: fp.write("## target PDBID_thread\n") fp.write("# \n") fp.write("scores_from_program: 0\n") fp.write("0 " + s1 + "\n") fp.write("0 " + s2 + "\n") fp.write("--\n") log.info('Running rosetta thread...') rthread = ppdx.ROSETTABIN+"/partial_thread.static.linuxgccrelease " + \ " -database " + ppdx.ROSETTA + "/main/database" + \ " -in:file:fasta " + target_sequence_file + \ " -in:file:alignment alignment.gri" + \ " -in:file:template_pdb PDBID.pdb" + \ " -ignore_unrecognized_res " + \ " >rosetta_thread.out 2>&1 " ppdx.tools.execute(rthread) if not os.path.isfile('PDBID_thread.pdb'): log.error( 'Rosetta thread failed! Look at the output file rosetta_thread.out' ) log.info("Running Rosetta script...") ppdx.link_data('hybridize.xml') rscript = ppdx.ROSETTABIN + "/rosetta_scripts.static.linuxgccrelease " + \ " -database " + ppdx.ROSETTA + "/main/database " + \ " -in:file:fasta " + target_sequence_file + \ " -parser:protocol hybridize.xml " + \ " -default_max_cycles 200 " + \ " -dualspace " + \ " -restore_talaris_behavior " + \ " -score:set_weights pro_close 0 " + \ " >rosetta_script.out 2>&1 " ppdx.tools.execute(rscript) if not os.path.isfile('S_0001.pdb'): log.error( 'Rosetta hybridize failed! Look at the output file rosetta_script.out' ) # Done! if os.path.lexists('model.pdb'): os.remove('model.pdb') os.symlink("S_0001.pdb", "model.pdb") os.chdir(basepath) log.info("Model %s/model.pdb created!" % (wrkdir)) time_end = timer() return time_end - time_start
def enm_entropy(fname, K=50.0, cutoff=7.0, spring='constant'): """ Given a pdb file, make the Elastic Network Model (ENM), build its hessian matrix, diagonalize it, and return its entropy. """ pdb_all = ppdx.Pdb(fname) pdb_all.remove_hydrogens() pdb = [ [atom.x, atom.y, atom.z] for atom in pdb_all.atoms if (atom.name == 'CB' or (atom.name == 'CA' and atom.resname == 'GLY')) ] def spring_constant(k, d): return k / (d * d) def spring_exponential(k, d, d0=7.0): return k * exp(-(d / d0)**2) def spring_overR6(k, d): return k / (d**4) if spring == 'constant': spr = spring_constant elif spring == 'exponential': spr = spring_exponential elif spring == 'overR6': spr = spring_overR6 else: log.error( "Unkown spring method <%s>. Available are: constant, exponential, overR6" % spring) return float('nan') enm = [] natoms = len(pdb) log.info("Making ENM on %d residues" % (natoms)) for i in range(natoms): for j in range(i + 1, natoms): dx = pdb[i][0] - pdb[j][0] dy = pdb[i][1] - pdb[j][1] dz = pdb[i][2] - pdb[j][2] d = sqrt(dx * dx + dy * dy + dz * dz) if (d < cutoff): enm.append([i, j, dx, dy, dz, d]) #print("Among all N*(N-1)/2 = %d pairs, %d were selected." % (natoms*(natoms-1)/2, len(enm))) #print("Building Hessian Matrix") nfree = natoms * 3 hess = np.zeros((nfree, nfree)) for bond in enm: i = bond[0] j = bond[1] dx = bond[2] dy = bond[3] dz = bond[4] d = bond[5] tmp = spr(K, d) for m, v1 in enumerate([dx, dy, dz]): for n, v2 in enumerate([dx, dy, dz]): hess[3 * i + m][3 * i + n] += tmp * v1 * v2 hess[3 * j + m][3 * j + n] += tmp * v1 * v2 hess[3 * i + m][3 * j + n] -= tmp * v1 * v2 hess[3 * j + m][3 * i + n] -= tmp * v1 * v2 del enm #print("Diagonalizing it...") #eival, eivec = np.linalg.eig(hess) eival = np.linalg.eigvalsh(hess) #print("Calculating the entropy") S = 0 skipped = 0 toout = "" for i in eival: if i > 1E-6: S += 1 + np.log(sqrt(i)) else: skipped += 1 toout += "%d %f\n" % (skipped, i) if skipped != 6: log.warning( "Warning! There were %d skipped frequencees below 1E-4! Exactly 6 were expected!" % skipped) log.warning(toout) return S
def firedock(wrkdir): """ Calculate FireDock scoring. """ time_start = timer() log.info("Getting FireDock scoring...") basepath = os.getcwd() os.chdir(wrkdir) # Remove hydrogens pdb = ppdx.Pdb('ligand.pdb') pdb.remove_hydrogens() pdb.write('ligand_firedock_noh.pdb') pdb = ppdx.Pdb('receptor.pdb') pdb.remove_hydrogens() pdb.write('receptor_firedock_noh.pdb') # Reduce fd_reduce = "%s/PDBPreliminaries/reduce.2.21.030604 -DB %s/PDBPreliminaries/reduce_het_dict.txt -OH -HIS -NOADJust -NOROTMET" % ( ppdx.FIREDOCK, ppdx.FIREDOCK) ret = ppdx.tools.execute( fd_reduce + ' receptor_firedock_noh.pdb >firedock_receptor.pdb 2>firedock_reduce_receptor.out' ) if ret != 0: os.chdir(basepath) raise ValueError("FireDock reduce receptor failed!") ret = ppdx.tools.execute( fd_reduce + ' ligand_firedock_noh.pdb >firedock_ligand.pdb 2>firedock_reduce_ligand.out' ) if ret != 0: os.chdir(basepath) raise ValueError("FireDock reduce ligand failed!") # Score with open('firedock.trans', 'w') as fp: fp.write("1 0.0 0.0 0.0 0.0 0.0 0.0") ret = ppdx.tools.execute( "%s/buildFireDockParams.pl firedock_receptor.pdb firedock_ligand.pdb U U Default firedock.trans firedock_build.out 1 50 0.85 0 firedock_parameters.dat >firedock_build.err 2>&1" % ppdx.FIREDOCK) if ret != 0: os.chdir(basepath) raise ValueError("FireDock build param failed!") ret = ppdx.tools.execute( "%s/runFireDock.pl firedock_parameters.dat >firedock.log 2>&1" % (ppdx.FIREDOCK)) if ret != 0: os.chdir(basepath) raise ValueError("FireDock scoring failed!") # Get values with open('firedock_build.out.ref', 'r') as fp: data = fp.readlines()[-1].split('|') if data[5].strip() == 'glob': log.warning('Problem with FireDock in %s' % (wrkdir)) data = [float('nan')] * 20 desc = dict() desc['FireDock'] = float(data[5]) desc['FireDock_aVdW'] = float(data[6]) desc['FireDock_rVdW'] = float(data[7]) desc['FireDock_ACE'] = float(data[8]) desc['FireDock_inside'] = float(data[9]) desc['FireDock_aElec'] = float(data[10]) desc['FireDock_rElec'] = float(data[11]) desc['FireDock_laElec'] = float(data[12]) desc['FireDock_lrElec'] = float(data[13]) desc['FireDock_hb'] = float(data[14]) desc['FireDock_piS'] = float(data[15]) desc['FireDock_catpiS'] = float(data[16]) desc['FireDock_aliph'] = float(data[17]) os.chdir(basepath) time_end = timer() desc['>TIME_FireDock'] = time_end - time_start return desc