def findSurfaceAtoms(selection="all", cutoff=2.5, quiet=1): """ DESCRIPTION Finds those atoms on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. USAGE findSurfaceAtoms [ selection, [ cutoff ]] SEE ALSO findSurfaceResidues """ cutoff, quiet = float(cutoff), int(quiet) tmpObj = cmd.get_unused_name("_tmp") cmd.create(tmpObj, "(" + selection + ") and polymer", zoom=0) cmd.set("dot_solvent", 1, tmpObj) cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove(tmpObj + " and b < " + str(cutoff)) selName = cmd.get_unused_name("exposed_atm_") cmd.select(selName, "(" + selection + ") in " + tmpObj) cmd.delete(tmpObj) if not quiet: print("Exposed atoms are selected in: " + selName) return selName
def findSurfaceResidues(objSel="(all)", cutoff=2.5, doShow=False, verbose=False): tmpObj = "__tmp" cmd.create(tmpObj, objSel + " and polymer") if verbose != False: print("WARNING: I'm setting dot_solvent. You may not care for this.") cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) cmd.remove(tmpObj + " and b < " + str(cutoff)) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() randstr = str(random.randint(0, 10000)) selName = "exposed_atm_" + randstr if verbose != False: print("Exposed residues are selected in: " + selName) cmd.select(selName, objSel + " in " + tmpObj) selNameRes = "exposed_res_" + randstr cmd.select(selNameRes, "byres " + selName) if doShow != False: cmd.show_as("spheres", objSel + " and poly") cmd.color("white", objSel) cmd.color("red", selName) cmd.delete(tmpObj) return exposed
def slowpacking(pdb): "Derive mean packing density of pdb as pd.Series." cmd.delete('all') cmd.load(pdb) cmd.remove('solvent') # Only heavy atoms cmd.remove('hydro') # Compute SAS per atom cmd.set('dot_solvent') cmd.get_area('all', load_b=1) N = float(cmd.select('interior', 'b = 0')) internal_coords = [at.coord for at in cmd.get_model('interior').atom]#[1:50] all_coords = [at.coord for at in cmd.get_model('all').atom]#[1:50] # Count counts = pd.Series(0, index=RADS) for a, b in product(internal_coords, all_coords): es = euclid_step(a, b) if es is not None: counts.loc[es] += 1 counts = counts.cumsum() # Mean per center atom meancounts = counts / N # Normalize to density volumina = pd.Series(4 / 3.0 * sp.pi * (RADS ** 3), index=RADS) density = meancounts / volumina # Correct for center density -= 1 / (4/3 * sp.pi * RADS ** 3) # Results counts.index = ["{}_correctcount".format(i) for i in counts.index] density.index = ["{}_density".format(i) for i in density.index] return pd.concat(([counts, density]))
def packing(pdb): "Derive mean packing density of pdb as pd.Series." cmd.delete('all') cmd.load(pdb) cmd.remove('solvent') # Only heavy atoms cmd.remove('hydro') # Compute SAS per atom cmd.set('dot_solvent', 1) cmd.get_area('all', load_b=1) cmd.select('interior', 'b = 0') counts = pd.Series(0, index=RADS) vest = pd.Series(0, index=RADS) # from biggest to smallest radius for r in RADS[::-1]: # Counting counts.loc[r] = cmd.select('extended', 'interior extend {}'.format(r)) cmd.remove('not extended') # moleculare area #cmd.set('dot_solvent', 0) vest[r] = cmd.get_area('all') # Results cvdens = counts / vest counts.index = ["{}_rawcount".format(i) for i in counts.index] vest.index = ["{}_volume estimate".format(i) for i in vest.index] cvdens.index = ["{}_cv density".format(i) for i in cvdens.index] return pd.concat(([counts, cvdens, vest]))
def interface_area(rec_file, lig_file): cmd.set('dot_solvent', 1) cmd.set('dot_density', 3) #============================================================================== # rec_file = sys.argv[1] # lig_file = sys.argv[2] # #============================================================================== #============================================================================== #rec_file = '/home/athar/Dimer/dock-std/true/12as/rec.pdb' #lig_file = '/home/athar/Dimer/dock-std/true/12as/lig.pdb' #============================================================================== #complex_file = sys.argv[3] cmd.load(rec_file) # use the name of your pdb file rec_area = cmd.get_area('rec') cmd.load(lig_file) lig_area = cmd.get_area('lig.2') cmd.save('complexfile.pdb') cmd.delete(all) cmd.load('complexfile.pdb') total_area = cmd.get_area('complexfile') area = (abs(rec_area + lig_area - total_area)) * 0.5 # using sasa area # print area return area
def findSurfaceResidues(objSel="(all)", cutoff=2.5, doShow=False, verbose=False): """ findSurfaceResidues finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS objSel (string) the object or selection in which to find exposed residues DEFAULT: (all) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 asSel (boolean) make a selection out of the residues found RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ tmpObj = "__tmp" cmd.create(tmpObj, objSel + " and polymer") if verbose != False: print "WARNING: I'm setting dot_solvent. You may not care for this." cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove(tmpObj + " and b < " + str(cutoff)) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() randstr = str(random.randint(0, 10000)) selName = "exposed_atm_" + randstr if verbose != False: print "Exposed residues are selected in: " + selName cmd.select(selName, objSel + " in " + tmpObj) selNameRes = "exposed_res_" + randstr cmd.select(selNameRes, "byres " + selName) if doShow != False: cmd.show_as("spheres", objSel + " and poly") cmd.color("white", objSel) cmd.color("red", selName) cmd.delete(tmpObj) return exposed
def findSurfaceResidues(objSel="(all)", cutoff=2.5, doShow=False, verbose=True): """ findSurfaceResidues finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS objSel (string) the object or selection in which to find exposed residues DEFAULT: (all) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 asSel (boolean) make a selection out of the residues found RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ tmpObj="__tmp" cmd.create( tmpObj, objSel + " and polymer"); if verbose!=False: print "WARNING: I'm setting dot_solvent. You may not care for this." cmd.set("dot_solvent"); cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove( tmpObj + " and b < " + str(cutoff) ) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() randstr = str(random.randint(0,10000)) selName = "exposed_atm_" + randstr if verbose!=False: print "Exposed residues are selected in: " + selName cmd.select(selName, objSel + " in " + tmpObj ) selNameRes = "exposed_res_" + randstr cmd.select(selNameRes, "byres " + selName ) if doShow!=False: cmd.show_as("spheres", objSel + " and poly") cmd.color("white", objSel) cmd.color("red", selName) cmd.delete(tmpObj) print exposed return exposed
def surfaceatoms(molecule="NIL",show=True, verbose=True, cutoff=2.5): """ surfaceatoms finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS molecule (string) the object or selection in which to find exposed residues DEFAULT: (last molecule in pymol) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ if molecule=="NIL": assert len(cmd.get_names())!=0, "Did you forget to load a molecule? There are no objects in pymol." molecule=cmd.get_names()[-1] tmpObj="__tmp" cmd.create(tmpObj, "(%s and polymer) and not resn HOH"%molecule) if verbose!=False: print "WARNING: I'm setting dot_solvent. You may not care for this." cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove( tmpObj + " and b < " + str(cutoff) ) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() selName = "%s_atoms"%molecule cmd.select(selName, molecule + " in " + tmpObj ) if verbose!=False: print "Exposed residues are selected in: " + selName selNameRes = "%s_resi"%molecule cmd.select(selNameRes, "byres " + selName ) if show!=False: cmd.hide("everything", molecule) cmd.show("cartoon", "%s and not %s and not resn HOH"%(molecule,selNameRes)) cmd.show("sticks", "%s"%selNameRes) cmd.util.cbaw(selNameRes) cmd.disable(selNameRes) #cmd.alter('%s'%(selName),'vdw=0.5') # affects repeated runs cmd.set('sphere_scale','0.3','%s'%(selName)) # does not affect repeated runs cmd.show("spheres", "%s"%selName) cmd.util.cbao(selName) cmd.disable(selName) cmd.delete(tmpObj) print(exposed) return(exposed)
def calcRMSD_pymol(uf, bf): """ Given two pdb files of the same protein, this function calculates the rmsd, asa for each and the molecular weight of each using Pymol """ # Call the function below before using any PyMOL modules. #time.sleep(random.random()) cmd.set("dot_solvent", 1) cmd.load(uf) cmd.load(bf) #cmd.h_add() #cmd.remove('het') _, un, _ = getFileParts(uf) _, bn, _ = getFileParts(bf) asa_u = cmd.get_area(un) asa_b = cmd.get_area(bn) umass = cmd.get_model(un).get_mass() bmass = cmd.get_model(bn).get_mass() #rms=cmd.super(un,bn,transform=1)[0] #time.sleep(random.random()) bv0 = [] cmd.iterate('all', 'bv0.append(b)', space=locals()) cmd.do('run colorbyrmsd.py; colorbyrmsd \'' + un + '\',\'' + bn + '\',guide = 0,doAlign=1, doPretty=1') while True: # synchronization bv1 = [] cmd.iterate('all', 'bv1.append(b)', space=locals()) if bv0 != bv1: time.sleep(0.1) break out_file = tempfile.NamedTemporaryFile(suffix='.pdb') out_file.close() tmp_pdb = out_file.name updb = tmp_pdb + 'u' bpdb = tmp_pdb + 'b' cmd.save(updb, un) cmd.save(bpdb, bn) (_, uR, _, _, _) = readPDB(updb) urmsd = getBvalues(uR) os.remove(updb) (_, bR, _, _, _) = readPDB(bpdb) brmsd = getBvalues(bR) os.remove(bpdb) rms = np.sqrt(np.mean( np.array([v for V in urmsd for v in V if v >= 0])**2)) #(_,urx,_,_,_)=readPDB(uf); ux=getBvalues(urx); # if np.abs(rms-rmsd)>0.1: # print "RMSD =",rms,rmsd # pdb.set_trace() cmd.reinitialize() pdb.set_trace() return rms, asa_u, asa_b, umass, bmass, urmsd, brmsd
def testGetArea(self): cmd.fragment("gly") r = cmd.get_area(load_b=1) b_list = [] cmd.iterate("elem O", "b_list.append(b)", space=locals()) self.assertAlmostEqual(r, 82.505165, delta=1e-2) self.assertAlmostEqual(b_list[0], 15.47754, delta=1e-2) cmd.set("dot_solvent") self.assertAlmostEqual(cmd.get_area(), 200.145, delta=1e-2) cmd.set("dot_density", 4) self.assertAlmostEqual(cmd.get_area(), 200.888, delta=1e-2)
def markAccessible(model, chain, resi): # this is an arbitrary threshold for solvent accessibility. # Average accessible surface areas of residues are ~70-200 A^2 # (http://www.proteinsandproteomics.org/content/free/tables_1/table08.pdf) sasaThreshold = 12.0 selStr = model + " and chain " + chain + " and resi " + resi cmd.set('dot_solvent', 1) sasa = cmd.get_area(selStr) if (sasa > sasaThreshold): cmd.show("spheres", selStr + " and name ca") cmd.color("red", selStr + " and name ca") print selStr + " SASA: " + str(cmd.get_area(selStr)) return
def get_SASA(pdbfile, pdb1='pdb1'): ## set cmd.load(pdbfile, pdb1) oldDS = cmd.get("dot_solvent") cmd.h_add() cmd.flag("ignore", "none") cmd.set("dot_solvent", 1) cmd.set("dot_density", 2) cmd.set("solvent_radius", 3) ## calculate area = cmd.get_area(pdb1, load_b=1) stored.r = [] cmd.iterate(pdb1, 'stored.r.append((model,chain,resi,resn,name,b))') areas = {} for model, chain, idx, char, name, sasa in stored.r: if idx == '653': print chain, idx, char, name, sasa base = areas.get((chain, idx, char), 0) if (char == 'A' and name == 'N1') or (char == 'C' and name == 'N3'): base += float(sasa) areas[(chain, idx, char)] = base ## reset cmd.set("dot_solvent", oldDS) cmd.delete(pdb1) print pdbfile, area, sum(areas.values()) return areas
def compute_contact_surface(opts): with open(opts.output, "w") as fd: print( "{:5}{:>12}{:>12}{:>12}{:>12}{:>12}".format( "", "ligand", "protein", "complex", "contact", "ligand", ), file=fd, ) print( "{:5}{:>12}{:>12}{:>12}{:>12}{:>12}".format( "frame", "area(Å\u00b2)", "area(Å\u00b2)", "area(Å\u00b2)", "area(Å\u00b2)", "portion(%)", ), file=fd, ) for f in range(2, cmd.count_frames() + 1): print("Processing frame {}...".format(f - 1), flush=True) cmd.frame(f) set_selections(opts, f) ligand_area = cmd.get_area("ligand", f) protein_area = cmd.get_area("protein", f) complex_area = cmd.get_area("complex", f) contact_area = ((ligand_area + protein_area) - complex_area) / 2 ligand_portion = (contact_area * 100) / ligand_area print( "{:5}{:12.4f}{:12.4f}{:12.4f}{:12.4f}{:12.1f}".format( f - 1, ligand_area, protein_area, complex_area, contact_area, ligand_portion, ), file=fd, ) print("Output written to {}".format(opts.output))
def solventExposure(file_name, resi): pymol.finish_launching() cmd.delete('all') cmd.load(file_name) tmpObj="__tmp" cmd.create( tmpObj, "(all) and polymer"); cmd.set("dot_solvent"); cmd.get_area(selection=tmpObj, load_b=1) stored.list=[] cmd.remove( tmpObj + " and not resn CYS") cmd.remove( tmpObj + " and not elem S") cmd.remove( tmpObj + " and CYS/SG and bound_to CYS/SG") cmd.remove( tmpObj + " and not resi " + resi) cmd.iterate(tmpObj, "stored.list.append((b))") #return sum(stored.list) / len(stored.list) return max(stored.list)
def resipick(selectedChain=False,cutoff=10, doShow=False, verbose=False): objSel="(all)" # Obtain pdb information tmpObj = "__tmp" cmd.create(tmpObj, objSel + " and polymer") fullObj = "full_str" cmd.create(fullObj, objSel + " and polymer") cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) #print selectedChain # Remove unselected chains if selectedChain: cmd.remove(tmpObj + " and not chain "+ selectedChain) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove(tmpObj + " and b < " + str(cutoff)) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() # create sels selResi = "exposed_res" cmd.select(selResi, "byres " + objSel + " in " + tmpObj) # show exposed_resi's sels resiStr = cmd.get_pdbstr(selResi) if doShow != False: cmd.show_as("spheres", objSel + " and poly") cmd.color("yellow", selResi) cmd.delete(tmpObj) cmd.delete(fullObj) cmd.delete(selResi) return resiStr
def pymol_sasa(inputf, visualise=False, test=False): # Load PDB input file cmd.load(inputf, "MyProtein") """ Set molecular surface area # Note: This will determine the whole area of the residues # without considering overlapping between residue surfaces, as SASA does #cmd.set('dot_solvent', value="off") """ # Set solvent accessible surface area (SASA) cmd.set('dot_solvent', value="on") # Set dot density for area calculation # Higest density. Dot density ranges:1-4 cmd.set('dot_density', value="4") # Select residues by type cmd.select("hydrophobes","resn ala+gly+val+ile+leu+phe+met+trp+pro") cmd.select("nonhydrophobes", "resn ser+thr+cys+tyr+asn+gln") cmd.select("pcharged", "resn lys+arg+his") cmd.select("ncharged", "resn glu+asp") if visualise == True: # Veirfy correct selection by visualisation cmd.color("grey", "hydrophobes") cmd.color("green", "nonhydrophobes") cmd.color("red", "ncharged") cmd.color("blue", "pcharged") mysel = "hydrophobes or nonhydrophobes or ncharged or pcharged" cmd.hide("lines", "all") cmd.show("dots", mysel) # Work out SASA for each aminoacid-type group sasa_all = cmd.get_area("all") sasa_hydrophobes = cmd.get_area("hydrophobes") sasa_nonhydrophobes = cmd.get_area("nonhydrophobes") sasa_ncharged = cmd.get_area("ncharged") sasa_pcharged = cmd.get_area("pcharged") print(sasa_hydrophobes, sasa_nonhydrophobes, sasa_ncharged, sasa_pcharged) if test == True: # Test: Compare total surface area of protein to area of added selections # to determine the difference due to exclusion of ACE and NH2 caps sasa_mysel = sasa_hydrophobes +sasa_nonhydrophobes+sasa_ncharged+sasa_pcharged print(sasa_all, sasa_mysel)
def resipick(selectedChain=False,cutoff=10, doShow=False, verbose=False): '''对导入的PDB文件选取可及表面积为 cutoff (默认为10)外的表位残基''' objSel="(all)" # 获取当前pdb文件 tmpObj = "__tmp" cmd.create(tmpObj, objSel + " and polymer") fullObj = "full_str" cmd.create(fullObj, objSel + " and polymer") cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) print selectedChain #移除非选中的链 if selectedChain: cmd.remove(tmpObj + " and not chain "+ selectedChain) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove(tmpObj + " and b < " + str(cutoff)) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() # 创建 sels selResi = "exposed_res" cmd.select(selResi, "byres " + objSel + " in " + tmpObj) # 呈现 exposed_resi 的序列 sels resiStr = cmd.get_pdbstr(selResi) if doShow != False: cmd.show_as("spheres", objSel + " and poly") cmd.color("blue", selResi) cmd.delete(tmpObj) return resiStr
def findSurfaceResidues(objSel="(all)", cutoff=2.5, selName = 0): """ findSurfaceResidues finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS objSel (string) the object or selection in which to find exposed residues DEFAULT: (all) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 asSel (boolean) make a selection out of the residues found RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ tmpObj="__tmp" cmd.create( tmpObj, objSel + " and polymer"); cmd.set("dot_solvent"); cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove( tmpObj + " and b < " + str(cutoff) ) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() cmd.select(selName, objSel + " in " + tmpObj ) cmd.delete(tmpObj) return exposed
def findSurfaceResidues(objSel="(all)", cutoff=2.5, selName=0): """ findSurfaceResidues finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS objSel (string) the object or selection in which to find exposed residues DEFAULT: (all) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 asSel (boolean) make a selection out of the residues found RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ tmpObj = "__tmp" cmd.create(tmpObj, objSel + " and polymer") cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): cmd.remove(tmpObj + " and b < " + str(cutoff)) stored.tmp_dict = {} cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = list(stored.tmp_dict.keys()) exposed.sort() cmd.select(selName, objSel + " in " + tmpObj) cmd.delete(tmpObj) return exposed
def area(pdbid, chain_onco, chain_peptide, cystein_resid): import __main__ __main__.pymol_argv = ['pymol', '-qc'] import pymol from pymol import cmd, stored pymol.finish_launching() cmd.set('dot_solvent', 3) cmd.set('dot_density', 3) cmd.load(pdbid) cmd.remove('! chain {}+{}'.format(chain_onco, chain_peptide)) area1 = cmd.get_area('chain {} & resi {} & name SG'.format( chain_onco, cystein_resid)) cmd.remove('! chain {}'.format(chain_onco)) area2 = cmd.get_area('chain {} & resi {} & name SG'.format( chain_onco, cystein_resid)) return area1, area2
def identifySA(pdb, chain, start, end): # Create lists to store the selected, object, difference of area and list of residue position selArea = [] objArea = [] diffArea = [] listPos = [] start = int(start) end = int(end) # Fetch the protein cmd.fetch(pdb) # Create selection and object cmd.select("sel_nanobody", pdb + " and " + chain) cmd.create("obj_nanobody", pdb + " and " + chain) # Create selection for all residues for x in range(start, end): cmd.select("sele" + str(x), "sel_nanobody and resi " + str(x) + " and not name c+n+o") # Find the surface areas of all the selections for x in range(start, end): selArea.append(cmd.get_area("sele" + str(x))) # Create objects for all residues for x in range(start, end): cmd.select("obj" + str(x), "obj_nanobody and resi " + str(x) + " and not name c+n+o") # Find the surface areas of all the objects for x in range(start, end): objArea.append(cmd.get_area("obj" + str(x))) # Find the difference in surface areas of all the residues for x in range(0, end - 1): diffArea.append(selArea[x] - objArea[x]) # If there is a difference in the area, then print the position of the residues for x in range(0, len(diffArea)): if diffArea[x] < -0.5: listPos.append(x + 1) print(listPos)
def GetArea(identifier,solvent_area,dot_density=4,solvent_radius=1.4): """ Args: identifier: the idenfier for what we are getting the area of; passed to cmd.get_area dot_density: the density of dots, between 0 and 4 solvent_area,: if True, get the solvent-accessible area. Otherwise, just get the surface area solvent_radius: radius of the solvent, in Angstroms (def is water) """ cmd.set('dot_solvent', int(solvent_area)) cmd.set('dot_density', dot_density) cmd.set('solvent_radius', solvent_radius) return cmd.get_area(identifier)
def SASA(app): # first check and make sure the user has made a selection if (not 'sele' in cmd.get_names('selections')): tkMessageBox.showwarning( 'SASA Utility', 'No selection detected. This plugin requires a selection named (sele) to measure the surface area for.' ) return # ask what size rolling ball the user would like to use for the solvent sol_rad = tkSimpleDialog.askstring( 'SASA Utility', 'Please enter the solvent radius (in Angstrom):', parent=app.root, initialvalue='1.4' ) if sol_rad == None: return else: sol_rad = float(sol_rad) dot_dens = tkSimpleDialog.askstring( 'SASA Utility', 'Please enter the sampling density (1-4).\nHigher density is more accurate,\nbut slower for large selections:', parent=app.root, initialvalue='3' ) if dot_dens == None: return else: dot_dens = float(dot_dens) if dot_dens > 4: dot_dens = 4 print 'Warning: selected sampling density greater than max allowed val (4). Defaulting to 4.' cmd.set('dot_solvent', 1) cmd.set('dot_density', dot_dens) cmd.set('solvent_radius', sol_rad) sasa_res = cmd.get_area('(sele)') # print out the results sasa_str = '%.2f' % sasa_res print 'Solvent accessible surface area for the given selection is ' + sasa_str + ' square Angstrom'
def get_sasa(selection, state=-1, dot_density=5, quiet=1): ''' DESCRIPTION Get solvent accesible surface area SEE ALSO get_area pymol.util.get_sasa (considered broken!) ''' state, dot_density, quiet = int(state), int(dot_density), int(quiet) if state < 1: state = cmd.get_state() n = cmd.get_unused_name('_') cmd.create(n, selection, state, 1, zoom=0, quiet=1) cmd.set('dot_solvent', 1, n) if dot_density > -1: cmd.set('dot_density', dot_density, n) r = cmd.get_area(n, quiet=int(quiet)) cmd.delete(n) return r
def main(): in_dir, out_dir = '', '' if len(sys.argv) != 2 and (sys.argv[1] != 'protein' or sys.argv[1] != 'complex'): print('Insufficient argument. (Can only be either protein or complex)') print(sys.argv[1]) return if sys.argv[1] == 'protein': in_dir, out_dir = in_dir2, out_dir2 else: in_dir, out_dir = in_dir1, out_dir1 counter = 2 for file in os.listdir(in_dir): if counter == 0: break if file[-4:] != '.cif': continue print(file) outfile = open(out_dir + file + '_SASA.txt', 'w+') cmd.load(in_dir + file) stored.residues = [] cmd.iterate('name ca', 'stored.residues.append(resi)') sasa_per_residue = [] for i in stored.residues: #sasa_per_residue.append(cmd.get_area('resi %s' % i)) outfile.write('resi %s' % i + ' ' + str(cmd.get_area('resi %s' % i))) outfile.write('\n') outfile.close() print('Finished calculating protein {}'.format(file)) cmd.reinitialize() counter -= 1 return
# ================================================================ # PyMOL launch code # # === Provide arguments to PyMOL (first one must be "pymol") pymol_argv = ["pymol", "-q"] # # === Launch the PyMOL thread(s) try: import __builtin__ except ImportError: import builtins as __builtin__ import os, threading, __main__ threading.Thread(target=__builtin__.execfile, args=(os.environ['PYMOL_PATH'] + "/modules/launch_pymol.py", __main__.__dict__, __main__.__dict__)).start() # # === Wait until PyMOL is ready to receive commands e = threading.Event() while not hasattr(__main__, 'pymol'): e.wait(0.01) while not pymol._cmd.ready(): e.wait(0.01) # # PyMOL is now launched, you can now import "pymol" modules. # =============================================================== from pymol import cmd cmd.load("$PYMOL_PATH/test/dat/pept.pdb") print(" The surface area is: %8.3f" % cmd.get_area())
cmd.load("3u5d.pdb") cmd.load("3u5e.pdb") # Create ribosome complex cmd.create("mycomp", "3u5b 3u5c 3u5d 3u5e") with open("yeast_18S.sasa", "w") as fout18S, open("yeast_25S.sasa", "w") as fout25S: # First deal with A (N1) stored.residues = [] cmd.iterate("mycomp///A/N1", "stored.residues.append((int(chain), int(resi)))") stored.residues.sort() # 18S alist = [y for (x, y) in stored.residues if x == 2] for i in alist: outstr = "{0}\tA\t{1:.4f}\n".format(i, cmd.get_area("/mycomp//2/{0}/N1".format(i))) fout18S.write(outstr) # 25S alist = [y for (x, y) in stored.residues if x == 1] for i in alist: outstr = "{0}\tA\t{1:.4f}\n".format(i, cmd.get_area("/mycomp//1/{0}/N1".format(i))) fout25S.write(outstr) # Now deal with C (N3) stored.residues = [] cmd.iterate("mycomp///C/N3", "stored.residues.append((int(chain), int(resi)))") stored.residues.sort() # 18S clist = [y for (x, y) in stored.residues if x == 2]
def interfaceResidues(cmpx, cA='c. A', cB='c. B', cutoff=1.0, selName="interface"): """ interfaceResidues -- finds 'interface' residues between two chains in a complex. PARAMS cmpx The complex containing cA and cB cA The first chain in which we search for residues at an interface with cB cB The second chain in which we search for residues at an interface with cA cutoff The difference in area OVER which residues are considered interface residues. Residues whose dASA from the complex to a single chain is greater than this cutoff are kept. Zero keeps all residues. selName The name of the selection to return. RETURNS * A selection of interface residues is created and named depending on what you passed into selName * An array of values is returned where each value is: ( modelName, residueNumber, dASA ) NOTES If you have two chains that are not from the same PDB that you want to complex together, use the create command like: create myComplex, pdb1WithChainA or pdb2withChainX then pass myComplex to this script like: interfaceResidues myComlpex, c. A, c. X This script calculates the area of the complex as a whole. Then, it separates the two chains that you pass in through the arguments cA and cB, alone. Once it has this, it calculates the difference and any residues ABOVE the cutoff are called interface residues. AUTHOR: Jason Vertrees, 2009. """ # Save user's settings, before setting dot_solvent oldDS = cmd.get("dot_solvent") cmd.set("dot_solvent", 1) # set some string names for temporary objects/selections tempC, selName1 = "tempComplex", selName+"1" chA, chB = "chA", "chB" # operate on a new object & turn off the original cmd.create(tempC, cmpx) cmd.disable(cmpx) # remove cruft and inrrelevant chains cmd.remove(tempC + " and not (polymer and (%s or %s))" % (cA, cB)) # get the area of the complete complex cmd.get_area(tempC, load_b=1) # copy the areas from the loaded b to the q, field. cmd.alter(tempC, 'q=b') # extract the two chains and calc. the new area # note: the q fields are copied to the new objects # chA and chB cmd.extract(chA, tempC + " and (" + cA + ")") cmd.extract(chB, tempC + " and (" + cB + ")") cmd.get_area(chA, load_b=1) cmd.get_area(chB, load_b=1) # update the chain-only objects w/the difference cmd.alter( "%s or %s" % (chA,chB), "b=b-q" ) # The calculations are done. Now, all we need to # do is to determine which residues are over the cutoff # and save them. stored.r, rVal, seen = [], [], [] cmd.iterate('%s or %s' % (chA, chB), 'stored.r.append((model,resi,b))') cmd.enable(cmpx) cmd.select(selName1, 'none') for (model,resi,diff) in stored.r: key=resi+"-"+model if abs(diff)>=float(cutoff): if key in seen: continue else: seen.append(key) rVal.append( (model,resi,diff) ) # expand the selection here; I chose to iterate over stored.r instead of # creating one large selection b/c if there are too many residues PyMOL # might crash on a very large selection. This is pretty much guaranteed # not to kill PyMOL; but, it might take a little longer to run. cmd.select( selName1, selName1 + " or (%s and i. %s)" % (model,resi)) # this is how you transfer a selection to another object. cmd.select(selName, cmpx + " in " + selName1) # clean up after ourselves cmd.delete(selName1) cmd.delete(chA) cmd.delete(chB) cmd.delete(tempC) # show the selection cmd.enable(selName) # reset users settings cmd.set("dot_solvent", oldDS) return rVal
# PyMOL launch code import os,threading,__main__,__builtin__ os.environ['PYMOL_PATH'] = 'C:/Programme/DeLano Scientific/PyMOL' # # === Provide arguments to PyMOL (first one must be "pymol") pymol_argv = [ "pymol", "-q" ] # # === Launch the PyMOL thread(s) threading.Thread(target=__builtin__.execfile, args=(os.environ['PYMOL_PATH']+"/modules/launch_pymol.py", __main__.__dict__,__main__.__dict__)).start() # # === Wait until PyMOL is ready to receive commands e=threading.Event() while not hasattr(__main__,'pymol'): e.wait(0.01) while not pymol._cmd.ready(): e.wait(0.01) # # PyMOL is now launched, you can now import "pymol" modules. # =============================================================== from pymol import cmd cmd.load("$PYMOL_PATH/test/dat/pept.pdb") print " The surface area is: %8.3f"%cmd.get_area()
def findSurfaceResidues(file_name, objSel="(all)", cutoff=2.5, doShow=False, verbose=False, only_cysteine=False): """ findSurfaceResidues finds those residues on the surface of a protein that have at least 'cutoff' exposed A**2 surface area. PARAMS objSel (string) the object or selection in which to find exposed residues DEFAULT: (all) cutoff (float) your cutoff of what is exposed or not. DEFAULT: 2.5 Ang**2 asSel (boolean) make a selection out of the residues found RETURNS (list: (chain, resv ) ) A Python list of residue numbers corresponding to those residues w/more exposure than the cutoff. """ pymol.finish_launching() cmd.delete('all') cmd.load(file_name) tmpObj="__tmp" #if only_cysteine: # cmd.create( tmpObj, objSel + " and polymer and resn CYS"); #else: cmd.create( tmpObj, objSel + " and polymer"); if verbose!=False: print "WARNING: I'm setting dot_solvent. You may not care for this." cmd.set("dot_solvent"); cmd.get_area(selection=tmpObj, load_b=1) # threshold on what one considers an "exposed" atom (in A**2): if only_cysteine: cmd.remove( tmpObj + " and not resn CYS") cmd.remove( tmpObj + " and not elem S") cmd.remove( tmpObj + " and CYS/SG and bound_to CYS/SG") cmd.remove( tmpObj + " and b < " + str(cutoff) ) cmd.iterate(tmpObj, "b") stored.tmp_dict = {} if only_cysteine: cmd.iterate(tmpObj + " and resn CYS", "stored.tmp_dict[(chain,resv)]=1") else: cmd.iterate(tmpObj, "stored.tmp_dict[(chain,resv)]=1") exposed = stored.tmp_dict.keys() exposed.sort() randstr = str(random.randint(0,10000)) selName = "exposed_atm_" + randstr if verbose!=False: print "Exposed residues are selected in: " + selName cmd.select(selName, objSel + " in " + tmpObj ) selNameRes = "exposed_res_" + randstr cmd.select(selNameRes, "byres " + selName ) cmd.delete(tmpObj) exposed = [i[1] for i in exposed] return exposed
cmd.set("dot_solvent", "on") output = open("interface_area.csv", "w") output.write( "protein1\tprotein2\tPDB\tPDB_type\tiface_area\tcomplex_area\tP1_area\tP2_area\n" ) for this_line in interaction_data[1:]: if (this_line != ""): this_info = this_line.split("\t") this_p1 = this_info[0] this_p2 = this_info[1] this_type = this_info[4] this_pdb = this_info[21] this_file = "%s%s" % (pdb_path[this_type], this_pdb) print this_file cmd.load(this_file, "this_pdb") cmd.copy("ca", "this_pdb") cmd.remove("ca and chain B") cmd.copy("cb", "this_pdb") cmd.remove("cb and chain A") complex_area = cmd.get_area("this_pdb") chainA_area = cmd.get_area("ca") chainB_area = cmd.get_area("cb") interface_area = (chainA_area + chainB_area - complex_area) / 2 output.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (this_p1, this_p2, this_pdb, this_type, interface_area, complex_area, chainA_area, chainB_area)) cmd.delete("this_pdb") cmd.delete("ca") cmd.delete("cb") output.close()
def get_surface_area(file_name): pymol.finish_launching() cmd.load(file_name) area = cmd.get_area('resi 1') cmd.remove(file_name[:-5]) return area
def prune_grid(rna, score_file, outname, quantile=0.99, sasa_cutoff=20.0): # make sure all atoms within an object occlude one another cmd.flag("ignore", "none") # use solvent-accessible surface with high sampling density cmd.set('dot_solvent', 1) cmd.set('dot_density', 3) cmd.set('solvent_radius', 2.0) k = 1 df = pd.read_csv(score_file, header=0, sep=",") means = df[['pred_MLP', 'pred_XGB', 'pred_RF', 'pred_LR', 'pred_Extra']].apply(np.mean, 'columns') cutoff = np.quantile(a=means, q=quantile) for pred, pos in zip( df[['pred_MLP', 'pred_XGB', 'pred_RF', 'pred_LR', 'pred_Extra']].values, df[['x', 'y', 'z']].values): if np.mean(pred) > cutoff: # create tmp complex cmd.pseudoatom("tmpPoint3", hetatm=1, name="C", resn="UNK", pos=[pos[0], pos[1], pos[2]]) cmd.create("complextmp", "%s tmpPoint3" % rna) sasa = cmd.get_area('resn UNK and not polymer and complextmp') cmd.delete("complextmp tmpPoint3") # remove really highly exposed points if sasa < sasa_cutoff: cmd.pseudoatom("tmpPoint", hetatm=1, b=np.mean(pred), q=sasa, name="C", resn="UNK", resi=k, chain="ZZ", pos=[pos[0], pos[1], pos[2]]) print(pred, pos, np.mean(pred), cutoff, sasa) k += 1 # write out grid file coor = "%s_pruned_grid.xyz" % (outname) xyz = cmd.get_coords('tmpPoint', 1) df = pd.DataFrame.from_records(xyz) df.insert(0, "element", "C") df.to_csv(coor, index=False, header=False, sep=" ") # write out complex cmd.create("complex", "%s tmpPoint" % rna) coor = "%s_pruned_grid.pdb" % (outname) cmd.save(coor, "complex") coor = "cavity_pruned_grid.sd" cmd.save(coor, "tmpPoint") # remove isolated remove_isolated() # write out grid file coor = "%s_pruned_grid_clusters.xyz" % (outname) xyz = cmd.get_coords('tmpPoint', 1) df = pd.DataFrame.from_records(xyz) df.insert(0, "element", "C") df.to_csv(coor, index=False, header=False, sep=" ") # write out complex cmd.create("complex", "%s tmpPoint" % rna) coor = "%s_pruned_grid_clusters.pdb" % (outname) cmd.save(coor, "complex") coor = "cavity_pruned_grid.sd" cmd.save(coor, "tmpPoint")
def get_resi_stats(target_sel="all", residues=[], group_id="X", atom="NZ", atom_dist=8, resi_n_term=8, resi_c_term=8, verb=True): # The distance in angstrom to look for var_dist = 12 if type(residues) != list: print("\nERROR: The residues should be supplied as a list\n") return # Get current setting ini_setting = cmd.get("dot_solvent") ini_setting2 = cmd.get("dot_density") # Increasing dot_density makes calculation slower, but not a big difference #cmd.set('dot_density', 3) # Make groups group = "Stats_%s_%s" % (target_sel, group_id) group_atom = "Stats_%s_%s_%s" % (atom, target_sel, group_id) group_chain = "Stats_%s_%s_%s" % ("chain", target_sel, group_id) group_3dweb = "Stats_%s_%s_%s" % ("3dweb", target_sel, group_id) # Make list for storing slist = [] # Make file for writing wfileweblogo = open("resi_stats_weblogo_%s_%s.txt" % (target_sel, group_id), 'w') for residue in residues: residue = residue.strip() resn_1 = residue[0].upper() resn_3 = aa_1_3[resn_1] resi = int(residue[1:]) # Check input if resn_1.isdigit(): print("\nERROR: The residue should be in format of ex: K10\n") return # Do selection and group sel_str = "%s and resn %s and resi %s" % (target_sel, resn_3, resi) resn_resi = "%s%s" % (resn_1, resi) sel_str_text = "%s_%s_%s" % (target_sel, group_id, resn_resi) cmd.select(sel_str_text, sel_str) # Make quick test, to see if the atom is there sel_str_atom_test = "%s and name %s" % (sel_str_text, atom) test_str = "Test_nr_atoms" cmd.select(test_str, sel_str_atom_test) nr_test = cmd.count_atoms(test_str) if nr_test != 1: print("\nERROR: The selection '%s', has only nr of atoms:%s. SKIPPING"%(sel_str_atom_test, nr_test)) continue # MSA = Molecular Surface Area cmd.set("dot_solvent", "off") MSA = cmd.get_area(sel_str) # SASA = Solvent Accessible Surface Area cmd.set("dot_solvent", "on") SASA = cmd.get_area(sel_str) # Get the chain residues chain = "."*(resi_n_term + resi_c_term + 1) chain_sec = "."*(resi_n_term + resi_c_term + 1) resi_sel_min = resi-resi_n_term if resi_sel_min < 1: resi_sel_min = 1 resi_sel_max = resi+resi_c_term resi_sel = "%i-%i" % (resi_sel_min, resi_sel_max) # Make selection sel_str_chain = "%s and resi %s and name CA" % (target_sel, resi_sel) sel_str_text_chain = "%s_%s_%s_%s" % ("chain", target_sel, group_id, resn_resi) cmd.select(sel_str_text_chain, sel_str_chain) # Get the chain info stored.list_chain = [] expression_chain="stored.list_chain.append([resi, resn, name, ss])" cmd.iterate(sel_str_text_chain, expression_chain) for chain_resi_info in stored.list_chain: chain_resi, chain_resn, chain_name, chain_ss = chain_resi_info # Convert ss, secondary structure, if ss=S (Sheet), or ss='' (Not Helix or Sheet) if chain_ss == '': chain_ss = 'L' chain_resi = int(chain_resi) try: chain_resn_1 = aa_3_1[chain_resn] except KeyError: chain_resn_1 = "." # Calculate index index = resi_n_term - (resi - chain_resi) # Replace in string for residue names chain = chain[:index] + chain_resn_1 + chain[index + 1:] # Replace in string for secondary structyre chain_sec = chain_sec[:index] + chain_ss + chain_sec[index + 1:] # Get number of neighbour atoms # Make selection for NZ atoms sel_str_atom = "%s and name %s" % (sel_str_text, atom) sel_str_text_atom = "%s_%s_%s_%s" % (atom, target_sel, group_id, resn_resi) cmd.select(sel_str_text_atom, sel_str_atom) # Make selection around NZ atom for fixed distance, and count sel_str_atom_around = "%s around %s and not (%s)" % (sel_str_text_atom, atom_dist, sel_str) sel_str_text_atom_around = "%s_around_%s_%s_%s" % (atom, target_sel, group_id, resn_resi) cmd.select(sel_str_text_atom_around, sel_str_atom_around) # Count around stored.list = [] expression="stored.list.append([resi, resn, name])" cmd.iterate(sel_str_text_atom_around, expression) nr_atoms_around = len(stored.list) # Make selection around NZ atom for variable distance #for i in range(2, var_dist+1): for i in range(2, var_dist+1, 2): dist = i dist_pre = dist - 1 # Select for an angstrom shorter sel_str_atom_3dweb_pre = "byres %s around %s" % (sel_str_text_atom, dist_pre) sel_str_text_atom_3dweb_pre = "%s_3dweb_pre_%s_%s_%s_%s_%s" % (atom, target_sel, group_id, resn_resi, dist, dist_pre) cmd.select(sel_str_text_atom_3dweb_pre, sel_str_atom_3dweb_pre) # Select at distance sel_str_atom_3dweb_post = "byres %s around %s" % (sel_str_text_atom, dist) sel_str_text_atom_3dweb_post = "%s_3dweb_post_%s_%s_%s_%s_%s" % (atom, target_sel, group_id, resn_resi, dist, dist) cmd.select(sel_str_text_atom_3dweb_post, sel_str_atom_3dweb_post) # Make selection for uniq residues with shell sel_str_text_atom_3dweb_sel = "%s_3dweb_sel_%s_%s_%s_%s" % (atom, target_sel, group_id, resn_resi, dist) cmd.select(sel_str_text_atom_3dweb_sel, "(%s and not %s) and name CA" % (sel_str_atom_3dweb_post, sel_str_atom_3dweb_pre)) # delete cmd.delete(sel_str_text_atom_3dweb_pre) cmd.delete(sel_str_text_atom_3dweb_post) # Loop through selecion stored.list_3dweb = [] expression_3dweb="stored.list_3dweb.append([resi, resn, name])" cmd.iterate(sel_str_text_atom_3dweb_sel, expression_3dweb) for web3d_residues in stored.list_3dweb: web3d_resi, web3d_resn, web3d_name = web3d_residues try: web3d_resn_1 = aa_3_1[web3d_resn] except KeyError: web3d_resn_1 = "." # Write http://weblogo.threeplusone.com/ file FASTA_text = "> %s %s %s %s %s, dist=%s resi=%s resn=%s %s" %(target_sel, group_id, resi, resn_1, resn_3, dist, web3d_resi, web3d_resn_1, web3d_resn) weblogo = "."*(var_dist) weblogo = weblogo[:i-1] + web3d_resn_1 + weblogo[i:] # Write wfileweblogo.write(FASTA_text + "\n") wfileweblogo.write(weblogo + "\n") # Store info slist.append([target_sel, group_id, resn_resi, resn_1, resi, MSA, SASA, nr_atoms_around, chain, chain_sec]) # Group selections cmd.group(group, "%s_%s_*" % (target_sel, group_id)) cmd.select("%s_sel"%group, "%s_%s_*" % (target_sel, group_id)) # Group around cmd.group(group_chain, "%s_%s_%s*" % ("chain",target_sel, group_id) ) cmd.group(group_atom, "%s_%s_%s_*" % (atom, target_sel, group_id)) cmd.group(group_atom, "%s_around_%s_%s_*" % (atom, target_sel, group_id)) cmd.group(group_3dweb, "%s_3dweb_sel_%s_%s_*" % (atom, target_sel, group_id)) # Write output wfile = open("resi_stats_%s_%s.csv" % (target_sel, group_id), 'w') wfile.write("target_sel;group_id;resn_resi;resn;resi;MSA;SASA;nr_atoms_around;chain;chain_sec"+"\n") for i in slist: wfile.write("%s;%s;%s;%s;%i;%3.0f;%3.0f;%i;%s;%s" % (i[0], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9]) + "\n") wfile.close() wfileweblogo.close() # Back to before cmd.set("dot_solvent", ini_setting) cmd.set('dot_density', ini_setting2)
# Create ribosome complex cmd.create("mycomp", "3u5b 3u5c 3u5d 3u5e") with open("yeast_18S.sasa", "w") as fout18S, open("yeast_25S.sasa", "w") as fout25S: # First deal with A (N1) stored.residues = [] cmd.iterate("mycomp///A/N1", "stored.residues.append((int(chain), int(resi)))") stored.residues.sort() # 18S alist = [y for (x, y) in stored.residues if x == 2] for i in alist: outstr = "{0}\tA\t{1:.4f}\n".format( i, cmd.get_area("/mycomp//2/{0}/N1".format(i))) fout18S.write(outstr) # 25S alist = [y for (x, y) in stored.residues if x == 1] for i in alist: outstr = "{0}\tA\t{1:.4f}\n".format( i, cmd.get_area("/mycomp//1/{0}/N1".format(i))) fout25S.write(outstr) # Now deal with C (N3) stored.residues = [] cmd.iterate("mycomp///C/N3", "stored.residues.append((int(chain), int(resi)))") stored.residues.sort()
('A', 12, 'CA', 9.998000144958496, -7.690999984741211, 28.7810001373291) (... """ # rename to .pdb such that I don't have to cmd.fetch(CODE) (reimport from PDB) head, tail = os.path.split(path_to_file) output_path = os.path.join(head, tail[3:7] + '.pdb') os.rename(path_to_file, output_path) # .ent -> .pdb # see https://github.com/dsw7/BridgingInteractions/tree/master/scalene-triangle/pymol-get-surface-example cmd.load(output_path) cmd.create( tmpObj, "({} and polymer and chain {}) and not resn HOH".format(CODE, CHAIN)) cmd.set("dot_solvent") cmd.get_area(selection=tmpObj, load_b=1) cmd.remove(tmpObj + " and b < " + str(SOLVENT_EXPOSED_CUTOFF)) cmd.show(selection=tmpObj, representation="dots") # show the exposed atoms stored.tmp_dict = {} cmd.iterate_state(state=-1, selection=tmpObj, expression=ITER_STATE_EXP) exposed = stored.tmp_dict.keys() exposed.sort() cmd.delete(tmpObj) cmd.delete('all') os.rename( output_path, path_to_file) # .pdb -> .ent such that file_pdb.clear() can del dir # compute closest surface coordinate if protein ends up having both bridge and metal # -------------------------------------------------------------------------- try:
s.show_message('Error', 'The sampling density is invalid.') raise SystemExit in_msg = 'Select the model number. \n\ Use 1 if there\'s only one model. Use 0 for all models.' model = int(s.show_inputdialog('Select the model', in_msg, '1')) cmd.delete('all') cmd.load(pdbpath, 'for_area') cmd.split_states('for_area') cmd.set('dot_solvent', 1) cmd.set('dot_density', density) if model != 0: # for a specific model try: area = cmd.get_area('resi ' + selection + ' and model for_area_' + '{:04d}'.format(model)) except: s.show_message('Error', 'The model number was wrong.') raise SystemExit print(area) message = 'The surface area for residue ' + selection + ' is ' + \ '{:.3f}'.format(area) + ' Angstroms^2' if area == 0: message += "\nYou'd also get a zero if the residue number is invalid." s.show_message('Finished', message) else: # for all
def calc_psa3d(self, obj_list=None, include_SandP: bool = True, atom_to_remove=None): """ Help function to calculate the 3d polar surface area (3D-PSA) of molecules in Interface_Pymol for all the snapshots in a MD trajectory. (Contribution by Benjamin Schroeder) Parameters ---------- obj_list: list, optional list of pymol objects (Default = "cmpd1") include_SandP: bool, optional Set to False to exclude the S and P atoms from the calculation of the 3D-PSA. (Default = True) atom_to_remove: str, optional Single atom name of the atom to remove from the selection (Default = None). Useful if you want to include only S or only P in the calculation of the 3D-PSA. Returns ---------- obj_psa_dict: list Values correspond to mean, standard deviation, and median of the 3D-PSA calculated over the simulation time """ # IO if (obj_list is None): obj_list = cmd.get_names("objects") # Loop over objects obj_psa_dict = {} for obj in obj_list: cmd.frame(0) states = range(1, cmd.count_states(obj) + 1) # get all states of the object ##Loop over all states psa = [] for state in states: ###select all needed atoms by partialCSelection or element or H next to (O ,N) if atom_to_remove != None and isinstance(atom_to_remove, str): if include_SandP: select_string = "resn LIG and (elem N or elem O or elem S or elem P or (elem H and (neighbor elem N+O+S+P))) and " + obj + " and not name {}".format( atom_to_remove) #@carmen add: "or elem S" else: select_string = "resn LIG and (elem N or elem O or (elem H and (neighbor elem N+O))) and " + obj + " and not name {}".format( atom_to_remove) #@carmen add: "or elem S" else: if include_SandP: select_string = "resn LIG and (elem N or elem O or elem S or elem P or (elem H and (neighbor elem N+O+S+P))) and " + obj #@carmen add: "or elem S" else: select_string = "resn LIG and (elem N or elem O or (elem H and (neighbor elem N+O))) and " + obj #@carmen add: "or elem S" cmd.select("noh", select_string) ###calc surface area psa.append(float(cmd.get_area("noh", state=state))) ###gather data #obj_psa_dict.update({obj: psa}) obj_psa_dict = [ np.mean(psa) / 100, np.std(psa) / 100, np.median(psa) / 100 ] #/100 to have nm instead of Angstrom return obj_psa_dict
def interfaceResidues(cmpx, cA='c. A', cB='c. B', cutoff=1.0, selName="interface"): """ interfaceResidues -- finds 'interface' residues between two chains in a complex. PARAMS cmpx The complex containing cA and cB cA The first chain in which we search for residues at an interface with cB cB The second chain in which we search for residues at an interface with cA cutoff The difference in area OVER which residues are considered interface residues. Residues whose dASA from the complex to a single chain is greater than this cutoff are kept. Zero keeps all residues. selName The name of the selection to return. RETURNS * A selection of interface residues is created and named depending on what you passed into selName * An array of values is returned where each value is: ( modelName, residueNumber, dASA ) NOTES If you have two chains that are not from the same PDB that you want to complex together, use the create command like: create myComplex, pdb1WithChainA or pdb2withChainX then pass myComplex to this script like: interfaceResidues myComlpex, c. A, c. X This script calculates the area of the complex as a whole. Then, it separates the two chains that you pass in through the arguments cA and cB, alone. Once it has this, it calculates the difference and any residues ABOVE the cutoff are called interface residues. AUTHOR: Jason Vertrees, 2009. """ # Save user's settings, before setting dot_solvent oldDS = cmd.get("dot_solvent") cmd.set("dot_solvent", 1) # set some string names for temporary objects/selections tempC, selName1 = "tempComplex", selName + "1" chA, chB = "chA", "chB" # operate on a new object & turn off the original cmd.create(tempC, cmpx) cmd.disable(cmpx) # remove cruft and inrrelevant chains cmd.remove(tempC + " and not (polymer and (%s or %s))" % (cA, cB)) # get the area of the complete complex cmd.get_area(tempC, load_b=1) # copy the areas from the loaded b to the q, field. cmd.alter(tempC, 'q=b') # extract the two chains and calc. the new area # note: the q fields are copied to the new objects # chA and chB cmd.extract(chA, tempC + " and (" + cA + ")") cmd.extract(chB, tempC + " and (" + cB + ")") cmd.get_area(chA, load_b=1) cmd.get_area(chB, load_b=1) # update the chain-only objects w/the difference cmd.alter("%s or %s" % (chA, chB), "b=b-q") # The calculations are done. Now, all we need to # do is to determine which residues are over the cutoff # and save them. stored.r, rVal, seen = [], [], [] cmd.iterate('%s or %s' % (chA, chB), 'stored.r.append((model,resi,b))') cmd.enable(cmpx) cmd.select(selName1, None) for (model, resi, diff) in stored.r: key = resi + "-" + model if abs(diff) >= float(cutoff): if key in seen: continue else: seen.append(key) rVal.append((model, resi, diff)) # expand the selection here; I chose to iterate over stored.r instead of # creating one large selection b/c if there are too many residues PyMOL # might crash on a very large selection. This is pretty much guaranteed # not to kill PyMOL; but, it might take a little longer to run. cmd.select(selName1, selName1 + " or (%s and i. %s)" % (model, resi)) # this is how you transfer a selection to another object. cmd.select(selName, cmpx + " in " + selName1) # clean up after ourselves cmd.delete(selName1) cmd.delete(chA) cmd.delete(chB) cmd.delete(tempC) # show the selection cmd.enable(selName) # reset users settings cmd.set("dot_solvent", oldDS) return rVal
def get_resi_stats(target_sel="all", residues=[], group_id="X", atom="NZ", atom_dist=8, resi_n_term=8, resi_c_term=8, verb=True): # The distance in angstrom to look for var_dist = 12 if type(residues) != list: print("\nERROR: The residues should be supplied as a list\n") return # Get current setting ini_setting = cmd.get("dot_solvent") ini_setting2 = cmd.get("dot_density") # Increasing dot_density makes calculation slower, but not a big difference #cmd.set('dot_density', 3) # Make groups group = "Stats_%s_%s" % (target_sel, group_id) group_atom = "Stats_%s_%s_%s" % (atom, target_sel, group_id) group_chain = "Stats_%s_%s_%s" % ("chain", target_sel, group_id) group_3dweb = "Stats_%s_%s_%s" % ("3dweb", target_sel, group_id) # Make list for storing slist = [] # Make file for writing wfileweblogo = open( "resi_stats_weblogo_%s_%s.txt" % (target_sel, group_id), 'w') for residue in residues: residue = residue.strip() resn_1 = residue[0].upper() resn_3 = aa_1_3[resn_1] resi = int(residue[1:]) # Check input if resn_1.isdigit(): print("\nERROR: The residue should be in format of ex: K10\n") return # Do selection and group sel_str = "%s and resn %s and resi %s" % (target_sel, resn_3, resi) resn_resi = "%s%s" % (resn_1, resi) sel_str_text = "%s_%s_%s" % (target_sel, group_id, resn_resi) cmd.select(sel_str_text, sel_str) # Make quick test, to see if the atom is there sel_str_atom_test = "%s and name %s" % (sel_str_text, atom) test_str = "Test_nr_atoms" cmd.select(test_str, sel_str_atom_test) nr_test = cmd.count_atoms(test_str) if nr_test != 1: print( "\nERROR: The selection '%s', has only nr of atoms:%s. SKIPPING" % (sel_str_atom_test, nr_test)) continue # MSA = Molecular Surface Area cmd.set("dot_solvent", "off") MSA = cmd.get_area(sel_str) # SASA = Solvent Accessible Surface Area cmd.set("dot_solvent", "on") SASA = cmd.get_area(sel_str) # Get the chain residues chain = "." * (resi_n_term + resi_c_term + 1) chain_sec = "." * (resi_n_term + resi_c_term + 1) resi_sel_min = resi - resi_n_term if resi_sel_min < 1: resi_sel_min = 1 resi_sel_max = resi + resi_c_term resi_sel = "%i-%i" % (resi_sel_min, resi_sel_max) # Make selection sel_str_chain = "%s and resi %s and name CA" % (target_sel, resi_sel) sel_str_text_chain = "%s_%s_%s_%s" % ("chain", target_sel, group_id, resn_resi) cmd.select(sel_str_text_chain, sel_str_chain) # Get the chain info stored.list_chain = [] expression_chain = "stored.list_chain.append([resi, resn, name, ss])" cmd.iterate(sel_str_text_chain, expression_chain) for chain_resi_info in stored.list_chain: chain_resi, chain_resn, chain_name, chain_ss = chain_resi_info # Convert ss, secondary structure, if ss=S (Sheet), or ss='' (Not Helix or Sheet) if chain_ss == '': chain_ss = 'L' chain_resi = int(chain_resi) try: chain_resn_1 = aa_3_1[chain_resn] except KeyError: chain_resn_1 = "." # Calculate index index = resi_n_term - (resi - chain_resi) # Replace in string for residue names chain = chain[:index] + chain_resn_1 + chain[index + 1:] # Replace in string for secondary structyre chain_sec = chain_sec[:index] + chain_ss + chain_sec[index + 1:] # Get number of neighbour atoms # Make selection for NZ atoms sel_str_atom = "%s and name %s" % (sel_str_text, atom) sel_str_text_atom = "%s_%s_%s_%s" % (atom, target_sel, group_id, resn_resi) cmd.select(sel_str_text_atom, sel_str_atom) # Make selection around NZ atom for fixed distance, and count sel_str_atom_around = "%s around %s and not (%s)" % ( sel_str_text_atom, atom_dist, sel_str) sel_str_text_atom_around = "%s_around_%s_%s_%s" % (atom, target_sel, group_id, resn_resi) cmd.select(sel_str_text_atom_around, sel_str_atom_around) # Count around stored.list = [] expression = "stored.list.append([resi, resn, name])" cmd.iterate(sel_str_text_atom_around, expression) nr_atoms_around = len(stored.list) # Make selection around NZ atom for variable distance #for i in range(2, var_dist+1): for i in range(2, var_dist + 1, 2): dist = i dist_pre = dist - 1 # Select for an angstrom shorter sel_str_atom_3dweb_pre = "byres %s around %s" % (sel_str_text_atom, dist_pre) sel_str_text_atom_3dweb_pre = "%s_3dweb_pre_%s_%s_%s_%s_%s" % ( atom, target_sel, group_id, resn_resi, dist, dist_pre) cmd.select(sel_str_text_atom_3dweb_pre, sel_str_atom_3dweb_pre) # Select at distance sel_str_atom_3dweb_post = "byres %s around %s" % ( sel_str_text_atom, dist) sel_str_text_atom_3dweb_post = "%s_3dweb_post_%s_%s_%s_%s_%s" % ( atom, target_sel, group_id, resn_resi, dist, dist) cmd.select(sel_str_text_atom_3dweb_post, sel_str_atom_3dweb_post) # Make selection for uniq residues with shell sel_str_text_atom_3dweb_sel = "%s_3dweb_sel_%s_%s_%s_%s" % ( atom, target_sel, group_id, resn_resi, dist) cmd.select( sel_str_text_atom_3dweb_sel, "(%s and not %s) and name CA" % (sel_str_atom_3dweb_post, sel_str_atom_3dweb_pre)) # delete cmd.delete(sel_str_text_atom_3dweb_pre) cmd.delete(sel_str_text_atom_3dweb_post) # Loop through selecion stored.list_3dweb = [] expression_3dweb = "stored.list_3dweb.append([resi, resn, name])" cmd.iterate(sel_str_text_atom_3dweb_sel, expression_3dweb) for web3d_residues in stored.list_3dweb: web3d_resi, web3d_resn, web3d_name = web3d_residues try: web3d_resn_1 = aa_3_1[web3d_resn] except KeyError: web3d_resn_1 = "." # Write http://weblogo.threeplusone.com/ file FASTA_text = "> %s %s %s %s %s, dist=%s resi=%s resn=%s %s" % ( target_sel, group_id, resi, resn_1, resn_3, dist, web3d_resi, web3d_resn_1, web3d_resn) weblogo = "." * (var_dist) weblogo = weblogo[:i - 1] + web3d_resn_1 + weblogo[i:] # Write wfileweblogo.write(FASTA_text + "\n") wfileweblogo.write(weblogo + "\n") # Store info slist.append([ target_sel, group_id, resn_resi, resn_1, resi, MSA, SASA, nr_atoms_around, chain, chain_sec ]) # Group selections cmd.group(group, "%s_%s_*" % (target_sel, group_id)) cmd.select("%s_sel" % group, "%s_%s_*" % (target_sel, group_id)) # Group around cmd.group(group_chain, "%s_%s_%s*" % ("chain", target_sel, group_id)) cmd.group(group_atom, "%s_%s_%s_*" % (atom, target_sel, group_id)) cmd.group(group_atom, "%s_around_%s_%s_*" % (atom, target_sel, group_id)) cmd.group(group_3dweb, "%s_3dweb_sel_%s_%s_*" % (atom, target_sel, group_id)) # Write output wfile = open("resi_stats_%s_%s.csv" % (target_sel, group_id), 'w') wfile.write( "target_sel;group_id;resn_resi;resn;resi;MSA;SASA;nr_atoms_around;chain;chain_sec" + "\n") for i in slist: wfile.write( "%s;%s;%s;%s;%i;%3.0f;%3.0f;%i;%s;%s" % (i[0], i[1], i[2], i[3], i[4], i[5], i[6], i[7], i[8], i[9]) + "\n") wfile.close() wfileweblogo.close() # Back to before cmd.set("dot_solvent", ini_setting) cmd.set('dot_density', ini_setting2)