def __init__(self, config, wsas_params, tmp_dir, nonstandard_residue_files, nonstandard_residue, ligand_topology, options=None, parameters=None): """Wrapper for freesasa config: str Path to configuration file containing residue composition and atomic parameters - freesasa format. options: dict, optional Options to change how PDBs are parsed by freesasa. parameters: dict, optional Parameters to alter how freesasa computes surface area. """ # Hide warnings (as the load of multiple structures is two step and # extended config is not read in first step). freesasa.setVerbosity(1) config = self._update_sasa_config(config, wsas_params, tmp_dir, nonstandard_residue_files, nonstandard_residue, ligand_topology) self.classifier = freesasa.Classifier(bytes(str(config), 'utf-8')) self.options = options or _DEFAULT_OPTIONS self.parameters = parameters or _DEFAULT_PARAMETERS
def freesasa_cb(prody_parsed, probe_radius=1.4): cb_sele = prody_parsed.select( 'protein and (backbone or name CB) and not element H D') coords = list(x for y in cb_sele.getCoords() for x in y) radii = list(freesasa.Classifier().radius(x, y) for x, y in zip(cb_sele.getResnames(), \ cb_sele.getNames())) return freesasa.calcCoord( coords, radii, freesasa.Parameters({'probe-radius': probe_radius}))
def _get_free_sasa(t, parameters=None, classifier=None, options=None): '''Get factions of alpha, beta and coil within a chain ''' key = t[0] structure = t[1] if structure.num_chains != 1: raise Exception( "This method can only be applied to single polyer chain.") dsspQ8, dsspQ3 = '', '' groupIndex = 0 atomIndex = 0 freesasaStructure = freesasa.Structure() if (classifier is None): classifier = freesasa.Classifier() optbitfield = freesasa.Structure._get_structure_options( options or freesasa.Structure.defaultOptions) for i in range(0, structure.num_models): print("model: " + str(i + 1)) for j in range(0, structure.chains_per_model[i]): chainName = structure.chain_name_list[chainIndex] chainId = structure.chain_id_list[chainIndex] groups = structure.groups_per_chain[chainIndex] entityType = structure.entity_list[ chainToEntityIndex[chainIndex]]["type"] #if not entityType == "polymer": continue prev_coords = None coords = None for k in range(0, structure.groups_per_chain[chainIndex]): groupId = structure.group_id_list[groupIndex] insertionCode = structure.ins_code_list[groupIndex] secStruct = structure.sec_struct_list[groupIndex] seqIndex = structure.sequence_index_list[groupIndex] groupType = structure.group_type_list[groupIndex] groupName = structure.group_list[groupType]["groupName"] for i, name in enumerate( structure.group_list[groupType]["atomNameList"]): if (classifier.classify(groupName, name) is 'Unknown'): if (optbitfield & freesasa.FREESASA_SKIP_UNKNOWN): continue if (optbitfield & freesasa.FREESASA_HALT_AT_UNKNOWN): raise Exception("Halting at unknown atom") freesasaStructure.addAtom( name, groupName, seqIndex, chainName, structure.x_coord_list[atomIndex + i], structure.y_coord_list[atomIndex + i], structure.z_coord_list[atomIndex + i]) atomIndex += len( structure.group_list[groupType]["atomNameList"]) groupIndex += 1 freesasaStructure.setRadiiWithClassifier(classifier) freesasaResult = freesasa.calc(freesasaStructure, parameters or freesasa.Parameters()) sasa_classes = classifyResults(freesasaResult, freesasaStructure, classifier) return Row([key, sasa_classes.totalArea])
def get_DNA_H_SASA(pdb_file,csvfileout,chain=None,resids=[],seq=None,probe_radius=1.4,slicen=100,vdw_set=None,Hcontrib=[1.0]*7,n_threads=1,verbose=False): """ Function is a warapper to the FREESASA library to calculate the Surface Accessible Surface Area out atoms in pdb_file, then expreacts the SASA deoxiribose hydrogen atoms and sums it up for every nucleotide with coefficients Hcontrib. chain - name of the DNA chain of interest in pdb_file, if chain has no name leave blank ('') resids - a list of resids to calculate H-SASA values. seq - seqeunce of the DNA strand, string or biopython Seq object. Hcontrib - coefficients for individual SASA of deoxyribose hydrogens for summing them up into H-SASA profile, order [H1' H2' H2'' H3' H4' H5' H5''] Note: chain, resids, seq, Hcontrib - can be also a list of two or more instances, to make calculation for several chains, spans of resids or combinations of Hcontrib at once. In this case number of elements in chain, resids, Hcontrib should be the same, and the algorithm will iterate through all list simultaneously (i.e. no combination will be tried). Chains should be of the same length. probe_radius - size of probe to roll. slicen - number of slices per atom, controls precision of the calculation. vdw_set - seleting the set of VdW radii: None - default for FREESASA used charmm36-rmin - rmin from charmm36 forcefield abmer10-rmin - rmin from AMBER10 forcefield Return -------- CSV file csvfileout with columns of H-SASA profiles along the sequence. """ chain=[chain] if isinstance(chain,basestring) else list(chain) if len(chain)>1: assert len(chain)==len(resids) assert len(chain)==len(seq) assert len(chain)==len(Hcontrib) else: resids=[resids] seq=[seq] Hcontrib=[Hcontrib] if not verbose: freesasa.setVerbosity(freesasa.nowarnings) hatoms=['H1\'','H2\'','H2\'','H3\'','H4\'','H5\'','H5\'\''] if vdw_set=='charmm36-rmin': #Open config from package in a tricky way, independent of package installation mode temp2 = tempfile.NamedTemporaryFile(delete=False) conffile = pkgutil.get_data('hydroid', 'pkgdata/charmm36_rmin.config') temp2.write(conffile) temp2.seek(0) temp2.close() classifier = freesasa.Classifier(temp2.name) os.remove(temp2.name) #### structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True}) elif vdw_set=='amber10-rmin': #Open config from package in a tricky way, independent of package installation mode temp2 = tempfile.NamedTemporaryFile(delete=False) conffile = pkgutil.get_data('hydroid', 'pkgdata/amber10_rmin.config') temp2.write(conffile) temp2.seek(0) temp2.close() classifier = freesasa.Classifier(temp2.name) os.remove(temp2.name) #### structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True}) else: structure = freesasa.Structure(pdb_file, options={'hydrogen' : True,'hetatm' : True}) print "Launching FreeSASA calculation..." result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.LeeRichards,'n-slices' : slicen,'probe-radius':probe_radius,'n-threads':n_threads})) # result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.ShrakeRupley,'n-slices' : slicen,'n-threads':n_threads})) print "Calculation done" print "Extracting SASA values ..." res=dict() for ch,rids,Hcont,i in zip(chain,resids,Hcontrib,range(len(chain))): res[i]=pd.Series() if (np.array(Hcont)==1.0).all(): #simplified procedure, we can do it faster: we need to calculate all H-SASA at once sels=[] for resid in rids: if len(ch)>0: sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, '+'.join(hatoms))) else: sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, '+'.join(hatoms))) selections = freesasa.selectArea(sels,structure, result) res[i]=res[i].add(pd.Series(selections)*1.0,fill_value=0) else: #regular procedure for hat,hcont in zip(hatoms,Hcont): sels=[] if hcont!=0: for resid in rids: if len(ch)>0: sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, hat)) else: sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, hat)) selections = freesasa.selectArea(sels,structure, result) res[i]=res[i].add(pd.Series(selections)*float(hcont),fill_value=0) for i in range(len(chain)): res[i].index=res[i].index.map(int) res[i]=res[i].sort_index() if len(chain)==1: df=pd.DataFrame({'resid':res[0].index,'Site':['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[0])),seq[0])],'H-SASA':res[0].values}) else: df=pd.DataFrame() for ch,i in zip(chain,range(len(chain))): # print res[i] # print seq[i] ndf=pd.DataFrame({'resid_%d'%i:res[i].index,'Site_%d'%i:['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[i])),seq[i])],'H-SASA_%d'%i:res[i].values}) df=pd.concat([df,ndf],axis=1) print "Outputting H-SASA profile to %s"%csvfileout df.to_csv(csvfileout)
def run_freesasa_custom(self, npoints, verbose=False): c = fs.Classifier(self.database_classifier) #print(classifier_path) structure = fs.Structure( self.pdb_path, c, ({ 'hetatm': False, # False: skip HETATM # True: include HETATM 'hydrogen': True, # False: ignore hydrogens # True: include hydrogens 'join-models': False, # False: Only use the first MODEL # True: Include all MODELs 'skip-unknown': False, # False: Guess radius for unknown atoms # based on element # True: Skip unknown atoms 'halt-at-unknown': False # False: set radius for unknown atoms, # that can not be guessed to 0. # True: Throw exception on unknown atoms. })) #result =fs.calc(structure,fs.Parameters({'algorithm' : fs.ShrakeRupley, # 'probe-radius' : 1.4, # 'n-points' : 1000})) result = fs.calc( structure, fs.Parameters({ 'algorithm': fs.LeeRichards, 'probe-radius': 1.4, 'n-slices': npoints })) area_prot = result.totalArea() #energy_prot=result.totalArea()*g+b structureArray = fs.structureArray(self.pdb_path, { 'separate-chains': True, 'hydrogen': True, 'separate-models': False }, c) #if verbose: print(structureArray) #en_list=[] area_list = [] for model in structureArray: #print(dir(model)) #result = fs.calc(model,fs.Parameters({'algorithm' : fs.ShrakeRupley, # 'probe-radius' : 1.4, # 'n-points' : 1000})) result = fs.calc( model, fs.Parameters({ 'algorithm': fs.LeeRichards, 'probe-radius': 1.4, 'n-slices': npoints })) #energy=result.totalArea()*g+b area = result.totalArea() #print(model.chainLabel(1) ,area,'En:',energy) area_list.append(area) #area_monA, area_monB = area_list areas = [area_prot, area_list[0], area_list[1]] return areas