def sa_calc(polymer_pdb, radius): # pdb files are needed for calculation surface area mol_file = Chem.MolFromMolFile(polymer_pdb) # hydrogens are removed in the mol file pdb_file = Chem.AddHs(mol_file, addCoords = True) # convert mol file to pdb file in rdkit Chem.MolToPDBFile(pdb_file, out_dir+NAME+'_new.pdb') # hydrogens are removed in the default option option_with_Hs = { 'hetatm' : True, 'hydrogen' : True, 'join-models' : False, 'skip-unknown' : False, 'halt-at-unknown' : False } # calculate solvent accessible surface area(probe radius = 1.4 Å or 3.6 Å) para = freesasa.Parameters() freesasa.Parameters.setProbeRadius(para, radius) # calculate sa for different type of polymers free_struct = freesasa.Structure(out_dir+NAME+'_new.pdb', options = option_with_Hs) free_calc = freesasa.calc(free_struct, para) total = free_calc.totalArea() # round to 4 decimals decimal = round(total, 4) print (f'Total SASA is {decimal} Å^2 when probe radius is {radius} Å.') atom_number = mol_file.GetNumAtoms() normalized_sa = round(decimal / atom_number, 4) # save data to a txt file with open (out_dir + 'Average surface area.txt', 'a+') as Asa: Asa.write(f'The normalized surface area of {NAME} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n' ) print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n')
def freesasa_cb(prody_parsed, probe_radius=1.4): cb_sele = prody_parsed.select( 'protein and (backbone or name CB) and not element H D') coords = list(x for y in cb_sele.getCoords() for x in y) radii = list(freesasa.Classifier().radius(x, y) for x, y in zip(cb_sele.getResnames(), \ cb_sele.getNames())) return freesasa.calcCoord( coords, radii, freesasa.Parameters({'probe-radius': probe_radius}))
def test_freesasa_lookup(pdb_seq_object): # Just test a few of the options for metric in ["total", "polar", "relativeTotal", "relativeMainChain"]: fs = FreeSASALookup(metric=metric, pdb_directory=TEST_DATA_DIR, sifts_directory=TEST_DATA_DIR, download_sifts=False) res = fs(pdb_seq_object) # Save reference results if they have been deliberately changed # np.save(os.path.join(FILE_DIR, 'reference_FreeSASA_{}_results'.format(metric)), res) expected = np.load( os.path.join(FILE_DIR, 'reference_FreeSASA_{}_results.npy'.format(metric))) np.testing.assert_almost_equal(expected, res) # Test with some changed Parameters import freesasa fs = FreeSASALookup(metric='total', pdb_directory=TEST_DATA_DIR, sifts_directory=TEST_DATA_DIR, download_sifts=False, freesasa_parameters=freesasa.Parameters({ 'algorithm': freesasa.LeeRichards, 'n-slices': 100 })) res = fs(pdb_seq_object) # Save reference results if they have been deliberately changed # np.save(os.path.join(FILE_DIR, 'reference_FreeSASA_custom_params_results'), res) expected = np.load( os.path.join(FILE_DIR, 'reference_FreeSASA_custom_params_results.npy')) np.testing.assert_almost_equal(expected, res)
def getAtomSASA(structure, classifier=None, probe_radius=1.4, mi=0, **kwargs): if(classifier is None): # initialize new classifier classifier = Radius(**kwargs) freesasa_structure = getFreeSASAStructureFromModel(structure, classifier=classifier) SASA = freesasa.calc(freesasa_structure, freesasa.Parameters({"probe-radius": probe_radius})) # get atom SASA N = structure.nAtoms() for i in range(N): sasa = SASA.atomArea(i) resi = freesasa_structure.residueNumber(i).strip() cid = freesasa_structure.chainLabel(i).strip() if(resi[-1].isdigit()): ins = " " else: ins = resi[-1] resi = resi[:-1] aname = structure.atomName(i).strip() structure[mi][cid][(' ', int(resi), ins)][aname].xtra["sasa"] = sasa
def sa_conformers(file_1, func_1, file_2, func_2, units, radius): # turn off cache stk.OPTIONS['cache'] = False # number of conformers N = 10 """ functional groups: ['diol'] and ['dibromine']/['difluorene'] or ['bromine'] and ['bromine']/['iodine'] """ name_1 = file_1.replace('.mol', '') unit_1 = stk.StructUnit2(file_1, func_1) name_2 = file_2.replace('.mol', '') unit_2 = stk.StructUnit2(file_2, func_2) # make polymer NAME = name_1+'_'+name_2+'_AB_poly' print(f'Creating polymer: {NAME}') polymer = stk.Polymer([unit_1, unit_2], stk.Linear('AB', [0, 0], n=units, ends='h')) # write unoptimized structure polymer.write(NAME+'.mol') mol_polymer = rdkit.MolFromMolFile(NAME + '.mol') #print(f'{NAME} has {polymer.mol.get_no_atoms()} atoms!') print(f'Optimizing polymer {NAME} and saving {N} conformers') # clean molecule with ETKDG embedder = stk.UFF(use_cache=False) embedder.optimize(polymer, conformer=-1) # write optimized polymer to json polymer.dump(NAME+'_opt.json') polymer.write(NAME+'_opt.mol') # make N conformers of the polymer molecule etkdg = rdkit.ETKDGv2() etkdg.randomSeed = 1000 etkdg.verbose = True etkdg.maxIterations = 200000 cids = rdkit.EmbedMultipleConfs( mol=polymer.mol, numConfs=N, params=etkdg ) print(f'Made {len(cids)} conformers...') print(f'Warning! I have not implemented an optimization of the ETKDG cleaned polymers!') # iterate over conformers and save structure file_dir = '/home/fanyuzhao/Monomers/OH+F/dimer/conformers/' new_dir = file_dir+NAME+'_'+str(units)+'_'+str(radius)+'/' for cid in cids: # build directories if not os.path.exists(new_dir): os.makedirs(new_dir) # write optimized polymer to mol polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.mol', conformer=cid) # write optimized polymer to pdb polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.pdb', conformer=cid) print(f'Done! {N} ETKDG conformers of polymer written to {NAME}_{N}_opt.mol/pdb') # pdb file from stk can not be read in freesasa # save the new pdb file in rdkit from mol files for item in os.listdir(new_dir): if item.endswith('.mol'): file_pdb = item.replace('.mol', '') a = rdkit.MolFromMolFile(os.path.join(new_dir, item)) # hydrogens are removed when converting the file in rdkit b = rdkit.AddHs(a, addCoords = True) rdkit.MolToPDBFile(b, new_dir + file_pdb + '_new.pdb') # calculate solvent accessible surface area(probe radius = 1.4Å and 3.6Å) # hydrogens are removed in the default option # hetatm are ignored in the default option options_with_Hs = { 'hetatm' : True, 'hydrogen' : True, 'join-models' : False, 'skip-unknown' : False, 'halt-at-unknown' : False } sa_list = [] pdb_list = [] # loop all new pdb files for pdb in os.listdir(new_dir): if pdb.endswith("_new.pdb"): # use freesasa to calculate SASA para = freesasa.Parameters() freesasa.Parameters.setProbeRadius(para, radius) free_struct = freesasa.Structure(os.path.join(new_dir, pdb), options = options_with_Hs) free_calc = freesasa.calc(free_struct, para) total = free_calc.totalArea() # keep 3 decimals decimal = round(total, 4) sa_list.append(decimal) name_pdb = pdb.replace('.pdb', '') pdb_list.append(name_pdb) # calculate average SASA(probe radius = 1.4Å) sa_average = round(sum(sa_list) / len(sa_list), 4) atom_number = mol_polymer.GetNumAtoms() normalized_sa = round(sa_average / atom_number, 4) with open (file_dir + 'Average surface area of conformers.txt', 'a+') as Asa: Asa.write(f'The normalized surface area of {NAME}_{units} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + f'Å and chain length of {units}.\n') print ('The avarage surface area of the conformers is ' + str(sa_average) + ' Å^2 with the probe size of ' + str(radius) + 'Å.') # save data to a csv table # save pdb file and surface area to a directory dic = {p: s for p, s in zip(pdb_list, sa_list)} download_dict = new_dir + 'Solvent accessible surface area of ' + NAME +'.csv' csv = open(download_dict, 'w') columnTitleRow = "Polymer_name, SASA\n" csv.write(columnTitleRow) for key in dic.keys(): Polymer_name = key SASA = dic[key] row = Polymer_name + "," + str(SASA) + "\n" csv.write(row) print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.')
def openfile(): global prob, probab, te global my_seq global anti global structure, structure_id, filename global antigenicity, hydro, flex, sec global m, a, c, b, length, j, k global hydroph, flexi, access anti = [] sec = [] probab = [] from tkinter import filedialog root = Tk() root.filename = filedialog.askopenfilename( initialdir="/", title="Select file", filetypes=(("pdb files", "*.pdb"), ("pdb files", "*.pdb"))) filename = root.filename print(filename) structure_id = "1e6j" structure = PDBParser().get_structure(structure_id, root.filename) ppb = PPBuilder() for pp in ppb.build_peptides(structure): my_seq = pp.get_sequence() # type: Seq print(my_seq) for model in structure: for chain in model: print(chain) sequence = list(my_seq) m = ''.join(sequence) print(m) length = len(m) # type: int print("Sequence consist of", length, "Amino Acids") from Bio.SeqUtils.ProtParam import ProteinAnalysis analysed_seq = ProteinAnalysis(m) print("Molecular weight = ", analysed_seq.molecular_weight()) print("Amino Acid Count = ", analysed_seq.count_amino_acids()) print("Secondary structure fraction =", analysed_seq.secondary_structure_fraction()) kd = { 'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5, 'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5, 'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6, 'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2 } c = list(analysed_seq.flexibility()) b = list(analysed_seq.protein_scale(kd, 10, 1.0)) hydro = list(analysed_seq.protein_scale(kd, 10, 1.0)) flex = list(analysed_seq.flexibility()) hydroph = list(analysed_seq.protein_scale(kd, 10, 1.0)) flexi = list(analysed_seq.flexibility()) i = 1 j = -1 # type: int k = 9 while i <= (length - 10): print("Sequence is = ", m[j + 1:k + 1]) print("Flexibility value = ", c[j + 1]) print("Hydrophilicity value = ", b[j + 1]) ana_seq = ''.join(m[j + 1:k + 1]) analyze_seq = ProteinAnalysis(ana_seq) # For Secondary structure Analysis print("Secondary structure fraction =", analyze_seq.secondary_structure_fraction()) a = list(analyze_seq.secondary_structure_fraction()) a = a[0] sec.append(a) i += 1 j += 1 k += 1 f = length r = 1 y = 10 global acc, logacc acc = [] for i in range(0, f): str1 = "accessibility, resi " str2 = str(r) + "-" + str(y) saving = str1 + str2 print(saving) r = r + 1 y = y + 1 structure = freesasa.Structure("1e6j.pdb") resulta = freesasa.calc(structure) area_classes = freesasa.classifyResults(resulta, structure) print("Total : %.2f A2" % resulta.totalArea()) for key in area_classes: print(key, ": %.2f A2" % area_classes[key]) resulta = freesasa.calc( structure, freesasa.Parameters({ 'algorithm': freesasa.LeeRichards, 'n-slices': 10 })) selections = freesasa.selectArea(('alanine, resn ala', saving), structure, resulta) for key in selections: print(key, ": %.2f A2" % selections[key]) a = selections[key] acc.append(a) l = acc[0::2] access = l print(acc) print(l) logacc = [math.log(y, 10) for y in l] print(logacc)
def calculate_sasa(pdbfile, chain, multichain=True, relative_type='sidechain'): """ :param pdbfile: String of PDB file name. :param chain: String or List of chain identifiers. :param multichain: Boolean. True to separate chains. This allows SASA calculation for a single unattached monomer. False if you want to calculate SASA for the structure 'as-is'. :return: Pandas Dataframe of residue number, types, and sasa values as columns. """ import freesasa as fs dict_max_acc = { # Miller max acc: Miller et al. 1987 https://doi.org/10.1016/0022-2836(87)90038-6 # Wilke: Tien et al. 2013 https://doi.org/10.1371/journal.pone.0080635 # Sander: Sander & Rost 1994 https://doi.org/10.1002/prot.340200303 "Miller": { "ALA": 113.0, "ARG": 241.0, "ASN": 158.0, "ASP": 151.0, "CYS": 140.0, "GLN": 189.0, "GLU": 183.0, "GLY": 85.0, "HIS": 194.0, "ILE": 182.0, "LEU": 180.0, "LYS": 211.0, "MET": 204.0, "PHE": 218.0, "PRO": 143.0, "SER": 122.0, "THR": 146.0, "TRP": 259.0, "TYR": 229.0, "VAL": 160.0, }, "Wilke": { "ALA": 129.0, "ARG": 274.0, "ASN": 195.0, "ASP": 193.0, "CYS": 167.0, "GLN": 225.0, "GLU": 223.0, "GLY": 104.0, "HIS": 224.0, "ILE": 197.0, "LEU": 201.0, "LYS": 236.0, "MET": 224.0, "PHE": 240.0, "PRO": 159.0, "SER": 155.0, "THR": 172.0, "TRP": 285.0, "TYR": 263.0, "VAL": 174.0, "MSE": 224.0, "SEC": 167.0, }, "Sander": { "ALA": 106.0, "ARG": 248.0, "ASN": 157.0, "ASP": 163.0, "CYS": 135.0, "GLN": 198.0, "GLU": 194.0, "GLY": 84.0, "HIS": 184.0, "ILE": 169.0, "LEU": 164.0, "LYS": 205.0, "MET": 188.0, "PHE": 197.0, "PRO": 136.0, "SER": 130.0, "THR": 142.0, "TRP": 227.0, "TYR": 222.0, "VAL": 142.0, }, } theoreticalMaxASA = dict_max_acc["Wilke"] # Calculates SASA for unseparated chains. if not multichain: structure = fs.Structure(pdbfile) else: # Separate chains if multichain structure. This allows SASA calculation for a single unattached monomer. structures = fs.structureArray(pdbfile, options={"separate-chains": True}) chains = [] for c in range(len(structures)): chains.append(structures[c].chainLabel(1)) structure = structures[chains.index(chain)] print("using {} separating chains {}".format(chains.index(chain), chains)) print("Number of atoms of {}: {}".format(pdbfile, structure.nAtoms())) result = fs.calc(structure, fs.Parameters({'algorithm': fs.ShrakeRupley, 'n-points': 10000})) res = result.residueAreas() residue = [] resnum = [] total = [] apolar = [] mainchain = [] sidechain = [] ratio = [] for idx, v in res[chain].items(): residue.append(v.residueType) resnum.append(v.residueNumber) total.append(v.total) apolar.append(v.apolar) mainchain.append(v.mainChain) sidechain.append(v.sideChain) if v.residueType == 'GLY': ratio.append(100 * v.mainChain / theoreticalMaxASA[v.residueType]) elif v.residueType not in theoreticalMaxASA.keys(): possibleSASA = [] for i, maxSASA in enumerate(theoreticalMaxASA.values()): # If the residue is unknown but has a SASA, # calculate the rSASA dividing by theoretical maxSASA and then use the average of that value possibleSASA.append(100 * v.sideChain / maxSASA) ratio.append(np.average(possibleSASA)) else: if relative_type == 'sidechain': ratio.append(100 * v.sideChain / theoreticalMaxASA[v.residueType]) else: ratio.append(100 * v.total / theoreticalMaxASA[v.residueType]) # if v.hasRelativeAreas: # ratio.append(v.relativeSideChain) # else: # ratio.append(np.nan) df_sasa = pd.DataFrame({'Residue': residue, 'Residue_num': resnum, 'Chain': chain, 'Total': total, 'Apolar': apolar, 'Backbone': mainchain, 'Sidechain': sidechain, 'Ratio': ratio}) area_class = fs.classifyResults(result, structure) print("Total : %.2f A2" % result.totalArea()) for key in area_class: print(key, ": %.2f A2" % area_class[key]) return df_sasa
import freesasa savedData = open('SASA.txt', 'w+') structure = freesasa.Structure("3lau.pdb") result = freesasa.calc( structure, freesasa.Parameters({ 'algorithm': freesasa.LeeRichards, 'n-slices': 100 })) print(result.nAtoms()) for i in range(1, result.nAtoms() + 1): details = '(' + structure.atomName(i) + ',' + str( result.atomArea(i)) + ' )' print(details) savedData.writelines(details + '\n') area_classes = freesasa.classifyResults(result, structure) print(area_classes) print("Total : %.2f A2" % result.totalArea()) for key in area_classes: print(key, ": %.2f A2" % area_classes[key])
def get_DNA_H_SASA(pdb_file,csvfileout,chain=None,resids=[],seq=None,probe_radius=1.4,slicen=100,vdw_set=None,Hcontrib=[1.0]*7,n_threads=1,verbose=False): """ Function is a warapper to the FREESASA library to calculate the Surface Accessible Surface Area out atoms in pdb_file, then expreacts the SASA deoxiribose hydrogen atoms and sums it up for every nucleotide with coefficients Hcontrib. chain - name of the DNA chain of interest in pdb_file, if chain has no name leave blank ('') resids - a list of resids to calculate H-SASA values. seq - seqeunce of the DNA strand, string or biopython Seq object. Hcontrib - coefficients for individual SASA of deoxyribose hydrogens for summing them up into H-SASA profile, order [H1' H2' H2'' H3' H4' H5' H5''] Note: chain, resids, seq, Hcontrib - can be also a list of two or more instances, to make calculation for several chains, spans of resids or combinations of Hcontrib at once. In this case number of elements in chain, resids, Hcontrib should be the same, and the algorithm will iterate through all list simultaneously (i.e. no combination will be tried). Chains should be of the same length. probe_radius - size of probe to roll. slicen - number of slices per atom, controls precision of the calculation. vdw_set - seleting the set of VdW radii: None - default for FREESASA used charmm36-rmin - rmin from charmm36 forcefield abmer10-rmin - rmin from AMBER10 forcefield Return -------- CSV file csvfileout with columns of H-SASA profiles along the sequence. """ chain=[chain] if isinstance(chain,basestring) else list(chain) if len(chain)>1: assert len(chain)==len(resids) assert len(chain)==len(seq) assert len(chain)==len(Hcontrib) else: resids=[resids] seq=[seq] Hcontrib=[Hcontrib] if not verbose: freesasa.setVerbosity(freesasa.nowarnings) hatoms=['H1\'','H2\'','H2\'','H3\'','H4\'','H5\'','H5\'\''] if vdw_set=='charmm36-rmin': #Open config from package in a tricky way, independent of package installation mode temp2 = tempfile.NamedTemporaryFile(delete=False) conffile = pkgutil.get_data('hydroid', 'pkgdata/charmm36_rmin.config') temp2.write(conffile) temp2.seek(0) temp2.close() classifier = freesasa.Classifier(temp2.name) os.remove(temp2.name) #### structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True}) elif vdw_set=='amber10-rmin': #Open config from package in a tricky way, independent of package installation mode temp2 = tempfile.NamedTemporaryFile(delete=False) conffile = pkgutil.get_data('hydroid', 'pkgdata/amber10_rmin.config') temp2.write(conffile) temp2.seek(0) temp2.close() classifier = freesasa.Classifier(temp2.name) os.remove(temp2.name) #### structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True}) else: structure = freesasa.Structure(pdb_file, options={'hydrogen' : True,'hetatm' : True}) print "Launching FreeSASA calculation..." result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.LeeRichards,'n-slices' : slicen,'probe-radius':probe_radius,'n-threads':n_threads})) # result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.ShrakeRupley,'n-slices' : slicen,'n-threads':n_threads})) print "Calculation done" print "Extracting SASA values ..." res=dict() for ch,rids,Hcont,i in zip(chain,resids,Hcontrib,range(len(chain))): res[i]=pd.Series() if (np.array(Hcont)==1.0).all(): #simplified procedure, we can do it faster: we need to calculate all H-SASA at once sels=[] for resid in rids: if len(ch)>0: sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, '+'.join(hatoms))) else: sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, '+'.join(hatoms))) selections = freesasa.selectArea(sels,structure, result) res[i]=res[i].add(pd.Series(selections)*1.0,fill_value=0) else: #regular procedure for hat,hcont in zip(hatoms,Hcont): sels=[] if hcont!=0: for resid in rids: if len(ch)>0: sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, hat)) else: sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, hat)) selections = freesasa.selectArea(sels,structure, result) res[i]=res[i].add(pd.Series(selections)*float(hcont),fill_value=0) for i in range(len(chain)): res[i].index=res[i].index.map(int) res[i]=res[i].sort_index() if len(chain)==1: df=pd.DataFrame({'resid':res[0].index,'Site':['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[0])),seq[0])],'H-SASA':res[0].values}) else: df=pd.DataFrame() for ch,i in zip(chain,range(len(chain))): # print res[i] # print seq[i] ndf=pd.DataFrame({'resid_%d'%i:res[i].index,'Site_%d'%i:['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[i])),seq[i])],'H-SASA_%d'%i:res[i].values}) df=pd.concat([df,ndf],axis=1) print "Outputting H-SASA profile to %s"%csvfileout df.to_csv(csvfileout)
def _get_free_sasa(t, parameters=None, classifier=None, options=None): '''Get factions of alpha, beta and coil within a chain ''' key = t[0] structure = t[1] if structure.num_chains != 1: raise Exception( "This method can only be applied to single polyer chain.") dsspQ8, dsspQ3 = '', '' groupIndex = 0 atomIndex = 0 freesasaStructure = freesasa.Structure() if (classifier is None): classifier = freesasa.Classifier() optbitfield = freesasa.Structure._get_structure_options( options or freesasa.Structure.defaultOptions) for i in range(0, structure.num_models): print("model: " + str(i + 1)) for j in range(0, structure.chains_per_model[i]): chainName = structure.chain_name_list[chainIndex] chainId = structure.chain_id_list[chainIndex] groups = structure.groups_per_chain[chainIndex] entityType = structure.entity_list[ chainToEntityIndex[chainIndex]]["type"] #if not entityType == "polymer": continue prev_coords = None coords = None for k in range(0, structure.groups_per_chain[chainIndex]): groupId = structure.group_id_list[groupIndex] insertionCode = structure.ins_code_list[groupIndex] secStruct = structure.sec_struct_list[groupIndex] seqIndex = structure.sequence_index_list[groupIndex] groupType = structure.group_type_list[groupIndex] groupName = structure.group_list[groupType]["groupName"] for i, name in enumerate( structure.group_list[groupType]["atomNameList"]): if (classifier.classify(groupName, name) is 'Unknown'): if (optbitfield & freesasa.FREESASA_SKIP_UNKNOWN): continue if (optbitfield & freesasa.FREESASA_HALT_AT_UNKNOWN): raise Exception("Halting at unknown atom") freesasaStructure.addAtom( name, groupName, seqIndex, chainName, structure.x_coord_list[atomIndex + i], structure.y_coord_list[atomIndex + i], structure.z_coord_list[atomIndex + i]) atomIndex += len( structure.group_list[groupType]["atomNameList"]) groupIndex += 1 freesasaStructure.setRadiiWithClassifier(classifier) freesasaResult = freesasa.calc(freesasaStructure, parameters or freesasa.Parameters()) sasa_classes = classifyResults(freesasaResult, freesasaStructure, classifier) return Row([key, sasa_classes.totalArea])
def run_freesasa_custom(self, npoints, verbose=False): c = fs.Classifier(self.database_classifier) #print(classifier_path) structure = fs.Structure( self.pdb_path, c, ({ 'hetatm': False, # False: skip HETATM # True: include HETATM 'hydrogen': True, # False: ignore hydrogens # True: include hydrogens 'join-models': False, # False: Only use the first MODEL # True: Include all MODELs 'skip-unknown': False, # False: Guess radius for unknown atoms # based on element # True: Skip unknown atoms 'halt-at-unknown': False # False: set radius for unknown atoms, # that can not be guessed to 0. # True: Throw exception on unknown atoms. })) #result =fs.calc(structure,fs.Parameters({'algorithm' : fs.ShrakeRupley, # 'probe-radius' : 1.4, # 'n-points' : 1000})) result = fs.calc( structure, fs.Parameters({ 'algorithm': fs.LeeRichards, 'probe-radius': 1.4, 'n-slices': npoints })) area_prot = result.totalArea() #energy_prot=result.totalArea()*g+b structureArray = fs.structureArray(self.pdb_path, { 'separate-chains': True, 'hydrogen': True, 'separate-models': False }, c) #if verbose: print(structureArray) #en_list=[] area_list = [] for model in structureArray: #print(dir(model)) #result = fs.calc(model,fs.Parameters({'algorithm' : fs.ShrakeRupley, # 'probe-radius' : 1.4, # 'n-points' : 1000})) result = fs.calc( model, fs.Parameters({ 'algorithm': fs.LeeRichards, 'probe-radius': 1.4, 'n-slices': npoints })) #energy=result.totalArea()*g+b area = result.totalArea() #print(model.chainLabel(1) ,area,'En:',energy) area_list.append(area) #area_monA, area_monB = area_list areas = [area_prot, area_list[0], area_list[1]] return areas
def getAtomSASA(structure, classifier=None, probe_radius=1.4, mi=0, feature_name="sasa", binary=False, threshold=1.0, bonds=None, impute_hydrogens=False, include_hydrogens=False, **kwargs): if classifier is None: # initialize new classifier classifier = Radius(**kwargs) options = { 'hydrogen': include_hydrogens, 'hetatm': False, 'join-models': False, 'skip-unknown': False, 'halt-at-unknown': False } freesasa_structure = getFreeSASAStructureFromModel(structure, options, classifier=classifier) SASA = freesasa.calc(freesasa_structure, freesasa.Parameters({"probe-radius": probe_radius})) # get atom SASA N = freesasa_structure.nAtoms() for i in range(N): sasa = SASA.atomArea(i) resi = freesasa_structure.residueNumber(i).strip() cid = freesasa_structure.chainLabel(i).strip() if resi[-1].isdigit(): ins = " " else: ins = resi[-1] resi = resi[:-1] aname = freesasa_structure.atomName(i).strip() if binary: sasa = int(sasa > threshold) if structure.get_level() == "S": structure[mi][cid][(' ', int(resi), ins)][aname].xtra[feature_name] = sasa else: structure[cid][(' ', int(resi), ins)][aname].xtra[feature_name] = sasa # use parent atom as hydrogen sasa value if we include them if impute_hydrogens: if bonds is None: # use default bond data bonds = data.covalent_bond_data for residue in structure.get_residues(): resn = residue.get_resname().strip() for atom in residue: if atom.element != 'H': continue aname = atom.get_name().strip() parent_atom = bonds[resn][aname]['bonded_atoms'][0] if (parent_atom in residue) and (feature_name in residue[parent_atom].xtra): atom.xtra[feature_name] = residue[parent_atom].xtra[ feature_name] return feature_name