def writeESSAZscoresToPDB(self): 'Writes a pdb file with ESSA z-scores placed in the B-factor column.' writePDB('{}_{}_zs'.format(self._title, self._enm), self._heavy, beta=extendAtomicData(self._zscore, self._ca, self._heavy)[0])
def writeDeformProfile(stiffness, pdb, filename='dp_out', \ select='protein and name CA', \ pdb_selstr='protein', loadToVMD=False): """Calculate deformability (plasticity) profile of molecule based on mechanical stiffness matrix (see [EB08]_). :arg model: this is an 3-dimensional NMA instance from a :class:`.ANM calculations :type model: :class:`.ANM` :arg pdb: a coordinate set or an object with ``getCoords`` method :type pdb: :class:`numpy.ndarray` Note: selection can be done using ``select`` and ``pdb_selstr``. ``select`` defines ``model`` selection (used for building :class:`.ANM` model) and ``pdb_selstr`` will be used in VMD program for visualization. By default files are saved as *filename* and loaded to VMD program. To change it use ``loadToVMD=False``. Mean value of mechanical stiffness for molecule can be found in occupancy column in PDB file. """ _, pdb = sliceAtoms(pdb, pdb_selstr) _, coords = sliceAtoms(pdb, select) meanStiff = np.mean(stiffness, axis=0) out_mean = open(filename+'_mean.txt','w') # mean value of Kij for each residue for nr_i, i in enumerate(meanStiff): out_mean.write("{} {}\n".format(nr_i, i)) out_mean.close() from collections import Counter aa_counter = Counter(pdb.getResindices()) meanStiff_all = [] for i in range(coords.numAtoms()): meanStiff_all.extend(list(aa_counter.values())[i]*[round(meanStiff[i], 2)]) writePDB(filename, pdb, occupancy=meanStiff_all) LOGGER.info('PDB file with deformability profile has been saved.') LOGGER.info('Creating TCL file.') out_tcl = open(filename+'.tcl','w') out_tcl.write('display resetview \nmol addrep 0 \ndisplay resetview \n') out_tcl.write('mol new {./'+filename+'.pdb} type {pdb} first 0 last -1 step 1 waitfor 1 \n') out_tcl.write('animate style Loop \ndisplay projection Orthographic \n') out_tcl.write('display depthcue off \ndisplay rendermode GLSL \naxes location Off \n') out_tcl.write('color Display Background white \n') out_tcl.write('mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0 \n') out_tcl.write('mol modmaterial 0 0 Diffuse \nmol modcolor 0 0 Occupancy \n') out_tcl.write('menu colorscalebar on \n') out_tcl.close() if loadToVMD: from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD()+" -e "+str(filename)+".tcl")
def writePDB(self, filename=None, single=True, **kwargs): ''' Write conformers in PDB format to a file. :arg filename: The name of the file. If it is None (default), the title of the ClustENM will be used. :type filename: str :arg single: If it is True (default), then the conformers will be saved into a single PDB file with each conformer as a model. Otherwise, a directory will be created with the filename, and each conformer will be saved as a separate PDB fle. :type single: bool ''' if filename is None: filename = self.getTitle() if single: filename = writePDB(filename, self) LOGGER.info('PDB file saved as %s' % filename) else: direc = filename if isdir(direc): LOGGER.warn('%s is not empty; will be flooded' % direc) else: mkdir(direc) LOGGER.info('Saving files ...') for i, lab in enumerate(self.getLabels()): filename = '%s/%s'%(direc, lab) writePDB(filename, self, csets=i) LOGGER.info('PDB files saved in %s ...'%direc)
def writeDeformProfile(stiffness, pdb, filename='dp_out', \ select='protein and name CA', \ pdb_selstr='protein', loadToVMD=False): """Calculate deformability (plasticity) profile of molecule based on mechanical stiffness matrix (see [EB08]_). :arg stiffness: mechanical stiffness matrix :type stiffness: :class:`~numpy.ndarray :arg pdb: a coordinate set or an object with ``getCoords`` method :type pdb: :class:`~numpy.ndarray` Note: selection can be done using ``select`` and ``pdb_selstr``. ``select`` defines ``model`` selection (used for building :class:`.ANM` model) and ``pdb_selstr`` will be used in VMD program for visualization. By default files are saved as *filename* and loaded to VMD program. To change it use ``loadToVMD=False``. Mean value of mechanical stiffness for molecule can be found in occupancy column in PDB file. """ _, pdb = sliceAtoms(pdb, pdb_selstr) _, coords = sliceAtoms(pdb, select) meanStiff = np.mean(stiffness, axis=0) out_mean = open(filename+'_mean.txt','w') # mean value of Kij for each residue for nr_i, i in enumerate(meanStiff): out_mean.write("{} {}\n".format(nr_i, i)) out_mean.close() from collections import Counter aa_counter = Counter(pdb.getResindices()) meanStiff_all = [] for i in range(coords.numAtoms()): meanStiff_all.extend(list(aa_counter.values())[i]*[round(meanStiff[i], 2)]) writePDB(filename, pdb, occupancy=meanStiff_all) LOGGER.info('PDB file with deformability profile has been saved.') LOGGER.info('Creating TCL file.') out_tcl = open(filename+'.tcl','w') out_tcl.write('display resetview \nmol addrep 0 \ndisplay resetview \n') out_tcl.write('mol new {./'+filename+'.pdb} type {pdb} first 0 last -1 step 1 waitfor 1 \n') out_tcl.write('animate style Loop \ndisplay projection Orthographic \n') out_tcl.write('display depthcue off \ndisplay rendermode GLSL \naxes location Off \n') out_tcl.write('color Display Background white \n') out_tcl.write('mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0 \n') out_tcl.write('mol modmaterial 0 0 Diffuse \nmol modcolor 0 0 Occupancy \n') out_tcl.write('menu colorscalebar on \n') out_tcl.close() if loadToVMD: from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD()+" -e "+str(filename)+".tcl")
def runManyStepsAlternating(self, n_steps, **kwargs): LOGGER.timeit('_prody_runManySteps') n_start = self.numSteps while self.numSteps < n_start + n_steps: n_modes = self.n_modes self.runStep(structA=self.structA, structB=self.structB, reduceSelA=self.reduceSelA, reduceSelB=self.reduceSelB, alignSelA=self.alignSelA, alignSelB=self.alignSelB, n_modes=n_modes, **kwargs) LOGGER.debug( 'Total time so far is %.2f minutes' % ((time.time() - LOGGER._times['_prody_runManySteps']) / 60)) self.runStep(structA=self.structB, structB=self.structA, reduceSelA=self.reduceSelB, reduceSelB=self.reduceSelA, alignSelA=self.alignSelB, alignSelB=self.alignSelA, n_modes=n_modes, **kwargs) LOGGER.debug( 'Total time so far is %.2f minutes' % ((time.time() - LOGGER._times['_prody_runManySteps']) / 60)) converged = self.checkConvergence() if converged: self.structA.setCoords( self.coordsA ) # That way the original object is back to normal self.structB.setCoords( self.coordsB ) # That way the original object is back to normal LOGGER.debug( 'Process completed in %.2f hours' % ((time.time() - LOGGER._times['_prody_runManySteps']) / 3600)) break ensemble = Ensemble('combined trajectory') ensemble.setAtoms(self.structA) for coordset in self.ensembleA.getCoordsets(): ensemble.addCoordset(coordset) for coordset in reversed(self.ensembleB.getCoordsets()): ensemble.addCoordset(coordset) if self.outputPDB: writePDB(self.filename, ensemble) if self.outputDCD: writeDCD(self.filename, ensemble) return
def writeCombinedPDB(self,filename): """ Given membrane coordinates it will write a pdb file with membrane coordinates. :arg filename: filename for the pdb file. :type filename: str :arg membrane: membrane coordinates or the membrane structure. :type membrane: nd.array """ if self._combined is None: combineMembraneProtein(self,coords) try: writePDB(filename,self._combined) except TypeError: raise "Membrane not found. Use buildMembrane() function."
def writeCombinedPDB(self, filename): """ Given membrane coordinates it will write a pdb file with membrane coordinates. :arg filename: filename for the pdb file. :type filename: str :arg membrane: membrane coordinates or the membrane structure. :type membrane: nd.array """ if self._combined is None: combineMembraneProtein(self, coords) try: writePDB(filename, self._combined) except TypeError: raise "Membrane not found. Use buildMembrane() function."
def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False): """Align PDB files using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to determine the PDB structure and model number. First four characters of the label is expected to be the PDB identifier and ending numbers to be the model number. For example, the :class:`.Transformation` from conformation with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th model of structure **2k39**. After applicable transformations are made, structure will be written into *outputdir* as :file:`2k39_aligned.pdb`. If *gzip* is **True**, output files will be compressed. Return value is the output filename or list of filenames, in the order files are processed. Note that if multiple models from a file are aligned, that filename will appear in the list multiple times.""" if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if gzip: gzip = '.gz' else: gzip = '' output = [] pdbdict = {} for conf in ensemble: trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') label = conf.getLabel() pdb = label[:4] filename = pdbdict.get(pdb, fetchPDB(pdb)) if filename is None: LOGGER.warning('PDB file for conformation {0} is not found.' .format(label)) output.append(None) continue LOGGER.info('Parsing PDB file {0} for conformation {1}.' .format(pdb, label)) acsi = None model = label.rfind('m') if model > 3: model = label[model+1:] if model.isdigit(): acsi = int(model) - 1 LOGGER.info('Applying transformation to model {0}.' .format(model)) if isinstance(filename, str): ag = parsePDB(filename) else: ag = filename if acsi is not None: if acsi >= ag.numCoordsets(): LOGGER.warn('Model number {0} for {1} is out of range.' .format(model, pdb)) output.append(None) continue ag.setACSIndex(acsi) trans.apply(ag) outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip) if ag.numCoordsets() > 1: pdbdict[pdb] = ag else: writePDB(outfn, ag) output.append(os.path.normpath(outfn)) for pdb, ag in pdbdict.items(): # PY3K: OK writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag) if len(output) == 1: return output[0] else: return output
def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False): """Align PDB files using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to determine the PDB structure and model number. First four characters of the label is expected to be the PDB identifier and ending numbers to be the model number. For example, the :class:`.Transformation` from conformation with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th model of structure **2k39**. After applicable transformations are made, structure will be written into *outputdir* as :file:`2k39_aligned.pdb`. If ``gzip=True``, output files will be compressed. Return value is the output filename or list of filenames, in the order files are processed. Note that if multiple models from a file are aligned, that filename will appear in the list multiple times.""" if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if gzip: gzip = '.gz' else: gzip = '' output = [] pdbdict = {} for conf in ensemble: trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') label = conf.getLabel() pdb = label[:4] filename = pdbdict.get(pdb, fetchPDB(pdb)) if filename is None: LOGGER.warning( 'PDB file for conformation {0} is not found.'.format(label)) output.append(None) continue LOGGER.info('Parsing PDB file {0} for conformation {1}.'.format( pdb, label)) acsi = None model = label.rfind('m') if model > 3: model = label[model + 1:] if model.isdigit(): acsi = int(model) - 1 LOGGER.info('Applying transformation to model {0}.'.format(model)) if isinstance(filename, str): ag = parsePDB(filename) else: ag = filename if acsi is not None: if acsi >= ag.numCoordsets(): LOGGER.warn('Model number {0} for {1} is out of range.'.format( model, pdb)) output.append(None) continue ag.setACSIndex(acsi) trans.apply(ag) outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip) if ag.numCoordsets() > 1: pdbdict[pdb] = ag else: writePDB(outfn, ag) output.append(os.path.normpath(outfn)) for pdb, ag in pdbdict.items(): # PY3K: OK writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag) if len(output) == 1: return output[0] else: return output
def writeVMDstiffness(model, pdb, indices, k_range, filename='vmd_out', \ selstr='protein and name CA', loadToVMD=True): """Returns three *filename* files: (1) PDB file with coordinates. (2) TCL file containing vmd commands for loading PDB file with accurate vmd representation. Pair of residues with selected *k_range* of effective spring constant are shown in VMD respresentation with solid line between them. If more than one residue will be selected in *indices*, different pair for each residue will be colored in the different colors. (3) TXT file contains pair of residues with effective spring constant in selected range *k_range*. The effective spring constant calculation using ``buildSM`` method from :class:`.ANM`. .. note:: #. This function skips modes with zero eigenvalues. #. If a :class:`.Vector` instance is given, it will be normalized before it is written. It's length before normalization will be written as the scaling factor of the vector. :arg model: this is an 3-dimensional NMA instance from a :class:`.ANM calculations :type model: :class:`.ANM` :arg pdb: a coordinate set or an object with ``getCoords`` method :type pdb: :class:`numpy.ndarray`. :arg indices: amino acid number. :type indices: ``[int, int]`` or ``[int]`` for one amino acid :arg k_range: effective force constant value. :type k_range: int or float, ``[int, int]`` By default files are saved as *filename* and loaded to VMD program and *selstr* is a selection from :class:`.Select` """ try: coords_sel = pdb.select(selstr) resnum_list = coords_sel.getResnums() coords = (coords_sel._getCoords() if hasattr(coords_sel, '_getCoords') else coords_sel.getCoords()) except AttributeError: try: checkCoords(coords_sel) except TypeError: raise TypeError('pdb must be a Numpy array or an object ' 'with `getCoords` method') if not isinstance(model, NMA): raise TypeError('model must be an NMA instance') elif not model.is3d(): raise TypeError('model must be a 3-dimensional NMA instance') elif len(model) == 0: raise ValueError('model must have normal modes calculated') elif model.getStiffness() is None: raise ValueError('model must have stiffness matrix calculated') elif len(indices) == 0: raise ValueError('indices cannot be an empty array') if len(indices) == 1: indices0 = indices[0] - resnum_list[0] indices1 = indices[0] - resnum_list[0] elif len(indices) == 2: indices0 = indices[0] - resnum_list[0] indices1 = indices[1] - resnum_list[0] out = openFile(addext(filename, '.tcl'), 'w') out_txt = openFile(addext(filename, '.txt'), 'w') writePDB(filename + '.pdb', pdb) LOGGER.info('Creating VMD file.') out.write('display rendermode GLSL \n') out.write('display projection orthographic\n') out.write('color Display Background white\n') out.write('display shadows on\n') out.write('display depthcue off\n') out.write('axes location off\n') out.write('stage location off\n') out.write('light 0 on\n') out.write('light 1 on\n') out.write('light 2 off\n') out.write('light 3 on\n') out.write('mol addrep 0\n') out.write('display resetview\n') out.write('mol new {./' + str(filename) + '.pdb} type {pdb} first 0 last -1 step 1 waitfor 1\n') out.write('mol modselect 0 0 protein\n') out.write('mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0\n') out.write('mol modcolor 0 0 Structure\n') out.write('mol color Structure\n') out.write('mol representation NewCartoon 0.300000 10.000000 4.100000 0\n') out.write('mol selection protein\n') out.write('mol material Opaque\n') colors = ['blue', 'red', 'gray', 'orange','yellow', 'tan','silver', 'green', \ 'white', 'pink', 'cyan', 'purple', 'lime', 'mauve', 'ochre', 'iceblue', 'black', \ 'yellow2','yellow3','green2','green3','cyan2','cyan3','blue2','blue3','violet', \ 'violet2','magenta','magenta2','red2','red3','orange2','orange3']*50 color_nr = 1 # starting from red color in VMD ResCounter = [] for r in xrange(indices0, indices1 + 1): baza_col = [] # Value of Kij is here for each residue nr_baza_col = [] # Resid of aa are here out.write("draw color " + str(colors[color_nr]) + "\n") for nr_i, i in enumerate(model.getStiffness()[r]): if k_range[0] < float(i) < k_range[1]: baza_col.append(i) nr_baza_col.append(nr_i + resnum_list[0]) resid_r = str( coords_sel.getResnames()[r]) + str(r + resnum_list[0]) resid_r2 = str( coords_sel.getResnames()[nr_i]) + str(nr_i + resnum_list[0]) if len( baza_col ) == 0: # if base is empty then it will not change the color color_nr = 0 else: out.write("draw line "+'{'+str(coords[r])[1:-1]+'} {'+\ str(coords[nr_i])[1:-1]+'} width 3 style solid \n') out_txt.write( str(resid_r) + '\t' + resid_r2 + '\t' + str(i) + '\n') ResCounter.append(len(baza_col)) else: pass if len(baza_col) != 0: out.write('mol addrep 0\n') out.write('mol modselect '+str(color_nr+1)+' 0 protein and name CA and resid '+ \ str(r+resnum_list[0])+' '+str(nr_baza_col)[1:-1].replace(',','')+'\n') out.write('mol modcolor ' + str(color_nr + 1) + ' 0 ColorID ' + str(color_nr) + '\n') out.write('mol modstyle ' + str(color_nr + 1) + ' 0 VDW 0.600000 12.000000\n') out.write('mol color ColorID ' + str(color_nr) + '\n') out.write('mol representation VDW 1.000000 12.000000 \n') out.write('mol selection protein and name CA and resid '+ \ str(r+resnum_list[0])+' '+str(nr_baza_col)[1:-1].replace(',','')+'\n') out.write('mol material Opaque \n') color_nr = color_nr + 1 out.write('mol addrep 0\n') out.close() out_txt.close() if (loadToVMD == True): from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD() + " -e " + str(filename) + ".tcl") if len(ResCounter) > 0: return out elif len(ResCounter) == 0: LOGGER.info('There is no residue pair in this Kij range.') return 'None'
def calcChainsNormDistFluct(coords, ch1, ch2, cutoff=10., percent=5, rangeAng=5, \ filename='ch_ndf_out', loadToVMD=True): '''Calculate protein-protein interaction using getNormDistFluct() from :class:`.GNM` model. It is assigned to protein complex. :arg coords: a coordinate set or an object with ``getCoords`` method. :type coords: :class:`numpy.ndarray`. :arg ch1: first chain name :type ch1: 'A' or other letter as a string :arg ch2: second chain name :type ch2: string :arg cutoff: cutoff distance (Å) for pairwise interactions in Kirchhoff matrix, default is 10.0 Å :type cutoff: float :arg percent: percent of the highest and lowest results displayed in _VMD program, default is 5% :type percent: int or float :arg rangeAng: cutoff range of protein-protein interactions, default is 5 Å :type rangeAng: int or float :arg filename: name of tcl file from _VMD program :type filename: str By default files are saved as *filename* and loaded to VMD program. To change it use ``loadToVMD=False``. UNDER PREPARATION.. problems with not complete structures ''' sele1 = coords.select('name CA and same residue as exwithin '+str(rangeAng) \ +' of chain '+str(ch1)) sele2 = coords.select('name CA and same residue as exwithin '+str(rangeAng) \ +' of chain '+str(ch2)) num1 = len(list(set(sele1.getResnums()))) num2 = len(list(set(sele2.getResnums()))) LOGGER.info('Analized chains: {0}, {1}'.format(ch1, ch2)) LOGGER.info( 'Number of selected amino acids: chain {0}-{1}aa, chain {2}-{3}aa'. format(ch1, num2, ch2, num1)) seleALL = sele1 + sele2 seleALL_ca = seleALL.select('protein and name CA') from .gnm import GNM model = GNM('prot analysis') model.buildKirchhoff(seleALL_ca, cutoff) model.calcModes() ndf_matrix0 = model.getNormDistFluct(seleALL_ca) ndf_c1 = np.delete(ndf_matrix0, np.s_[0:num1], axis=0) # rows ndf_matrix = np.delete(ndf_c1, np.s_[num1:(num1 + num2)], axis=1) perc = (np.amax(ndf_matrix) - np.min(ndf_matrix)) * percent * 0.01 maxRange = np.amax(ndf_matrix) - perc minRange = np.min(ndf_matrix) + perc writePDB(filename, coords) out_tcl = open(filename + '.tcl', 'w') out_tcl.write('display resetview \nmol addrep 0 \ndisplay resetview \n') out_tcl.write('mol new {./' + filename + '.pdb} type {pdb} first 0 last -1 step 1 waitfor 1 \n') out_tcl.write('animate style Loop \ndisplay projection Orthographic \n') out_tcl.write( 'display depthcue off \ndisplay rendermode GLSL \naxes location Off \n' ) out_tcl.write('color Display Background white \n') out_tcl.write( 'mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0 \n') out_tcl.write('mol modselect 0 0 protein \nmol modcolor 0 0 Chain \n') out_tcl.write('mol modmaterial 0 0 BrushedMetal\n') out_tcl.write('m') mmRange = {'minRange': minRange, 'maxRange': maxRange} ch = [ch2, ch1] color = ['1', '7'] # red-maxRange, red-minRange out_pairs = open(filename + '_pairs.txt', 'w') for nr_j, j in enumerate(mmRange): if j == 'minRange': x, y = np.where(ndf_matrix < mmRange[j]) out_pairs.write(j + ' (< ' + str(mmRange[j]) + ') \n') elif j == 'maxRange': x, y = np.where(ndf_matrix > mmRange[j]) out_pairs.write(j + ' (> ' + str(mmRange[j]) + ') \n') vmd_ch_list = [[], []] for i in range(len(x)): out_pairs.write("{}{} {}{} {}\n".format(sele1.select('protein and name \ CA' ).getResnames()[y[i]], sele1.select('protein and name CA').getResnums()[y[i]], \ sele2.select('protein and name CA').getResnames()[x[i]], sele2.select('protein and \ name CA' ).getResnums()[x[i]], ndf_matrix[x[i],y[i]])) vmd_ch_list[0].append( sele1.select('protein and name CA').getResnums()[y[i]]) vmd_ch_list[1].append( sele2.select('protein and name CA').getResnums()[x[i]]) out_pairs.write('\n') for k in range(len(vmd_ch_list)): out_tcl.write('mol addrep 0\n') out_tcl.write('mol modselect '+color[nr_j]+' 0 protein and name chain '+ch[k]+\ ' and resid '+str(list(set(vmd_ch_list[k]))).replace(',','')[1:-1]+'\n') out_tcl.write('mol modcolor ' + color[nr_j] + ' 0 ColorID ' + color[nr_j] + '\n') out_tcl.write('mol modstyle ' + color[nr_j] + ' 0 CPK 1.000000 0.300000 12.000000 12.000000\n') out_tcl.write('mol color ColorID ' + color[nr_j] + '\n') out_tcl.write( 'mol representation CPK 1.000000 0.300000 12.000000 12.000000\n' ) out_tcl.write('mol selection protein and chain '+ch[k]+' and resid '\ +str(list(set(vmd_ch_list[k]))).replace(',','')[1:-1]+'\n') out_tcl.write('mol material Opaque \n') #out_tcl.write('mol addrep 0\n') LOGGER.info('Finded residues in {0}: {1}'.format(mmRange.keys()[nr_j],\ len(list(set(vmd_ch_list[1])))+len(list(set(vmd_ch_list[0]))))) LOGGER.info('chain {0} and resid {1}'.format(ch[0], \ str(list(set(vmd_ch_list[0]))).replace(',','')[1:-1])) LOGGER.info('chain {0} and resid {1}'.format(ch[1], \ str(list(set(vmd_ch_list[1]))).replace(',','')[1:-1])) out_tcl.write('mol addrep 0\n') LOGGER.info('Created TCL file.') out_tcl.close() out_pairs.close() if (loadToVMD == True): from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD() + " -e " + str(filename) + ".tcl") return ndf_matrix
def scanPockets(self): 'Generates ESSA z-scores for pockets and parses pocket features. It requires both Fpocket 3.0 and Pandas being installed in your system.' from re import findall fpocket = which('fpocket') if fpocket is None: LOGGER.warning( 'Fpocket (version >= 3.0) was not found, please install it.') return None try: from pandas import Index, DataFrame except ImportError as ie: LOGGER.warning(ie.__str__() + ' was found, please install it.') return None rcr = {(i, j): k if self._rib else self._ri[k] for i, j, k in zip(self._ca.getChids(), self._ca.getResnums(), self._ca.getResindices())} writePDB('{}_pro'.format(self._title), self._heavy) direc = '{}_pro_out'.format(self._title) if not isdir(direc): system('fpocket -f {}_pro.pdb'.format(self._title)) chdir(direc + '/pockets') l = [x for x in listdir('.') if x.endswith('.pdb')] l.sort(key=lambda x: int(x.partition('_')[0][6:])) ps = [] for x in l: with open(x, 'r') as f: tmp0 = f.read() tmp1 = [(x[1].strip(), float(x[2])) for x in findall( r'(\w+\s\w+\s*-\s*)(.+):\s*([\d.-]+)(\n)', tmp0)] fea, sco = list(zip(*tmp1)) ps.append(sco) pdbs = parsePDB(l) chdir('../..') # ----- # ----- # ps = array(ps) pcn = { int(pdb.getTitle().partition('_')[0][6:]): set(zip(pdb.getChids().tolist(), pdb.getResnums().tolist())) for pdb in pdbs } pi = {p: [rcr[x] for x in crn] for p, crn in pcn.items()} pzs_max = {k: max(self._zscore[v]) for k, v in pi.items()} pzs_med = {k: median(self._zscore[v]) for k, v in pi.items()} # ----- # ----- # indices = Index(range(1, ps.shape[0] + 1), name='Pocket #') columns = Index(fea, name='Feature') self._df = DataFrame(index=indices, columns=columns, data=ps) # ----- # ----- # columns_zs = Index(['ESSA_max', 'ESSA_med', 'LHD'], name='Z-score') zps = c_[list(pzs_max.values())] zps = hstack((zps, c_[list(pzs_med.values())])) zps = hstack( (zps, zscore(self._df[['Local hydrophobic density Score']]))) self._df_zs = DataFrame(index=indices, columns=columns_zs, data=zps)
def runStep(self, **kwargs): """Run a single step of adaptive ANM. Modes will be calculated for *structA* and the subset with a cumulative overlap above a threshold defined by *Fmin* is used for transitioning towards *structB*. By default this function uses values from initialisation but they can be over-ridden if desired. For example, in bi-directional adaptive ANM, we switch *structA* and *structB*, *alignSelA* and *alignSelB*, and *reduceSelA* and *reduceSelB* """ structA = kwargs.pop('structA', self.structA) structB = kwargs.pop('structA', self.structB) alignSel = kwargs.pop('alignSel', self.alignSel) alignSelA = kwargs.pop('alignSelA', self.alignSelA) alignSelB = kwargs.pop('alignSelB', self.alignSelB) reduceSel = kwargs.pop('reduceSel', self.reduceSel) reduceSelA = kwargs.pop('reduceSelA', self.reduceSelA) reduceSelB = kwargs.pop('reduceSelB', self.reduceSelB) if reduceSelA is None: reduceSelA = reduceSel if reduceSelB is None: reduceSelB = reduceSel if alignSelA is None: if alignSelA is None: alignSelA = reduceSelA if alignSelB is None: alignSelB = reduceSelB else: if alignSelA is None: alignSelA = alignSel if alignSelB is None: alignSelB = alignSel Fmin = kwargs.get('Fmin', self.Fmin) f = kwargs.get('f', self.f) outputDCD = kwargs.get('outputDCD', self.outputDCD) outputPDB = kwargs.get('outputPDB', self.outputPDB) filename = kwargs.get('filename', self.filename) LOGGER.info('\nStarting cycle {0} with initial structure {1}'.format( self.numSteps + 1, structA)) mapping_func = kwargs.get('mapping_func', mapOntoChains) if alignSelA is None: structA_sel = structA else: structA_sel = structA.select(alignSelA) if alignSelB is None: structB_sel = structB else: structB_sel = structB.select(alignSelB) mapping_func = kwargs.pop('mapping_func', self.mapping_func) seqid = kwargs.pop('seqid', self.seqid) coverage = kwargs.pop('overlap', self.coverage) coverage = kwargs.pop('coverage', coverage) pwalign = kwargs.pop('pwalign', self.pwalign) pwalign = kwargs.pop('mapping', pwalign) try: _, T = superpose(structA_sel, structB_sel) structA = applyTransformation(T, structA) except: structB_amap = sum( np.array( mapping_func(structB_sel, structA_sel, overlap=coverage, seqid=seqid, pwalign=pwalign))[:, 0]) _, T = superpose(structA_sel, structB_amap) structA = applyTransformation(T, structA) maxModes = kwargs.get('maxModes', self.maxModes) if not isinstance(maxModes, (int, float)): raise TypeError('maxModes should be an integer or float') if maxModes < 1: maxModes = int(maxModes * 3 * self.structA.numAtoms() - 6) if maxModes > 3 * self.structA.numAtoms() - 6: maxModes = 3 * self.structA.numAtoms() - 6 if self.n_modes > maxModes: self.n_modes = maxModes trim = kwargs.pop('trim', self.trim) anmA, _ = calcENM(structA, n_modes=self.n_modes) if trim == 'slice': trim_anmA, _ = sliceModel(anmA, structA, reduceSelA) elif trim == 'reduce': trim_anmA, _ = reduceModel(anmA, structA, reduceSelA) trim_anmA.calcModes(n_modes=self.n_modes) else: trim_anmA = anmA coordsA = structA.getCoords() coordsA_sel = structA_sel.getCoords() coordsB_sel = structB_sel.getCoords() defvec = coordsB_sel - coordsA_sel d = defvec.flatten() self.dList.append(d) if Fmin is None: if self.numSteps == 0 or self.resetFmin: Fmin = 0. # Select the first mode only else: Fmin = 1 - np.sqrt( np.linalg.norm(self.dList[self.numSteps]) / np.linalg.norm(self.dList[0])) if Fmin > self.Fmin_max: Fmin = self.Fmin_max LOGGER.info( 'Fmin is {:4.3f}, corresponding to a cumulative overlap of {:4.3f}' .format(Fmin, np.sqrt(Fmin))) trim_d = sliceAtomicData(d, structA_sel, reduceSelA) overlaps = np.dot(trim_d, trim_anmA.getEigvecs()) overlap_sorting_indices = list( reversed(list(np.argsort(abs(overlaps))))) overlaps = overlaps[overlap_sorting_indices] if trim == 'reduce': sliced_anmA, _ = sliceModel(anmA, structA, reduceSelA) modesetA = ModeSet(trim_anmA, overlap_sorting_indices) _, overlap_sorting_indices = matchModes(modesetA, sliced_anmA, index=True) modesetA = ModeSet(anmA, overlap_sorting_indices) normalised_overlaps = overlaps / np.linalg.norm(d) c_sq = np.cumsum(np.power(normalised_overlaps, 2), axis=0) modesCrossingFmin = np.where(c_sq <= Fmin)[0] numModes = len(modesCrossingFmin) if numModes == 0: numModes = 1 modesCrossingFmin = [0] self.numModesList.append(numModes) if numModes == 1: LOGGER.info('Using 1 mode with overlap {0} (Mode {1})'.format( '{:4.3f}'.format(np.sqrt(c_sq[0])), modesetA.getIndices()[0] + 1)) elif numModes < 11: LOGGER.info( 'Using {0} modes with cumulative overlap {1} (Modes {2} and {3})' .format( numModes, '{:4.3f}'.format(np.sqrt(c_sq[numModes - 1])), ', '.join([ str(entry) for entry in modesetA.getIndices()[:numModes - 1] + 1 ]), str(modesetA.getIndices()[numModes - 1] + 1))) else: LOGGER.info( 'Using {0} modes with cumulative overlap {1} (Modes {2}, ... and {3}) with max mode number {4} and min mode number {5}' .format( numModes, '{:4.3f}'.format(np.sqrt(c_sq[numModes - 1])), ', '.join([ str(entry) for entry in modesetA.getIndices()[:10] + 1 ]), str(modesetA.getIndices()[numModes - 1] + 1), np.max(modesetA.getIndices()[:numModes] + 1), np.min(modesetA.getIndices()[:numModes] + 1))) if np.max(modesetA.getIndices()[:numModes]) > self.n_modes - 5: self.n_modes *= 10 if self.n_modes > 3 * self.structA.numAtoms() - 6: self.n_modes = 3 * self.structA.numAtoms() - 6 v = np.sum(np.multiply(overlaps[:numModes], modesetA.getEigvecs()[:, :numModes]), axis=1).reshape(coordsA.shape) trim_v = sliceAtomicData(v.reshape(-1), structA, reduceSelA).reshape(-1, 3) s_min = sum(np.multiply(trim_v.flatten(), trim_d)) / sum( np.power(trim_v.flatten(), 2)) new_coordsA = coordsA + f * s_min * v if structA == self.structA: self.anmA = anmA self.anmListA.append(modesetA) self.structA.setCoords(new_coordsA) self.ensembleA.addCoordset(new_coordsA) self.whichModesA.append(modesetA[modesCrossingFmin]) elif structA == self.structB: self.anmB = anmA self.anmListB.append(modesetA) self.structB.setCoords(new_coordsA) self.ensembleB.addCoordset(new_coordsA) self.whichModesB.append(modesetA[modesCrossingFmin]) new_coordsA_reduceSel = structA.select(reduceSelA).getCoords() coordsB_reduceSel = structB.select(reduceSelB).getCoords() rmsd = calcRMSD(new_coordsA_reduceSel, coordsB_reduceSel) LOGGER.info('Current RMSD is {:4.3f}\n'.format(rmsd)) self.numSteps += 1 self.rmsds.append(rmsd) if outputPDB: writePDB(filename + '_A', self.ensembleA) LOGGER.clear() writePDB(filename + '_B', self.ensembleB) LOGGER.clear() if outputDCD: writeDCD(filename + '_A', self.ensembleA) LOGGER.clear() writeDCD(filename + '_B', self.ensembleB) LOGGER.clear() return
def writeVMDstiffness(model, pdb, indices, k_range, filename='vmd_out', \ selstr='protein and name CA', loadToVMD=True): """Returns three *filename* files: (1) PDB file with coordinates. (2) TCL file containing vmd commands for loading PDB file with accurate vmd representation. Pair of residues with selected *k_range* of effective spring constant are shown in VMD respresentation with solid line between them. If more than one residue will be selected in *indices*, different pair for each residue will be colored in the different colors. (3) TXT file contains pair of residues with effective spring constant in selected range *k_range*. The effective spring constant calculation using ``buildSM`` method from :class:`.ANM`. .. note:: #. This function skips modes with zero eigenvalues. #. If a :class:`.Vector` instance is given, it will be normalized before it is written. It's length before normalization will be written as the scaling factor of the vector. :arg model: this is an 3-dimensional NMA instance from a :class:`.ANM calculations :type model: :class:`.ANM` :arg pdb: a coordinate set or an object with ``getCoords`` method :type pdb: :class:`numpy.ndarray`. :arg indices: amino acid number. :type indices: ``[int, int]`` or ``[int]`` for one amino acid :arg k_range: effective force constant value. :type k_range: int or float, ``[int, int]`` By default files are saved as *filename* and loaded to VMD program and *selstr* is a selection from :class:`.Select` """ try: coords_sel = pdb.select(selstr) resnum_list = coords_sel.getResnums() coords = (coords_sel._getCoords() if hasattr(coords_sel, '_getCoords') else coords_sel.getCoords()) except AttributeError: try: checkCoords(coords_sel) except TypeError: raise TypeError('pdb must be a Numpy array or an object ' 'with `getCoords` method') if not isinstance(model, NMA): raise TypeError('model must be an NMA instance') elif not model.is3d(): raise TypeError('model must be a 3-dimensional NMA instance') elif len(model) == 0: raise ValueError('model must have normal modes calculated') elif model.getStiffness() is None: raise ValueError('model must have stiffness matrix calculated') elif len(indices)==0: raise ValueError('indices cannot be an empty array') if len(indices)==1: indices0=indices[0]-resnum_list[0] indices1=indices[0]-resnum_list[0] elif len(indices)==2: indices0=indices[0]-resnum_list[0] indices1=indices[1]-resnum_list[0] out = openFile(addext(filename, '.tcl'), 'w') out_txt = openFile(addext(filename,'.txt'), 'w') writePDB(filename + '.pdb', pdb) LOGGER.info('Creating VMD file.') out.write('display rendermode GLSL \n') out.write('display projection orthographic\n') out.write('color Display Background white\n') out.write('display shadows on\n') out.write('display depthcue off\n') out.write('axes location off\n') out.write('stage location off\n') out.write('light 0 on\n') out.write('light 1 on\n') out.write('light 2 off\n') out.write('light 3 on\n') out.write('mol addrep 0\n') out.write('display resetview\n') out.write('mol new {./'+str(filename)+'.pdb} type {pdb} first 0 last -1 step 1 waitfor 1\n') out.write('mol modselect 0 0 protein\n') out.write('mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0\n') out.write('mol modcolor 0 0 Structure\n') out.write('mol color Structure\n') out.write('mol representation NewCartoon 0.300000 10.000000 4.100000 0\n') out.write('mol selection protein\n') out.write('mol material Opaque\n') colors = ['blue', 'red', 'gray', 'orange','yellow', 'tan','silver', 'green', \ 'white', 'pink', 'cyan', 'purple', 'lime', 'mauve', 'ochre', 'iceblue', 'black', \ 'yellow2','yellow3','green2','green3','cyan2','cyan3','blue2','blue3','violet', \ 'violet2','magenta','magenta2','red2','red3','orange2','orange3']*50 color_nr = 1 # starting from red color in VMD ResCounter = [] for r in xrange(indices0, indices1+1): baza_col = [] # Value of Kij is here for each residue nr_baza_col = [] # Resid of aa are here out.write("draw color "+str(colors[color_nr])+"\n") for nr_i, i in enumerate(model.getStiffness()[r]): if k_range[0] < float(i) < k_range[1]: baza_col.append(i) nr_baza_col.append(nr_i+resnum_list[0]) resid_r = str(coords_sel.getResnames()[r])+str(r+resnum_list[0]) resid_r2 = str(coords_sel.getResnames()[nr_i])+str(nr_i+resnum_list[0]) if len(baza_col) == 0: # if base is empty then it will not change the color color_nr = 0 else: out.write("draw line "+'{'+str(coords[r])[1:-1]+'} {'+\ str(coords[nr_i])[1:-1]+'} width 3 style solid \n') out_txt.write(str(resid_r)+'\t'+resid_r2+'\t'+str(i)+'\n') ResCounter.append(len(baza_col)) else: pass if len(baza_col) != 0: out.write('mol addrep 0\n') out.write('mol modselect '+str(color_nr+1)+' 0 protein and name CA and resid '+ \ str(r+resnum_list[0])+' '+str(nr_baza_col)[1:-1].replace(',','')+'\n') out.write('mol modcolor '+str(color_nr+1)+' 0 ColorID '+str(color_nr)+'\n') out.write('mol modstyle '+str(color_nr+1)+' 0 VDW 0.600000 12.000000\n') out.write('mol color ColorID '+str(color_nr)+'\n') out.write('mol representation VDW 1.000000 12.000000 \n') out.write('mol selection protein and name CA and resid '+ \ str(r+resnum_list[0])+' '+str(nr_baza_col)[1:-1].replace(',','')+'\n') out.write('mol material Opaque \n') color_nr = color_nr + 1 out.write('mol addrep 0\n') out.close() out_txt.close() if (loadToVMD == True): from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD()+" -e "+str(filename)+".tcl") if len(ResCounter) > 0: return out elif len(ResCounter) == 0: LOGGER.info('There is no residue pair in this Kij range.') return 'None'
def calcChainsNormDistFluct(coords, ch1, ch2, cutoff=10., percent=5, rangeAng=5, \ filename='ch_ndf_out', loadToVMD=True): '''Calculate protein-protein interaction using getNormDistFluct() from :class:`.GNM` model. It is assigned to protein complex. :arg coords: a coordinate set or an object with ``getCoords`` method. :type coords: :class:`numpy.ndarray`. :arg ch1: first chain name :type ch1: 'A' or other letter as a string :arg ch2: second chain name :type ch2: string :arg cutoff: cutoff distance (Å) for pairwise interactions in Kirchhoff matrix, default is 10.0 Å :type cutoff: float :arg percent: percent of the highest and lowest results displayed in _VMD program, default is 5% :type percent: int or float :arg rangeAng: cutoff range of protein-protein interactions, default is 5 Å :type rangeAng: int or float :arg filename: name of tcl file from _VMD program :type filename: str By default files are saved as *filename* and loaded to VMD program. To change it use ``loadToVMD=False``. UNDER PREPARATION.. problems with not complete structures ''' sele1 = coords.select('name CA and same residue as exwithin '+str(rangeAng) \ +' of chain '+str(ch1)) sele2 = coords.select('name CA and same residue as exwithin '+str(rangeAng) \ +' of chain '+str(ch2)) num1 = len(list(set(sele1.getResnums()))) num2 = len(list(set(sele2.getResnums()))) LOGGER.info('Analized chains: {0}, {1}'.format(ch1, ch2)) LOGGER.info('Number of selected amino acids: chain {0}-{1}aa, chain {2}-{3}aa' .format(ch1, num2, ch2, num1)) seleALL = sele1 + sele2 seleALL_ca = seleALL.select('protein and name CA') from .gnm import GNM model = GNM('prot analysis') model.buildKirchhoff(seleALL_ca, cutoff) model.calcModes() ndf_matrix0 = model.getNormDistFluct(seleALL_ca) ndf_c1 = np.delete(ndf_matrix0, np.s_[0:num1], axis=0) # rows ndf_matrix = np.delete(ndf_c1, np.s_[num1:(num1+num2)], axis=1) perc = (np.amax(ndf_matrix)-np.min(ndf_matrix))*percent*0.01 maxRange = np.amax(ndf_matrix)-perc minRange = np.min(ndf_matrix)+perc writePDB(filename, coords) out_tcl = open(filename+'.tcl','w') out_tcl.write('display resetview \nmol addrep 0 \ndisplay resetview \n') out_tcl.write('mol new {./'+filename+'.pdb} type {pdb} first 0 last -1 step 1 waitfor 1 \n') out_tcl.write('animate style Loop \ndisplay projection Orthographic \n') out_tcl.write('display depthcue off \ndisplay rendermode GLSL \naxes location Off \n') out_tcl.write('color Display Background white \n') out_tcl.write('mol modstyle 0 0 NewCartoon 0.300000 10.000000 4.100000 0 \n') out_tcl.write('mol modselect 0 0 protein \nmol modcolor 0 0 Chain \n') out_tcl.write('mol modmaterial 0 0 BrushedMetal\n') out_tcl.write('m') mmRange = {'minRange':minRange,'maxRange':maxRange} ch = [ch2, ch1] color = ['1','7'] # red-maxRange, red-minRange out_pairs = open(filename+'_pairs.txt','w') for nr_j,j in enumerate(mmRange): if j == 'minRange': x,y = np.where(ndf_matrix < mmRange[j]) out_pairs.write(j+' (< '+str(mmRange[j])+') \n') elif j == 'maxRange': x,y = np.where(ndf_matrix > mmRange[j]) out_pairs.write(j+' (> '+str(mmRange[j])+') \n') vmd_ch_list = [[],[]] for i in range(len(x)): out_pairs.write("{}{} {}{} {}\n".format(sele1.select('protein and name \ CA').getResnames()[y[i]], sele1.select('protein and name CA').getResnums()[y[i]], \ sele2.select('protein and name CA').getResnames()[x[i]], sele2.select('protein and \ name CA').getResnums()[x[i]], ndf_matrix[x[i],y[i]])) vmd_ch_list[0].append(sele1.select('protein and name CA').getResnums()[y[i]]) vmd_ch_list[1].append(sele2.select('protein and name CA').getResnums()[x[i]]) out_pairs.write('\n') for k in range(len(vmd_ch_list)): out_tcl.write('mol addrep 0\n') out_tcl.write('mol modselect '+color[nr_j]+' 0 protein and name chain '+ch[k]+\ ' and resid '+str(list(set(vmd_ch_list[k]))).replace(',','')[1:-1]+'\n') out_tcl.write('mol modcolor '+color[nr_j]+' 0 ColorID '+color[nr_j]+'\n') out_tcl.write('mol modstyle '+color[nr_j]+' 0 CPK 1.000000 0.300000 12.000000 12.000000\n') out_tcl.write('mol color ColorID '+color[nr_j]+'\n') out_tcl.write('mol representation CPK 1.000000 0.300000 12.000000 12.000000\n') out_tcl.write('mol selection protein and chain '+ch[k]+' and resid '\ +str(list(set(vmd_ch_list[k]))).replace(',','')[1:-1]+'\n') out_tcl.write('mol material Opaque \n') #out_tcl.write('mol addrep 0\n') LOGGER.info('Finded residues in {0}: {1}'.format(mmRange.keys()[nr_j],\ len(list(set(vmd_ch_list[1])))+len(list(set(vmd_ch_list[0]))))) LOGGER.info('chain {0} and resid {1}'.format(ch[0], \ str(list(set(vmd_ch_list[0]))).replace(',','')[1:-1])) LOGGER.info('chain {0} and resid {1}'.format(ch[1], \ str(list(set(vmd_ch_list[1]))).replace(',','')[1:-1])) out_tcl.write('mol addrep 0\n') LOGGER.info('Created TCL file.') out_tcl.close() out_pairs.close() if (loadToVMD == True): from prody import pathVMD LOGGER.info('File will be loaded to VMD program.') os.system(pathVMD()+" -e "+str(filename)+".tcl") return ndf_matrix
def renumber_InputAlign(alnfile,pdbid,refid,selection="protein"\ ,outfile="renumbered.pdb",pdbfile="",newAA=None,first=1): ''' Renumber input pdb using an exsiting multiple alignment. - alnfile: alignment in .fasta format. Beware of weird characters in the sequence ids, eg "|" - pdbid: sequence id in the alginment file that corresponds to the input structure. Must be the same number of residues - refid: sequence id corresponding to the reference sequence by which to renumber the pdbid sequence. pdbid musnt' align to any gaps in refid. - selection: atom selection(s) in the the structure file to renumber. Will iterate over comma separated selections to renumber each. - pdbfile: original structure file - outfile: output structure file - newAA: comma separated list of unrepresented amino acids XXXYCA: XXX = three letter abbrevation as in pdbfile Y = one letter code in the alignment CA = atom to use as CA if different from "CA", eg C1 in PVL of 1JEN ''' selections = selection.split(",") tmp=tempfile.gettempdir() modified_selections = [] if os.path.exists(alnfile): aln = AlignIO.read(alnfile, "fasta",alphabet=IUPAC.protein) else: print("ERROR, no such alignment: %s"%alnfile) exit(1) aln_ids = [x.id for x in aln] if pdbid in aln_ids and refid in aln_ids: pdbSeqRec = seqbyname(aln, pdbid) if not pdbSeqRec: print("ERROR, bad pdbid name") exit(1) refSeqRec = seqbyname(aln, refid) if not refSeqRec: print("ERROR, bad refid name") exit(1) if pdbfile != '': if os.path.exists(pdbfile): structure = parsePDB(pdbfile) updateAA(structure,newAA) else: print("ERROR, no such pdb file: %s"%pdbfile) exit(1) renumber_aln(aln, refid, pdbid,first) for polymer in selections: currentSel = structure.select("not hetero and protein and name CA and %s"%polymer) if currentSel: renumber_struct(structure, pdbSeqRec, polymer) modified_selections.append(polymer) else: print('ERROR: Selection \"%s\" has zero CA atoms'%polymer) else: if pdbid not in [x.id for x in aln]: print("ERROR, no such sequence to renumber: %s"%pdbid) if refid not in [x.id for x in aln]: print("ERROR, no such sequence to renumber by: %s"%refid) exit(1) if writePDB(outfile, structure): print("Wrote renumbered %s selections from %s to %s"\ %(str(modified_selections),pdbfile,outfile))
def renumber_noInputAlign(pdbfile,refseqfile,selection="protein",\ outfile="renumbered.pdb",newAA=None,first=1): ''' Renumber pdb file (pdbfile) according to reference sequence in refseqfile. Pdb sequence is extracted and aligned with reference sequence using needle from EMBOSS. - refseqfile: .fasta file containing the reference sequence by which to renumber - selection: atom selection(s) in the the structure file to renumber. Will iterate over comma separated selections to renumber each. - pdbfile: original structure file - outfile: output structure file - newAA: comma separated list of unrepresented amino acids XXXYCA: XXX = three letter abbrevation as in pdbfile Y = one letter code in the alignment CA = atom to use as CA if different from "CA", eg C1 in PVL of 1JEN ''' # selections = selection.split(",") selections = selection tmp=tempfile.gettempdir() tmp_refseqfile="%s/refseq.fasta"%tmp pdbID = re.search("\w+\.\w+", pdbfile).group(0) tmp_pdbseqfile="%s/%s.fasta"%(tmp,pdbID) tmp_needle="%s/needle.out"%tmp if os.path.exists(refseqfile): refseqRec = SeqIO.read(refseqfile,"fasta",alphabet=IUPAC.protein ) refseqRec.id = "refseq" SeqIO.write(refseqRec,tmp_refseqfile,"fasta") else: print ("ERROR, no such file: %s"%refseqfile) exit(1) if os.path.exists(pdbfile): structure=parsePDB("%s"%pdbfile) updateAA(structure,newAA) else: print ("ERROR, no such file: %s"%pdbfile) exit(1) modified_selections = [] for polymer in selections: currentSel = structure.select("protein and name CA and %s"%polymer) if currentSel: pdbseq_str=''.join([oneletter[i] for i in currentSel.getResnames()]) pdbseqRec=SeqRecord(Seq(pdbseq_str,IUPAC.protein),id=pdbID) SeqIO.write(pdbseqRec,tmp_pdbseqfile,"fasta") needle_cli = NeedleCommandline(asequence=tmp_pdbseqfile,bsequence=tmp_refseqfile,\ gapopen=10,gapextend=0.5,outfile=tmp_needle) needle_cli() aln = AlignIO.read(tmp_needle, "emboss",alphabet=IUPAC.protein ) # os.remove(tmp_needle) # os.remove(tmp_pdbseqfile) gpdb.renumber_aln(aln,"refseq",pdbID,first) pdbRenSeq = gpdb.seqbyname(aln, pdbID) gpdb.renumber_struct(structure, pdbRenSeq,polymer) pdbRenSeq.annotations["resnum"]=str(pdbRenSeq.letter_annotations["resnum"]) modified_selections.append(polymer) # seems to be the only way to store pret residue annotations # AlignIO.write(aln,"pdb.outseq","seqxml") else: print ('ERROR: Selection \"%s\" has zero CA atoms'%polymer) if writePDB(outfile, structure): print ("Wrote renumbered %s selections from %s to %s"%\ (str(modified_selections),pdbfile,outfile)) os.remove(tmp_refseqfile)