def execDSSP(pdb, outputname=None, outputdir=None, stderr=True): """Execute DSSP for given *pdb*. *pdb* can be a PDB identifier or a PDB file path. If *pdb* is a compressed file, it will be decompressed using Python :mod:`gzip` library. When no *outputname* is given, output name will be :file:`pdb.dssp`. :file:`.dssp` extension will be appended automatically to *outputname*. If :file:`outputdir` is given, DSSP output and uncompressed PDB file will be written into this folder. Upon successful execution of :command:`dssp pdb > out` command, output filename is returned. On Linux platforms, when *stderr* is false, standard error messages are suppressed, i.e. ``dssp pdb > outputname 2> /dev/null``. For more information on DSSP see http://swift.cmbi.ru.nl/gv/dssp/. If you benefited from DSSP, please consider citing [WK83]_. .. [WK83] Kabsch W, Sander C. Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features. *Biopolymers* **1983** 22:2577-2637.""" dssp = which('mkdssp') if dssp is None: dssp = which('dssp') if dssp is None: raise EnvironmentError('command not found: dssp executable is not ' 'found in one of system paths') assert outputname is None or isinstance(outputname, str),\ 'outputname must be a string' assert outputdir is None or isinstance(outputdir, str),\ 'outputdir must be a string' if not os.path.isfile(pdb): pdb = fetchPDB(pdb, compressed=False) if pdb is None: raise ValueError('pdb is not a valid PDB identifier or filename') if os.path.splitext(pdb)[1] == '.gz': if outputdir is None: pdb = gunzip(pdb, os.path.splitext(pdb)[0]) else: pdb = gunzip(pdb, os.path.join(outputdir, os.path.split(os.path.splitext(pdb)[0])[1])) if outputdir is None: outputdir = '.' if outputname is None: out = os.path.join(outputdir, os.path.splitext(os.path.split(pdb)[1])[0] + '.dssp') else: out = os.path.join(outputdir, outputname + '.dssp') if not stderr and PLATFORM != 'Windows': status = os.system('{0} {1} > {2} 2> /dev/null'.format( dssp, pdb, out)) else: status = os.system('{0} {1} > {2}'.format(dssp, pdb, out)) if status == 0: return out
def showSequenceTree(hits): """Returns a plot that contains a dendrogram of the sequence similarities among the sequences in given hit list. :arg hits: A dictionary that contains hits that are obtained from a blast record object. :type hits: dict """ clustalw = which('clustalw') if clustalw is None: print( "The executable for clustalw does not exists, install or add clustalw to path." ) return try: from Bio import Phylo except: raise ImportError("Phylo is not installed properly.") with open("hits.fasta", "w") as inp: for z in hits: inp.write(">" + str(z) + "\n") inp.write(hits[z]['hseq']) inp.write("\n") cmd = clustalw + " hits.fasta" os.system(cmd) tree = Phylo.read("hits.dnd", "newick") try: import pylab except: raise ImportError("Pylab or matplotlib is not installed.") Phylo.draw(tree) return
def pathVMD(*path): """Returns VMD path, or set it to be a user specified *path*.""" if not path: path = SETTINGS.get('vmd', None) if isExecutable(path): return path else: LOGGER.warning('VMD path is not set by user, looking for it.') vmdbin = None vmddir = None if PLATFORM == 'Windows': if PY3K: import winreg else: import _winreg as winreg # PY3K: OK for vmdversion in ('1.8.7', '1.9', '1.9.1'): try: key = winreg.OpenKey( winreg.HKEY_LOCAL_MACHINE, 'Software\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass try: key = winreg.OpenKey( winreg.HKEY_LOCAL_MACHINE, 'Software\\WOW6432node\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass else: vmdbin = which('vmd') if False: pipe = os.popen('which vmd') vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith('defaultvmddir='): vmddir = line.split('=')[1].replace('"', '') break vmdfile.close() if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin elif len(path) == 1: path = path[0] if isExecutable(path): SETTINGS['vmd'] = path SETTINGS.save() LOGGER.info("VMD path is set to '{0}'.".format(path)) else: raise OSError('{0} is not executable.'.format(str(path))) else: raise ValueError('specify a single path string')
def pathVMD(*path): """Return VMD path, or set it to be a user specified *path*.""" if not path: path = SETTINGS.get('vmd', None) if isExecutable(path): return path else: LOGGER.warning('VMD path is not set by user, looking for it.') vmdbin = None vmddir = None if PLATFORM == 'Windows': if PY3K: import winreg else: import _winreg as winreg # PY3K: OK for vmdversion in ('1.8.7', '1.9', '1.9.1'): try: key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 'Software\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass try: key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 'Software\\WOW6432node\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass else: vmdbin = which('vmd') if False: pipe = os.popen('which vmd') vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith('defaultvmddir='): vmddir = line.split('=')[1].replace('"', '') break vmdfile.close() if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin elif len(path) == 1: path = path[0] if isExecutable(path): SETTINGS['vmd'] = path SETTINGS.save() LOGGER.info("VMD path is set to '{0}'.".format(path)) else: raise OSError('{0} is not executable.'.format(str(path))) else: raise ValueError('specify a single path string')
def getVMDpath(): """Return VMD path set by user or one identified automatically.""" path = SETTINGS.get("vmd", None) if isExecutable(path): return path else: LOGGER.warning("VMD path is not set by user, looking for it.") from types import StringType, UnicodeType vmdbin = None vmddir = None if PLATFORM == "Windows": import _winreg for vmdversion in ("1.8.7", "1.9", "1.9.1"): try: key = _winreg.OpenKey( _winreg.HKEY_LOCAL_MACHINE, "Software\\University of Illinois\\VMD\\" + vmdversion ) vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0] vmdbin = os.path.join(vmddir, "vmd.exe") except: pass try: key = _winreg.OpenKey( _winreg.HKEY_LOCAL_MACHINE, "Software\\WOW6432node\\University of Illinois\\VMD\\" + vmdversion ) vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0] vmdbin = os.path.join(vmddir, "vmd.exe") except: pass else: vmdbin = which("vmd") if False: pipe = os.popen("which vmd") vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith("defaultvmddir="): vmddir = line.split("=")[1].replace('"', "") break vmdfile.close() if ( False and isinstance(vmdbin, (StringType, UnicodeType)) and isinstance(vmddir, (StringType, UnicodeType)) and os.path.isfile(vmdbin) and os.path.isdir(vmddir) ): pass # return vmdbin, vmddir if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin
def getVMDpath(): """Return VMD path set by user or one identified automatically.""" path = SETTINGS.get('vmd', None) if isExecutable(path): return path else: LOGGER.warning('VMD path is not set by user, looking for it.') from types import StringType, UnicodeType vmdbin = None vmddir = None if PLATFORM == 'Windows': import _winreg for vmdversion in ('1.8.7', '1.9', '1.9.1'): try: key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, 'Software\\University of Illinois\\VMD\\' + vmdversion) vmddir = _winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = os.path.join(vmddir, 'vmd.exe') except: pass try: key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, 'Software\\WOW6432node\\University of Illinois\\VMD\\' + vmdversion) vmddir = _winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = os.path.join(vmddir, 'vmd.exe') except: pass else: vmdbin = which('vmd') if False: pipe = os.popen('which vmd') vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith('defaultvmddir='): vmddir = line.split('=')[1].replace('"', '') break vmdfile.close() if False and \ isinstance(vmdbin, (StringType, UnicodeType)) and \ isinstance(vmddir, (StringType, UnicodeType)) and \ os.path.isfile(vmdbin) and os.path.isdir(vmddir): pass#return vmdbin, vmddir if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin
def execSTRIDE(pdb, outputname=None, outputdir=None): """Execute STRIDE program for given *pdb*. *pdb* can be an identifier or a PDB file path. If *pdb* is a compressed file, it will be decompressed using Python :mod:`gzip` library. When no *outputname* is given, output name will be :file:`pdb.stride`. :file:`.stride` extension will be appended automatically to *outputname*. If :file:`outputdir` is given, STRIDE output and uncompressed PDB file will be written into this folder. Upon successful execution of :command:`stride pdb > out` command, output filename is returned. For more information on STRIDE see http://webclu.bio.wzw.tum.de/stride/. If you benefited from STRIDE, please consider citing [DF95]_. .. [DF95] Frishman D, Argos P. Knowledge-Based Protein Secondary Structure Assignment. *Proteins* **1995** 23:566-579.""" stride = which('stride') if stride is None: raise EnvironmentError('command not found: stride executable is not ' 'found in one of system paths') assert outputname is None or isinstance(outputname, str),\ 'outputname must be a string' assert outputdir is None or isinstance(outputdir, str),\ 'outputdir must be a string' if not os.path.isfile(pdb): pdb = fetchPDB(pdb, compressed=False) if pdb is None: raise ValueError('pdb is not a valid PDB identifier or filename') if os.path.splitext(pdb)[1] == '.gz': if outputdir is None: pdb = gunzip(pdb, os.path.splitext(pdb)[0]) else: pdb = gunzip( pdb, os.path.join(outputdir, os.path.split(os.path.splitext(pdb)[0])[1])) if outputdir is None: outputdir = '.' if outputname is None: out = os.path.join( outputdir, os.path.splitext(os.path.split(pdb)[1])[0] + '.stride') else: out = os.path.join(outputdir, outputname + '.stride') status = os.system('{0} {1} > {2}'.format(stride, pdb, out)) if status == 0: return out
def execSTRIDE(pdb, outputname=None, outputdir=None): """Execute STRIDE program for given *pdb*. *pdb* can be an identifier or a PDB file path. If *pdb* is a compressed file, it will be decompressed using Python :mod:`gzip` library. When no *outputname* is given, output name will be :file:`pdb.stride`. :file:`.stride` extension will be appended automatically to *outputname*. If :file:`outputdir` is given, STRIDE output and uncompressed PDB file will be written into this folder. Upon successful execution of :command:`stride pdb > out` command, output filename is returned. For more information on STRIDE see http://webclu.bio.wzw.tum.de/stride/. If you benefited from STRIDE, please consider citing [DF95]_. .. [DF95] Frishman D, Argos P. Knowledge-Based Protein Secondary Structure Assignment. *Proteins* **1995** 23:566-579.""" stride = which('stride') if stride is None: raise EnvironmentError('command not found: stride executable is not ' 'found in one of system paths') assert outputname is None or isinstance(outputname, str),\ 'outputname must be a string' assert outputdir is None or isinstance(outputdir, str),\ 'outputdir must be a string' if not os.path.isfile(pdb): pdb = fetchPDB(pdb, compressed=False) if pdb is None: raise ValueError('pdb is not a valid PDB identifier or filename') if os.path.splitext(pdb)[1] == '.gz': if outputdir is None: pdb = gunzip(pdb, os.path.splitext(pdb)[0]) else: pdb = gunzip(pdb, os.path.join(outputdir, os.path.split(os.path.splitext(pdb)[0])[1])) if outputdir is None: outputdir = '.' if outputname is None: out = os.path.join(outputdir, os.path.splitext(os.path.split(pdb)[1])[0] + '.stride') else: out = os.path.join(outputdir, outputname + '.stride') status = os.system('{0} {1} > {2}'.format(stride, pdb, out)) if status == 0: return out
def conf_opt_setup(name): namd2 = which('namd2') par = os.path.join('/usr/local/lib/vmd/plugins/noarch/tcl/readcharmmpar1.2', 'par_all27_prot_lipid_na.inp') dir_name = name[0:4] + '_opt' if os.path.exists(dir_name): shutil.rmtree(dir_name) os.makedirs(dir_name) conf = open('min.conf').read() for pdb in glob.glob(os.path.join(name[0:4] + '_ens', '*.pdb')): fn = os.path.splitext(os.path.split(pdb)[1])[0] pdb = os.path.join('..', pdb) out = open(os.path.join(dir_name, fn + '.conf'), 'w') out.write(conf.format( out=fn, pdb=pdb, par=par)) out.close()
class TestDSSPFunctions(unittest.TestCase): @dec.slow def setUp(self): """Setup the testing framework.""" self.pdbs = [DATA_FILES['dssp']] @dec.slow @unittest.skipIf(which('dssp') is None, 'dssp is not found') def testDSSPBridgePartners(self): """Check if the DSSP bridge-partners were correctly parsed and assigned.""" for pdb in self.pdbs: prot_ag = parseDatafile(pdb['file'], folder=TEMPDIR) dssp = execDSSP(pathDatafile(pdb['file']), outputdir=TEMPDIR, stderr=False) parseDSSP(dssp, prot_ag, parseall=True) # Map a dssp_resnum to its Residue object. dssp_dict = {} for chain in prot_ag.select("protein").getHierView(): for res in chain: dssp_resnum = res.getData("dssp_resnum")[0] dssp_dict[dssp_resnum] = res for res in dssp_dict.values(): bp1 = res.getData("dssp_bp1")[0] bp2 = res.getData("dssp_bp2")[0] if bp1 != 0: msg_ = "BP1 (dssp_resnum: %d) of %s is missing" % \ (bp1, str(res)) self.assertIn(bp1, dssp_dict, msg=msg_) if bp2 != 0: msg_ = "BP2 (dssp_resnum: %d) of %s is missing" % \ (bp2, str(res)) self.assertIn(bp2, dssp_dict, msg=msg_)
def buildMSA(sequences, title='Unknown', labels=None, **kwargs): """ Aligns sequences with clustalw or clustalw2 and returns the resulting MSA. :arg sequences: a file, MSA object or a list or array containing sequences as Atomic objects with :func:`getSequence` or Sequence objects or strings. If strings are used then labels must be provided using ``labels`` :type sequences: :class:`Atomic`, :class:`.MSA`, :class:`~numpy.ndarray`, str :arg title: the title for the MSA and it will be used as the prefix for output files. :type title: str :arg labels: a list of labels to go with the sequences :type labels: list :arg align: whether to align the sequences default True :type align: bool :arg method: alignment method, one of either biopython.align.globalms or clustalw(2). default 'clustalw' :type align: str """ align = kwargs.get('align', True) method = kwargs.pop('method', 'clustalw') # 1. check if sequences are in a fasta file and if not make one if isinstance(sequences, str): filename = sequences elif not isinstance(sequences, MSA): try: max_len = 0 for sequence in sequences: if isinstance(sequence, Atomic): if len(sequence.ca.copy()) > max_len: max_len = len(sequence.ca.copy()) elif isinstance(sequence, MSA): if len(sequence[0]) > max_len: max_len = len(sequence[0]) else: if len(sequence) > max_len: max_len = len(sequence) msa = [] fetched_labels = [] for i, sequence in enumerate(sequences): if isinstance(sequence, Atomic): strseq = sequence.ca.getSequence() label = sequence.getTitle() elif isinstance(sequence, Sequence): strseq = str(sequence) label = sequence.getLabel() elif isinstance(sequence, MSA): strseq = str(sequence[0]) label = sequence.getLabel(0) LOGGER.warn( 'Only the first sequence in the MSA at entry {0} is used.' .format(i)) elif isinstance(sequence, str): strseq = sequence label = str(i + 1) else: raise TypeError('sequences should be a list of strings, ' 'Atomic, or Sequence instances') strseq = strseq + '-' * (max_len - len(strseq)) msa.append(array(list(strseq))) fetched_labels.append(label) sequences = array(msa) except: raise TypeError('sequences should be iterable') # "if a list" is a pythonic way to check if a list is empty or not (or none) if not labels and fetched_labels: labels = fetched_labels label = [label.replace(' ', '_') for label in labels] # labels checkers are removed because they will be properly handled in MSA class initialization msa = MSA(msa=sequences, title=title, labels=labels) if align and 'clustal' in method: filename = writeMSA(title + '.fasta', msa) if align: # 2. find and run alignment method if 'biopython' in method: if len(sequences) == 2: msa, _, _ = alignTwoSequencesWithBiopython( sequences[0], sequences[1], **kwargs) else: raise ValueError( "Provide only two sequences or another method. \ Biopython pairwise alignment can only be used \ to build an MSA with two sequences.") elif 'clustalw' in method: clustalw = which('clustalw') if clustalw is None: if which('clustalw2') is not None: clustalw = which('clustalw2') else: raise EnvironmentError( "The executable for clustalw was not found, \ install clustalw or add it to the path." ) os.system('"%s" %s -OUTORDER=INPUT' % (clustalw, filename)) # 3. parse and return the new MSA msa = parseMSA(title + '.aln') else: alignTool = which(method) if alignTool is None: raise EnvironmentError("The executable for {0} was not found, \ install it or add it to the path.". format(alignTool)) os.system('"%s" %s -OUTORDER=INPUT' % (clustalw, filename)) # 3. parse and return the new MSA msa = parseMSA(title + '.aln') return msa
def buildMSA(sequences, title='Unknown', labels=None, **kwargs): """ Aligns sequences with clustalw or clustalw2 and returns the resulting MSA. :arg sequences: a file, MSA object or a list or array containing sequences as Atomic objects with :func:`getSequence` or Sequence objects or strings. If strings are used then labels must be provided using ``labels`` :type sequences: :class:`Atomic`, :class:`.MSA`, :class:`~numpy.ndarray`, str :arg title: the title for the MSA and it will be used as the prefix for output files. :type title: str :arg labels: a list of labels to go with the sequences :type labels: list :arg align: whether to align the sequences default True :type align: bool :arg method: alignment method, one of either biopython.align.globalms or clustalw(2). default 'clustalw' :type align: str """ align = kwargs.get('align', True) method = kwargs.pop('method', 'clustalw') # 1. check if sequences are in a fasta file and if not make one if isinstance(sequences, str): filename = sequences elif not isinstance(sequences, MSA): try: max_len = 0 for sequence in sequences: if isinstance(sequence, Atomic): if len(sequence.ca.copy()) > max_len: max_len = len(sequence.ca.copy()) elif isinstance(sequence, MSA): if len(sequence[0]) > max_len: max_len = len(sequence[0]) else: if len(sequence) > max_len: max_len = len(sequence) msa = [] fetched_labels = [] for i, sequence in enumerate(sequences): if isinstance(sequence, Atomic): strseq = sequence.ca.getSequence() label = sequence.getTitle() elif isinstance(sequence, Sequence): strseq = str(sequence) label = sequence.getLabel() elif isinstance(sequence, MSA): strseq = str(sequence[0]) label = sequence.getLabel(0) LOGGER.warn('Only the first sequence in the MSA at entry {0} is used.' .format(i)) elif isinstance(sequence, str): strseq = sequence label = str(i + 1) else: raise TypeError('sequences should be a list of strings, ' 'Atomic, or Sequence instances') strseq = strseq + '-'*(max_len - len(strseq)) msa.append(array(list(strseq))) fetched_labels.append(label) sequences = array(msa) except: raise TypeError('sequences should be iterable') # "if a list" is a pythonic way to check if a list is empty or not (or none) if not labels and fetched_labels: labels = fetched_labels label = [label.replace(' ','_') for label in labels] # labels checkers are removed because they will be properly handled in MSA class initialization msa = MSA(msa=sequences, title=title, labels=labels) if align and 'clustal' in method: filename = writeMSA(title + '.fasta', msa) if align: # 2. find and run alignment method if 'biopython' in method: if len(sequences) == 2: msa, _, _ = alignTwoSequencesWithBiopython(sequences[0], sequences[1], **kwargs) else: raise ValueError("Provide only two sequences or another method. \ Biopython pairwise alignment can only be used \ to build an MSA with two sequences.") elif 'clustalw' in method: clustalw = which('clustalw') if clustalw is None: if which('clustalw2') is not None: clustalw = which('clustalw2') else: raise EnvironmentError("The executable for clustalw was not found, \ install clustalw or add it to the path.") os.system('"%s" %s -OUTORDER=INPUT'%(clustalw, filename)) # 3. parse and return the new MSA msa = parseMSA(title + '.aln') else: alignTool = which(method) if alignTool is None: raise EnvironmentError("The executable for {0} was not found, \ install it or add it to the path.".format(alignTool)) os.system('"%s" %s -OUTORDER=INPUT'%(clustalw, filename)) # 3. parse and return the new MSA msa = parseMSA(title + '.aln') return msa
def buildMSA(sequences, title='Unknown', labels=None, **kwargs): """ Aligns sequences with clustalw or clustalw2 and returns the resulting MSA. :arg sequences: a file, MSA object or a list or array containing sequences as Atomic objects with :func:`getSequence` or Sequence objects or strings. If strings are used then labels must be provided using ``labels`` :type sequences: :class:`Atomic`, :class:`.MSA`, :class:`~numpy.ndarray`, str :arg title: the title for the MSA and it will be used as the prefix for output files. :type title: str :arg labels: a list of labels to go with the sequences :type labels: list :arg align: whether to do alignment with clustalw(2) default True :type align: bool """ align = kwargs.get('align', True) # 1. check if sequences are in a fasta file and if not make one if isinstance(sequences, str): filename = sequences elif not isinstance(sequences, MSA): try: max_len = 0 for sequence in sequences: if len(sequence) > max_len: max_len = len(sequence) msa = [] fetched_labels = [] for i, sequence in enumerate(sequences): if isinstance(sequence, Atomic): strseq = sequence.getSequence() label = sequence.getTitle() elif isinstance(sequence, Sequence): strseq = str(sequence) label = sequence.getLabel() elif isinstance(sequence, str): strseq = sequence label = str(i + 1) else: raise TypeError('sequences should be a list of strings, ' 'Atomic, or Sequence instances') strseq = strseq + '-' * (max_len - len(strseq)) msa.append(array(list(strseq))) fetched_labels.append(label) sequences = array(msa) except: raise TypeError('sequences should be iterable') # "if a list" is a pythonic way to check if a list is empty or not (or none) if not labels and fetched_labels: labels = fetched_labels # labels checkers are removed because they will be properly handled in MSA class initialization msa = MSA(msa=sequences, title=title, labels=labels) if align: filename = writeMSA(title + '.fasta', msa) if align: # 2. find and run alignment method clustalw = which('clustalw') if clustalw is None: if which('clustalw2') is not None: clustalw = which('clustalw2') else: raise EnvironmentError( "The executable for clustalw was not found, \ install clustalw or add it to the path." ) os.system('"%s" %s' % (clustalw, filename)) # 3. parse and return the new MSA msa = parseMSA(title + '.aln') return msa
from os.path import sep as dirsep import inspect import tempfile try: import unittest2 as unittest from unittest2 import TestCase, skipIf, skipUnless except ImportError: import unittest from unittest import TestCase, skipIf, skipUnless from prody.utilities import PLATFORM from prody import LOGGER from prody.utilities import which NOPRODYCMD = which('prody') is None WINDOWS = PLATFORM == 'Windows' try: import matplotlib matplotlib.use('Agg') except ImportError: MATPLOTLIB = False else: try: from matplotlib import pyplot except ImportError: MATPLOTLIB = False else: MATPLOTLIB = True
def scanPockets(self): 'Generates ESSA z-scores for pockets and parses pocket features. It requires both Fpocket 3.0 and Pandas being installed in your system.' from re import findall fpocket = which('fpocket') if fpocket is None: LOGGER.warning( 'Fpocket (version >= 3.0) was not found, please install it.') return None try: from pandas import Index, DataFrame except ImportError as ie: LOGGER.warning(ie.__str__() + ' was found, please install it.') return None rcr = {(i, j): k if self._rib else self._ri[k] for i, j, k in zip(self._ca.getChids(), self._ca.getResnums(), self._ca.getResindices())} writePDB('{}_pro'.format(self._title), self._heavy) direc = '{}_pro_out'.format(self._title) if not isdir(direc): system('fpocket -f {}_pro.pdb'.format(self._title)) chdir(direc + '/pockets') l = [x for x in listdir('.') if x.endswith('.pdb')] l.sort(key=lambda x: int(x.partition('_')[0][6:])) ps = [] for x in l: with open(x, 'r') as f: tmp0 = f.read() tmp1 = [(x[1].strip(), float(x[2])) for x in findall( r'(\w+\s\w+\s*-\s*)(.+):\s*([\d.-]+)(\n)', tmp0)] fea, sco = list(zip(*tmp1)) ps.append(sco) pdbs = parsePDB(l) chdir('../..') # ----- # ----- # ps = array(ps) pcn = { int(pdb.getTitle().partition('_')[0][6:]): set(zip(pdb.getChids().tolist(), pdb.getResnums().tolist())) for pdb in pdbs } pi = {p: [rcr[x] for x in crn] for p, crn in pcn.items()} pzs_max = {k: max(self._zscore[v]) for k, v in pi.items()} pzs_med = {k: median(self._zscore[v]) for k, v in pi.items()} # ----- # ----- # indices = Index(range(1, ps.shape[0] + 1), name='Pocket #') columns = Index(fea, name='Feature') self._df = DataFrame(index=indices, columns=columns, data=ps) # ----- # ----- # columns_zs = Index(['ESSA_max', 'ESSA_med', 'LHD'], name='Z-score') zps = c_[list(pzs_max.values())] zps = hstack((zps, c_[list(pzs_med.values())])) zps = hstack( (zps, zscore(self._df[['Local hydrophobic density Score']]))) self._df_zs = DataFrame(index=indices, columns=columns_zs, data=zps)