Example #1
0
def execDSSP(pdb, outputname=None, outputdir=None, stderr=True):
    """Execute DSSP for given *pdb*.  *pdb* can be a PDB identifier or a PDB
    file path.  If *pdb* is a compressed file, it will be decompressed using
    Python :mod:`gzip` library.  When no *outputname* is given, output name
    will be :file:`pdb.dssp`.  :file:`.dssp` extension will be appended
    automatically to *outputname*.  If :file:`outputdir` is given, DSSP
    output and uncompressed PDB file will be written into this folder.
    Upon successful execution of :command:`dssp pdb > out` command, output
    filename is returned.  On Linux platforms, when *stderr* is false,
    standard error messages are suppressed, i.e.
    ``dssp pdb > outputname 2> /dev/null``.

    For more information on DSSP see http://swift.cmbi.ru.nl/gv/dssp/.
    If you benefited from DSSP, please consider citing [WK83]_.

    .. [WK83] Kabsch W, Sander C. Dictionary of protein secondary structure:
       pattern recognition of hydrogen-bonded and geometrical features.
       *Biopolymers* **1983** 22:2577-2637."""

    dssp = which('mkdssp')
    if dssp is None:
        dssp = which('dssp')
    if dssp is None:
        raise EnvironmentError('command not found: dssp executable is not '
                               'found in one of system paths')
    assert outputname is None or isinstance(outputname, str),\
        'outputname must be a string'
    assert outputdir is None or isinstance(outputdir, str),\
        'outputdir must be a string'
    if not os.path.isfile(pdb):
        pdb = fetchPDB(pdb, compressed=False)
    if pdb is None:
        raise ValueError('pdb is not a valid PDB identifier or filename')
    if os.path.splitext(pdb)[1] == '.gz':
        if outputdir is None:
            pdb = gunzip(pdb, os.path.splitext(pdb)[0])
        else:
            pdb = gunzip(pdb, os.path.join(outputdir,
                         os.path.split(os.path.splitext(pdb)[0])[1]))
    if outputdir is None:
        outputdir = '.'
    if outputname is None:
        out = os.path.join(outputdir,
                           os.path.splitext(os.path.split(pdb)[1])[0] +
                           '.dssp')
    else:
        out = os.path.join(outputdir, outputname + '.dssp')

    if not stderr and PLATFORM != 'Windows':
        status = os.system('{0} {1} > {2} 2> /dev/null'.format(
                           dssp, pdb, out))
    else:
        status = os.system('{0} {1} > {2}'.format(dssp, pdb, out))

    if status == 0:
        return out
Example #2
0
def showSequenceTree(hits):
    """Returns a plot that contains a dendrogram of the sequence similarities among
    the sequences in given hit list. 
    :arg hits: A dictionary that contains hits that are obtained from a blast record object. 
    :type hits: dict
    """
    clustalw = which('clustalw')
    if clustalw is None:
        print(
            "The executable for clustalw does not exists, install or add clustalw to path."
        )
        return
    try:
        from Bio import Phylo
    except:
        raise ImportError("Phylo is not installed properly.")
    with open("hits.fasta", "w") as inp:
        for z in hits:
            inp.write(">" + str(z) + "\n")
            inp.write(hits[z]['hseq'])
            inp.write("\n")
    cmd = clustalw + " hits.fasta"
    os.system(cmd)
    tree = Phylo.read("hits.dnd", "newick")
    try:
        import pylab
    except:
        raise ImportError("Pylab or matplotlib is not installed.")
    Phylo.draw(tree)
    return
Example #3
0
def pathVMD(*path):
    """Returns VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\University of Illinois\\VMD\\' +
                            vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\WOW6432node\\University of Illinois\\VMD\\'
                            + vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
Example #4
0
def pathVMD(*path):
    """Return VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                                'Software\\University of Illinois\\VMD\\' +
                                vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                    'Software\\WOW6432node\\University of Illinois\\VMD\\' +
                    vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
Example #5
0
def getVMDpath():
    """Return VMD path set by user or one identified automatically."""

    path = SETTINGS.get("vmd", None)
    if isExecutable(path):
        return path
    else:
        LOGGER.warning("VMD path is not set by user, looking for it.")

        from types import StringType, UnicodeType

        vmdbin = None
        vmddir = None
        if PLATFORM == "Windows":
            import _winreg

            for vmdversion in ("1.8.7", "1.9", "1.9.1"):
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\WOW6432node\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
        else:
            vmdbin = which("vmd")
            if False:
                pipe = os.popen("which vmd")
                vmdbin = pipe.next().strip()
                vmdfile = open(vmdbin)
                for line in vmdfile:
                    if line.startswith("defaultvmddir="):
                        vmddir = line.split("=")[1].replace('"', "")
                        break
                vmdfile.close()
        if (
            False
            and isinstance(vmdbin, (StringType, UnicodeType))
            and isinstance(vmddir, (StringType, UnicodeType))
            and os.path.isfile(vmdbin)
            and os.path.isdir(vmddir)
        ):
            pass  # return vmdbin, vmddir
        if isExecutable(vmdbin):
            setVMDpath(vmdbin)
            return vmdbin
Example #6
0
def getVMDpath():
    """Return VMD path set by user or one identified automatically."""
    
    path = SETTINGS.get('vmd', None)
    if isExecutable(path):
        return path   
    else:
        LOGGER.warning('VMD path is not set by user, looking for it.')    

        from types import StringType, UnicodeType
        vmdbin = None
        vmddir = None
        if PLATFORM == 'Windows': 
            import _winreg
            for vmdversion in ('1.8.7', '1.9', '1.9.1'): 
                try:
                    key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, 
                            'Software\\University of Illinois\\VMD\\' + 
                            vmdversion)
                    vmddir = _winreg.QueryValueEx(key, 'VMDDIR')[0]
                    vmdbin = os.path.join(vmddir, 'vmd.exe') 
                except:    
                    pass
                try:
                    key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, 
                'Software\\WOW6432node\\University of Illinois\\VMD\\' + 
                vmdversion)
                    vmddir = _winreg.QueryValueEx(key, 'VMDDIR')[0]
                    vmdbin = os.path.join(vmddir, 'vmd.exe') 
                except:    
                    pass
        else:
            vmdbin = which('vmd')
            if False:
                pipe = os.popen('which vmd')
                vmdbin = pipe.next().strip()
                vmdfile = open(vmdbin)
                for line in vmdfile:
                    if line.startswith('defaultvmddir='):
                        vmddir = line.split('=')[1].replace('"', '')
                        break
                vmdfile.close()
        if False and \
           isinstance(vmdbin, (StringType, UnicodeType)) and \
           isinstance(vmddir, (StringType, UnicodeType)) and \
           os.path.isfile(vmdbin) and os.path.isdir(vmddir): 
            pass#return vmdbin, vmddir
        if isExecutable(vmdbin):
            setVMDpath(vmdbin)
            return vmdbin
Example #7
0
def execSTRIDE(pdb, outputname=None, outputdir=None):
    """Execute STRIDE program for given *pdb*.  *pdb* can be an identifier or
    a PDB file path.  If *pdb* is a compressed file, it will be decompressed
    using Python :mod:`gzip` library.  When no *outputname* is given, output
    name will be :file:`pdb.stride`.  :file:`.stride` extension will be
    appended automatically to *outputname*.  If :file:`outputdir` is given,
    STRIDE output and uncompressed PDB file will be written into this folder.
    Upon successful execution of :command:`stride pdb > out` command, output
    filename is returned.

    For more information on STRIDE see http://webclu.bio.wzw.tum.de/stride/.
    If you benefited from STRIDE, please consider citing [DF95]_.

    .. [DF95] Frishman D, Argos P. Knowledge-Based Protein Secondary Structure
       Assignment. *Proteins* **1995** 23:566-579."""

    stride = which('stride')
    if stride is None:
        raise EnvironmentError('command not found: stride executable is not '
                               'found in one of system paths')
    assert outputname is None or isinstance(outputname, str),\
        'outputname must be a string'
    assert outputdir is None or isinstance(outputdir, str),\
        'outputdir must be a string'
    if not os.path.isfile(pdb):
        pdb = fetchPDB(pdb, compressed=False)
    if pdb is None:
        raise ValueError('pdb is not a valid PDB identifier or filename')
    if os.path.splitext(pdb)[1] == '.gz':
        if outputdir is None:
            pdb = gunzip(pdb, os.path.splitext(pdb)[0])
        else:
            pdb = gunzip(
                pdb,
                os.path.join(outputdir,
                             os.path.split(os.path.splitext(pdb)[0])[1]))
    if outputdir is None:
        outputdir = '.'
    if outputname is None:
        out = os.path.join(
            outputdir,
            os.path.splitext(os.path.split(pdb)[1])[0] + '.stride')
    else:
        out = os.path.join(outputdir, outputname + '.stride')

    status = os.system('{0} {1} > {2}'.format(stride, pdb, out))
    if status == 0:
        return out
Example #8
0
def execSTRIDE(pdb, outputname=None, outputdir=None):
    """Execute STRIDE program for given *pdb*.  *pdb* can be an identifier or
    a PDB file path.  If *pdb* is a compressed file, it will be decompressed
    using Python :mod:`gzip` library.  When no *outputname* is given, output
    name will be :file:`pdb.stride`.  :file:`.stride` extension will be
    appended automatically to *outputname*.  If :file:`outputdir` is given,
    STRIDE output and uncompressed PDB file will be written into this folder.
    Upon successful execution of :command:`stride pdb > out` command, output
    filename is returned.

    For more information on STRIDE see http://webclu.bio.wzw.tum.de/stride/.
    If you benefited from STRIDE, please consider citing [DF95]_.

    .. [DF95] Frishman D, Argos P. Knowledge-Based Protein Secondary Structure
       Assignment. *Proteins* **1995** 23:566-579."""

    stride = which('stride')
    if stride is None:
        raise EnvironmentError('command not found: stride executable is not '
                               'found in one of system paths')
    assert outputname is None or isinstance(outputname, str),\
        'outputname must be a string'
    assert outputdir is None or isinstance(outputdir, str),\
        'outputdir must be a string'
    if not os.path.isfile(pdb):
        pdb = fetchPDB(pdb, compressed=False)
    if pdb is None:
        raise ValueError('pdb is not a valid PDB identifier or filename')
    if os.path.splitext(pdb)[1] == '.gz':
        if outputdir is None:
            pdb = gunzip(pdb, os.path.splitext(pdb)[0])
        else:
            pdb = gunzip(pdb, os.path.join(outputdir,
                         os.path.split(os.path.splitext(pdb)[0])[1]))
    if outputdir is None:
        outputdir = '.'
    if outputname is None:
        out = os.path.join(outputdir,
                           os.path.splitext(os.path.split(pdb)[1])[0] +
                           '.stride')
    else:
        out = os.path.join(outputdir, outputname + '.stride')

    status = os.system('{0} {1} > {2}'.format(stride, pdb, out))
    if status == 0:
        return out
def conf_opt_setup(name):
  namd2 = which('namd2')
  par = os.path.join('/usr/local/lib/vmd/plugins/noarch/tcl/readcharmmpar1.2', 'par_all27_prot_lipid_na.inp')

  dir_name = name[0:4] + '_opt'

  if os.path.exists(dir_name):
    shutil.rmtree(dir_name)

  os.makedirs(dir_name)

  conf = open('min.conf').read()

  for pdb in glob.glob(os.path.join(name[0:4] + '_ens', '*.pdb')):
    fn = os.path.splitext(os.path.split(pdb)[1])[0]
    pdb = os.path.join('..', pdb)
    out = open(os.path.join(dir_name, fn + '.conf'), 'w')
    out.write(conf.format(
      out=fn, pdb=pdb, par=par))
    out.close()
def conf_opt_setup(name):
  namd2 = which('namd2')
  par = os.path.join('/usr/local/lib/vmd/plugins/noarch/tcl/readcharmmpar1.2', 'par_all27_prot_lipid_na.inp')

  dir_name = name[0:4] + '_opt'

  if os.path.exists(dir_name):
    shutil.rmtree(dir_name)

  os.makedirs(dir_name)

  conf = open('min.conf').read()

  for pdb in glob.glob(os.path.join(name[0:4] + '_ens', '*.pdb')):
    fn = os.path.splitext(os.path.split(pdb)[1])[0]
    pdb = os.path.join('..', pdb)
    out = open(os.path.join(dir_name, fn + '.conf'), 'w')
    out.write(conf.format(
      out=fn, pdb=pdb, par=par))
    out.close()
Example #11
0
class TestDSSPFunctions(unittest.TestCase):
    @dec.slow
    def setUp(self):
        """Setup the testing framework."""

        self.pdbs = [DATA_FILES['dssp']]

    @dec.slow
    @unittest.skipIf(which('dssp') is None, 'dssp is not found')
    def testDSSPBridgePartners(self):
        """Check if the DSSP bridge-partners were correctly parsed and
        assigned."""

        for pdb in self.pdbs:
            prot_ag = parseDatafile(pdb['file'], folder=TEMPDIR)
            dssp = execDSSP(pathDatafile(pdb['file']),
                            outputdir=TEMPDIR,
                            stderr=False)
            parseDSSP(dssp, prot_ag, parseall=True)

            # Map a dssp_resnum to its Residue object.
            dssp_dict = {}

            for chain in prot_ag.select("protein").getHierView():
                for res in chain:
                    dssp_resnum = res.getData("dssp_resnum")[0]
                    dssp_dict[dssp_resnum] = res

            for res in dssp_dict.values():
                bp1 = res.getData("dssp_bp1")[0]
                bp2 = res.getData("dssp_bp2")[0]

                if bp1 != 0:
                    msg_ = "BP1 (dssp_resnum: %d) of %s is missing" % \
                        (bp1, str(res))
                    self.assertIn(bp1, dssp_dict, msg=msg_)

                if bp2 != 0:
                    msg_ = "BP2 (dssp_resnum: %d) of %s is missing" % \
                        (bp2, str(res))
                    self.assertIn(bp2, dssp_dict, msg=msg_)
Example #12
0
def buildMSA(sequences, title='Unknown', labels=None, **kwargs):
    """
    Aligns sequences with clustalw or clustalw2 and returns the resulting MSA.

    :arg sequences: a file, MSA object or a list or array containing sequences
       as Atomic objects with :func:`getSequence` or Sequence objects or strings. 
       If strings are used then labels must be provided using ``labels``
    :type sequences: :class:`Atomic`, :class:`.MSA`, 
        :class:`~numpy.ndarray`, str

    :arg title: the title for the MSA and it will be used as the prefix for output files.
    :type title: str

    :arg labels: a list of labels to go with the sequences
    :type labels: list

    :arg align: whether to align the sequences
        default True
    :type align: bool

    :arg method: alignment method, one of either biopython.align.globalms or clustalw(2).
        default 'clustalw'
    :type align: str
    """

    align = kwargs.get('align', True)
    method = kwargs.pop('method', 'clustalw')
    # 1. check if sequences are in a fasta file and if not make one
    if isinstance(sequences, str):
        filename = sequences
    elif not isinstance(sequences, MSA):
        try:
            max_len = 0
            for sequence in sequences:
                if isinstance(sequence, Atomic):
                    if len(sequence.ca.copy()) > max_len:
                        max_len = len(sequence.ca.copy())
                elif isinstance(sequence, MSA):
                    if len(sequence[0]) > max_len:
                        max_len = len(sequence[0])
                else:
                    if len(sequence) > max_len:
                        max_len = len(sequence)

            msa = []
            fetched_labels = []
            for i, sequence in enumerate(sequences):
                if isinstance(sequence, Atomic):
                    strseq = sequence.ca.getSequence()
                    label = sequence.getTitle()
                elif isinstance(sequence, Sequence):
                    strseq = str(sequence)
                    label = sequence.getLabel()
                elif isinstance(sequence, MSA):
                    strseq = str(sequence[0])
                    label = sequence.getLabel(0)
                    LOGGER.warn(
                        'Only the first sequence in the MSA at entry {0} is used.'
                        .format(i))
                elif isinstance(sequence, str):
                    strseq = sequence
                    label = str(i + 1)
                else:
                    raise TypeError('sequences should be a list of strings, '
                                    'Atomic, or Sequence instances')
                strseq = strseq + '-' * (max_len - len(strseq))
                msa.append(array(list(strseq)))
                fetched_labels.append(label)
            sequences = array(msa)
        except:
            raise TypeError('sequences should be iterable')

        # "if a list" is a pythonic way to check if a list is empty or not (or none)
        if not labels and fetched_labels:
            labels = fetched_labels

        label = [label.replace(' ', '_') for label in labels]
        # labels checkers are removed because they will be properly handled in MSA class initialization
        msa = MSA(msa=sequences, title=title, labels=labels)

        if align and 'clustal' in method:
            filename = writeMSA(title + '.fasta', msa)

    if align:
        # 2. find and run alignment method
        if 'biopython' in method:
            if len(sequences) == 2:
                msa, _, _ = alignTwoSequencesWithBiopython(
                    sequences[0], sequences[1], **kwargs)
            else:
                raise ValueError(
                    "Provide only two sequences or another method. \
                                  Biopython pairwise alignment can only be used \
                                  to build an MSA with two sequences.")
        elif 'clustalw' in method:
            clustalw = which('clustalw')
            if clustalw is None:
                if which('clustalw2') is not None:
                    clustalw = which('clustalw2')
                else:
                    raise EnvironmentError(
                        "The executable for clustalw was not found, \
                                            install clustalw or add it to the path."
                    )

            os.system('"%s" %s -OUTORDER=INPUT' % (clustalw, filename))

            # 3. parse and return the new MSA
            msa = parseMSA(title + '.aln')

        else:
            alignTool = which(method)
            if alignTool is None:
                raise EnvironmentError("The executable for {0} was not found, \
                                        install it or add it to the path.".
                                       format(alignTool))

            os.system('"%s" %s -OUTORDER=INPUT' % (clustalw, filename))

            # 3. parse and return the new MSA
            msa = parseMSA(title + '.aln')

    return msa
Example #13
0
def buildMSA(sequences, title='Unknown', labels=None, **kwargs):
    """
    Aligns sequences with clustalw or clustalw2 and returns the resulting MSA.

    :arg sequences: a file, MSA object or a list or array containing sequences
       as Atomic objects with :func:`getSequence` or Sequence objects or strings. 
       If strings are used then labels must be provided using ``labels``
    :type sequences: :class:`Atomic`, :class:`.MSA`, 
        :class:`~numpy.ndarray`, str

    :arg title: the title for the MSA and it will be used as the prefix for output files.
    :type title: str

    :arg labels: a list of labels to go with the sequences
    :type labels: list

    :arg align: whether to align the sequences
        default True
    :type align: bool

    :arg method: alignment method, one of either biopython.align.globalms or clustalw(2).
        default 'clustalw'
    :type align: str
    """
    
    align = kwargs.get('align', True)
    method = kwargs.pop('method', 'clustalw')
    # 1. check if sequences are in a fasta file and if not make one
    if isinstance(sequences, str):
        filename = sequences
    elif not isinstance(sequences, MSA):
        try:
            max_len = 0
            for sequence in sequences:
                if isinstance(sequence, Atomic):
                    if len(sequence.ca.copy()) > max_len:
                        max_len = len(sequence.ca.copy())
                elif isinstance(sequence, MSA):
                    if len(sequence[0]) > max_len:
                        max_len = len(sequence[0])
                else:
                    if len(sequence) > max_len:
                        max_len = len(sequence)

            msa = []
            fetched_labels = []
            for i, sequence in enumerate(sequences):
                if isinstance(sequence, Atomic):
                    strseq = sequence.ca.getSequence()
                    label = sequence.getTitle()
                elif isinstance(sequence, Sequence):
                    strseq = str(sequence)
                    label = sequence.getLabel()
                elif isinstance(sequence, MSA):
                    strseq = str(sequence[0])
                    label = sequence.getLabel(0)
                    LOGGER.warn('Only the first sequence in the MSA at entry {0} is used.'
                                .format(i))
                elif isinstance(sequence, str):
                    strseq = sequence
                    label = str(i + 1)
                else:
                    raise TypeError('sequences should be a list of strings, '
                                    'Atomic, or Sequence instances')
                strseq = strseq + '-'*(max_len - len(strseq))
                msa.append(array(list(strseq)))
                fetched_labels.append(label)
            sequences = array(msa)
        except:
            raise TypeError('sequences should be iterable')

        # "if a list" is a pythonic way to check if a list is empty or not (or none)
        if not labels and fetched_labels:
            labels = fetched_labels

        label = [label.replace(' ','_') for label in labels]
        # labels checkers are removed because they will be properly handled in MSA class initialization
        msa = MSA(msa=sequences, title=title, labels=labels)

        if align and 'clustal' in method:
            filename = writeMSA(title + '.fasta', msa)

    if align:
        # 2. find and run alignment method
        if 'biopython' in method:
            if len(sequences) == 2:
                msa, _, _ = alignTwoSequencesWithBiopython(sequences[0], sequences[1], **kwargs)
            else:
                raise ValueError("Provide only two sequences or another method. \
                                  Biopython pairwise alignment can only be used \
                                  to build an MSA with two sequences.")
        elif 'clustalw' in method:
            clustalw = which('clustalw')
            if clustalw is None:
                if which('clustalw2') is not None:
                    clustalw = which('clustalw2')
                else:
                    raise EnvironmentError("The executable for clustalw was not found, \
                                            install clustalw or add it to the path.")

            os.system('"%s" %s -OUTORDER=INPUT'%(clustalw, filename))

            # 3. parse and return the new MSA
            msa = parseMSA(title + '.aln')

        else:
            alignTool = which(method)
            if alignTool is None:
                raise EnvironmentError("The executable for {0} was not found, \
                                        install it or add it to the path.".format(alignTool))

            os.system('"%s" %s -OUTORDER=INPUT'%(clustalw, filename))

            # 3. parse and return the new MSA
            msa = parseMSA(title + '.aln')

    return msa
Example #14
0
def buildMSA(sequences, title='Unknown', labels=None, **kwargs):
    """
    Aligns sequences with clustalw or clustalw2 and returns the resulting MSA.

    :arg sequences: a file, MSA object or a list or array containing sequences
       as Atomic objects with :func:`getSequence` or Sequence objects or strings. 
       If strings are used then labels must be provided using ``labels``
    :type sequences: :class:`Atomic`, :class:`.MSA`, 
        :class:`~numpy.ndarray`, str

    :arg title: the title for the MSA and it will be used as the prefix for output files.
    :type title: str

    :arg labels: a list of labels to go with the sequences
    :type labels: list

    :arg align: whether to do alignment with clustalw(2)
        default True
    :type align: bool
    """

    align = kwargs.get('align', True)
    # 1. check if sequences are in a fasta file and if not make one
    if isinstance(sequences, str):
        filename = sequences
    elif not isinstance(sequences, MSA):
        try:
            max_len = 0
            for sequence in sequences:
                if len(sequence) > max_len:
                    max_len = len(sequence)

            msa = []
            fetched_labels = []
            for i, sequence in enumerate(sequences):
                if isinstance(sequence, Atomic):
                    strseq = sequence.getSequence()
                    label = sequence.getTitle()
                elif isinstance(sequence, Sequence):
                    strseq = str(sequence)
                    label = sequence.getLabel()
                elif isinstance(sequence, str):
                    strseq = sequence
                    label = str(i + 1)
                else:
                    raise TypeError('sequences should be a list of strings, '
                                    'Atomic, or Sequence instances')
                strseq = strseq + '-' * (max_len - len(strseq))
                msa.append(array(list(strseq)))
                fetched_labels.append(label)
            sequences = array(msa)
        except:
            raise TypeError('sequences should be iterable')

        # "if a list" is a pythonic way to check if a list is empty or not (or none)
        if not labels and fetched_labels:
            labels = fetched_labels
        # labels checkers are removed because they will be properly handled in MSA class initialization
        msa = MSA(msa=sequences, title=title, labels=labels)

        if align:
            filename = writeMSA(title + '.fasta', msa)

    if align:
        # 2. find and run alignment method
        clustalw = which('clustalw')
        if clustalw is None:
            if which('clustalw2') is not None:
                clustalw = which('clustalw2')
            else:
                raise EnvironmentError(
                    "The executable for clustalw was not found, \
                                        install clustalw or add it to the path."
                )

        os.system('"%s" %s' % (clustalw, filename))

        # 3. parse and return the new MSA
        msa = parseMSA(title + '.aln')

    return msa
Example #15
0
from os.path import sep as dirsep
import inspect
import tempfile

try:
    import unittest2 as unittest
    from unittest2 import TestCase, skipIf, skipUnless
except ImportError:
    import unittest
    from unittest import TestCase, skipIf, skipUnless

from prody.utilities import PLATFORM
from prody import LOGGER

from prody.utilities import which
NOPRODYCMD = which('prody') is None

WINDOWS = PLATFORM == 'Windows'

try:
    import matplotlib
    matplotlib.use('Agg')
except ImportError:
    MATPLOTLIB = False
else:
    try:
        from matplotlib import pyplot
    except ImportError:
        MATPLOTLIB = False
    else:
        MATPLOTLIB = True
Example #16
0
    def scanPockets(self):

        'Generates ESSA z-scores for pockets and parses pocket features. It requires both Fpocket 3.0 and Pandas being installed in your system.'

        from re import findall

        fpocket = which('fpocket')

        if fpocket is None:
            LOGGER.warning(
                'Fpocket (version >= 3.0) was not found, please install it.')
            return None

        try:
            from pandas import Index, DataFrame
        except ImportError as ie:
            LOGGER.warning(ie.__str__() + ' was found, please install it.')
            return None

        rcr = {(i, j): k if self._rib else self._ri[k]
               for i, j, k in zip(self._ca.getChids(), self._ca.getResnums(),
                                  self._ca.getResindices())}

        writePDB('{}_pro'.format(self._title), self._heavy)

        direc = '{}_pro_out'.format(self._title)
        if not isdir(direc):
            system('fpocket -f {}_pro.pdb'.format(self._title))

        chdir(direc + '/pockets')
        l = [x for x in listdir('.') if x.endswith('.pdb')]
        l.sort(key=lambda x: int(x.partition('_')[0][6:]))

        ps = []
        for x in l:
            with open(x, 'r') as f:
                tmp0 = f.read()
                tmp1 = [(x[1].strip(), float(x[2])) for x in findall(
                    r'(\w+\s\w+\s*-\s*)(.+):\s*([\d.-]+)(\n)', tmp0)]
            fea, sco = list(zip(*tmp1))
            ps.append(sco)
        pdbs = parsePDB(l)
        chdir('../..')

        # ----- # ----- #

        ps = array(ps)

        pcn = {
            int(pdb.getTitle().partition('_')[0][6:]):
            set(zip(pdb.getChids().tolist(),
                    pdb.getResnums().tolist()))
            for pdb in pdbs
        }
        pi = {p: [rcr[x] for x in crn] for p, crn in pcn.items()}

        pzs_max = {k: max(self._zscore[v]) for k, v in pi.items()}
        pzs_med = {k: median(self._zscore[v]) for k, v in pi.items()}

        # ----- # ----- #

        indices = Index(range(1, ps.shape[0] + 1), name='Pocket #')

        columns = Index(fea, name='Feature')

        self._df = DataFrame(index=indices, columns=columns, data=ps)

        # ----- # ----- #

        columns_zs = Index(['ESSA_max', 'ESSA_med', 'LHD'], name='Z-score')

        zps = c_[list(pzs_max.values())]
        zps = hstack((zps, c_[list(pzs_med.values())]))
        zps = hstack(
            (zps, zscore(self._df[['Local hydrophobic density Score']])))

        self._df_zs = DataFrame(index=indices, columns=columns_zs, data=zps)
Example #17
0
from os.path import sep as dirsep
import inspect
import tempfile

try:
    import unittest2 as unittest
    from unittest2 import TestCase, skipIf, skipUnless
except ImportError:
    import unittest
    from unittest import TestCase, skipIf, skipUnless

from prody.utilities import PLATFORM
from prody import LOGGER

from prody.utilities import which
NOPRODYCMD = which('prody') is None

WINDOWS = PLATFORM == 'Windows'

try:
    import matplotlib
    matplotlib.use('Agg')
except ImportError:
    MATPLOTLIB = False
else:
    try:
        from matplotlib import pyplot
    except ImportError:
        MATPLOTLIB = False
    else:
        MATPLOTLIB = True