예제 #1
0
파일: analysis.py 프로젝트: ielushuai/ProDy
def buildSCAMatrix(msa, turbo=True, **kwargs):
    """Returns SCA matrix calculated for *msa*, which may be an :class:`.MSA`
    instance or a 2D Numpy character array.

    Implementation is case insensitive and handles ambiguous amino acids
    as follows:

      * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn)
      * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln)
      * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu)
      * **X** (Xaa) count is allocated to the twenty standard amino acids
      * Joint probability of observing a pair of ambiguous amino acids is
        allocated to all potential combinations, e.g. probability of **XX**
        is allocated to 400 combinations of standard amino acids, similarly
        probability of **XB** is allocated to 40 combinations of *D* and *N*
        with the standard amino acids.

    Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered
    as distinct amino acids.  When *ambiguity* is set **False**, all alphabet
    characters as considered as distinct types.  All non-alphabet characters
    are considered as gaps."""

    msa = getMSA(msa)
    if msa.shape[0] < 100:
        LOGGER.warning(
            'SCA performs the best with higher number of sequences, and '
            'minimal number of sequences is recommended as 100.')

    from .msatools import msasca
    LOGGER.timeit('_sca')
    length = msa.shape[1]
    sca = zeros((length, length), float)
    sca = msasca(msa, sca, turbo=bool(turbo))
    LOGGER.report('SCA matrix was calculated in %.2fs.', '_sca')
    return sca
예제 #2
0
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning(
                    'PDB mirror path {0} is not a accessible.'.format(
                        repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
예제 #3
0
def resetTicks(x, y=None):
    """Reset X (and Y) axis ticks using values in given *array*.  Ticks in the
    current figure should not be fractional values for this function to work as
    expected."""

    import matplotlib.pyplot as plt
    if x is not None:
        try:
            xticks = plt.xticks()[0]
            xlist = list(xticks.astype(int))
            if xlist[-1] > len(x):
                xlist.pop()
            if xlist:
                xlist = list(x[xlist])
                plt.xticks(xticks, xlist + [''] * (len(xticks) - len(xlist)))
        except:
            LOGGER.warning('xticks could not be reset.')
    if y is not None:
        try:
            yticks = plt.yticks()[0]
            ylist = list(yticks.astype(int))
            if ylist[-1] > len(y):
                ylist.pop()
            if ylist:
                ylist = list(y[ylist])
                plt.yticks(yticks, ylist + [''] * (len(yticks) - len(ylist)))
        except:
            LOGGER.warning('xticks could not be reset.')
예제 #4
0
def alignCoordsets(atoms, weights=None):
    """Returns *atoms* after superposing coordinate sets onto its active
    coordinate set.  Transformations will be calculated for *atoms* and
    applied to its :class:`.AtomGroup`, when applicable.  Optionally,
    atomic *weights* can be passed for weighted superposition."""

    try:
        acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets()
    except AttributeError:
        raise TypeError('atoms must have type Atomic, not {0}'
                        .format(type(atoms)))
        if n_csets < 2:
            LOGGER.warning('{0} contains fewer than two coordinate sets, '
                           'alignment was not performed.'.format(str(atoms)))
            return

    try:
        ag = atoms.getAtomGroup()
    except AttributeError:
        ag = atoms
    agacsi = ag.getACSIndex()

    tar = atoms._getCoords()
    for i in range(n_csets):
        if i == acsi:
            continue
        atoms.setACSIndex(i)
        ag.setACSIndex(i)
        calcTransformation(atoms, tar, weights).apply(ag)
    atoms.setACSIndex(acsi)
    ag.setACSIndex(agacsi)
    return atoms
예제 #5
0
def showContactMap(enm, *args, **kwargs):
    """Show Kirchhoff matrix using :func:`~matplotlib.pyplot.spy`.
    
    .. plot::
       :context:
       :include-source:
        
       p38_gnm = GNM('p38')
       p38_gnm.buildKirchhoff( p38_structure )
       plt.figure(figsize=(4,4))
       showContactMap( p38_gnm )

    .. plot::
       :context:
       :nofigs:
        
       plt.close('all')"""
    
    import matplotlib.pyplot as plt
    if not isinstance(enm, GNMBase):
        raise TypeError('model argument must be an ENM instance')
    kirchhoff = enm.getKirchhoff()
    if kirchhoff is None:
        LOGGER.warning('kirchhoff matrix is not set')
        return None
    show = plt.spy(kirchhoff, *args, **kwargs)
    plt.title('{0:s} contact map'.format(enm.getTitle())) 
    plt.xlabel('Residue index')
    plt.ylabel('Residue index')
    return show
예제 #6
0
def resetTicks(x, y=None):
    """Reset X (and Y) axis ticks using values in given *array*.  Ticks in the
    current figure should not be fractional values for this function to work as
    expected."""

    import matplotlib.pyplot as plt
    if x is not None:
        try:
            xticks = plt.xticks()[0]
            xlist = list(xticks.astype(int))
            if xlist[-1] > len(x):
                xlist.pop()
            if xlist:
                xlist = list(x[xlist])
                plt.xticks(xticks, xlist + [''] * (len(xticks) - len(xlist)))
        except:
            LOGGER.warning('xticks could not be reset.')
    if y is not None:
        try:
            yticks = plt.yticks()[0]
            ylist = list(yticks.astype(int))
            if ylist[-1] > len(y):
                ylist.pop()
            if ylist:
                ylist = list(y[ylist])
                plt.yticks(yticks, ylist + [''] * (len(yticks) - len(ylist)))
        except:
            LOGGER.warning('xticks could not be reset.')
예제 #7
0
def pathVMD(*path):
    """Returns VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\University of Illinois\\VMD\\' +
                            vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\WOW6432node\\University of Illinois\\VMD\\'
                            + vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
예제 #8
0
def loadPDBClusters(sqid=None):
    """Load previously fetched PDB sequence clusters from disk to memory."""

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if sqid is None:
        sqid_list = list(PDB_CLUSTERS)
        LOGGER.info('Loading all PDB sequence clusters.')
    else:
        assert isinstance(sqid, int), 'sqid must be an integer'
        if sqid not in PDB_CLUSTERS:
            raise ValueError('PDB cluster data is not available for sequence '
                             'identity {0}%, try one of {1}'.format(
                                 sqid, PDB_CLUSTERS_SQID_STR))
        LOGGER.info('Loading PDB sequence clusters for sequence identity '
                    '{0}.'.format(sqid))
        sqid_list = [sqid]
    global PDB_CLUSTERS_UPDATE_WARNING
    for sqid in sqid_list:
        filename = os.path.join(PDB_CLUSTERS_PATH,
                                'bc-{0}.out.gz'.format(sqid))
        if not os.path.isfile(filename):
            fetchPDBClusters(sqid)

        if PDB_CLUSTERS_UPDATE_WARNING:
            import time
            diff = (time.time() - os.path.getmtime(filename)) / 604800.
            if diff > 1.:
                LOGGER.warning(
                    'PDB sequence clusters are {0:.1f} week(s) old,'
                    ' call `fetchPDBClusters` to receive updates.'.format(
                        diff))
                PDB_CLUSTERS_UPDATE_WARNING = False
        inp = openFile(filename)
        PDB_CLUSTERS[sqid] = inp.read()
        inp.close()
예제 #9
0
    def __add__(self, other):
        """Returns an :class:`.AtomMap` instance. Order of pointed atoms are
        preserved."""

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('unsupported operand type(s) for +: {0} and '
                            '{1}'.format(repr(type(self).__name__),
                                         repr(type(other).__name__)))

        if ag != self._ag:
            raise ValueError('AtomPointer instances must point to the same '
                             'AtomGroup instance')
        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('Active coordset indices of atoms are not the same.'
                           ' Result will have ACSI {0}.'.format(acsi))

        title = '({0}) + ({1})'.format(str(self), str(other))
        indices = concatenate([self._getIndices(), other._getIndices()])

        dummies = 0
        try:
            dummies += self.numDummies()
        except AttributeError:
            pass
        try:
            dummies += other.numDummies()
        except AttributeError:
            pass

        return AtomMap(ag, indices, acsi, title=title, intarrays=True,
                       dummies=dummies)
예제 #10
0
    def getCoordsets(self, indices=None):
        """Returns coordinate sets at given *indices*. *indices* may be an
        integer, a list of integers or **None**. **None** returns all
        coordinate sets."""

        if self._closed:
            raise ValueError('I/O operation on closed file')
        if (self._indices is None
                and (indices is None or indices == slice(None))):
            nfi = self._nfi
            self.reset()
            n_floats = self._n_floats + self._unitcell * 14
            n_atoms = self._n_atoms
            n_csets = self._n_csets
            data = self._file.read(self._itemsize * n_floats * n_csets)
            data = fromstring(data, self._dtype)
            if len(data) > n_floats * n_csets:
                n_csets = len(data) / n_floats
                data = data[:n_csets]
                LOGGER.warning('DCD is corrupt, {0} out of {1} frames '
                               'were parsed.'.format(n_csets, self._n_csets))
            data = data.reshape((n_csets, n_floats))
            if self._unitcell:
                data = data[:, 14:]
            data = data.reshape((n_csets, 3, n_atoms + 2))
            data = data[:, :, 1:-1]
            data = data.transpose(0, 2, 1)
            self.goto(nfi)
            if self._astype is not None and self._astype != data.dtype:
                data = data.astype(self._astype)
            return data
        else:
            return TrajFile.getCoordsets(self, indices)
예제 #11
0
파일: analysis.py 프로젝트: prody/ProDy
def calcShannonEntropy(msa, ambiguity=True, omitgaps=True, **kwargs):
    """Returns Shannon entropy array calculated for *msa*, which may be
    an :class:`.MSA` instance or a 2D Numpy character array.  Implementation
    is case insensitive and handles ambiguous amino acids as follows:

      * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn)
      * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln)
      * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu)
      * **X** (Xaa) count is allocated to the twenty standard amino acids

    Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered
    as distinct amino acids.  When *ambiguity* is set **False**, all alphabet
    characters as considered as distinct types.

    All non-alphabet characters are considered as gaps, and they are handled
    in two ways:

      * non-existent, the probability of observing amino acids in a given
        column is adjusted, by default
      * as a distinct character with its own probability, when *omitgaps* is
        **False**"""

    msa = getMSA(msa)
    length = msa.shape[1]
    if msa.shape[0] < 100:
        LOGGER.warning(
            "SCA performs the best with higher number of sequences, and "
            "minimal number of sequences is recommended as 100."
        )
    entropy = empty(length, float)
    from .msatools import msaentropy

    return msaentropy(msa, entropy, ambiguity=bool(ambiguity), omitgaps=bool(omitgaps))
예제 #12
0
def alignCoordsets(atoms, weights=None):
    """Return *atoms* after superposing coordinate sets onto its active 
    coordinate set.  Transformations will be calculated for *atoms* and 
    applied to its :class:`.AtomGroup`, when applicable.  Optionally, 
    atomic *weights* can be passed for weighted superposition."""
    
    try:
        acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets()
    except AttributeError:
        raise TypeError('atoms must have type Atomic, not {0:s}'
                        .format(type(atoms)))
        if n_csets < 2:
            LOGGER.warning('{0:s} contains fewer than two coordinate sets, '
                           'alignment was not performed.'.format(str(atoms)))
            return
    
    try:
        ag = atoms.getAtomGroup()
    except AttributeError:
        ag = atoms
    agacsi = ag.getACSIndex()

    tar = atoms._getCoords()
    for i in range(n_csets):
        if i == acsi:
            continue
        atoms.setACSIndex(i)
        ag.setACSIndex(i)
        calcTransformation(atoms, tar, weights).apply(ag)
    atoms.setACSIndex(acsi)
    ag.setACSIndex(agacsi)
    return atoms
예제 #13
0
    def __and__(self, other):
        
        if self is other:
            return self
    
        if not isinstance(other, AtomPointer):
            raise TypeError('other must be an AtomPointer')
            
        if self._ag != other.getAtomGroup():
            raise ValueError('both selections must be from the same AtomGroup')
    
        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('active coordinate set indices do not match, '
                           'so it will be set to zero in the union.')
            acsi = 0

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0
            
        indices = set(self._getIndices())
    
        indices = indices.intersection(other.getIndices())
        if indices:
            indices = np.unique(indices)
            return Selection(self._ag, indices, '({0:s}) and ({1:s})'.format(
                                    self.getSelstr(), other.getSelstr()), acsi)
예제 #14
0
파일: analysis.py 프로젝트: tekpinar/ProDy
def calcShannonEntropy(msa, ambiguity=True, omitgaps=True, **kwargs):
    """Returns Shannon entropy array calculated for *msa*, which may be
    an :class:`.MSA` instance or a 2D Numpy character array.  Implementation
    is case insensitive and handles ambiguous amino acids as follows:

      * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn)
      * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln)
      * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu)
      * **X** (Xaa) count is allocated to the twenty standard amino acids

    Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered
    as distinct amino acids.  When *ambiguity* is set **False**, all alphabet
    characters as considered as distinct types.

    All non-alphabet characters are considered as gaps, and they are handled
    in two ways:

      * non-existent, the probability of observing amino acids in a given
        column is adjusted, by default
      * as a distinct character with its own probability, when *omitgaps* is
        **False**"""

    msa = getMSA(msa)
    length = msa.shape[1]
    if msa.shape[0] < 100:
        LOGGER.warning(
            'SCA performs the best with higher number of sequences, and '
            'minimal number of sequences is recommended as 100.')
    entropy = empty(length, float)
    from .msatools import msaentropy
    return msaentropy(msa,
                      entropy,
                      ambiguity=bool(ambiguity),
                      omitgaps=bool(omitgaps))
예제 #15
0
파일: pdbclusters.py 프로젝트: njekin/ProDy
def loadPDBClusters(sqid=None):
    """Load previously fetched PDB sequence clusters from disk to memory."""

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if sqid is None:
        sqid_list = list(PDB_CLUSTERS)
        LOGGER.info('Loading all PDB sequence clusters.')
    else:
        assert isinstance(sqid, int), 'sqid must be an integer' 
        if sqid not in PDB_CLUSTERS:
            raise ValueError('PDB cluster data is not available for sequence '
                             'identity {0}%, try one of {1}'
                             .format(sqid, PDB_CLUSTERS_SQID_STR))
        LOGGER.info('Loading PDB sequence clusters for sequence identity '
                    '{0}.'.format(sqid))
        sqid_list = [sqid]
    global PDB_CLUSTERS_UPDATE_WARNING
    for sqid in sqid_list:
        filename = os.path.join(PDB_CLUSTERS_PATH, 
                                'bc-{0}.out.gz'.format(sqid))
        if not os.path.isfile(filename):
            fetchPDBClusters(sqid)
            
        if PDB_CLUSTERS_UPDATE_WARNING:
            import time
            diff = (time.time() - os.path.getmtime(filename)) / 604800.
            if diff > 1.:
                LOGGER.warning('PDB sequence clusters are {0:.1f} week(s) old,'
                               ' call `fetchPDBClusters` to receive updates.'
                               .format(diff))
                PDB_CLUSTERS_UPDATE_WARNING = False
        inp = openFile(filename)
        PDB_CLUSTERS[sqid] = inp.read()
        inp.close()
예제 #16
0
파일: localpdb.py 프로젝트: sixpi/ProDy
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning('PDB mirror path {0} is not a accessible.'
                               .format(repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'
                        .format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'
                            .format(repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
예제 #17
0
    def __and__(self, other):

        if self is other:
            return self

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('other must be an AtomPointer')

        if self._ag != ag:
            raise ValueError('both selections must be from the same AtomGroup')

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('active coordinate set indices do not match, '
                           'so it will be set to zero in the union.')
            acsi = 0

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0

        indices = set(self._getIndices())

        indices = indices.intersection(other.getIndices())
        if indices:
            indices = unique(indices)
            if indices[-1] == atommap.DUMMY:
                indices = indices[:-1]
            return Selection(self._ag, indices, '({0}) and ({1})'
                             .format(self.getSelstr(), other.getSelstr()),
                             acsi)
예제 #18
0
파일: dcdfile.py 프로젝트: anindita85/ProDy
 def getCoordsets(self, indices=None):
     """Returns coordinate sets at given *indices*. *indices* may be an 
     integer, a list of integers or ``None``. ``None`` returns all 
     coordinate sets."""
             
     if self._closed: 
         raise ValueError('I/O operation on closed file')
     if (self._indices is None and 
         (indices is None or indices == slice(None))):
         nfi = self._nfi
         self.reset()
         n_floats = self._n_floats + self._unitcell * 14
         n_atoms = self._n_atoms
         n_csets = self._n_csets
         data = np.fromfile(self._file, self._dtype, 
                            n_floats * n_csets)
         if len(data) > n_floats * n_csets:
             n_csets = len(data)/n_floats
             data = data[:n_csets]
             LOGGER.warning('DCD is corrupt, {0:d} out of {1:d} frames '
                            'were parsed.'.format(n_csets, self._n_csets))
         data = data.reshape((n_csets, n_floats))
         if self._unitcell:
             data = data[:, 14:]
         data = data.reshape((n_csets, 3, n_atoms+2))
         data = data[:, :, 1:-1]
         data = data.transpose(0, 2, 1)
         self.goto(nfi)
         if self._astype is not None and self._astype != data.dtype:
             data = data.astype(self._astype)
         return data
     else:            
         return TrajFile.getCoordsets(self, indices)
예제 #19
0
파일: analysis.py 프로젝트: fongchun/ProDy
def buildSCAMatrix(msa, turbo=True, **kwargs):
    """Returns SCA matrix calculated for *msa*, which may be an :class:`.MSA`
    instance or a 2D Numpy character array.

    Implementation is case insensitive and handles ambiguous amino acids
    as follows:

      * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn)
      * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln)
      * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu)
      * **X** (Xaa) count is allocated to the twenty standard amino acids
      * Joint probability of observing a pair of ambiguous amino acids is
        allocated to all potential combinations, e.g. probability of **XX**
        is allocated to 400 combinations of standard amino acids, similarly
        probability of **XB** is allocated to 40 combinations of *D* and *N*
        with the standard amino acids.

    Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered
    as distinct amino acids.  When *ambiguity* is set **False**, all alphabet
    characters as considered as distinct types.  All non-alphabet characters
    are considered as gaps."""

    msa = getMSA(msa)
    if msa.shape[0]<100:
        LOGGER.warning('SCA performs the best with higher number of sequences, and '
                       'minimal number of sequences is recommended as 100.')
                       
    from .msatools import msasca
    LOGGER.timeit('_sca')
    length = msa.shape[1]
    sca = zeros((length, length), float)
    sca = msasca(msa, sca, turbo=bool(turbo))
    LOGGER.report('SCA matrix was calculated in %.2fs.', '_sca')
    return sca
예제 #20
0
    def getLigandResidueIndices(self):

        'Returns residue indices of the residues interacting with ligands.'

        if self._lig:
            return self._ligres_idx
        else:
            LOGGER.warning('No ligand provided.')
예제 #21
0
    def getLigandResidueESSAZscores(self):

        'Returns ESSA z-scores of the residues interacting with ligands as a dictionary. The keys of which are the corresponding chain ids and residue numbers of the ligands. Each value comprises the indices of the residue ESSA z-scores in the profile and the corresponding scores as separate arrays.'

        if self._lig:
            return self._zs_lig
        else:
            LOGGER.warning('No ligand provided.')
예제 #22
0
    def getLigandResidueCodes(self):

        'Returns chain ids and residue numbers of the residues interacting with ligands.'

        if self._lig:
            return self._ligres_code
        else:
            LOGGER.warning('No ligand provided.')
예제 #23
0
파일: nmdfile.py 프로젝트: barettog1/ProDy
def pathVMD(*path):
    """Return VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                                'Software\\University of Illinois\\VMD\\' +
                                vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                    'Software\\WOW6432node\\University of Illinois\\VMD\\' +
                    vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
예제 #24
0
    def saveLigandResidueESSAZscores(self):

        'Saves the dictionary of ESSA z-scores of the residues interacting with ligands to a pickle `.pkl` file. The keys of the dictionary are the corresponding chain ids and residue numbers of the ligands. Each value comprises the indices of the residue ESSA z-scores in the profile and the corresponding scores as separate arrays.'

        if self._lig:
            from pickle import dump

            dump(self._zs_lig, open('{}_ligres_gnm_zs.pkl'.format(self._title), 'wb'))
        else:
            LOGGER.warning('No ligand provided.')
예제 #25
0
def getVMDpath():
    """Return VMD path set by user or one identified automatically."""

    path = SETTINGS.get("vmd", None)
    if isExecutable(path):
        return path
    else:
        LOGGER.warning("VMD path is not set by user, looking for it.")

        from types import StringType, UnicodeType

        vmdbin = None
        vmddir = None
        if PLATFORM == "Windows":
            import _winreg

            for vmdversion in ("1.8.7", "1.9", "1.9.1"):
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\WOW6432node\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
        else:
            vmdbin = which("vmd")
            if False:
                pipe = os.popen("which vmd")
                vmdbin = pipe.next().strip()
                vmdfile = open(vmdbin)
                for line in vmdfile:
                    if line.startswith("defaultvmddir="):
                        vmddir = line.split("=")[1].replace('"', "")
                        break
                vmdfile.close()
        if (
            False
            and isinstance(vmdbin, (StringType, UnicodeType))
            and isinstance(vmddir, (StringType, UnicodeType))
            and os.path.isfile(vmdbin)
            and os.path.isdir(vmddir)
        ):
            pass  # return vmdbin, vmddir
        if isExecutable(vmdbin):
            setVMDpath(vmdbin)
            return vmdbin
예제 #26
0
def getPDBMirrorPath():
    """Return the path to a local PDB mirror, or **None** if a mirror path is 
    not set."""

    path = SETTINGS.get('pdb_mirror_path')
    if path:
        if isdir(path):
            return path
        else:
            LOGGER.warning('PDB mirror path {0:s} is not a accessible.'
                           .format(repr(path)))
예제 #27
0
def getPDBLocalFolder():
    """Return the path to a local PDB folder and folder structure specifier. 
    If a local folder is not set, **None** will be returned."""

    folder = SETTINGS.get('pdb_local_folder')
    if folder:
        if isdir(folder):
            return folder, SETTINGS.get('pdb_local_divided', True)
        else:
            LOGGER.warning('PDB local folder {0:s} is not a accessible.'
                           .format(repr(folder)))
예제 #28
0
파일: clustenm.py 프로젝트: kaynakb/ProDy
    def _min_sim(self, coords):

        # coords: coordset   (numAtoms, 3) in Angstrom, which should be converted into nanometer

        try:
            from simtk.openmm.app import StateDataReporter
            from simtk.unit import kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R
        except ImportError:
            raise ImportError(
                'Please install PDBFixer and OpenMM in order to use ClustENM.')

        simulation = self._prep_sim(coords=coords)

        # automatic conversion into nanometer will be carried out.
        # simulation.context.setPositions(coords * angstrom)

        try:
            simulation.minimizeEnergy(tolerance=self._tolerance *
                                      kilojoule_per_mole,
                                      maxIterations=self._maxIterations)
            if self._sim:
                # heating-up the system incrementally
                sdr = StateDataReporter(stdout, 1, step=True, temperature=True)
                sdr._initializeConstants(simulation)
                temp = 0.0

                # instantaneous temperature could be obtained by openmmtools module
                # but its installation using conda may lead to problem due to repository freezing,
                # therefore, we are here evaluating it by hand.

                while temp < self._temp:
                    simulation.step(1)
                    ke = simulation.context.getState(
                        getEnergy=True).getKineticEnergy()
                    temp = (2 * ke / (sdr._dof * MOLAR_GAS_CONSTANT_R)
                            ).value_in_unit(kelvin)

                simulation.step(self._t_steps[self._cycle])

            pos = simulation.context.getState(getPositions=True).getPositions(
                asNumpy=True).value_in_unit(
                    angstrom)[:self._topology.getNumAtoms()]
            pot = simulation.context.getState(
                getEnergy=True).getPotentialEnergy().value_in_unit(
                    kilojoule_per_mole)

            return pot, pos

        except BaseException as be:
            LOGGER.warning(
                'OpenMM exception: ' + be.__str__() +
                ' so the corresponding conformer will be discarded!')

            return np.nan, np.full_like(coords, np.nan)
예제 #29
0
    def saveLigandResidueCodes(self):

        'Saves chain ids and residue numbers of the residues interacting with ligands.'

        if self._lig:
            with open('{}_ligand_rescodes.txt'.format(self._title), 'w') as f:
                for k, v in self._ligres_code.items():
                    f.write(k + '\n')
                    for x in v:
                        f.write(x + '\n')
        else:
            LOGGER.warning('No ligand provided.')
예제 #30
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of
    the weekly clustering of protein chains in the PDB generated by blastclust.
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/

    This function will download about 10 Mb of data and save it after
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can
    be loaded using :func:`loadPDBClusters` function and be accessed
    using :func:`listPDBCluster`."""

    if sqid is not None:
        if isListLike(sqid):
            for s in sqid:
                if s not in PDB_CLUSTERS:
                    raise ValueError('sqid must be one or more of ' +
                                     PDB_CLUSTERS_SQID_STR)
            keys = list(sqid)
        else:
            if sqid not in PDB_CLUSTERS:
                raise ValueError('sqid must be one or more of ' +
                                 PDB_CLUSTERS_SQID_STR)
            keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(keys),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.'.format(x))
            continue
        else:
            out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH)
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, label='_prody_fetchPDBClusters')
    LOGGER.finish()
    if len(keys) == count:
        LOGGER.info('All selected PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
예제 #31
0
    def getDeviations(self):
        """Returns deviations from reference coordinates for selected atoms.
        Conformations can be aligned using one of :meth:`superpose` or
        :meth:`iterpose` methods prior to calculating deviations."""

        if not isinstance(self._confs, ndarray):
            LOGGER.warning('Conformations are not set.')
            return None
        if not isinstance(self._coords, ndarray):
            LOGGER.warning('Coordinates are not set.')
            return None

        return self._getCoordsets() - self._getCoords()
예제 #32
0
 def getDeviations(self):
     """Return deviations from reference coordinates.  Note that you
     might need to align the conformations using :meth:`superpose` or 
     :meth:`iterpose` before calculating deviations."""
     
     if not isinstance(self._confs, np.ndarray):
         LOGGER.warning('Conformations are not set.')
         return None
     if not isinstance(self._coords, np.ndarray):
         LOGGER.warning('Coordinates are not set.')
         return None
     
     return self._getCoordsets() - self._coords 
예제 #33
0
파일: ensemble.py 프로젝트: sixpi/ProDy
    def getDeviations(self):
        """Returns deviations from reference coordinates for selected atoms.
        Conformations can be aligned using one of :meth:`superpose` or
        :meth:`iterpose` methods prior to calculating deviations."""

        if not isinstance(self._confs, ndarray):
            LOGGER.warning('Conformations are not set.')
            return None
        if not isinstance(self._coords, ndarray):
            LOGGER.warning('Coordinates are not set.')
            return None

        return self._getCoordsets() - self._getCoords()
예제 #34
0
파일: analysis.py 프로젝트: ielushuai/ProDy
def buildDirectInfoMatrix(msa,
                          seqid=.8,
                          pseudo_weight=.5,
                          refine=False,
                          **kwargs):
    """Returns direct information matrix calculated for *msa*, which may be an
    :class:`.MSA` instance or a 2D Numpy character array.

    Sequences sharing sequence identity of *seqid* or more with another
    sequence are regarded as similar sequences for calculating their weights
    using :func:`.calcMeff`.

    *pseudo_weight* are the weight for pseudo count probability.

    Sequences are not refined by default. When *refine* is set **True**,
    the MSA will be refined by the first sequence and the shape of direct
    information matrix will be smaller.
    """

    msa = getMSA(msa)
    from .msatools import msadipretest, msadirectinfo1, msadirectinfo2
    from numpy import matrix

    LOGGER.timeit('_di')
    if msa.shape[0] < 250:
        LOGGER.warning(
            'DI performs the best with higher number of sequences, and '
            'minimal number of sequences is recommended as 250.')
    refine = 1 if refine else 0
    # msadipretest get some parameter from msa to set matrix size
    length, q = msadipretest(msa, refine=refine)
    c = matrix.dot(matrix(zeros((length * q, 1), float)),
                   matrix(zeros((1, length * q), float)))
    prob = zeros((length, q + 1), float)
    # msadirectinfo1 return c to be inversed and prob to be used
    meff, n, length, c, prob = msadirectinfo1(msa,
                                              c,
                                              prob,
                                              theta=1. - seqid,
                                              pseudocount_weight=pseudo_weight,
                                              refine=refine,
                                              q=q + 1)

    c = c.I

    di = zeros((length, length), float)
    # get final DI
    di = msadirectinfo2(n, length, c, prob, di, q + 1)
    del prob, c
    LOGGER.report('DI matrix was calculated in %.2fs.', '_di')
    return di
예제 #35
0
파일: analysis.py 프로젝트: fongchun/ProDy
def calcTempFactors(modes, atoms):
    """Returns temperature (β) factors calculated using *modes* from a
    :class:`.ANM` or :class:`.GNM` instance scaled according to the 
    experimental B-factors from *atoms*."""

    model = modes.getModel()
    if not isinstance(model, GNMBase):
        raise TypeError('modes must come from GNM or ANM')
    if model.numAtoms() != atoms.numAtoms():
        raise ValueError('modes and atoms must have same number of nodes')
    sqf = calcSqFlucts(modes)
    expBetas = atoms.getBetas()
    # add warning message if experimental B-factors are zeros or meaningless (e.g., having same values)?
    if expBetas.max() < 0.5 or expBetas.std() < 0.5:
        LOGGER.warning('Experimental B-factors are quite small or meaningless. The calculated B-factors may be incorrect.')
    return sqf * (expBetas.sum() / sqf.sum())
예제 #36
0
def calcTempFactors(modes, atoms):
    """Returns temperature (β) factors calculated using *modes* from a
    :class:`.ANM` or :class:`.GNM` instance scaled according to the 
    experimental B-factors from *atoms*."""

    model = modes.getModel()
    if not isinstance(model, GNMBase):
        raise TypeError('modes must come from GNM or ANM')
    if model.numAtoms() != atoms.numAtoms():
        raise ValueError('modes and atoms must have same number of nodes')
    sqf = calcSqFlucts(modes)
    expBetas = atoms.getBetas()
    # add warning message if experimental B-factors are zeros or meaningless (e.g., having same values)?
    if expBetas.max() < 0.5 or expBetas.std() < 0.5:
        LOGGER.warning('Experimental B-factors are quite small or meaningless. The calculated B-factors may be incorrect.')
    return sqf * (expBetas.sum() / sqf.sum())
예제 #37
0
def getWWPDBFTPServer():
    """Return a tuple containing name, host, and path of the currently 
    set `wwPDB <http://www.wwpdb.org/>`_ FTP server."""
    
    server = SETTINGS.get('wwpdb_ftp', None)
    if server is None:
        LOGGER.warning('A wwPDB FTP server is not set, default FTP server '
                       'RCSB PDB is used. Use `setWWPDBFTPServer` function '
                       'to set a server close to your location.')
        return _WWPDB_RCSB
    else:
        if server[2].endswith('data/structures/divided/pdb/'):
            return (server[0], server[1], 
                    server[2][:-len('data/structures/divided/pdb/')])
        else:
            return server
예제 #38
0
def showContactMap(enm, *args, **kwargs):
    """Show Kirchhoff matrix using :func:`~matplotlib.pyplot.spy`."""

    import matplotlib.pyplot as plt
    if not isinstance(enm, GNMBase):
        raise TypeError('model argument must be an ENM instance')
    kirchhoff = enm.getKirchhoff()
    if kirchhoff is None:
        LOGGER.warning('kirchhoff matrix is not set')
        return None
    show = plt.spy(kirchhoff, *args, **kwargs)
    plt.title('{0} contact map'.format(enm.getTitle()))
    plt.xlabel('Residue index')
    plt.ylabel('Residue index')
    if SETTINGS['auto_show']:
        showFigure()
    return show
예제 #39
0
def alignByEnsemble(atomics, ensemble):
    """Align a set of :class:`.Atomic` objects using transformations from *ensemble*, 
    which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. 
    
    Transformations will be applied based on indices so *atomics* and *ensemble* must 
    have the same number of members.

    :arg atomics: a set of :class:`.Atomic` objects to be aligned
    :type atomics: tuple, list, :class:`~numpy.ndarray`

    :arg ensemble: a :class:`.PDBEnsemble` or a :class:`.PDBConformation` from which 
                   transformations can be extracted
    :type ensemble: :class:`.PDBEnsemble`, :class:`.PDBConformation`
    """

    if not isListLike(atomics):
        raise TypeError('atomics must be list-like')

    if not isinstance(ensemble, (PDBEnsemble, PDBConformation)):
        raise TypeError('ensemble must be a PDBEnsemble or PDBConformation')
    if isinstance(ensemble, PDBConformation):
        ensemble = [ensemble]

    if len(atomics) != len(ensemble):
        raise ValueError('atomics and ensemble must have the same length')

    output = []
    for i, conf in enumerate(ensemble):
        trans = conf.getTransformation()
        if trans is None:
            raise ValueError('transformations are not calculated, call '
                             '`superpose` or `iterpose`')

        ag = atomics[i]
        if not isinstance(ag, Atomic):
            LOGGER.warning(
                'No atomic object found for conformation {0}.'.format(i))
            output.append(None)
            continue

        output.append(trans.apply(ag))

    if len(output) == 1:
        return output[0]
    else:
        return output
예제 #40
0
def showContactMap(enm, *args, **kwargs):
    """Show Kirchhoff matrix using :func:`~matplotlib.pyplot.spy`."""

    import matplotlib.pyplot as plt
    if not isinstance(enm, GNMBase):
        raise TypeError('model argument must be an ENM instance')
    kirchhoff = enm.getKirchhoff()
    if kirchhoff is None:
        LOGGER.warning('kirchhoff matrix is not set')
        return None
    show = plt.spy(kirchhoff, *args, **kwargs)
    plt.title('{0} contact map'.format(enm.getTitle()))
    plt.xlabel('Residue index')
    plt.ylabel('Residue index')
    if SETTINGS['auto_show']:
        showFigure()
    return show
예제 #41
0
파일: pdbclusters.py 프로젝트: njekin/ProDy
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`listPDBCluster`."""
    
    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)
    
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, '_prody_fetchPDBClusters')
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
예제 #42
0
파일: analysis.py 프로젝트: prody/ProDy
def buildDirectInfoMatrix(msa, seqid=0.8, pseudo_weight=0.5, refine=False, **kwargs):
    """Returns direct information matrix calculated for *msa*, which may be an
    :class:`.MSA` instance or a 2D Numpy character array.

    Sequences sharing sequence identity of *seqid* or more with another
    sequence are regarded as similar sequences for calculating their weights
    using :func:`.calcMeff`.

    *pseudo_weight* are the weight for pseudo count probability.

    Sequences are not refined by default. When *refine* is set **True**,
    the MSA will be refined by the first sequence and the shape of direct
    information matrix will be smaller.
    """

    msa = getMSA(msa)
    from .msatools import msadipretest, msadirectinfo1, msadirectinfo2
    from numpy import matrix

    LOGGER.timeit("_di")
    if msa.shape[0] < 250:
        LOGGER.warning(
            "DI performs the best with higher number of sequences, and "
            "minimal number of sequences is recommended as 250."
        )
    refine = 1 if refine else 0
    # msadipretest get some parameter from msa to set matrix size
    length, q = msadipretest(msa, refine=refine)
    c = matrix.dot(matrix(zeros((length * q, 1), float)), matrix(zeros((1, length * q), float)))
    prob = zeros((length, q + 1), float)
    # msadirectinfo1 return c to be inversed and prob to be used
    meff, n, length, c, prob = msadirectinfo1(
        msa, c, prob, theta=1.0 - seqid, pseudocount_weight=pseudo_weight, refine=refine, q=q + 1
    )

    c = c.I

    di = zeros((length, length), float)
    # get final DI
    di = msadirectinfo2(n, length, c, prob, di, q + 1)
    del prob, c
    LOGGER.report("DI matrix was calculated in %.2fs.", "_di")
    return di
예제 #43
0
    def _iterDonors(self):
        """Yield pairs of indices for donored atoms that are within the pointer.
        Use :meth:`setDonors` for setting donors."""

        if self._ag._donors is None:
            LOGGER.warning('donors are not set, use `AtomGroup.setDonors`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 2 >= len(self):
            for a, b in self._ag._iterDonors():
                if a in iset and b in iset:
                    yield a, b
        else:
            for a, dmap in zip(indices, self._ag._domap[indices]):
                for b in dmap:
                    if b > -1 and b in iset:
                        yield a, b
                iset.remove(a)
예제 #44
0
파일: pointer.py 프로젝트: SHZ66/ProDy
    def _iterBonds(self):
        """Yield pairs of indices for bonded atoms that are within the pointer.
        Use :meth:`setBonds` for setting bonds."""

        if self._ag._bonds is None:
            LOGGER.warning('bonds are not set, use `setBonds` or `inferBonds`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 2 >= len(self):
            for a, b in self._ag._iterBonds():
                if a in iset and b in iset:
                    yield a, b
        else:
            if any(self._ag._bmap):
                for a, bmap in zip(indices, self._ag._bmap[indices]):
                    for b in bmap:
                        if b > -1 and b in iset:
                            yield a, b
                    iset.remove(a)
예제 #45
0
파일: pointer.py 프로젝트: SHZ66/ProDy
    def _iterAngles(self):
        """Yield triplets of indices for angled atoms that are within the pointer.
        Use :meth:`setAngles` for setting angles."""

        if self._ag._angles is None:
            LOGGER.warning('angles are not set, use `AtomGroup.setAngles`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 3 >= len(self):
            for a, b, c in self._ag._iterAngles():
                if a in iset and b in iset and c in iset:
                    yield a, b, c
        else:
            if any(self._ag._angmap):
                for a, amap in zip(indices, self._ag._angmap[indices]):
                    for b, c in amap:
                        if b > -1 and b in iset and c > -1 and c in iset:
                            yield a, b, c
                    iset.remove(a)
예제 #46
0
    def _iterNBExclusions(self):
        """Yield pairs of indices for nbexclusioned atoms that are within the pointer.
        Use :meth:`setNBExclusions` for setting nbexclusions."""

        if self._ag._nbexclusions is None:
            LOGGER.warning(
                'nbexclusions are not set, use `AtomGroup.setNBExclusions`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 2 >= len(self):
            for a, b in self._ag._iterNBExclusions():
                if a in iset and b in iset:
                    yield a, b
        else:
            for a, nbemap in zip(indices, self._ag._nbemap[indices]):
                for b in nbemap:
                    if b > -1 and b in iset:
                        yield a, b
                iset.remove(a)
예제 #47
0
파일: __init__.py 프로젝트: njekin/ProDy
def runTests(*mods, **kwargs):

    if mods:
        modules = []
        for mod in mods:
            try:
                modules.append(MODULES[mod])
            except KeyError:
                raise ValueError(mod + ' is not a valid test module name')
    else:
        modules = MODULES.values() # PY3K: OK

    try:
        import nose

    except ImportError:

        LOGGER.warning('Failed to import nose, using unittest for testing.')
        LOGGER.info('nose is available at http://readthedocs.org/docs/nose/')
        from sys import stderr

        verbosity = kwargs.get('verbose', 2)
        descriptions = kwargs.get('descriptions', True)
        stream = kwargs.get('stream', stderr)

        testrunner = unittest.TextTestRunner(stream, descriptions, verbosity)

        for module in modules:
            testrunner.run(unittest.defaultTestLoader.
                           loadTestsFromName(module))
    else:
        from numpy.testing import Tester
        verbose = kwargs.get('verbose', 1)
        label = kwargs.get('label', 'fast')

        if mods:
            for module in modules:
                Tester(module).test(label=label, verbose=verbose)
        else:
            Tester('prody.tests').test(label=label, verbose=verbose)
예제 #48
0
파일: __init__.py 프로젝트: npabon/ProDy
def runTests(*mods, **kwargs):

    if mods:
        modules = []
        for mod in mods:
            try:
                modules.append(MODULES[mod])
            except KeyError:
                raise ValueError(mod + ' is not a valid test module name')
    else:
        modules = MODULES.values()  # PY3K: OK

    try:
        import nose

    except ImportError:

        LOGGER.warning('Failed to import nose, using unittest for testing.')
        LOGGER.info('nose is available at http://readthedocs.org/docs/nose/')
        from sys import stderr

        verbosity = kwargs.get('verbose', 2)
        descriptions = kwargs.get('descriptions', True)
        stream = kwargs.get('stream', stderr)

        testrunner = unittest.TextTestRunner(stream, descriptions, verbosity)

        for module in modules:
            testrunner.run(
                unittest.defaultTestLoader.loadTestsFromName(module))
    else:
        from numpy.testing import Tester
        verbose = kwargs.get('verbose', 1)
        label = kwargs.get('label', 'fast')

        if mods:
            for module in modules:
                Tester(module).test(label=label, verbose=verbose)
        else:
            Tester('prody.tests').test(label=label, verbose=verbose)
예제 #49
0
파일: pointer.py 프로젝트: SHZ66/ProDy
    def _iterDihedrals(self):
        """Yield quadruples of indices for dihedraled atoms that are within the pointer.
        Use :meth:`setDihedrals` for setting dihedrals."""

        if self._ag._dihedrals is None:
            LOGGER.warning(
                'dihedrals are not set, use `AtomGroup.setDihedrals`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 4 >= len(self):
            for a, b, c, d in self._ag._iterDihedrals():
                if a in iset and b in iset and c in iset and d in iset:
                    yield a, b, c, d
        else:
            if any(self._ag._dmap):
                for a, dmap in zip(indices, self._ag._dmap[indices]):
                    for b, c, d in dmap:
                        if b > -1 and b in iset and c > -1 and c in iset \
                        and d > -1 and d in iset:
                            yield a, b, c, d
                    iset.remove(a)
예제 #50
0
파일: pointer.py 프로젝트: SHZ66/ProDy
    def _iterCrossterms(self):
        """Yield quadruplet of indices for crosstermed atoms that are within the pointer.
        Use :meth:`setCrossterms` for setting crossterms."""

        if self._ag._crossterms is None:
            LOGGER.warning(
                'crossterms are not set, use `AtomGroup.setCrossterms`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 4 >= len(self):
            for a, b, c, d in self._ag._iterCrossterms():
                if a in iset and b in iset and c in iset and d in iset:
                    yield a, b, c, d
        else:
            if any(self._ag._cmap):
                for a, cmap in zip(indices, self._ag._cmap[indices]):
                    for b, c, d in cmap:
                        if b > -1 and b in iset and c > -1 and c in iset \
                        and d > -1 and d in iset:
                            yield a, b, c, d
                    iset.remove(a)
예제 #51
0
    def getCoordsets(self, indices=None):

        if self._closed:
            raise ValueError('I/O operation on closed file')
        if indices is None:
            indices = np.arange(self._n_csets)
        elif isinstance(indices, int):
            indices = np.array([indices])
        elif isinstance(indices, slice):
            indices = np.arange(*indices.indices(self._n_csets))
            indices.sort()
        elif isinstance(indices, (list, np.ndarray)):
            indices = np.unique(indices)
        else:
            raise TypeError('indices must be an integer or a list of integers')

        nfi = self._nfi
        self.reset()

        n_atoms = self.numSelected()
        coords = np.zeros((len(indices), n_atoms, 3), self._dtype)

        prev = 0
        next = self.nextCoordset
        for i, index in enumerate(indices):
            diff = index - prev
            if diff > 1:
                self.skip(diff-1)
            xyz = next()
            if xyz is None:
                LOGGER.warning('Expected {0} frames, but parsed {1}.'
                               .format(len(indices), i))
                self.goto(nfi)
                return coords[:i]
            coords[i] = xyz
            prev = index

        self.goto(nfi)
        return coords
예제 #52
0
파일: trajfile.py 프로젝트: fongchun/ProDy
    def getCoordsets(self, indices=None):

        if self._closed:
            raise ValueError('I/O operation on closed file')
        if indices is None:
            indices = np.arange(self._n_csets)
        elif isinstance(indices, int):
            indices = np.array([indices])
        elif isinstance(indices, slice):
            indices = np.arange(*indices.indices(self._n_csets))
            indices.sort()
        elif isinstance(indices, (list, np.ndarray)):
            indices = np.unique(indices)
        else:
            raise TypeError('indices must be an integer or a list of integers')

        nfi = self._nfi
        self.reset()

        n_atoms = self.numSelected()
        coords = np.zeros((len(indices), n_atoms, 3), self._dtype)

        prev = 0
        next = self.nextCoordset
        for i, index in enumerate(indices):
            diff = index - prev
            if diff > 1:
                self.skip(diff-1)
            xyz = next()
            if xyz is None:
                LOGGER.warning('Expected {0} frames, but parsed {1}.'
                               .format(len(indices), i))
                self.goto(nfi)
                return coords[:i]
            coords[i] = xyz
            prev = index

        self.goto(nfi)
        return coords
예제 #53
0
def setWWPDBFTPServer(key):
    """Set the `wwPDB <http://www.wwpdb.org/>`_ FTP server used for downloading
    PDB structures when needed.  Use one of the following keywords for setting 
    a different server.
    
    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+
    """
    
    server = WWPDB_FTP_SERVERS.get(key.lower())
    if server is not None:
        SETTINGS['wwpdb_ftp'] = server
        SETTINGS.save()
    else:
        LOGGER.warning('{0:s} is not a valid key.'.format(key))
예제 #54
0
 def __add__(self, other):
     """Returns an :class:`~.AtomMap` instance. Order of pointed atoms are
     preserved."""
     
     if not isinstance(other, AtomPointer):
         raise TypeError('unsupported operand type(s) for +: {0:s} and '
                         '{1:s}'.format(repr(type(self).__name__), 
                                        repr(type(other).__name__)))
                 
     ag = self._ag
     if ag != other._ag:
         raise ValueError('AtomPointer instances must point to the same '
                          'AtomGroup instance')
     acsi = self.getACSIndex()
     if acsi != other.getACSIndex():
         LOGGER.warning('Active coordset indices of atoms are not the same.'
                        ' Result will have ACSI {0:d}.'.format(acsi))
     
     title = '({0:s}) + ({1:s})'.format(str(self), str(other))
     indices = np.concatenate([self._getIndices(), other._getIndices()])
     length = len(self)
     
     if isinstance(self, AtomMap):
         mapping = [self._getMapping()]
         unmapped = [self._dummies]
     else:
         mapping = [np.arange(length)]
         unmapped = [np.array([])]
     
     if isinstance(other, AtomMap):
         mapping.append(other._getMapping() + length)
         unmapped.append(other._dummies + length) 
     else:
         mapping.append(np.arange(length, length + len(other)))
         unmapped.append(np.array([]))
         
     return AtomMap(ag, indices, np.concatenate(mapping), 
                        np.concatenate(unmapped), title, acsi)
예제 #55
0
파일: gnm.py 프로젝트: prody/ProDy
    def _eigh(M, eigvals=None, turbo=True):
        if linalg.__package__.startswith('scipy'):
            from scipy.sparse import issparse

            if eigvals:
                turbo = False
            if not issparse(M):
                values, vectors = linalg.eigh(M, turbo=turbo, eigvals=eigvals)
            else:
                try:
                    from scipy.sparse import linalg as scipy_sparse_la
                except ImportError:
                    raise ImportError('failed to import scipy.sparse.linalg, '
                                      'which is required for sparse matrix '
                                      'decomposition')
                if eigvals:
                    j = eigvals[0]
                    k = eigvals[-1] + 1
                else:
                    j = 0
                    k = dof

                if k >= dof:
                    k -= 1
                    LOGGER.warning('Cannot calculate all eigenvalues for sparse matrices, thus '
                                   'the last eigenvalue is omitted. See scipy.sparse.linalg.eigsh '
                                   'for more information')
                values, vectors = scipy_sparse_la.eigsh(M, k=k, which='SA')
                values = values[j:k]
                vectors = vectors[:, j:k]
        else:
            if n_modes is not None:
                LOGGER.info('Scipy is not found, all modes were calculated.')
            else:
                n_modes = dof
            values, vectors = linalg.eigh(M)
        return values, vectors
예제 #56
0
def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False):
    """Align PDB files using transformations from *ensemble*, which may be
    a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of
    the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to
    determine the PDB structure and model number.  First four characters of
    the label is expected to be the PDB identifier and ending numbers to be the
    model number.  For example, the :class:`.Transformation` from conformation
    with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th
    model of structure **2k39**.  After applicable transformations are made,
    structure will be written into *outputdir* as :file:`2k39_aligned.pdb`.
    If *gzip* is **True**, output files will be compressed.  Return value is
    the output filename or list of filenames, in the order files are processed.
    Note that if multiple models from a file are aligned, that filename will
    appear in the list multiple times."""

    if not isinstance(ensemble, (PDBEnsemble, PDBConformation)):
        raise TypeError('ensemble must be a PDBEnsemble or PDBConformation')
    if isinstance(ensemble, PDBConformation):
        ensemble = [ensemble]
    if gzip:
        gzip = '.gz'
    else:
        gzip = ''
    output = []
    pdbdict = {}
    for conf in ensemble:
        trans = conf.getTransformation()
        if trans is None:
            raise ValueError('transformations are not calculated, call '
                             '`superpose` or `iterpose`')
        label = conf.getLabel()

        pdb = label[:4]
        filename = pdbdict.get(pdb, fetchPDB(pdb))
        if filename is None:
            LOGGER.warning('PDB file for conformation {0} is not found.'
                           .format(label))
            output.append(None)
            continue
        LOGGER.info('Parsing PDB file {0} for conformation {1}.'
                    .format(pdb, label))

        acsi = None
        model = label.rfind('m')
        if model > 3:
            model = label[model+1:]
            if model.isdigit():
                acsi = int(model) - 1
            LOGGER.info('Applying transformation to model {0}.'
                        .format(model))

        if isinstance(filename, str):
            ag = parsePDB(filename)
        else:
            ag = filename

        if acsi is not None:
            if acsi >= ag.numCoordsets():
                LOGGER.warn('Model number {0} for {1} is out of range.'
                            .format(model, pdb))
                output.append(None)
                continue
            ag.setACSIndex(acsi)
        trans.apply(ag)
        outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip)
        if ag.numCoordsets() > 1:
            pdbdict[pdb] = ag
        else:
            writePDB(outfn, ag)
        output.append(os.path.normpath(outfn))

    for pdb, ag in pdbdict.items():  # PY3K: OK
        writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag)
    if len(output) == 1:
        return output[0]
    else:
        return output
예제 #57
0
파일: gnm.py 프로젝트: sixpi/ProDy
    def calcModes(self, n_modes=20, zeros=False, turbo=True, hinges=True):
        """Calculate normal modes.  This method uses :func:`scipy.linalg.eigh`
        function to diagonalize the Kirchhoff matrix. When Scipy is not found,
        :func:`numpy.linalg.eigh` is used.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
              If ``None`` is given, all modes will be calculated.
        :type n_modes: int or None, default is 20

        :arg zeros: If ``True``, modes with zero eigenvalues will be kept.
        :type zeros: bool, default is ``False``

        :arg turbo: Use a memory intensive, but faster way to calculate modes.
        :type turbo: bool, default is ``True``

        :arg hinges: Identify hinge sites after modes are computed.
        :type hinges: bool, default is ``True``
        """

        if self._kirchhoff is None:
            raise ValueError('Kirchhoff matrix is not built or set')
        assert n_modes is None or isinstance(n_modes, int) and n_modes > 0, \
            'n_modes must be a positive integer'
        assert isinstance(zeros, bool), 'zeros must be a boolean'
        assert isinstance(turbo, bool), 'turbo must be a boolean'
        linalg = importLA()
        start = time.time()
        shift = 0
        if linalg.__package__.startswith('scipy'):
            if n_modes is None:
                eigvals = None
                n_modes = self._dof
            else:
                if n_modes >= self._dof:
                    eigvals = None
                    n_modes = self._dof
                else:
                    eigvals = (0, n_modes + shift)
            if eigvals:
                turbo = False
            if isinstance(self._kirchhoff, np.ndarray):
                values, vectors = linalg.eigh(self._kirchhoff, turbo=turbo,
                                              eigvals=eigvals)
            else:
                try:
                    from scipy.sparse import linalg as scipy_sparse_la
                except ImportError:
                    raise ImportError('failed to import scipy.sparse.linalg, '
                                      'which is required for sparse matrix '
                                      'decomposition')
                try:
                    values, vectors = (
                        scipy_sparse_la.eigsh(self._kirchhoff,
                                              k=n_modes + 1, which='SA'))
                except:
                    values, vectors = (
                        scipy_sparse_la.eigen_symmetric(self._kirchhoff,
                                                        k=n_modes + 1,
                                                        which='SA'))
        else:
            if n_modes is not None:
                LOGGER.info('Scipy is not found, all modes are calculated.')
            values, vectors = linalg.eigh(self._kirchhoff)
        n_zeros = sum(values < ZERO)
        if n_zeros < 1:
            LOGGER.warning('Less than 1 zero eigenvalues are calculated.')
            shift = n_zeros - 1
        elif n_zeros > 1:
            LOGGER.warning('More than 1 zero eigenvalues are calculated.')
            shift = n_zeros - 1
        if zeros:
            shift = -1
        self._eigvals = values[1+shift:]
        self._vars = 1 / self._eigvals
        self._trace = self._vars.sum()
        self._array = vectors[:, 1+shift:]
        self._n_modes = len(self._eigvals)
        if hinges:
            self.calcHinges()
        LOGGER.debug('{0} modes were calculated in {1:.2f}s.'
                     .format(self._n_modes, time.time()-start))
예제 #58
0
파일: anm.py 프로젝트: fongchun/ProDy
    def calcModes(self, n_modes=20, zeros=False, turbo=True):
        """Calculate normal modes.  This method uses :func:`scipy.linalg.eigh`
        function to diagonalize the Hessian matrix. When Scipy is not found,
        :func:`numpy.linalg.eigh` is used.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
            If **None** or ``'all'`` is given, all modes will be calculated.
        :type n_modes: int or None, default is 20

        :arg zeros: If **True**, modes with zero eigenvalues will be kept.
        :type zeros: bool, default is **True**

        :arg turbo: Use a memory intensive, but faster way to calculate modes.
        :type turbo: bool, default is **True**
        """

        if self._hessian is None:
            raise ValueError('Hessian matrix is not built or set')
        if str(n_modes).lower() == 'all':
            n_modes = None
        assert n_modes is None or isinstance(n_modes, int) and n_modes > 0, \
            'n_modes must be a positive integer'
        assert isinstance(zeros, bool), 'zeros must be a boolean'
        assert isinstance(turbo, bool), 'turbo must be a boolean'
        self._clear()
        linalg = importLA()
        LOGGER.timeit('_anm_calc_modes')
        shift = 5
        if linalg.__package__.startswith('scipy'):
            if n_modes is None:
                eigvals = None
                n_modes = self._dof
            else:
                if n_modes >= self._dof:
                    eigvals = None
                    n_modes = self._dof
                else:
                    eigvals = (0, n_modes + shift)
            if eigvals:
                turbo = False
            if isinstance(self._hessian, np.ndarray):
                values, vectors = linalg.eigh(self._hessian, turbo=turbo,
                                              eigvals=eigvals)
            else:
                try:
                    from scipy.sparse import linalg as scipy_sparse_la
                except ImportError:
                    raise ImportError('failed to import scipy.sparse.linalg, '
                                      'which is required for sparse matrix '
                                      'decomposition')
                try:
                    values, vectors = (
                        scipy_sparse_la.eigsh(self._hessian, k=n_modes+6,
                                              which='SA'))
                except:
                    values, vectors = (
                        scipy_sparse_la.eigen_symmetric(self._hessian,
                                                        k=n_modes+6,
                                                        which='SA'))

        else:
            if n_modes is not None:
                LOGGER.info('Scipy is not found, all modes are calculated.')
            values, vectors = np.linalg.eigh(self._hessian)
        n_zeros = sum(values < ZERO)

        if n_zeros < 6:
            LOGGER.warning('Less than 6 zero eigenvalues are calculated.')
            shift = n_zeros - 1
        elif n_zeros > 6:
            LOGGER.warning('More than 6 zero eigenvalues are calculated.')
            shift = n_zeros - 1
        if zeros:
            shift = -1
        if n_zeros > n_modes:
            self._eigvals = values[1+shift:]
        else:
            self._eigvals = values[1+shift:]
        self._vars = 1 / self._eigvals
        self._trace = self._vars.sum()
        
        if shift:
            self._array = vectors[:, 1+shift:].copy()
        else:
            self._array = vectors
        self._n_modes = len(self._eigvals)
        LOGGER.report('{0} modes were calculated in %.2fs.'
                     .format(self._n_modes), label='_anm_calc_modes')