def calcSquareInnerProduct(modes1, modes2):
    """Returns the square inner product (SIP) of fluctuations [SK02]_.  
    This function returns a single number.

    .. [SK02] Kundu S, Melton JS, Sorensen DC, Phillips GN: Dynamics of 
        proteins in crystals: comparison of experiment with simple models. 
        Biophys J. 2002, 83: 723-732.
    if isinstance(modes1, (NMA, ModeSet)):
        w1 = calcSqFlucts(modes1)
    elif isListLike(modes1):
        w1 = modes1
        raise TypeError(
            'modes1 should be a profile or an NMA or ModeSet object')

    if isinstance(modes2, (NMA, ModeSet)):
        w2 = calcSqFlucts(modes2)
    elif isListLike(modes2):
        w2 = modes2
        raise TypeError(
            'modes2 should be a profile or an NMA or ModeSet object')

    return, w2)**2 / (, w1) *, w2))
File: Progetto: nffaruk/ProDy
File: Progetto: nffaruk/ProDy
    def setIndices(self, value):
        if not isListLike(value):
            raise TypeError('value must be a list or numpy.ndarray instance')

        array = asarray(value)

        if len(array) != self._n_atoms:
            raise ValueError('length mismatch between this ensemble '
                             '(%d) and indices (%d)'%(self._n_atoms, len(array)))
        self._indices = value
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of
    the weekly clustering of protein chains in the PDB generated by blastclust.
    They are available at FTP site:

    This function will download about 10 Mb of data and save it after
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can
    be loaded using :func:`loadPDBClusters` function and be accessed
    using :func:`listPDBCluster`."""

    if sqid is not None:
        if isListLike(sqid):
            for s in sqid:
                if s not in PDB_CLUSTERS:
                    raise ValueError('sqid must be one or more of ' +
            keys = list(sqid)
            if sqid not in PDB_CLUSTERS:
                raise ValueError('sqid must be one or more of ' +
            keys = [sqid]
        keys = list(PDB_CLUSTERS)

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
    LOGGER.progress('Downloading sequence clusters', len(keys),
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('' + filename)
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.'.format(x))
            out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH)
            count += 1
        LOGGER.update(i, label='_prody_fetchPDBClusters')
    if len(keys) == count:'All selected PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
    def setApix(self, apix):
        if not isListLike(apix):
                apix = [apix, apix, apix]
                raise TypeError('apix must be a single value or list-like')

        if len(apix) != 3:
            raise ValueError('apix must be a single value or 3 values')

        self._apix = apix
        self.Lx = apix[0] * self.NS
        self.Ly = apix[1] * self.NR
        self.Lz = apix[2] * self.NC
def alignByEnsemble(atomics, ensemble):
    """Align a set of :class:`.Atomic` objects using transformations from *ensemble*, 
    which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. 
    Transformations will be applied based on indices so *atomics* and *ensemble* must 
    have the same number of members.

    :arg atomics: a set of :class:`.Atomic` objects to be aligned
    :type atomics: tuple, list, :class:`~numpy.ndarray`

    :arg ensemble: a :class:`.PDBEnsemble` or a :class:`.PDBConformation` from which 
                   transformations can be extracted
    :type ensemble: :class:`.PDBEnsemble`, :class:`.PDBConformation`

    if not isListLike(atomics):
        raise TypeError('atomics must be list-like')

    if not isinstance(ensemble, (PDBEnsemble, PDBConformation)):
        raise TypeError('ensemble must be a PDBEnsemble or PDBConformation')
    if isinstance(ensemble, PDBConformation):
        ensemble = [ensemble]

    if len(atomics) != len(ensemble):
        raise ValueError('atomics and ensemble must have the same length')

    output = []
    for i, conf in enumerate(ensemble):
        trans = conf.getTransformation()
        if trans is None:
            raise ValueError('transformations are not calculated, call '
                             '`superpose` or `iterpose`')

        ag = atomics[i]
        if not isinstance(ag, Atomic):
                'No atomic object found for conformation {0}.'.format(i))


    if len(output) == 1:
        return output[0]
        return output
def sliceModelByMask(model, mask, norm=False):
    """Returns a part of the *model* indicated by *mask*.  Note that
    normal modes (eigenvectors) are not normalized unless *norm* is **True**.

    :arg mode: NMA model instance to be sliced
    :type mode: :class:`.NMA`

    :arg mask: an Integer array or a Boolean array where ``"True"`` indicates 
        the parts being selected 
    :type mask: list, :class:`~numpy.ndarray`

    :arg norm: whether to normalize eigenvectors, default **False**
    :type norm: bool

    :returns: :class:`.NMA`"""

    if not isListLike(mask):
        raise TypeError(
            'mask must be either a list or a numpy.ndarray, not {0}'.format(

    is_bool = mask.dtype is np.dtype('bool')

    if is_bool:
        if len(mask) != model.numAtoms():
            raise ValueError('number of atoms in model and mask must be equal')
        which = mask
        if mask.min() < 0 or mask.max() >= model.numAtoms():
            raise ValueError('index in mask exceeds range')
        which = np.zeros(model.numAtoms(), dtype=bool)
        which[mask] = True

    array = model._getArray()

    nma = type(model)('{0} sliced'.format(model.getTitle()))
    if model.is3d():
        which = np.repeat(which, 3)

    evecs = array[which, :]
    if norm:
        evecs /= np.array([((evecs[:, i])**2).sum()**0.5
                           for i in range(evecs.shape[1])])

    nma.setEigens(evecs, model.getEigvals())
    return nma
    def __init__(self, parsingDict, prog, title='unnamed', indices=None):
        self._title = title
        self._dict = parsingDict
        self._prog = prog
        self._indices = indices

        if indices is None:
            self.dataBlocks = [
                StarDataBlock(self, key) for key in self._dict.keys()
            self.dataBlocks = []
            for idx in indices:
                if isListLike(idx):
                    self.dataBlocks.append(StarDataBlock(self, idx[0], idx[1]))
                    self.dataBlocks.append(StarDataBlock(self, idx))

            self._dict = OrderedDict()
            for i, idx in enumerate(indices):
                self._dict[idx[0]] = self.dataBlocks[i]._dict

        self.numDataBlocks = len(self.dataBlocks)
def fetchPDBs(*pdb, **kwargs):
    """"Wrapper function to fetch multiple files from the PDB. 
    If no format is given, it tries PDB then mmCIF then EMD.
    :arg pdb: one PDB identifier or filename, or a list of them.
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.

    n_pdb = len(pdb)
    if n_pdb == 0:
        raise ValueError('Please provide a PDB ID or filename')

    if n_pdb == 1:
        if isListLike(pdb[0]):
            pdb = pdb[0]
            n_pdb = len(pdb)

    fnames = []
    for p in pdb:
        format = kwargs.pop('format', None)

        if format is not None:
            filename = fetchPDB(p, format=format, **kwargs)

            filename = fetchPDB(p, **kwargs)

            if filename is None:
                filename = fetchPDB(p, format='cif', **kwargs)

            if filename is None:
                filename = fetchPDB(p, format='emd', **kwargs)


    return fnames
def buildPDBEnsemble(atomics,
    """Builds a :class:`.PDBEnsemble` from a given reference structure and a list of structures 
    (:class:`.Atomic` instances). Note that the reference should be included in the list as well.

    :arg atomics: a list of :class:`.Atomic` instances
    :type atomics: list

    :arg ref: reference structure or the index to the reference in *atomics*. If **None**,
        then the first item in *atomics* will be considered as the reference. If it is a 
        :class:`.PDBEnsemble` instance, then *atomics* will be appended to the existing ensemble.
        Default is **None**
    :type ref: int, :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg title: the title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg degeneracy: whether only the active coordinate set (**True**) or all the coordinate sets 
        (**False**) of each structure should be added to the ensemble. Default is **True**
    :type degeneracy: bool

    :arg occupancy: minimal occupancy of columns (range from 0 to 1). Columns whose occupancy
        is below this value will be trimmed
    :type occupancy: float

    :arg atommaps: labels of *atomics* that were mapped and added into the ensemble. This is an 
        output argument
    :type atommaps: list

    :arg unmapped: labels of *atomics* that cannot be included in the ensemble. This is an 
        output argument
    :type unmapped: list

    :arg subset: a subset for selecting particular atoms from the input structures.
        Default is ``"all"``
    :type subset: str

    :arg superpose: if set to ``'iter'``, :func:`.PDBEnsemble.iterpose` will be used to 
        superpose the structures, otherwise conformations will be superposed with respect 
        to the reference specified by *ref* unless set to ``False``. Default is ``'iter'``
    :type superpose: str, bool

    occupancy = kwargs.pop('occupancy', None)
    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'all')).lower()
    superpose = kwargs.pop('superpose', 'iter')
    superpose = kwargs.pop('iterpose', superpose)
    debug = kwargs.pop('debug', {})

    if 'mapping_func' in kwargs:
        raise DeprecationWarning(
            'mapping_func is deprecated. Please see release notes for '
            'more details:'
    start = time.time()

    if not isListLike(atomics):
        raise TypeError('atomics should be list-like')

    if len(atomics) == 1 and degeneracy is True:
        raise ValueError('atomics should have at least two items')

    if labels is not None:
        if len(labels) != len(atomics):
            raise TypeError('Labels and atomics must have the same lengths.')
        labels = []

        for atoms in atomics:
            if atoms is None:

    if ref is None:
        target = atomics[0]
    elif isinstance(ref, Integral):
        target = atomics[ref]
    elif isinstance(ref, PDBEnsemble):
        target = ref._atoms
        target = ref

    # initialize a PDBEnsemble with reference atoms and coordinates
    isrefset = False
    if isinstance(ref, PDBEnsemble):
        ensemble = ref
        # select the subset of reference beforehand for the sake of efficiency
        if subset != 'all':
            target =
        ensemble = PDBEnsemble(title)
        if isinstance(target, Atomic):
            isrefset = True
            ensemble._n_atoms = len(target)
            isrefset = False

    # build the ensemble
    if unmapped is None: unmapped = []
    if atommaps is None: atommaps = []

    LOGGER.progress('Building the ensemble...', len(atomics),
    for i, atoms in enumerate(atomics):
        if atoms is None:

                      'Mapping %s to the reference...' % atoms.getTitle(),
        except AttributeError:
            raise TypeError(
                'atomics must be a list of instances having the access to getHierView'

        if subset != 'all':
            atoms =

        # find the mapping of chains of atoms to those of target
        debug[labels[i]] = {}
        atommaps_ = alignChains(atoms,

        if len(atommaps_) == 0:

        # add the atommaps to the ensemble
        for atommap in atommaps_:
            lbl = pystr(labels[i])
            if len(atommaps_) > 1:
                chids = np.unique(atommap.getChids())
                strchids = ''.join(chids)
                lbl += '_%s' % strchids

            if not isrefset:
                isrefset = True


    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)

    if superpose == 'iter':
    elif superpose is not False:
        ensemble.superpose()'Ensemble ({0} conformations) were built in {1:.2f}s.'.format(
        time.time() - start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))
    return ensemble
def trimModelByMask(model, mask):
    """Returns a part of the *model* indicated by *mask*. This method removes 
    columns and rows in the connectivity matrix indicated by *mask* and fix the diagonal sums.
    Normal modes need to be calculated again after the trim.

    :arg mode: NMA model instance to be sliced
    :type mode: :class:`.NMA`

    :arg mask: an Integer array or a Boolean array where ``"True"`` indicates 
        the parts being selected 
    :type mask: list, :class:`~numpy.ndarray`

    :returns: :class:`.NMA`"""

    if not isListLike(mask):
        raise TypeError(
            'mask must be either a list or a numpy.ndarray, not {0}'.format(

    is_bool = mask.dtype is np.dtype('bool')

    if is_bool:
        if len(mask) != model.numAtoms():
            raise ValueError('number of atoms in model and mask must be equal')
        which = mask
        if mask.min() < 0 or mask.max() >= model.numAtoms():
            raise ValueError('index in mask exceeds range')
        which = np.zeros(model.numAtoms(), dtype=bool)
        which[mask] = True

    if model.is3d():
        which = np.repeat(which, 3)

    if isinstance(model, GNM):
        matrix = model._kirchhoff
    elif isinstance(model, ANM):
        matrix = model._hessian
    elif isinstance(model, PCA):
        matrix = model._cov

    if isinstance(model, PCA):
        ss = matrix[which, :][:, which]
        eda = PCA(model.getTitle() + ' reduced')
        return eda
        matrix = matrix[which, :][:, which]

        if isinstance(model, GNM):
            gnm = GNM(model.getTitle() + ' reduced')
            I = np.eye(len(matrix), dtype=bool)
            matrix[I] = -(matrix.sum(axis=0) - np.diag(matrix))
            return gnm
        elif isinstance(model, ANM):
            model_type = type(model)
            anm = model_type(model.getTitle() + ' reduced')

            n = len(matrix) // 3
            for i in range(n):
                S = np.zeros((3, 3))
                for j in range(n):
                    if i == j:
                    S -= matrix[i * 3:i * 3 + 3, j * 3:j * 3 + 3]
                matrix[i * 3:i * 3 + 3, i * 3:i * 3 + 3] = S
            if hasattr(anm, 'getMembrane'):
                anm._membrane = model.getMembrane()
                anm._combined = model.getCombined()
            return anm
def parsePDB(*pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a PDB file.

    This function extends :func:`.parsePDBStream`.

    See :ref:`parsepdb` for a detailed usage example.

    :arg pdb: one PDB identifier or filename, or a list of them.
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    You can also provide arguments that you would like passed on to fetchPDB().

    n_pdb = len(pdb)
    if n_pdb == 1:
        if isListLike(pdb[0]):
            pdb = pdb[0]
            n_pdb = len(pdb)

    if n_pdb == 1:
        return _parsePDB(pdb[0], **kwargs)
        results = []
        lstkwargs = {}
        for key in kwargs:
            argval = kwargs.get(key)
            if np.isscalar(argval):
                argval = [argval] * n_pdb
            lstkwargs[key] = argval

        start = time.time()
        LOGGER.progress('Retrieving {0} PDB structures...'.format(n_pdb),
                        n_pdb, '_prody_parsePDB')
        for i, p in enumerate(pdb):
            kwargs = {}
            for key in lstkwargs:
                kwargs[key] = lstkwargs[key][i]
            c = kwargs.get('chain', '')
                          'Retrieving {0}...'.format(p + c),
            result = _parsePDB(p, **kwargs)
            if not isinstance(result, tuple):
                if isinstance(result, dict):
                    result = (None, result)
                    result = (result, None)

        results = list(zip(*results))

        for i in reversed(range(len(results))):
            if all(j is None for j in results[i]):
        if len(results) == 1:
            results = results[0]
        results = list(results)

        model = kwargs.get('model')
        header = kwargs.get('header', False)
        if model != 0 and header:
            numPdbs = len(results[0])
            numPdbs = len(results)'{0} PDBs were parsed in {1:.2f}s.'.format(
            time.time() - start))

        return results
def parseBIRD(*ids, **kwargs):
    """Parse data from the Biologically Interesting Molecule Reference 
    Dictionary (BIRD) resource, which is updated every week. This includes 
    2 kinds of keys, which can be selected with the **keys** keyword argument.

    The chemical information is found in a single CIF file at 
    This data will be downloaded and extracted to :file:`.prody/bird-prd`.

    Biological function information is also found in a single CIF file at 
    This data will be downloaded and extracted to :file:`.prody/bird-family`.

    Individual compounds can be selected using **ids**. 
    If needed, BIRD files are downloaded using :func:`.fetchBIRDviaFTP` function.
    You can also provide arguments that you would like passed on to fetchBIRDviaFTP.

    :arg ids: one BIRD identifier (starting with PRD or FAM) or a list of them.
        If **None** is provided then all of them are returned.
    :type ids: str, tuple, list, :class:`~numpy.ndarray`, **None**

    :arg key: key specifying which data to fetch out of ``'prd'`` or ``'family'``
               default is ``'prd'``
    :type key: str

    Returns :class:`.StarDataBlock` object or list of them.
    key = kwargs.get('key', 'prd')
    if not isinstance(key, str):
        raise TypeError("key should be a string")

    if key[:3].lower() == 'prd':
        key = 'prd'
    elif key[:3].lower() == 'fam':
        key = 'family'
        raise ValueError("key should be 'prd' or 'fam'")

    n_ids = len(ids)
    if n_ids == 1:
        if isListLike(ids[0]):
            ids = ids[0]
            n_ids = len(ids)

    if n_ids == 1:
        ids = list(ids)

    BIRD_PATH = os.path.join(getPackagePath(), 'bird')
    filename = BIRD_PATH + '/{0}-all.cif.gz'.format(key)
    if not os.path.isfile(filename):
        fetchBIRDviaFTP(keys=key, **kwargs)

    data = parseSTAR(filename, shlex=True)
    ret = []
    for id in ids:
        except ValueError:
            except ValueError:
                LOGGER.warn('id {0} not found in {1} data '
                            'so appending None'.format(id, key))

    if n_ids == 1:
        return ret[0]

    return ret
def writePIR(filename, msa, **kwargs):
    """A function to write PIR format alignments for use with MODELLER.

    :arg filename: The name of the file to be written including .ali
    :type filename: str

    :arg msa: a multiple sequence alignment in :class:`MSA` format
    :type msa: :class:`MSA` instance

    :arg chain_sep: chain separation character or list of them
        default is '/'
    :type chain_sep: str, list

    :arg types: a list of strings for field 1, PIR types (Sequence or StructureX)
        default is all Sequence
    :type types: list

    :arg labels: a list of strings for field 2, sequence labels
        default is to take them from msa
    :type labels: list

    :arg first_resnums: contents for field 3, residue number for the first residue.
        This should be a list of strings each having length 5, 
        default is all 'FIRST'
    :type first_resnums: list

    :arg first_chains: contents for field 4, chain ID for the first residue
        This should be a list of strings each having length 1, 
        default is all '@'
    :type first_chains: list

    :arg last_resnums: contents for field 5, residue number for the last residue.
        This should be a list of strings each having length 5, 
        default is all 'LAST '
    :type last_resnums: list

    :arg last_chains: contents for field 6, chain ID for the last residue
        This should be a list of strings each having length 1, 
        default is all ' '
    :type first_chains: list

    :arg protein_names: list of strings for field 7
        default is all ''
    :type protein_names: list

    :arg protein_sources: list of strings for field 8
        default is all ''
    :type protein_sources: list

    :arg resolutions: list of strings for field 9
        default is all ''
    :type resolutions: list

    :arg r_factors: list of strings for field 10
        default is all ''
    :type r_factors: list
    msafile = open(filename, 'w')

    chain_sep = kwargs.get('chain_sep', '/')
    if isinstance(chain_sep, basestring): 
        chain_sep = [chain_sep] * msa.numSequences()
    elif isListLike(chain_sep) and isinstance(chain_sep[0], basestring):
        if len(chain_sep) != msa.numSequences():
            raise ValueError('There should be an entry in chain_sep list for each sequence in msa')
        raise TypeError('chain_sep should be a string or list of strings')

    types = kwargs.get('types', 'Sequence')
    if isinstance(types, basestring): 
        types = [types] * msa.numSequences()
    elif isListLike(types) and isinstance(types[0], basestring):
        if len(types) != msa.numSequences():
            raise ValueError('There should be an entry in types list for each sequence in msa')
        raise TypeError('types should be a string or list of strings')

    labels = kwargs.get('labels', None)
    if labels is None: 
        labels = []
        for sequence in msa:
    elif isListLike(labels) and isinstance(labels[0], basestring):
        if len(labels) != msa.numSequences():
            raise ValueError('There should be an entry in labels list for each sequence in msa')
        raise TypeError('labels should be a string or list of strings')

    first_resnums = kwargs.get('first_resnums', 'FIRST')
    if isinstance(first_resnums, basestring) and len(first_resnums) == 5: 
        first_resnums = [first_resnums] * msa.numSequences()
    elif isListLike(first_resnums) and isinstance(first_resnums, basestring):
        if len(first_resnums) != msa.numSequences():
            raise ValueError('There should be an entry in first_resnums list for each sequence in msa')
        raise TypeError('first_resnums should be a string of length 5 or list of them')

    first_chains = kwargs.get('first_chains', '@')
    if isinstance(first_chains, basestring) and len(first_chains) == 1: 
        first_chains = [first_chains] * msa.numSequences()
    elif isListLike(first_chains) and isinstance(first_chains, basestring):
        if len(first_chains) != msa.numSequences():
            raise ValueError('There should be an entry in first_chains list for each sequence in msa')
        raise TypeError('first_chains should be a string of length 1 or list of them')

    last_resnums = kwargs.get('last_resnums', 'LAST ')
    if isinstance(last_resnums, basestring) and len(last_resnums) == 5: 
        last_resnums = [last_resnums] * msa.numSequences()
    elif isListLike(last_resnums) and isinstance(last_resnums, basestring):
        if len(last_resnums) != msa.numSequences():
            raise ValueError('There should be an entry in last_resnums list for each sequence in msa')
        raise TypeError('last_resnums should be a string of length 5 or list of them')

    last_chains = kwargs.get('last_chains', ' ')
    if isinstance(last_chains, basestring) and len(last_chains) == 1: 
        last_chains = [last_chains] * msa.numSequences()
    elif isListLike(last_chains) and isinstance(last_chains, basestring):
        if len(last_chains) != msa.numSequences():
            raise ValueError('There should be an entry in last_chains list for each sequence in msa')
        raise TypeError('last_chains should be a string of length 1 or list of them')

    protein_names = kwargs.get('protein_names', '')
    if isinstance(protein_names, basestring): 
        protein_names = [protein_names] * msa.numSequences()
    elif isListLike(protein_names) and isinstance(protein_names, basestring):
        if len(protein_names) != msa.numSequences():
            raise ValueError('There should be an entry in protein_names list for each sequence in msa')
        raise TypeError('protein_names should be a string or list of strings')

    protein_sources = kwargs.get('protein_sources', '')
    if isinstance(protein_sources, basestring): 
        protein_sources = [protein_sources] * msa.numSequences()
    elif isListLike(protein_sources) and isinstance(protein_sources, basestring):
        if len(protein_sources) != msa.numSequences():
            raise ValueError('There should be an entry in protein_sources list for each sequence in msa')
        raise TypeError('protein_sources should be a string or list of strings')

    resolutions = kwargs.get('resolutions', '')
    if isinstance(resolutions, basestring): 
        resolutions = [resolutions] * msa.numSequences()
    elif isListLike(resolutions) and isinstance(resolutions, basestring):
        if len(resolutions) != msa.numSequences():
            raise ValueError('There should be an entry in resolutions list for each sequence in msa')
        raise TypeError('resolutions should be a string or list of strings')

    r_factors = kwargs.get('r_factors', '')
    if isinstance(r_factors, basestring): 
        r_factors = [r_factors] * msa.numSequences()
    elif isListLike(r_factors) and isinstance(r_factors, basestring):
        if len(r_factors) != msa.numSequences():
            raise ValueError('There should be an entry in r_factors list for each sequence in msa')
        raise TypeError('r_factors should be a string or list of strings')

    for i, sequence in enumerate(msa):
        sequence = str(sequence).replace(chain_sep[i],'/')
        msafile.write('>P1;' + labels[i] + '\n')
        msafile.write(types[i] + ':' + labels[i] + ':')
        msafile.write(first_resnums[i] + ':' + first_chains[i] + ':')
        msafile.write(last_resnums[i] + ':' + last_chains[i] + ':')
        msafile.write(protein_names[i] + ':' + protein_sources[i] + ':')
        msafile.write(resolutions[i] + ':' + r_factors[i])

        for j in range(len(sequence)/60):
            msafile.write(sequence[j*60:(j+1)*60] + '\n')
        msafile.write(sequence[(j+1)*60:] + '*\n\n')

    def scanPockets(self):

        'Generates ESSA z-scores for pockets and parses pocket features. It requires both Fpocket 3.0 and Pandas being installed in your system.'
        from re import findall

        fpocket = which('fpocket')

        if fpocket is None:
            LOGGER.warning('Fpocket (version >= 3.0) was not found, please install it.')
            return None

            from pandas import Index, DataFrame
        except ImportError as ie:
            LOGGER.warning(ie.__str__() + ' was found, please install it.')
            return None

        rcr = {(i, j): k if self._rib else self._ri[k]
               for i, j, k in zip(self._ca.getChids(),

        writePDB('{}_pro'.format(self._title), self._heavy)

        direc = '{}_pro_out'.format(self._title)
        if not isdir(direc):
            system('fpocket -f {}_pro.pdb'.format(self._title))

        chdir(direc + '/pockets')
        l = [x for x in listdir('.') if x.endswith('.pdb')]
        l.sort(key=lambda x:int(x.partition('_')[0][6:]))

        ps = []
        for x in l:
            with open(x, 'r') as f:
                tmp0 =
                tmp1 = [(x[1].strip(), float(x[2])) for x in findall(r'(\w+\s\w+\s*-\s*)(.+):\s*([\d.-]+)(\n)', tmp0)]
            fea, sco = list(zip(*tmp1))
        pdbs = parsePDB(l)
        if not isListLike(pdbs):
            pdbs = [pdbs]

        # ----- # ----- #

        ps = array(ps)

        pcn = {int(pdb.getTitle().partition('_')[0][6:]):
                       pdb.getResnums().tolist())) for pdb in pdbs}
        pi = {p: [rcr[x] for x in crn] for p, crn in pcn.items()}

        pzs_max = {k: max(self._zscore[v]) for k, v in pi.items()}
        pzs_med = {k: median(self._zscore[v]) for k, v in pi.items()}

        # ----- # ----- #

        indices = Index(range(1, ps.shape[0] + 1), name='Pocket #')

        columns = Index(fea, name='Feature')

        self._df = DataFrame(index=indices, columns=columns, data=ps)

        # ----- # ----- #

        columns_zs = Index(['ESSA_max',

        zps = c_[list(pzs_max.values())]
        zps = hstack((zps, c_[list(pzs_med.values())]))
        zps = hstack((zps, zscore(self._df[['Local hydrophobic density Score']])))

        self._df_zs = DataFrame(index=indices, columns=columns_zs, data=zps)
Esempio n. 22
def fetchBIRDviaFTP(**kwargs):
    """Retrieve the whole Biologically Interesting Molecule Reference 
    Dictionary (BIRD) resource, which is updated every week. This includes 
    2 kinds of keys, which can be selected with the **keys** keyword argument.

    The chemical information is found in a zipped (tar.gz) directory at, which 
    contains individual CIF files within it. This data will be downloaded 
    and extracted to :file:`.prody/bird-prd`.

    Biological function information is also found in a zipped (tar.gz) directory at, which 
    contains individual CIF files within it. This data will be downloaded 
    and extracted to :file:`.prody/bird-family`.

    :arg keys: keys specifying which data to fetch out of ``'prd'``, ``'family'`` or ``'both'``
               default is ``'both'``
    :type keys: str, tuple, list, :class:`~numpy.ndarray`

    The underlying data can be accessed using :func:`parseBIRD`."""

    BIRD_PATH = os.path.join(getPackagePath(), 'bird')

    keys = kwargs.get('keys', 'both')
    if isinstance(keys, str):
        if keys == 'both':
            keys = ['prd', 'family']
        elif keys[:3].lower() == 'prd':
            keys = ['prd']
        elif keys[:3].lower() == 'fam':
            keys = ['family']
            raise ValueError("keys should be 'both', 'prd' or 'fam'")

    elif isListLike(keys):
        keys = list(keys)
        raise TypeError("keys should be list-like or string")

    ftp_divided = 'pdb/data/bird/'
    ftp_pdbext = '.cif.gz'
    ftp_prefix = ''

    if not os.path.isdir(BIRD_PATH):

    LOGGER.progress('Downloading BIRD', len(keys),

    ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us']
    LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name))

    from ftplib import FTP
        ftp = FTP(ftp_host)
    except Exception as error:
        raise type(error)('FTP connection problem, potential reason: '
                          'no internet connectivity')
        count = 0
        success = 0
        failure = 0
        filenames = []
        for i, x in enumerate(keys):
            data = []
            ftp_fn = ftp_prefix + '{0}-all'.format(x) + ftp_pdbext
                ftp.retrbinary('RETR ' + ftp_fn, data.append)
            except Exception as error:
                if ftp_fn in ftp.nlst():
                    LOGGER.warn('{0} download failed ({1}). It is '
                                'possible that you do not have rights to '
                                'download .gz files in the current network.'
                                .format(x, str(error)))
          '{0} download failed. {1} does not exist '
                                'on {2}.'.format(ftp_fn, x, ftp_host))
                failure += 1
                if len(data):
                    filename = BIRD_PATH + '/{0}-all.cif.gz'.format(x)

                    with open(filename, 'w+b') as outfile:
                        write = outfile.write
                        [write(block) for block in data]

                    success += 1
                    failure += 1
            count += 1
            LOGGER.update(i, label='_prody_fetchBIRD')

    LOGGER.debug('PDB download via FTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
def reduceModelByMask(model, mask):
    """Returns NMA model reduced based on *mask*. 

    :arg model: dynamics model
    :type model: :class:`.ANM`, :class:`.GNM`, or :class:`.PCA`

    :arg mask: an Integer array or a Boolean array where ``"True"`` indicates 
        the parts being selected 
    :type mask: list, :class:`~numpy.ndarray`

    :returns: :class:`.NMA`"""

    if not isinstance(model, NMA):
        raise TypeError('model must be an NMA instance, not {0}'.format(

    if not isListLike(mask):
        raise TypeError(
            'mask must be either a list or a numpy.ndarray, not {0}'.format(

    is_bool = mask.dtype is np.dtype('bool')

    if is_bool:
        if len(mask) != model.numAtoms():
            raise ValueError('number of atoms in model and mask must be equal')
        system = mask
        if mask.min() < 0 or mask.max() >= model.numAtoms():
            raise ValueError('index in mask exceeds range')
        system = np.zeros(model.numAtoms(), dtype=bool)
        system[mask] = True

    if isinstance(model, GNM):
        matrix = model._kirchhoff
    elif isinstance(model, ANM):
        matrix = model._hessian
    elif isinstance(model, PCA):
        matrix = model._cov
        raise TypeError('model does not have a valid type derived from NMA')
    if matrix is None:
        raise ValueError('model matrix (Hessian/Kirchhoff/Covariance) is not '

    if model.is3d():
        system = np.repeat(system, 3)

    if isinstance(model, PCA):
        ss = matrix[system, :][:, system]
        eda = PCA(model.getTitle() + ' reduced')
        return eda
        matrix = _reduceModel(matrix, system)

        if isinstance(model, GNM):
            gnm = GNM(model.getTitle() + ' reduced')
            return gnm
        elif isinstance(model, ANM):
            anm = ANM(model.getTitle() + ' reduced')
            return anm
    def __init__(self, starDict, key, indices=None):
        self._title = key
        self._prog = starDict._prog
        self._starDict = starDict

        if indices is None:
                self._dict = starDict._dict[key]
                self._dict = list(starDict._dict)[key]

            keys = list(self._dict.keys())
            keys = [idx[0] for idx in indices]
            self._dict = OrderedDict()
            self._dict['data'] = OrderedDict()
            self._dict['fields'] = OrderedDict()
            for idx in indices:
                if idx[0] == 'data':
                    self._dict[idx[0]][idx[1]] = starDict._dict[self._title][
                    if not 'fields' in keys:
                        for k, v in self._starDict._dict[
                            if v == idx[1]:
                                self._dict['fields'][k] = v
                    self._dict[idx[0]] = OrderedDict()
                    self._dict[idx[0]]['fields'] = starDict._dict[self._title][
                    self._dict[idx[0]]['data'] = OrderedDict()
                    for id1 in idx[1]:
                        self._dict[idx[0]]['data'][id1] = starDict._dict[

        if set(keys) == set(['data', 'fields']):
            self.loops = []
            self.numLoops = 0

   = np.array(list(self._dict['data'].values()))
            self.fields = np.array(list(self._dict['fields'].values()))

            if not isListLike(
       = []

            if not isListLike(self.fields):
                self.fields = [self.fields]

            self.numEntries = len(
            self.numFields = len(self.fields)

        elif 'data' in keys and 'fields' in keys:
            if indices is not None:
                self.loops = [
                    StarLoop(self, key, idx) for (key, idx) in indices
                    if key not in ['data', 'fields']
                self.loops = [
                    StarLoop(self, key) for key in keys
                    if key not in ['data', 'fields']

   = np.array(list(self._dict['data'].values()))
            self.fields = np.array(list(self._dict['fields'].values()))

            if not isListLike(
       = []

            if not isListLike(self.fields):
                self.fields = [self.fields]

            self.numEntries = len(
            self.numFields = len(self.fields)
            self.numLoops = len(self.loops)

        elif 'data' in keys:
            if indices is not None:
                self.loops = [
                    StarLoop(self, key, idx) for (key, idx) in indices
                    if key != 'data'
                self.loops = [
                    StarLoop(self, key) for key in keys if key != 'data'

   = np.array(list(self._dict['data'].values()))
            self.fields = np.array(list(self._dict['fields'].values()))

            if not isListLike(
       = []

            if not isListLike(self.fields):
                self.fields = [self.fields]

            self.numLoops = len(self.loops)
            self.numEntries = len(
            self.numFields = 0

        elif 'fields' in keys:
            if indices is not None:
                self.loops = [
                    StarLoop(self, key, idx) for (key, idx) in indices
                    if key != 'fields'
                self.loops = [
                    StarLoop(self, key) for key in keys if key != 'fields'

   = np.array(list(self._dict['data'].values()))
            self.fields = np.array(list(self._dict['fields'].values()))

            if not isListLike(
       = []

            if not isListLike(self.fields):
                self.fields = [self.fields]

            self.numLoops = len(self.loops)
            self.numEntries = len(
            self.numFields = 0

            if indices is not None:
                self.loops = [
                    StarLoop(self, key, idx) for (key, idx) in indices
                self.loops = [StarLoop(self, key) for key in keys]

            self.numLoops = len(self.loops)
            self.numEntries = 0
            self.numFields = 0
File: Progetto: prody/ProDy
File: Progetto: nffaruk/ProDy
File: Progetto: nffaruk/ProDy
File: Progetto: prody/ProDy
def calcGoOverlap(*go_terms, **kwargs):
    """Calculate overlap between GO terms based on their distance
    in the graph. GO terms in different namespaces (molecular function,
    cellular component, and biological process) have undefined distances.

    :arg go_terms: a list of GO terms or GO IDs
    :type go_terms: list, tuple, `~numpy.ndarray`

    :arg pairwise: whether to calculate to a matrix of pairwise overlaps
        default is False
    :type pairwise: bool

    :arg distance: whether to return distances rather than calculating overlaps
        default is False
    :type distance: bool

    :arg go: GO graph. Default behaviour is to parse it with :func:`.parseOBO`.
    :type go: `~goatools.obo_parser.GODag`
    pairwise = kwargs.pop('pairwise', False)
    distance = kwargs.get('distance', False)
    operator = kwargs.get('operator', None)

    go = kwargs.get('go', None)
    if go is None:
        go = parseOBO(**kwargs)

    if not isListLike(go_terms):
        raise TypeError('please provide a list-like of go terms')

    if pairwise:
        distances = np.zeros((len(go_terms), len(go_terms)))
        for i, go_terms_i in enumerate(go_terms):
            for j, go_terms_j in enumerate(go_terms):
                distances[i, j] = calcGoOverlap(
                    go_terms_i, go_terms_j, pairwise=False, **kwargs)

        go_terms1 = go_terms[0]

        flattened_term_list = []
        for entry in go_terms[1:]:
            if isListLike(entry):

        if not isListLike(go_terms1):
            go_terms1 = [go_terms1]

        if not isListLike(flattened_term_list):
            flattened_term_list = [flattened_term_list]

            flattened_term_list = [go[term] for term in flattened_term_list]
            go_terms1 = [go[term] for term in go_terms1]
                flattened_term_list = [ for term in flattened_term_list]
                go_terms1 = [ for term in go_terms1]
                raise TypeError('go_terms should contain go terms or IDs')

        for term in flattened_term_list:
            if not isinstance(term, str):
                term =

        for term in go_terms1:
            if not isinstance(term, str):
                term =

        distances = np.zeros((len(go_terms1), len(flattened_term_list)))
        for i, go_id1 in enumerate(go_terms1):
            for j, go_id2 in enumerate(flattened_term_list):
                distances[i, j] = calcMinBranchLength(go_id1, go_id2, go)

        if operator is not None and isListLike(distances):
            distances = operator(distances)

    if operator is None:
        if distances.shape[-1] == 1:
            distances = distances.flatten()

        if distances.shape == (1,):
            distances = distances[0]

    if distance:
        return distances
        return 1. / distances