Example #1
0
    def _superpose(self, **kwargs):
        """Superpose conformations and update coordinates."""

        indices = self._indices
        weights = self._weights
        mobs = self._confs
        if indices is None:
            idx = False
            tar = self._coords
            movs = None
        else:
            idx = True
            if self._weights is not None:
                weights = weights[indices]
            tar = self._coords[indices]
            movs = self._confs

        linalg = importLA()
        svd = linalg.svd
        det = linalg.det

        if weights is None:
            tar_com = tar.mean(0)
            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)
            tar_org = tar_org.T
        else:
            weights_sum = weights.sum()
            weights_dot = dot(weights.T, weights)
            tar_com = (tar * weights).sum(axis=0) / weights_sum
            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)

        LOGGER.progress('Superposing ', len(mobs), '_prody_ensemble')
        for i, mob in enumerate(mobs):
            if idx:
                mob = mob[indices]
            if weights is None:
                mob_com = mob.mean(0)
                matrix = dot(tar_org, subtract(mob, mob_com, mob_org))
            else:
                mob_com = (mob * weights).sum(axis=0) / weights_sum
                subtract(mob, mob_com, mob_org)
                matrix = dot((tar_org * weights).T,
                             (mob_org * weights)) / weights_dot

            U, s, Vh = svd(matrix)
            Id = array([[1, 0, 0], [0, 1, 0], [0, 0, sign(det(matrix))]])
            rotation = dot(Vh.T, dot(Id, U.T))

            if movs is None:
                mobs[i] = dot(mob_org, rotation)
                add(mobs[i], tar_com, mobs[i])
            else:
                add(dot(movs[i], rotation),
                    (tar_com - dot(mob_com, rotation)), movs[i])
            LOGGER.update(i, '_prody_ensemble')
        LOGGER.clear()
Example #2
0
    def _superpose(self, **kwargs):
        """Superpose conformations and update coordinates."""

        indices = self._indices
        weights = self._weights
        mobs = self._confs
        if indices is None:
            idx = False
            tar = self._coords
            movs = None
        else:
            idx = True
            if self._weights is not None:
                weights = weights[indices]
            tar = self._coords[indices]
            movs = self._confs

        linalg = importLA()
        svd = linalg.svd
        det = linalg.det

        if weights is None:
            tar_com = tar.mean(0)
            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)
            tar_org = tar_org.T
        else:
            weights_sum = weights.sum()
            weights_dot = dot(weights.T, weights)
            tar_com = (tar * weights).sum(axis=0) / weights_sum
            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)

        LOGGER.progress('Superposing ', len(mobs), '_prody_ensemble')
        for i, mob in enumerate(mobs):
            if idx:
                mob = mob[indices]
            if weights is None:
                mob_com = mob.mean(0)
                matrix = dot(tar_org, subtract(mob, mob_com, mob_org))
            else:
                mob_com = (mob * weights).sum(axis=0) / weights_sum
                subtract(mob, mob_com, mob_org)
                matrix = dot((tar_org * weights).T,
                             (mob_org * weights)) / weights_dot

            U, s, Vh = svd(matrix)
            Id = array([[1, 0, 0], [0, 1, 0], [0, 0, sign(det(matrix))]])
            rotation = dot(Vh.T, dot(Id, U.T))

            if movs is None:
                mobs[i] = dot(mob_org, rotation)
                add(mobs[i], tar_com, mobs[i])
            else:
                add(dot(movs[i], rotation),
                    (tar_com - dot(mob_com, rotation)), movs[i])
            LOGGER.update(i, '_prody_ensemble')
        LOGGER.clear()
Example #3
0
def calcEnsembleENMs(ensemble, model='gnm', trim='trim', n_modes=20, **kwargs):
    """Description"""

    if isinstance(ensemble, Conformation):
        conformation = ensemble
        ensemble = conformation.getEnsemble()
        index = conformation.getIndex()
        ensemble = ensemble[index:index+1]
    if model is GNM:
        model_type = 'GNM'
    elif model is ANM:
        model_type = 'ANM'
    else:
        model_type = str(model).strip().upper()

    atoms = ensemble.getAtoms()
    select = None
    if ensemble._indices is not None:
        select = atoms
        atoms = atoms.getAtomGroup()
        
    labels = ensemble.getLabels()

    verb = LOGGER.verbosity
    LOGGER.verbosity = 'info'
    ### ENMs ###
    ## ENM for every conf
    enms = []
    n_confs = ensemble.numConfs()

    str_modes = 'all' if n_modes is None else str(n_modes)
    LOGGER.progress('Calculating {0} {1} modes for {2} conformations...'
                    .format(str_modes, model_type, n_confs), n_confs)

    for i in range(n_confs):
        coords = ensemble.getCoordsets(i, selected=False)
        if atoms is not None:
            atoms.setCoords(coords)
        else:
            atoms = coords
        enm, _ = calcENM(atoms, select, model=model, trim=trim, 
                            n_modes=n_modes, title=labels[i], **kwargs)
        enms.append(enm)

        #lbl = labels[i] if labels[i] != '' else '%d-th conformation'%(i+1)
        LOGGER.update(i)
    
    LOGGER.update(n_confs, 'Finished.')
    LOGGER.verbosity = verb

    LOGGER.info('{0} {1} modes were calculated for each of the {2} conformations.'
                        .format(str_modes, model_type, n_confs))

    modeens = ModeEnsemble(title=ensemble.getTitle())
    modeens.addModeSet(enms, label=ensemble.getLabels())
    modeens.setAtoms(ensemble.getAtoms())
    return modeens
Example #4
0
def parsePDB(*pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a PDB file.

    This function extends :func:`.parsePDBStream`.

    See :ref:`parsepdb` for a detailed usage example.

    :arg pdb: one PDB identifier or filename, or a list of them.
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    
    You can also provide arguments that you would like passed on to fetchPDB().
    """
    n_pdb = len(pdb)
    if n_pdb == 1:
        return _parsePDB(pdb[0], **kwargs)
    else:
        verb = LOGGER.verbosity
        LOGGER.verbosity = 'info'

        results = []
        lstkwargs = {}
        for key in kwargs:
            argval = kwargs.get(key)
            if np.isscalar(argval):
                argval = [argval] * n_pdb
            lstkwargs[key] = argval

        LOGGER.progress('Retrieving {0} PDB structures...'.format(n_pdb),
                        n_pdb)
        for i, p in enumerate(pdb):
            kwargs = {}
            for key in lstkwargs:
                kwargs[key] = lstkwargs[key][i]
            LOGGER.update(i, 'Retrieving {0}...'.format(p))
            result = _parsePDB(p, **kwargs)
            if not isinstance(result, tuple):
                if isinstance(result, dict):
                    result = (None, result)
                else:
                    result = (result, None)
            results.append(result)

        results = zip(*results)

        LOGGER.update(n_pdb, '{0} PDB structures retrieved'.format(n_pdb))
        LOGGER.verbosity = verb

        for i in reversed(range(len(results))):
            if all(j is None for j in results[i]):
                results.pop(i)
        if len(results) == 1:
            results = results[0]

        return results
Example #5
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of
    the weekly clustering of protein chains in the PDB generated by blastclust.
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/

    This function will download about 10 Mb of data and save it after
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can
    be loaded using :func:`loadPDBClusters` function and be accessed
    using :func:`listPDBCluster`."""

    if sqid is not None:
        if isListLike(sqid):
            for s in sqid:
                if s not in PDB_CLUSTERS:
                    raise ValueError('sqid must be one or more of ' +
                                     PDB_CLUSTERS_SQID_STR)
            keys = list(sqid)
        else:
            if sqid not in PDB_CLUSTERS:
                raise ValueError('sqid must be one or more of ' +
                                 PDB_CLUSTERS_SQID_STR)
            keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(keys),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.'.format(x))
            continue
        else:
            out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH)
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, label='_prody_fetchPDBClusters')
    LOGGER.finish()
    if len(keys) == count:
        LOGGER.info('All selected PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
Example #6
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`listPDBCluster`."""
    
    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)
    
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, '_prody_fetchPDBClusters')
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
Example #7
0
def parseChainsList(filename):
    """
    Parse a set of PDBs and extract chains based on a list in a text file.

    :arg filename: the name of the file to be read
    :type filename: str

    Returns: lists containing an :class:'.AtomGroup' for each PDB, 
    the headers for those PDBs, and the requested :class:`.Chain` objects
    """
    verb = LOGGER.verbosity
    LOGGER.verbosity = 'info'

    fi = open(filename, 'r')
    lines = fi.readlines()
    fi.close()

    pdb_ids = []
    ags = []
    headers = []
    chains = []
    num_lines = len(lines)
    LOGGER.progress('Starting', num_lines)
    for i, line in enumerate(lines):
        LOGGER.update(i, 'Parsing lines...')
        pdb_id = line.split()[0].split('_')[0]
        if not pdb_id in pdb_ids:
            pdb_ids.append(pdb_id)

            ag, header = parsePDB(pdb_id, compressed=False, \
                                  subset=line.split()[0].split('_')[1], header=True)

            ags.append(ag)
            headers.append(header)

        chains.append(ag.getHierView()[line.strip().split()[1]])

    LOGGER.verbosity = verb
    LOGGER.info(
        '{0} PDBs have been parsed and {1} chains have been extracted. \
                '.format(len(ags), len(chains)))

    return ags, headers, chains
Example #8
0
def parseChainsList(filename):
    """
    Parse a set of PDBs and extract chains based on a list in a text file.

    :arg filename: the name of the file to be read
    :type filename: str

    Returns: lists containing an :class:'.AtomGroup' for each PDB, 
    the headers for those PDBs, and the requested :class:`.Chain` objects
    """
    
    fi = open(filename,'r')
    lines = fi.readlines()
    fi.close()

    pdb_ids = []
    ags = []
    headers = []
    chains = []
    num_lines = len(lines)
    LOGGER.progress('Starting', num_lines, '_prody_parseChainsList')
    for i, line in enumerate(lines):
        LOGGER.update(i, 'Parsing lines...', label='_prody_parseChainsList')
        pdb_id = line.split()[0].split('_')[0]
        if not pdb_id in pdb_ids:
            pdb_ids.append(pdb_id)

            ag, header = parsePDB(pdb_id, compressed=False, \
                                  subset=line.split()[0].split('_')[1], header=True)

            ags.append(ag)
            headers.append(header)

        chains.append(ag.getHierView()[line.strip().split()[1]])

    LOGGER.finish()
    LOGGER.info('{0} PDBs have been parsed and {1} chains have been extracted. \
                '.format(len(ags),len(chains)))

    return ags, headers, chains
Example #9
0
def calcMSF(coordsets):
    """Calculate mean square fluctuation(s) (MSF)."""

    try:
        ncsets = coordsets.numFrames()
    except AttributeError:
        try:
            coordsets = coordsets.getCoordsets()
        except AttributeError:
            pass
        try:
            ndim, shape = coordsets.ndim, coordsets.shape
        except:
            raise TypeError('coordsets must be a Numpy array or a ProDy '
                            'object with `getCoordsets` method')
        if ndim != 3 or shape[0] == 1:
            raise ValueError('coordsets must contain multiple sets')
        msf = var(coordsets, 0).sum(1)
    else:
        nfi = coordsets.nextIndex()
        natoms = coordsets.numSelected()
        total = zeros((natoms, 3))
        sqsum = zeros((natoms, 3))

        LOGGER.progress('Evaluating {0} frames from {1}:'
                        .format(ncsets, str(coordsets)), ncsets,
                        '_prody_calcMSF')
        ncsets = 0
        coordsets.reset()
        for frame in coordsets:
            frame.superpose()
            coords = frame._getCoords()
            total += coords
            sqsum += coords ** 2
            ncsets += 1
            LOGGER.update(ncsets, label='_prody_calcMSF')
        LOGGER.finish()
        msf = (sqsum/ncsets - (total/ncsets)**2).sum(1)
        coordsets.goto(nfi)
    return msf
Example #10
0
def calcMSF(coordsets):
    """Calculate mean square fluctuation(s) (MSF)."""

    try:
        ncsets = coordsets.numFrames()
    except AttributeError:
        try:
            coordsets = coordsets.getCoordsets()
        except AttributeError:
            pass
        try:
            ndim, shape = coordsets.ndim, coordsets.shape
        except:
            raise TypeError('coordsets must be a Numpy array or a ProDy '
                            'object with `getCoordsets` method')
        if ndim != 3 or shape[0] == 1:
            raise ValueError('coordsets must contain multiple sets')
        msf = var(coordsets, 0).sum(1)
    else:
        nfi = coordsets.nextIndex()
        natoms = coordsets.numSelected()
        total = zeros((natoms, 3))
        sqsum = zeros((natoms, 3))

        LOGGER.progress(
            'Evaluating {0} frames from {1}:'.format(ncsets, str(coordsets)),
            ncsets, '_prody_calcMSF')
        ncsets = 0
        coordsets.reset()
        for frame in coordsets:
            frame.superpose()
            coords = frame._getCoords()
            total += coords
            sqsum += coords**2
            ncsets += 1
            LOGGER.update(ncsets, '_prody_calcMSF')
        msf = (sqsum / ncsets - (total / ncsets)**2).sum(1)
        LOGGER.clear()
        coordsets.goto(nfi)
    return msf
Example #11
0
def buildPDBEnsemble(PDBs, ref=None, title='Unknown', labels=None, 
                     mapping_func=mapOntoChain, unmapped=None, **kwargs):
    """Builds a PDB ensemble from a given reference structure and a list of PDB structures. 
    Note that the reference structure should be included in the list as well.

    :arg PDBs: A list of PDB structures
    :type PDBs: iterable

    :arg ref: Reference structure or the index to the reference in ``PDBs``. If **None**,
        then the first item in ``PDBs`` will be considered as the reference. 
        Default is **None**
    :type ref: int, :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg title: The title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg occupancy: Minimal occupancy of columns (range from 0 to 1). Columns whose occupancy
        is below this value will be trimmed.
    :type occupancy: float

    :arg unmapped: A list of PDB IDs that cannot be included in the ensemble. This is an 
        output argument. 
    :type unmapped: list

    :arg subset: A subset for selecting particular atoms from the input structures.
        Default is calpha
    :type subset: str
    """

    occupancy = kwargs.pop('occupancy', None)
    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'calpha')).lower()
    superpose = kwargs.pop('superpose', True)

    if len(PDBs) == 1:
        raise ValueError('PDBs should have at least two items')

    if labels is not None:
        if len(labels) != len(PDBs):
            raise TypeError('Labels and PDBs must have the same lengths.')
    else:
        labels = []
        
        for pdb in PDBs:
            if pdb is None:
                labels.append(None)
            else:
                labels.append(pdb.getTitle())

    if ref is None:
        refpdb = PDBs[0]
    elif isinstance(ref, Integral):
        refpdb = PDBs[ref]
    else:
        refpdb = ref
        if refpdb not in PDBs:
            raise ValueError('refpdb should be also in the PDBs')

    # obtain refchains from the hierarchical view of the reference PDB
    if subset != 'all':
        refpdb = refpdb.select(subset)
        
    try:
        refchains = list(refpdb.getHierView())
    except AttributeError:
        raise TypeError('refpdb must have getHierView')

    start = time.time()
    # obtain the atommap of all the chains combined.
    atoms = refchains[0]
    for i in range(1, len(refchains)):
        atoms += refchains[i]
    
    # initialize a PDBEnsemble with reference atoms and coordinates
    ensemble = PDBEnsemble(title)
    ensemble.setAtoms(atoms)
    ensemble.setCoords(atoms.getCoords())
    
    # build the ensemble
    if unmapped is None: unmapped = []

    LOGGER.progress('Building the ensemble...', len(PDBs), '_prody_buildPDBEnsemble')
    for i, pdb in enumerate(PDBs):
        if pdb is None:
            unmapped.append(labels[i])
            continue

        LOGGER.update(i, 'Mapping %s to the reference...'%pdb.getTitle(), 
                      label='_prody_buildPDBEnsemble')
        try:
            pdb.getHierView()
        except AttributeError:
            raise TypeError('PDBs must be a list of instances having the access to getHierView')
            
        if labels is None:
            lbl = pdb.getTitle()
        else:
            lbl = labels[i]

        atommaps = []
        # find the mapping of the pdb to each reference chain
        for chain in refchains:
            mappings = mapping_func(pdb, chain,
                                    index=i,
                                    **kwargs)
            if len(mappings) > 0:
                atommaps.append(mappings[0][0])
            else:
                break

        if len(atommaps) != len(refchains):
            unmapped.append(lbl)
            continue
        
        # combine the mappings of pdb to reference chains
        atommap = atommaps[0]
        for j in range(1, len(atommaps)):
            atommap += atommaps[j]
        
        # add the mappings to the ensemble
        ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped'), 
                             label = lbl, degeneracy=degeneracy)

    LOGGER.finish()

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)
    if superpose:
        ensemble.iterpose()
    
    LOGGER.info('Ensemble ({0} conformations) were built in {1:.2f}s.'
                     .format(ensemble.numConfs(), time.time()-start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))
    return ensemble
Example #12
0
def fetchBIRDviaFTP(**kwargs):
    """Retrieve the whole Biologically Interesting Molecule Reference 
    Dictionary (BIRD) resource, which is updated every week. This includes 
    2 kinds of keys, which can be selected with the **keys** keyword argument.

    The chemical information is found in a zipped (tar.gz) directory at 
    https://files.rcsb.org/pub/pdb/data/bird/prd/prd-all.cif.gz, which 
    contains individual CIF files within it. This data will be downloaded 
    and extracted to :file:`.prody/bird-prd`.

    Biological function information is also found in a zipped (tar.gz) directory at 
    https://files.rcsb.org/pub/pdb/data/bird/family/family-all.cif.gz, which 
    contains individual CIF files within it. This data will be downloaded 
    and extracted to :file:`.prody/bird-family`.

    :arg keys: keys specifying which data to fetch out of ``'prd'``, ``'family'`` or ``'both'``
               default is ``'both'``
    :type keys: str, tuple, list, :class:`~numpy.ndarray`

    The underlying data can be accessed using :func:`parseBIRD`."""

    BIRD_PATH = os.path.join(getPackagePath(), 'bird')

    keys = kwargs.get('keys', 'both')
    if isinstance(keys, str):
        if keys == 'both':
            keys = ['prd', 'family']
        elif keys[:3].lower() == 'prd':
            keys = ['prd']
        elif keys[:3].lower() == 'fam':
            keys = ['family']
        else:
            raise ValueError("keys should be 'both', 'prd' or 'fam'")

    elif isListLike(keys):
        keys = list(keys)
    else:
        raise TypeError("keys should be list-like or string")

    ftp_divided = 'pdb/data/bird/'
    ftp_pdbext = '.cif.gz'
    ftp_prefix = ''

    if not os.path.isdir(BIRD_PATH):
        os.mkdir(BIRD_PATH)

    LOGGER.progress('Downloading BIRD', len(keys),
                    '_prody_fetchBIRD')

    ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us']
    LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name))

    from ftplib import FTP
    try:
        ftp = FTP(ftp_host)
    except Exception as error:
        raise type(error)('FTP connection problem, potential reason: '
                          'no internet connectivity')
    else:
        count = 0
        success = 0
        failure = 0
        filenames = []
        ftp.login('')
        for i, x in enumerate(keys):
            data = []
            ftp_fn = ftp_prefix + '{0}-all'.format(x) + ftp_pdbext
            try:
                ftp.cwd(ftp_path)
                ftp.cwd(ftp_divided)
                ftp.cwd(x)
                ftp.retrbinary('RETR ' + ftp_fn, data.append)
            except Exception as error:
                if ftp_fn in ftp.nlst():
                    LOGGER.warn('{0} download failed ({1}). It is '
                                'possible that you do not have rights to '
                                'download .gz files in the current network.'
                                .format(x, str(error)))
                else:
                    LOGGER.info('{0} download failed. {1} does not exist '
                                'on {2}.'.format(ftp_fn, x, ftp_host))
                failure += 1
                filenames.append(None)
            else:
                if len(data):
                    filename = BIRD_PATH + '/{0}-all.cif.gz'.format(x)

                    with open(filename, 'w+b') as outfile:
                        write = outfile.write
                        [write(block) for block in data]

                    success += 1
                else:
                    failure += 1
            count += 1
            LOGGER.update(i, label='_prody_fetchBIRD')
        LOGGER.finish()

    LOGGER.debug('PDB download via FTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
Example #13
0
File: goa.py Project: nffaruk/ProDy
def queryGOA(*ids, **kwargs):
    """Query a GOA database by identifier.

    :arg ids: an identifier or a list-like of identifiers 
    :type ids: str, tuple, list, :class:`~numpy.ndarray`

    :arg database: name of the database of interest
        default is PDB. Others include UNIPROT and 
        common names of many organisms.
    :type database: str
    """
    database = kwargs.pop('database', 'PDB')

    gaf_dict = kwargs.pop('gaf_dict', None)
    if gaf_dict is None:
        gaf_dict = parseGAF(database=database, **kwargs)
        LOGGER.info('GAF parsing completed.')

    n_ids = len(ids)
    if n_ids == 1:
        if isListLike(ids[0]):
            ids = ids[0]
            n_ids = len(ids)

    if n_ids == 1:
        ids = list(ids)

    results = []
    unmapped = []
    LOGGER.progress('Querying GOA for {0} ids...'.format(n_ids), n_ids,
                    '_prody_queryGOA')
    for i, id in enumerate(ids):
        LOGGER.update(i,
                      'Querying GOA for id {0} of {1}...'.format(i + 1, n_ids),
                      label='_prody_queryGOA')
        if not isinstance(id, str):
            raise TypeError('each ID should be a string')

        id = id.upper()

        if database == 'PDB':
            if not len(id) in [4, 5, 6]:
                raise ValueError('PDB IDs should be strings of length 4 to 6')

            if len(id) == 5 and str.isalpha(id[-1]):
                id = id[:4] + '_' + id[-1]

        if id in list(gaf_dict.keys()):
            results.append(gaf_dict[id])
        else:
            results.append([])
            unmapped.append(id)

    rets = []
    LOGGER.progress(
        'Mapping GO terms back to GOA results for {0} ids...'.format(n_ids),
        n_ids, '_prody_mapGO')
    for i, result in enumerate(results):
        LOGGER.update(
            i,
            'Mapping GO terms back to GOA results id {0} of {1}...'.format(
                i + 1, n_ids),
            label='_prody_mapGO')
        rets.append(GOADictList(result, title=ids[i], **kwargs))

    if n_ids == 1:
        rets = rets[0]

    return rets
Example #14
0
def parsePDB(*pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a PDB file.

    This function extends :func:`.parsePDBStream`.

    See :ref:`parsepdb` for a detailed usage example.

    :arg pdb: one PDB identifier or filename, or a list of them.
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    
    You can also provide arguments that you would like passed on to fetchPDB().
    """

    n_pdb = len(pdb)
    if n_pdb == 1:
        if isListLike(pdb[0]):
            pdb = pdb[0]
            n_pdb = len(pdb)
            
    if n_pdb == 1:
        return _parsePDB(pdb[0], **kwargs)
    else:
        results = []
        lstkwargs = {}
        for key in kwargs:
            argval = kwargs.get(key)
            if np.isscalar(argval):
                argval = [argval]*n_pdb
            lstkwargs[key] = argval

        start = time.time()
        LOGGER.progress('Retrieving {0} PDB structures...'
                    .format(n_pdb), n_pdb, '_prody_parsePDB')
        for i, p in enumerate(pdb):
            kwargs = {}
            for key in lstkwargs:
                kwargs[key] = lstkwargs[key][i]
            c = kwargs.get('chain','')
            LOGGER.update(i, 'Retrieving {0}...'.format(p+c), 
                          label='_prody_parsePDB')
            result = _parsePDB(p, **kwargs)
            if not isinstance(result, tuple):
                if isinstance(result, dict):
                    result = (None, result)
                else:
                    result = (result, None)
            results.append(result)

        results = list(zip(*results))
        LOGGER.finish()
       
        for i in reversed(range(len(results))):
            if all(j is None for j in results[i]):
                results.pop(i)
        if len(results) == 1:
            results = results[0]
        results = list(results)

        model = kwargs.get('model')
        header = kwargs.get('header', False)
        if model != 0 and header:
            numPdbs = len(results[0])
        else:
            numPdbs = len(results)

        LOGGER.info('{0} PDBs were parsed in {1:.2f}s.'
                     .format(numPdbs, time.time()-start))

        return results
Example #15
0
def calcPerturbResponse(model, **kwargs):
    """This function implements the perturbation response scanning (PRS) method
    described in [CA09]_ and [IG14]_. It returns a PRS matrix, and effectiveness 
    and sensitivity profiles.
    
    Rows of the matrix are the average magnitude of the responses obtained by 
    perturbing the atom/node position at that row index, i.e. ``prs_matrix[i,j]`` 
    will give the response of residue/node *j* to perturbations in residue/node *i*. 
    
    PRS is performed using the covariance matrix from a *model*, e.g. 
    a :class:`.ANM` instance. To use an external matrix, please provide it to 
    a :class:`.PCA` instance using the :meth:`.PCA.setCovariance`.

    When an *atoms* instance is given, the PRS matrix will be added as data, 
    which can be retrieved with ``atoms.getData('prs_matrix')``.  

    *model* and *atoms* must have the same number of atoms. *atoms* must be an
    :class:`.AtomGroup` instance. 

    .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning
       Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein.
       *PLoS Comput Biol* **2009** 5(10):e1000544.

    .. [IG14] General IJ, Liu Y, Blackburn ME, Mao W, Gierasch LM, Bahar I.
        ATPase subdomain IA is a mediator of interdomain allostery in Hsp70
        molecular chaperones. *PLoS Comput. Biol.* **2014** 10:e1003624.

    If *turbo* is **True** (default), then PRS is approximated by the limit of 
    large numbers of forces and no perturbation forces are explicitly applied. 
    If set to **False**, then each residue/node is perturbed *repeats* times (default 100) 
    with a random unit force vector as in ProDy v1.8 and earlier.
    """

    if not isinstance(model, (NMA, ModeSet, Mode)):
        raise TypeError('model must be an NMA, ModeSet, or Mode instance')

    if isinstance(model, NMA) and len(model) == 0:
        raise ValueError('model must have normal modes calculated')

    atoms = kwargs.get('atoms', None)
    suppress_diag = kwargs.get('suppress_diag', False)
    no_diag = kwargs.get('no_diag', suppress_diag)

    if atoms is not None:
        if isinstance(atoms, Selection):
            atoms = atoms.copy()
        if not isinstance(atoms, AtomGroup):
            raise TypeError('atoms must be an AtomGroup instance')
        elif atoms.numAtoms() != model.numAtoms():
            raise ValueError('model and atoms must have the same number atoms')

    n_atoms = model.numAtoms()
    # LOGGER.timeit('_prody_prs_all')
    # LOGGER.info('Calculating covariance matrix')
    # LOGGER.timeit('_prody_cov')

    cov = model.getCovariance()

    turbo = kwargs.get('turbo', True)
    if turbo:
        if not model.is3d():
            prs_matrix = cov**2

        else:
            cov_squared = cov**2
            n_by_3n_cov_squared = np.zeros((n_atoms, 3 * n_atoms))
            prs_matrix = np.zeros((n_atoms, n_atoms))
            i3 = -3
            i3p3 = 0
            for i in range(n_atoms):
                i3 += 3
                i3p3 += 3
                n_by_3n_cov_squared[i, :] = (cov_squared[i3:i3p3, :]).sum(0)

            j3 = -3
            j3p3 = 0
            for j in range(n_atoms):
                j3 += 3
                j3p3 += 3
                prs_matrix[:, j] = (n_by_3n_cov_squared[:, j3:j3p3]).sum(1)
    else:
        repeats = kwargs.pop('repeats', 100)
        LOGGER.info(
            'Calculating perturbation response with {0} repeats'.format(
                repeats))
        LOGGER.timeit('_prody_prs_mat')

        response_matrix = np.zeros((n_atoms, n_atoms))
        LOGGER.progress('Calculating perturbation response', n_atoms,
                        '_prody_prs')
        i3 = -3
        i3p3 = 0
        for i in range(n_atoms):
            i3 += 3
            i3p3 += 3
            forces = np.random.rand(repeats * 3).reshape((repeats, 3))
            forces /= ((forces**2).sum(1)**0.5).reshape((repeats, 1))
            for force in forces:
                response_matrix[i] += (np.dot(cov[:, i3:i3p3],
                                              force)**2).reshape(
                                                  (n_atoms, 3)).sum(1)
            LOGGER.update(i, '_prody_prs')

        response_matrix /= repeats

        LOGGER.clear()
        LOGGER.report('Perturbation response matrix calculated in %.1fs.',
                      '_prody_prs_mat')

    norm_prs_matrix = np.zeros((n_atoms, n_atoms))
    self_dp = np.diag(prs_matrix)
    self_dp = self_dp.reshape(n_atoms, 1)
    re_self_dp = np.repeat(self_dp, n_atoms, axis=1)
    norm_prs_matrix = div0(prs_matrix, re_self_dp)

    if no_diag:
        # suppress the diagonal (self displacement) to facilitate
        # visualizing the response profile
        norm_prs_matrix = norm_prs_matrix - np.diag(np.diag(norm_prs_matrix))

    W = 1 - np.eye(n_atoms)
    effectiveness = np.average(norm_prs_matrix, weights=W, axis=1)
    sensitivity = np.average(norm_prs_matrix, weights=W, axis=0)

    # LOGGER.report('Perturbation response scanning completed in %.1fs.',
    #               '_prody_prs_all')

    if atoms is not None:
        try:
            ag = atoms.getAtomGroup()
            defdata = np.zeros(ag.numAtoms(), dtype=float)
            ag.setData('effectiveness', defdata.copy())
            ag.setData('sensitivity', defdata.copy())
        except AttributeError:
            pass
        atoms.setData('effectiveness', effectiveness)
        atoms.setData('sensitivity', sensitivity)

        #atoms.setData('prs_matrix', norm_prs_matrix)

    return norm_prs_matrix, effectiveness, sensitivity
Example #16
0
def matchModes(*modesets, **kwargs):
    """Returns the matches of modes among *modesets*. Note that the first 
    modeset will be treated as the reference so that only the matching 
    of each modeset to the first modeset is garanteed to be optimal.
    
    :arg index: if **True** then indices of modes will be returned instead of 
                :class:`Mode` instances
    :type index: bool

    :arg turbo: if **True** then the computation will be performed in parallel. 
                The number of threads is set to be the same as the number of 
                CPUs. Assigning a number to specify the number of threads to be 
                used. Note that if writing a script, ``if __name__ == '__main__'`` 
                is necessary to protect your code when multi-tasking. 
                See https://docs.python.org/2/library/multiprocessing.html for details.
                Default is **False**
    :type turbo: bool, int
    """

    index = kwargs.pop('index', False)
    turbo = kwargs.pop('turbo', False)

    n_worker = None
    if not isinstance(turbo, bool):
        n_worker = int(turbo)

    modeset0 = modesets[0]
    if index:
        ret = [modeset0.getIndices()]
    else:
        ret = [modeset0]

    n_modes = len(modeset0)
    n_sets = len(modesets)
    if n_sets == 1:
        return ret
    elif n_sets == 0:
        raise ValueError('at least one modeset should be given')

    if turbo:
        from multiprocessing import Pool, cpu_count
        from math import ceil
        
        if not n_worker:
            n_worker = cpu_count()

        LOGGER.info('Matching {0} modes across {1} modesets with {2} threads...'
                        .format(n_modes, n_sets, n_worker))

        pool = Pool(n_worker)
        n_sets_per_worker = ceil((n_sets - 1) / n_worker)
        args = []
        for i in range(n_worker):
            start = i*n_sets_per_worker + 1
            end = (i+1)*n_sets_per_worker + 1
            subset = modesets[start:end]
            args.append((modeset0, subset, index))
        nested_ret = pool.map(_pairModes_wrapper, args)
        for entry in nested_ret:
            ret.extend(entry)

        pool.close()
        pool.join()
    else:
        LOGGER.progress('Matching {0} modes across {1} modesets...'
                        .format(n_modes, n_sets), n_sets, '_prody_matchModes')
        for i, modeset in enumerate(modesets):
            LOGGER.update(i, label='_prody_matchModes')
            if i > 0:
                _, reordered_modeset = pairModes(modeset0, modeset, index=index, **kwargs)
                ret.append(reordered_modeset)
        LOGGER.finish()
    
    return ret
Example #17
0
def buildPDBEnsemble(atomics,
                     ref=None,
                     title='Unknown',
                     labels=None,
                     atommaps=None,
                     unmapped=None,
                     **kwargs):
    """Builds a :class:`.PDBEnsemble` from a given reference structure and a list of structures 
    (:class:`.Atomic` instances). Note that the reference should be included in the list as well.

    :arg atomics: a list of :class:`.Atomic` instances
    :type atomics: list

    :arg ref: reference structure or the index to the reference in *atomics*. If **None**,
        then the first item in *atomics* will be considered as the reference. If it is a 
        :class:`.PDBEnsemble` instance, then *atomics* will be appended to the existing ensemble.
        Default is **None**
    :type ref: int, :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg title: the title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg degeneracy: whether only the active coordinate set (**True**) or all the coordinate sets 
        (**False**) of each structure should be added to the ensemble. Default is **True**
    :type degeneracy: bool

    :arg occupancy: minimal occupancy of columns (range from 0 to 1). Columns whose occupancy
        is below this value will be trimmed
    :type occupancy: float

    :arg atommaps: labels of *atomics* that were mapped and added into the ensemble. This is an 
        output argument
    :type atommaps: list

    :arg unmapped: labels of *atomics* that cannot be included in the ensemble. This is an 
        output argument
    :type unmapped: list

    :arg subset: a subset for selecting particular atoms from the input structures.
        Default is ``"all"``
    :type subset: str

    :arg superpose: if set to ``'iter'``, :func:`.PDBEnsemble.iterpose` will be used to 
        superpose the structures, otherwise conformations will be superposed with respect 
        to the reference specified by *ref* unless set to ``False``. Default is ``'iter'``
    :type superpose: str, bool
    """

    occupancy = kwargs.pop('occupancy', None)
    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'all')).lower()
    superpose = kwargs.pop('superpose', 'iter')
    superpose = kwargs.pop('iterpose', superpose)
    debug = kwargs.pop('debug', {})

    if 'mapping_func' in kwargs:
        raise DeprecationWarning(
            'mapping_func is deprecated. Please see release notes for '
            'more details: http://prody.csb.pitt.edu/manual/release/v1.11_series.html'
        )
    start = time.time()

    if not isListLike(atomics):
        raise TypeError('atomics should be list-like')

    if len(atomics) == 1 and degeneracy is True:
        raise ValueError('atomics should have at least two items')

    if labels is not None:
        if len(labels) != len(atomics):
            raise TypeError('Labels and atomics must have the same lengths.')
    else:
        labels = []

        for atoms in atomics:
            if atoms is None:
                labels.append(None)
            else:
                labels.append(atoms.getTitle())

    if ref is None:
        target = atomics[0]
    elif isinstance(ref, Integral):
        target = atomics[ref]
    elif isinstance(ref, PDBEnsemble):
        target = ref._atoms
    else:
        target = ref

    # initialize a PDBEnsemble with reference atoms and coordinates
    isrefset = False
    if isinstance(ref, PDBEnsemble):
        ensemble = ref
    else:
        # select the subset of reference beforehand for the sake of efficiency
        if subset != 'all':
            target = target.select(subset)
        ensemble = PDBEnsemble(title)
        if isinstance(target, Atomic):
            ensemble.setAtoms(target)
            ensemble.setCoords(target.getCoords())
            isrefset = True
        else:
            ensemble._n_atoms = len(target)
            isrefset = False

    # build the ensemble
    if unmapped is None: unmapped = []
    if atommaps is None: atommaps = []

    LOGGER.progress('Building the ensemble...', len(atomics),
                    '_prody_buildPDBEnsemble')
    for i, atoms in enumerate(atomics):
        if atoms is None:
            unmapped.append(labels[i])
            continue

        LOGGER.update(i,
                      'Mapping %s to the reference...' % atoms.getTitle(),
                      label='_prody_buildPDBEnsemble')
        try:
            atoms.getHierView()
        except AttributeError:
            raise TypeError(
                'atomics must be a list of instances having the access to getHierView'
            )

        if subset != 'all':
            atoms = atoms.select(subset)

        # find the mapping of chains of atoms to those of target
        debug[labels[i]] = {}
        atommaps_ = alignChains(atoms,
                                target,
                                debug=debug[labels[i]],
                                **kwargs)

        if len(atommaps_) == 0:
            unmapped.append(labels[i])
            continue
        else:
            atommaps.extend(atommaps_)

        # add the atommaps to the ensemble
        for atommap in atommaps_:
            lbl = pystr(labels[i])
            if len(atommaps_) > 1:
                chids = np.unique(atommap.getChids())
                strchids = ''.join(chids)
                lbl += '_%s' % strchids
            ensemble.addCoordset(atommap,
                                 weights=atommap.getFlags('mapped'),
                                 label=lbl,
                                 degeneracy=degeneracy)

            if not isrefset:
                ensemble.setCoords(atommap.getCoords())
                isrefset = True

    LOGGER.finish()

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)

    if superpose == 'iter':
        ensemble.iterpose()
    elif superpose is not False:
        ensemble.superpose()

    LOGGER.info('Ensemble ({0} conformations) were built in {1:.2f}s.'.format(
        ensemble.numConfs(),
        time.time() - start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))
    return ensemble
Example #18
0
    def scanResidues(self, n_modes=10, enm='gnm', cutoff=None):
        '''
        Scans residues to generate ESSA z-scores.

        :arg n_modes: Number of global modes.
        :type n_modes: int

        :arg enm: Type of elastic network model, 'gnm' or 'anm', default is 'gnm'.
        :type enm: str

        :arg cutoff: Cutoff distance (A) for pairwise interactions, default is 10 A for GNM and 15 A for ANM.
        :type cutoff: float
        '''

        self._n_modes = n_modes
        self._enm = enm
        self._cutoff = cutoff

        self._ensemble = ModeEnsemble('{}'.format(self._title))
        self._ensemble.setAtoms(self._ca)
        self._labels = ['ref']

        # --- reference model --- #

        self._reference()

        # --- perturbed models --- #

        LOGGER.progress(msg='', steps=(self._ca.numAtoms()))
        for i, j in enumerate(self._ca.getResindices()):
            LOGGER.update(step=i + 1, msg='scanning residue {}'.format(i + 1))
            self._perturbed(j)

        if self._lowmem:
            self._eigvals = array(self._eigvals)
            self._eigvecs = array(self._eigvecs)

        # --- ESSA computation part --- #

        if self._lowmem:
            denom = self._eigvals[0]
            num = self._eigvals[1:] - denom
        else:
            self._ensemble.setLabels(self._labels)
            self._ensemble.match()

            denom = self._ensemble[0].getEigvals()
            num = self._ensemble[1:].getEigvals() - denom

        eig_diff = num / denom * 100
        eig_diff_mean = mean(eig_diff, axis=1)

        self._zscore = zscore(eig_diff_mean)

        if self._lig:
            self._zs_lig = {}
            for k, v in self._ligres_idx.items():
                if self._rib:
                    self._zs_lig[k] = [array(v), self._zscore[v]]
                else:
                    vv = [self._ri[i] for i in v]
                    self._zs_lig[k] = [array(vv), self._zscore[vv]]
Example #19
0
File: goa.py Project: prody/ProDy
def queryGOA(*ids, **kwargs):
    """Query a GOA database by identifier.

    :arg ids: an identifier or a list-like of identifiers 
    :type ids: str, tuple, list, :class:`~numpy.ndarray`

    :arg database: name of the database of interest
        default is PDB. Others include UNIPROT and 
        common names of many organisms.
    :type database: str
    """
    database = kwargs.pop('database', 'PDB')

    gaf_dict = kwargs.pop('gaf_dict', None)
    if gaf_dict is None:
        gaf_dict = parseGAF(database=database, **kwargs)
        LOGGER.info('GAF parsing completed.')

    n_ids = len(ids)
    if n_ids == 1:
        if isListLike(ids[0]):
            ids = ids[0]
            n_ids = len(ids)

    if n_ids == 1:
        ids = list(ids)

    results = []
    unmapped = []
    LOGGER.progress('Querying GOA for {0} ids...'
                    .format(n_ids), n_ids, '_prody_queryGOA')
    for i, id in enumerate(ids):
        LOGGER.update(i, 'Querying GOA for id {0} of {1}...'
                      .format(i+1, n_ids), label='_prody_queryGOA')
        if not isinstance(id, str):
            raise TypeError('each ID should be a string')

        id = id.upper()

        if database == 'PDB':
            if not len(id) in [4, 5, 6]:
                raise ValueError('PDB IDs should be strings of length 4 to 6')

            if len(id) == 5 and str.isalpha(id[-1]):
                id = id[:4] + '_' + id[-1]

        if id in list(gaf_dict.keys()):
            results.append(gaf_dict[id])
        else:
            results.append([])
            unmapped.append(id)

    rets = []
    LOGGER.progress('Mapping GO terms back to GOA results for {0} ids...'
                    .format(n_ids), n_ids, '_prody_mapGO')
    for i, result in enumerate(results):
        LOGGER.update(i, 'Mapping GO terms back to GOA results id {0} of {1}...'
                      .format(i+1, n_ids), label='_prody_mapGO')
        rets.append(GOADictList(result, title=ids[i], **kwargs))

    if n_ids == 1:
        rets = rets[0]

    return rets
Example #20
0
    def buildCovariance(self, coordsets, **kwargs):
        """Build a covariance matrix for *coordsets* using mean coordinates
        as the reference.  *coordsets* argument may be one of the following:

        * :class:`.Atomic`
        * :class:`.Ensemble`
        * :class:`.TrajBase`
        * :class:`numpy.ndarray` with shape ``(n_csets, n_atoms, 3)``

        For ensemble and trajectory objects, ``update_coords=True`` argument
        can be used to set the mean coordinates as the coordinates of the
        object.

        When *coordsets* is a trajectory object, such as :class:`.DCDFile`,
        covariance will be built by superposing frames onto the reference
        coordinate set (see :meth:`.Frame.superpose`).  If frames are already
        aligned, use ``aligned=True`` argument to skip this step.


        .. note::
           If *coordsets* is a :class:`.PDBEnsemble` instance, coordinates are
           treated specially.  Let's say **C**\_ij is the element of the
           covariance matrix that corresponds to atoms *i* and *j*.  This
           super element is divided by number of coordinate sets (PDB models or
           structures) in which both of these atoms are observed together."""

        if not isinstance(coordsets, (Ensemble, Atomic, TrajBase, np.ndarray)):
            raise TypeError('coordsets must be an Ensemble, Atomic, Numpy '
                            'array instance')
        LOGGER.timeit('_prody_pca')
        mean = None
        weights = None
        ensemble = None
        if isinstance(coordsets, np.ndarray):
            if (coordsets.ndim != 3 or coordsets.shape[2] != 3 or
                    coordsets.dtype not in (np.float32, float)):
                raise ValueError('coordsets is not a valid coordinate array')
        elif isinstance(coordsets, Atomic):
            coordsets = coordsets._getCoordsets()
        elif isinstance(coordsets, Ensemble):
            ensemble = coordsets
            if isinstance(coordsets, PDBEnsemble):
                weights = coordsets.getWeights() > 0
            coordsets = coordsets._getCoordsets()

        update_coords = bool(kwargs.get('update_coords', False))

        if isinstance(coordsets, TrajBase):
            nfi = coordsets.nextIndex()
            coordsets.reset()
            n_atoms = coordsets.numSelected()
            dof = n_atoms * 3
            cov = np.zeros((dof, dof))
            #mean = coordsets._getCoords().flatten()
            n_confs = 0
            n_frames = len(coordsets)
            LOGGER.info('Covariance will be calculated using {0} frames.'
                        .format(n_frames))
            coordsum = np.zeros(dof)
            LOGGER.progress('Building covariance', n_frames, '_prody_pca')
            align = not kwargs.get('aligned', False)
            for frame in coordsets:
                if align:
                    frame.superpose()
                coords = frame._getCoords().flatten()
                coordsum += coords
                cov += np.outer(coords, coords)
                n_confs += 1
                LOGGER.update(n_confs, label='_prody_pca')
            LOGGER.finish()
            cov /= n_confs
            coordsum /= n_confs
            mean = coordsum
            cov -= np.outer(coordsum, coordsum)
            coordsets.goto(nfi)
            self._cov = cov
            if update_coords:
                coordsets.setCoords(mean.reshape((n_atoms, 3)))
        else:
            n_confs = coordsets.shape[0]
            if n_confs < 3:
                raise ValueError('coordsets must have more than 3 coordinate '
                                 'sets')
            n_atoms = coordsets.shape[1]
            if n_atoms < 3:
                raise ValueError('coordsets must have more than 3 atoms')
            dof = n_atoms * 3
            LOGGER.info('Covariance is calculated using {0} coordinate sets.'
                        .format(len(coordsets)))
            s = (n_confs, dof)
            if weights is None:
                if coordsets.dtype == float:
                    self._cov = np.cov(coordsets.reshape((n_confs, dof)).T,
                                       bias=1)
                else:
                    cov = np.zeros((dof, dof))
                    coordsets = coordsets.reshape((n_confs, dof))
                    mean = coordsets.mean(0)
                    LOGGER.progress('Building covariance', n_confs,
                                    '_prody_pca')
                    for i, coords in enumerate(coordsets.reshape(s)):
                        deviations = coords - mean
                        cov += np.outer(deviations, deviations)
                        LOGGER.update(n_confs, label='_prody_pca')
                    LOGGER.finish()
                    cov /= n_confs
                    self._cov = cov
            else:
                # PDB ensemble case
                mean = np.zeros((n_atoms, 3))
                for i, coords in enumerate(coordsets):
                    mean += coords * weights[i]
                mean /= weights.sum(0)
                d_xyz = ((coordsets - mean) * weights).reshape(s)
                divide_by = weights.astype(float).repeat(3, axis=2).reshape(s)
                self._cov = np.dot(d_xyz.T, d_xyz) / np.dot(divide_by.T,
                                                            divide_by)
            if update_coords and ensemble is not None:
                if mean is None:
                    mean = coordsets.mean(0)
                ensemble.setCoords(mean)

        self._trace = self._cov.trace()
        self._dof = dof
        self._n_atoms = n_atoms
        LOGGER.report('Covariance matrix calculated in %2fs.', '_prody_pca')
Example #21
0
def calcPerturbResponse(model, atoms=None, repeats=100, **kwargs):
    """Returns a matrix of profiles from scanning of the response of the
    structure to random perturbations at specific atom (or node) positions.
    The function implements the perturbation response scanning (PRS) method
    described in [CA09]_.  Rows of the matrix are the average magnitude of the
    responses obtained by perturbing the atom/node position at that row index,
    i.e. ``prs_profile[i,j]`` will give the response of residue/node *j* to
    perturbations in residue/node *i*.  PRS is performed using the covariance
    matrix from *model*, e.g. :class:`.ANM` instance.  Each residue/node is
    perturbed *repeats* times with a random unit force vector.  When *atoms*
    instance is given, PRS profile for residues will be added as an attribute
    which then can be retrieved as ``atoms.getData('prs_profile')``.  *model*
    and *atoms* must have the same number of atoms. *atoms* must be an
    :class:`.AtomGroup` instance.

    .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning
       Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein.
       *PLoS Comput Biol* **2009** 5(10):e1000544.

    The PRS matrix can be calculated and saved as follows::

      prs_matrix = calcPerturbationResponse(p38_anm, saveMatrix=True)
      
    The PRS matrix can also be save later as follows::
    
      writeArray('prs_matrix.txt', prs_matrix, format='%8.6f', delimiter='\t')

    You can also control which operation is used for getting a single matrix
    from the repeated force application and whether to normalise the matrix
    at the end. If you do choose to normalise the matrix, you can still save
    the original matrix before normalisation as well.

    :arg operation: which operation to perform to get a single response matrix::
        the mean, variance, max or min of the set of repeats. Another operation 
        is to select elements from the matrix showing biggest difference from 
        the square sum of the covariance matrix. The Default is the mean.
        To obtain all response matrices, set operation=None without quotes.
        You can also ask for 'all' operations or provide a list containing
        any set of them.
    :type operation: str or list

    :arg noForce: whether to use the covariance matrix directly rather
        than applying forces. This appears to be equivalent when scanning for
        response magnitudes and will be much quicker. Default is True.
    :type noForce: bool

    :arg normMatrix: whether to normalise the single response matrix by
        dividing each row by its diagonal, Default is False, we recommend true
    :type normMatrix: bool

    :arg saveMatrix: whether to save the last matrix generated to a text file.
        Default is False
    :type saveMatrix: bool

    :arg saveOrig: whether to save the original matrix despite normalisation.
        This is the same as saveMatrix when not normalizing. Default is False
    :type saveOrig: bool

    :arg baseSaveName: The central part of the file name for saved
        matrices, which you can set. This is surrounded by underscores. 
        The beginning says orig or norm and the end says which operation 
        was used. Default is 'response_matrix'.
    :type baseSaveName: str

    :arg acceptDirection: select reference direction for forces to be accepted.
        Can be 'in' (towards center of atoms), 'out' (away from center),
        or 'all'. Default is 'all'; Using other directions requires atoms.
    :type acceptDirection: str
    """
    noForce = kwargs.get('noForce', True)
    if not noForce:
        operation = kwargs.get('operation', 'mea')

        if operation is not None:
            if type(operation) is str:
                if operation == 'all' or operation == 'all operations':
                    operationList = ['var', 'mea', 'max', 'min', 'dif']
                else:
                    operationList = []
                    operationList.append(operation.lower()[:3])
            elif type(operation) is list:
                operationList = operation
                for i in range(len(operationList)):
                    operationList[i] = operationList[i].lower()[:3]

            operationList = np.array(operationList)
            found_valid_operation = False

            if 'var' in operationList:
                found_valid_operation = True

            if 'max' in operationList:
                found_valid_operation = True

            if 'mea' in operationList:
                found_valid_operation = True

            if 'min' in operationList:
                found_valid_operation = True

            if 'dif' in operationList:
                found_valid_operation = True

            if not found_valid_operation:
                raise ValueError('Operation should be mean, variance, max, min or ' \
                                 'or difference (from covariance matrix) in quotes ' \
                                 'or a list containing a set of these or None.')

    if not isinstance(model, NMA):
        raise TypeError('model must be an NMA instance')
    elif not model.is3d() and not noForce:
        raise TypeError('model must be a 3-dimensional NMA instance' \
                        'for using PRS with force')
    elif len(model) == 0:
        raise ValueError('model must have normal modes calculated')

    if atoms is not None:
        if isinstance(atoms, Selection):
            atoms = atoms.copy()
        if not isinstance(atoms, AtomGroup):
            raise TypeError('atoms must be an AtomGroup instance')
        elif atoms.numAtoms() != model.numAtoms():
            raise ValueError('model and atoms must have the same number atoms')

    n_atoms = model.numAtoms()
    LOGGER.timeit('_prody_prs_all')
    LOGGER.info('Calculating covariance matrix')
    LOGGER.timeit('_prody_cov')

    assert isinstance(repeats, int), 'repeats must be an integer'
    cov = calcCovariance(model)
    if cov is None:
        raise ValueError('model did not return a covariance matrix')

    LOGGER.clear()
    LOGGER.report('Covariance matrix calculated in %.1fs.', '_prody_cov')

    LOGGER.progress('Calculating perturbation response', n_atoms,
                    '_prody_prs_mat')
    matrix_dict = {}

    if noForce or 'dif' in operationList:
        if not model.is3d():
            n_by_n_cov_squared = cov**2

        else:
            cov_squared = cov**2
            n_by_3n_cov_squared = np.zeros((n_atoms, 3 * n_atoms))
            n_by_n_cov_squared = np.zeros((n_atoms, n_atoms))
            i3 = -3
            i3p3 = 0
            for i in range(n_atoms):
                i3 += 3
                i3p3 += 3
                n_by_3n_cov_squared[i, :] = (cov_squared[i3:i3p3, :]).sum(0)

            j3 = -3
            j3p3 = 0
            for j in range(n_atoms):
                j3 += 3
                j3p3 += 3
                n_by_n_cov_squared[:,
                                   j] = (n_by_3n_cov_squared[:,
                                                             j3:j3p3]).sum(1)

    if noForce:
        matrix_dict['noForce'] = n_by_n_cov_squared
        LOGGER.clear()
        LOGGER.report('Perturbation response matrix calculated in %.1fs.',
                      '_prody_prs_mat')

    else:

        acceptDirection = kwargs.get('acceptDirection', 'all')
        if acceptDirection is not 'all':
            if atoms is None:
                acceptDirection = 'all'
                LOGGER.info('A specific direction for accepting forces was' \
                            ' provided without an atoms object. This' \
                            ' direction will be ignored and all forces will' \
                            ' be accepted.')
            else:
                coords = atoms.getCoords()
                atoms_center = array([np.mean(coords[:,0]), \
                                      np.mean(coords[:,1]), \
                                      np.mean(coords[:,2])])

        mag = kwargs.get('mag', 1)
        response_matrix = np.zeros((repeats, n_atoms, n_atoms))
        i3 = -3
        i3p3 = 0
        for i in range(n_atoms):
            i3 += 3
            i3p3 += 3
            forces = np.random.randn(repeats * 3).reshape((repeats, 3))
            forces /= ((forces**2).sum(1)**0.5).reshape((repeats, 1)) * mag
            for n in range(repeats):
                force = forces[n]

                if acceptDirection is 'in' or acceptDirection is 'out':
                    res_coords = atoms.getCoords()[i]
                    vec_to_center = atoms_center - res_coords
                    vec_to_center /= (((atoms_center -
                                        res_coords)**2).sum()**0.5)
                    force_overlap = np.dot(force, vec_to_center)

                    if acceptDirection is 'in' and force_overlap < 0:
                        force *= -1

                    if acceptDirection is 'out' and force_overlap > 0:
                        force *= -1

                response_matrix[n, i, :] = (np.dot(cov[:, i3:i3p3],
                                                   force)**2).reshape(
                                                       (n_atoms, 3)).sum(1)
            LOGGER.update(i, '_prody_prs_mat')

        LOGGER.clear()
        LOGGER.report(
            'Perturbation response scanning matrix calculated in %.1fs.',
            '_prody_prs_mat')

        LOGGER.progress('Performing matrix combination operations', n_atoms, \
                        '_prody_prs_ops')

        if 'var' in operationList:
            matrix_dict['var'] = np.var(response_matrix, axis=0)

        if 'max' in operationList:
            matrix_dict['max'] = np.amax(response_matrix, axis=0)

        if 'mea' in operationList:
            matrix_dict['mea'] = np.mean(response_matrix, axis=0)

        if 'min' in operationList:
            matrix_dict['min'] = np.amin(response_matrix, axis=0)

        if 'dif' in operationList:
            matrix_dict['dif'] = np.max(abs(response_matrix - n_by_n_cov_squared) \
                                       , axis=0)

            LOGGER.report(
                'Perturbation response matrix operations completed in %.1fs.',
                '_prody_prs_ops')

        if operation is None:
            LOGGER.info('Operation is None so all {0} repeats are output.' \
                        ' This is not compatible with saving, normalizing' \
                        ' or mapping to atoms at present.'.format(repeats))
            return response_matrix

    if atoms is not None:
        atoms.setData('prs_profile', matrix_dict[list(matrix_dict.keys())[0]])
        if len(list(matrix_dict.keys())) > 1:
            LOGGER.info('Only one matrix can be added as data to atoms so' \
                        ' the first one was chosen. The operation that generated' \
                        ' it was {0} (1st 3 letters).'.format(list(matrix_dict.keys())[0]))

    saveOrig = kwargs.get('saveOrig', False)
    saveMatrix = kwargs.get('saveMatrix', False)
    normMatrix = kwargs.get('normMatrix', False)
    suppressDiag = kwargs.get('suppressDiag', False)
    baseSaveName = kwargs.get('baseSaveName', 'response_matrix')

    if saveOrig == True or saveMatrix == True and normMatrix == False:
        # save the original PRS matrix for each operation
        for m in list(matrix_dict.keys()):
            np.savetxt('orig_{0}_{1}.txt'.format(baseSaveName,m), \
                       matrix_dict[m], delimiter='\t', fmt='%8.6f')

    if normMatrix == True:
        norm_PRS_mat = {}
        # calculate the normalized PRS matrix for each operation
        for m in list(matrix_dict.keys()):
            self_dp = np.diag(
                matrix_dict[m])  # using self displacement (diagonal of
            # the original matrix) as a
            # normalization factor
            self_dp = self_dp.reshape(n_atoms, 1)
            norm_PRS_mat[m] = matrix_dict[m] / np.repeat(
                self_dp, n_atoms, axis=1)

            if suppressDiag == True:
                # suppress the diagonal (self displacement) to facilitate
                # visualizing the response profile
                norm_PRS_mat[m] = norm_PRS_mat[m] - np.diag(
                    np.diag(norm_PRS_mat[m]))

            if saveMatrix == True:
                np.savetxt('norm_{0}_{1}.txt'.format(baseSaveName,m), \
                           norm_PRS_mat[m], delimiter='\t', fmt='%8.6f')

    LOGGER.report('Perturbation response scanning completed in %.1fs.',
                  '_prody_prs_all')

    matrix_list = []
    for m in list(matrix_dict.keys()):
        if normMatrix == True:
            matrix_list.append(norm_PRS_mat[m])
        else:
            matrix_list.append(matrix_dict[m])
    matrix_array = array(matrix_list)

    returnFormat = kwargs.get('returnFormat', 'array')
    returnFormat = returnFormat.lower()

    if len(matrix_array) == 1:
        LOGGER.info('Output has been returned as a single matrix (an array).')
        return matrix_array.reshape(n_atoms, n_atoms)

    if returnFormat is 'both':
        LOGGER.info('You have requested return in both formats.' \
                    ' Array comes first.')
        return matrix_array, matrix_dict
    elif 'dict' in returnFormat:
        LOGGER.info('Output has been returned as a dictionary of matrices.')
        return matrix_dict
    else:
        LOGGER.info('Output has been returned as an array of matrices,' \
                    ' which you can split into individual matrices.')
        return matrix_array
Example #22
0
    def buildCovariance(self, coordsets, **kwargs):
        """Build a covariance matrix for *coordsets* using mean coordinates
        as the reference.  *coordsets* argument may be one of the following:

        * :class:`.Atomic`
        * :class:`.Ensemble`
        * :class:`.TrajBase`
        * :class:`numpy.ndarray` with shape ``(n_csets, n_atoms, 3)``

        For ensemble and trajectory objects, ``update_coords=True`` argument
        can be used to set the mean coordinates as the coordinates of the
        object.

        When *coordsets* is a trajectory object, such as :class:`.DCDFile`,
        covariance will be built by superposing frames onto the reference
        coordinate set (see :meth:`.Frame.superpose`).  If frames are already
        aligned, use ``aligned=True`` argument to skip this step.


        .. note::
           If *coordsets* is a :class:`.PDBEnsemble` instance, coordinates are
           treated specially.  Let's say **C**\_ij is the element of the
           covariance matrix that corresponds to atoms *i* and *j*.  This
           super element is divided by number of coordinate sets (PDB models or
           structures) in which both of these atoms are observed together."""

        if not isinstance(coordsets, (Ensemble, Atomic, TrajBase, np.ndarray)):
            raise TypeError('coordsets must be an Ensemble, Atomic, Numpy '
                            'array instance')
        LOGGER.timeit('_prody_pca')
        mean = None
        weights = None
        ensemble = None
        if isinstance(coordsets, np.ndarray):
            if (coordsets.ndim != 3 or coordsets.shape[2] != 3
                    or coordsets.dtype not in (np.float32, float)):
                raise ValueError('coordsets is not a valid coordinate array')
        elif isinstance(coordsets, Atomic):
            coordsets = coordsets._getCoordsets()
        elif isinstance(coordsets, Ensemble):
            ensemble = coordsets
            if isinstance(coordsets, PDBEnsemble):
                weights = coordsets.getWeights() > 0
            coordsets = coordsets._getCoordsets()

        update_coords = bool(kwargs.get('update_coords', False))

        if isinstance(coordsets, TrajBase):
            nfi = coordsets.nextIndex()
            coordsets.reset()
            n_atoms = coordsets.numSelected()
            dof = n_atoms * 3
            cov = np.zeros((dof, dof))
            #mean = coordsets._getCoords().flatten()
            n_confs = 0
            n_frames = len(coordsets)
            LOGGER.info(
                'Covariance will be calculated using {0} frames.'.format(
                    n_frames))
            coordsum = np.zeros(dof)
            LOGGER.progress('Building covariance', n_frames, '_prody_pca')
            align = not kwargs.get('aligned', False)
            for frame in coordsets:
                if align:
                    frame.superpose()
                coords = frame._getCoords().flatten()
                coordsum += coords
                cov += np.outer(coords, coords)
                n_confs += 1
                LOGGER.update(n_confs, label='_prody_pca')
            LOGGER.finish()
            cov /= n_confs
            coordsum /= n_confs
            mean = coordsum
            cov -= np.outer(coordsum, coordsum)
            coordsets.goto(nfi)
            self._cov = cov
            if update_coords:
                coordsets.setCoords(mean.reshape((n_atoms, 3)))
        else:
            n_confs = coordsets.shape[0]
            if n_confs < 3:
                raise ValueError('coordsets must have more than 3 coordinate '
                                 'sets')
            n_atoms = coordsets.shape[1]
            if n_atoms < 3:
                raise ValueError('coordsets must have more than 3 atoms')
            dof = n_atoms * 3
            LOGGER.info(
                'Covariance is calculated using {0} coordinate sets.'.format(
                    len(coordsets)))
            s = (n_confs, dof)
            if weights is None:
                if coordsets.dtype == float:
                    self._cov = np.cov(coordsets.reshape((n_confs, dof)).T,
                                       bias=1)
                else:
                    cov = np.zeros((dof, dof))
                    coordsets = coordsets.reshape((n_confs, dof))
                    mean = coordsets.mean(0)
                    LOGGER.progress('Building covariance', n_confs,
                                    '_prody_pca')
                    for i, coords in enumerate(coordsets.reshape(s)):
                        deviations = coords - mean
                        cov += np.outer(deviations, deviations)
                        LOGGER.update(n_confs, label='_prody_pca')
                    LOGGER.finish()
                    cov /= n_confs
                    self._cov = cov
            else:
                # PDB ensemble case
                mean = np.zeros((n_atoms, 3))
                for i, coords in enumerate(coordsets):
                    mean += coords * weights[i]
                mean /= weights.sum(0)
                d_xyz = ((coordsets - mean) * weights).reshape(s)
                divide_by = weights.astype(float).repeat(3, axis=2).reshape(s)
                self._cov = np.dot(d_xyz.T, d_xyz) / np.dot(
                    divide_by.T, divide_by)
            if update_coords and ensemble is not None:
                if mean is None:
                    mean = coordsets.mean(0)
                ensemble.setCoords(mean)

        self._trace = self._cov.trace()
        self._dof = dof
        self._n_atoms = n_atoms
        LOGGER.report('Covariance matrix calculated in %2fs.', '_prody_pca')
Example #23
0
def addPDBEnsemble(ensemble, PDBs, refpdb=None, labels=None, 
                   mapping_func=mapOntoChain, occupancy=None, unmapped=None, **kwargs):  
    """Adds extra structures to a given PDB ensemble. 

    :arg ensemble: the ensemble to which the PDBs are added
    :type ensemble: :class:`.PDBEnsemble`

    :arg refpdb: reference structure. If set to `None`, it will be set to `ensemble.getAtoms()` automatically
    :type refpdb: :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg PDBs: A list of PDB structures
    :type PDBs: iterable

    :arg title: the title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg seqid: minimal sequence identity (percent)
    :type seqid: int

    :arg coverage: minimal sequence overlap (percent)
    :type coverage: int

    :arg occupancy: minimal occupancy of columns (range from 0 to 1). Columns whose occupancy 
                    is below this value will be trimmed
    :type occupancy: float

    :arg unmapped: a list of PDB IDs that cannot be included in the ensemble. This is an 
                   output argument
    :type unmapped: list
    """

    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'calpha')).lower()
    superpose = kwargs.pop('superpose', True)

    if labels is not None:
        if len(labels) != len(PDBs):
            raise TypeError('Labels and PDBs must have the same lengths.')
    else:
        labels = []
        
        for pdb in PDBs:
            if pdb is None:
                labels.append(None)
            else:
                labels.append(pdb.getTitle())

    # obtain refchains from the hierarhical view of the reference PDB
    if refpdb is None:
        refpdb = ensemble._atoms
    else:
        if subset != 'all':
            refpdb = refpdb.select(subset)

    refchains = list(refpdb.getHierView())

    start = time.time()

    # obtain the atommap of all the chains combined.
    atoms = refchains[0]
    for i in range(1, len(refchains)):
        atoms += refchains[i]
    
    # add the PDBs to the ensemble
    if unmapped is None: unmapped = []

    LOGGER.progress('Appending the ensemble...', len(PDBs), '_prody_addPDBEnsemble')
    for i, pdb in enumerate(PDBs):
        lbl = labels[i]
        if pdb is None:
            unmapped.append(labels[i])
            continue

        LOGGER.update(i, 'Mapping %s to the reference...'%pdb.getTitle(), 
                      label='_prody_addPDBEnsemble')
        if not isinstance(pdb, (Chain, Selection, AtomGroup)):
            raise TypeError('PDBs must be a list of Chain, Selection, or AtomGroup.')

        atommaps = []
        # find the mapping of the pdb to each reference chain
        for chain in refchains:
            mappings = mapping_func(pdb, chain,
                                    index=i,
                                    **kwargs)
            if len(mappings) > 0:
                atommaps.append(mappings[0][0])
            else:
                break

        if len(atommaps) != len(refchains):
            unmapped.append(lbl)
            continue
        
        # combine the mappings of pdb to reference chains
        atommap = atommaps[0]
        for i in range(1, len(atommaps)):
            atommap += atommaps[i]
        
        # add the mappings to the ensemble
        ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped'), 
                             label=lbl, degeneracy=degeneracy)
    LOGGER.finish()

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)
    if superpose:
        ensemble.iterpose()

    LOGGER.info('{0} PDBs were added to the ensemble in {1:.2f}s.'
                     .format(len(PDBs) - len(unmapped), time.time()-start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))

    return ensemble
Example #24
0
def buildPDBEnsemble(PDBs,
                     ref=None,
                     title='Unknown',
                     labels=None,
                     mapping_func=mapOntoChain,
                     unmapped=None,
                     **kwargs):
    """Builds a PDB ensemble from a given reference structure and a list of PDB structures. 
    Note that the reference structure should be included in the list as well.

    :arg PDBs: A list of PDB structures
    :type PDBs: iterable

    :arg ref: Reference structure or the index to the reference in ``PDBs``. If **None**,
        then the first item in ``PDBs`` will be considered as the reference. 
        Default is **None**
    :type ref: int, :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg title: The title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg occupancy: Minimal occupancy of columns (range from 0 to 1). Columns whose occupancy
        is below this value will be trimmed.
    :type occupancy: float

    :arg unmapped: A list of PDB IDs that cannot be included in the ensemble. This is an 
        output argument. 
    :type unmapped: list

    :arg subset: A subset for selecting particular atoms from the input structures.
        Default is calpha
    :type subset: str

    :arg superpose: if set to ``'iter'``, :func:`.PDBEnsemble.iterpose` will be used to 
        superpose the structures, otherwise conformations will be superposed with respect 
        to the reference specified by ``ref``. Default is ``'iter'``
    :type superpose: str
    """

    occupancy = kwargs.pop('occupancy', None)
    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'calpha')).lower()
    superpose = kwargs.pop('superpose', 'iter')
    superpose = kwargs.pop('iterpose', superpose)

    if len(PDBs) == 1:
        raise ValueError('PDBs should have at least two items')

    if labels is not None:
        if len(labels) != len(PDBs):
            raise TypeError('Labels and PDBs must have the same lengths.')
    else:
        labels = []

        for pdb in PDBs:
            if pdb is None:
                labels.append(None)
            else:
                labels.append(pdb.getTitle())

    if ref is None:
        refpdb = PDBs[0]
        refidx = 0
    elif isinstance(ref, Integral):
        refpdb = PDBs[ref]
        refidx = ref
    else:
        refpdb = ref
        if refpdb not in PDBs:
            raise ValueError('refpdb should be also in the PDBs')
        refidx = PDBs.index(ref)

    # obtain refchains from the hierarchical view of the reference PDB
    if subset != 'all':
        refpdb = refpdb.select(subset)

    try:
        refchains = list(refpdb.getHierView())
    except AttributeError:
        raise TypeError('refpdb must have getHierView')

    start = time.time()
    # obtain the atommap of all the chains combined.
    atoms = refpdb

    # initialize a PDBEnsemble with reference atoms and coordinates
    ensemble = PDBEnsemble(title)
    ensemble.setAtoms(atoms)
    ensemble.setCoords(atoms.getCoords())

    # build the ensemble
    if unmapped is None: unmapped = []

    LOGGER.progress('Building the ensemble...', len(PDBs),
                    '_prody_buildPDBEnsemble')
    for i, pdb in enumerate(PDBs):
        if pdb is None:
            unmapped.append(labels[i])
            continue

        LOGGER.update(i,
                      'Mapping %s to the reference...' % pdb.getTitle(),
                      label='_prody_buildPDBEnsemble')
        try:
            pdb.getHierView()
        except AttributeError:
            raise TypeError(
                'PDBs must be a list of instances having the access to getHierView'
            )

        if labels is None:
            lbl = pdb.getTitle()
        else:
            lbl = labels[i]

        atommaps = []
        # find the mapping of the pdb to each reference chain
        for chain in refchains:
            mappings = mapping_func(pdb, chain, index=i, **kwargs)
            if len(mappings) > 0:
                atommaps.append(mappings[0][0])
            else:
                break

        if len(atommaps) != len(refchains):
            unmapped.append(lbl)
            continue

        # combine the mappings of pdb to reference chains
        atommap = atommaps[0]
        for j in range(1, len(atommaps)):
            atommap += atommaps[j]

        # add the mappings to the ensemble
        ensemble.addCoordset(atommap,
                             weights=atommap.getFlags('mapped'),
                             label=lbl,
                             degeneracy=degeneracy)

    LOGGER.finish()

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)

    if superpose != 'iter':
        ensemble.superpose(ref=refidx)
    else:
        ensemble.iterpose()

    LOGGER.info('Ensemble ({0} conformations) were built in {1:.2f}s.'.format(
        ensemble.numConfs(),
        time.time() - start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))
    return ensemble
Example #25
0
def calcPerturbResponse(model, atoms=None, repeats=100):
    """Return a matrix of profiles from scanning of the response of the
    structure to random perturbations at specific atom (or node) positions.
    The function implements the perturbation response scanning (PRS) method
    described in [CA09]_.  Rows of the matrix are the average magnitude of the
    responses obtained by perturbing the atom/node position at that row index,
    i.e. ``prs_profile[i,j]`` will give the response of residue/node *j* to
    perturbations in residue/node *i*.  PRS is performed using the covariance
    matrix from *model*, e.t. :class:`.ANM` instance.  Each residue/node is
    perturbed *repeats* times with a random unit force vector.  When *atoms*
    instance is given, PRS profile for residues will be added as an attribute
    which then can be retrieved as ``atoms.getData('prs_profile')``.  *model*
    and *atoms* must have the same number of atoms. *atoms* must be an
    :class:`.AtomGroup` instance.


    .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning
       Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein.
       *PLoS Comput Biol* **2009** 5(10):e1000544.

    The RPS matrix can be save as follows::

      prs_matrix = calcPerturbationResponse(p38_anm)
      writeArray('prs_matrix.txt', prs_matrix, format='%8.6f', delimiter='\t')
    """

    if not isinstance(model, NMA):
        raise TypeError('model must be an NMA instance')
    elif not model.is3d():
        raise TypeError('model must be a 3-dimensional NMA instance')
    elif len(model) == 0:
        raise ValueError('model must have normal modes calculated')
    if atoms is not None:
        if not isinstance(atoms, AtomGroup):
            raise TypeError('atoms must be an AtomGroup instance')
        elif atoms.numAtoms() != model.numAtoms():
            raise ValueError('model and atoms must have the same number atoms')

    assert isinstance(repeats, int), 'repeats must be an integer'
    cov = calcCovariance(model)
    if cov is None:
        raise ValueError('model did not return a covariance matrix')

    n_atoms = model.numAtoms()
    response_matrix = np.zeros((n_atoms, n_atoms))
    LOGGER.progress('Calculating perturbation response', n_atoms, '_prody_prs')
    i3 = -3
    i3p3 = 0
    for i in range(n_atoms):
        i3 += 3
        i3p3 += 3
        forces = np.random.rand(repeats * 3).reshape((repeats, 3))
        forces /= ((forces**2).sum(1)**0.5).reshape((repeats, 1))
        for force in forces:
            response_matrix[i] += (
                np.dot(cov[:, i3:i3p3], force)
                ** 2).reshape((n_atoms, 3)).sum(1)
        LOGGER.update(i, '_prody_prs')

    response_matrix /= repeats
    LOGGER.clear()
    LOGGER.report('Perturbation response scanning completed in %.1fs.',
                  '_prody_prs')
    if atoms is not None:
        atoms.setData('prs_profile', response_matrix)
    return response_matrix

    # save the original PRS matrix
    np.savetxt('orig_PRS_matrix', response_matrix, delimiter='\t', fmt='%8.6f')
    # calculate the normalized PRS matrix
    self_dp = np.diag(response_matrix)  # using self displacement (diagonal of
                               # the original matrix) as a
                               # normalization factor
    self_dp = self_dp.reshape(n_atoms, 1)
    norm_PRS_mat = response_matrix / np.repeat(self_dp, n_atoms, axis=1)
    # suppress the diagonal (self displacement) to facilitate
    # visualizing the response profile
    norm_PRS_mat = norm_PRS_mat - np.diag(np.diag(norm_PRS_mat))
    np.savetxt('norm_PRS_matrix', norm_PRS_mat, delimiter='\t', fmt='%8.6f')
    return response_matrix
Example #26
0
def parsePfamPDBs(query, data=[], **kwargs):
    """Returns a list of AtomGroups containing sections of chains that 
    correspond to a particular PFAM domain family. These are defined by 
    alignment start and end residue numbers.

    :arg query: UniProt ID or PDB ID
        If a PDB ID is provided the corresponding UniProt ID is used.
        If this returns multiple matches then start or end must also be provided.
        This query is also used for label refinement of the Pfam domain MSA.
    :type query: str

    :arg data: If given the data list from the Pfam mapping table will 
        be output through this argument.
    :type data: list

    :keyword start: Residue number for defining the start of the domain.
        The PFAM domain that starts closest to this will be selected. 
        Default is **1**
    :type start: int

    :keyword end: Residue number for defining the end of the domain.
        The PFAM domain that ends closest to this will be selected. 
    :type end: int
    """
    
    start = kwargs.pop('start', 1)
    end = kwargs.pop('end', None)

    if len(query) > 4 and query.startswith('PF'):
        pfam_acc = query
    else:
        pfam_matches = searchPfam(query)
        keys = list(pfam_matches.keys())

        if isinstance(start, Integral):
            start_diff = []
            for i, key in enumerate(pfam_matches):
                start_diff.append(int(pfam_matches[key]['locations'][0]['start']) - start)
            start_diff = np.array(start_diff)
            pfam_acc = keys[np.where(abs(start_diff) == min(abs(start_diff)))[0][0]]

        elif isinstance(end, Integral):
            end_diff = []
            for i, key in enumerate(pfam_matches):
                end_diff.append(int(pfam_matches[key]['locations'][0]['end']) - end)
            end_diff = np.array(end_diff)
            pfam_acc = keys[np.where(abs(end_diff) == min(abs(end_diff)))[0][0]]

        else:
            raise ValueError('Please provide an integer for start or end '
                             'when using a UniProt ID or PDB ID.')

    from ftplib import FTP
    from .uniprot import queryUniprot

    data_stream = BytesIO()
    ftp_host = 'ftp.ebi.ac.uk'
    ftp = FTP(ftp_host)
    ftp.login()
    ftp.cwd('pub/databases/Pfam/current_release')
    ftp.retrbinary('RETR pdbmap.gz', data_stream.write)
    ftp.quit()
    zip_data = data_stream.getvalue()
    data_stream.close()

    rawdata = gunzip(zip_data)
    if PY3K:
        rawdata = rawdata.decode()

    fields = ['PDB_ID', 'chain', 'nothing', 'PFAM_Name', 'PFAM_ACC', 
              'UniprotAcc', 'UniprotResnumRange']
    
    data_dicts = []
    for line in rawdata.split('\n'):
        if line.find(pfam_acc) != -1:
            data_dicts.append({})
            for j, entry in enumerate(line.strip().split('\t')):
                data_dicts[-1][fields[j]] = entry.strip(';')

    pdb_ids = [data_dict['PDB_ID'] for data_dict in data_dicts]
    chains = [data_dict['chain'] for data_dict in data_dicts]

    header = kwargs.pop('header', False)
    model = kwargs.get('model', None)
    results = parsePDB(*pdb_ids, chain=chains, header=True, **kwargs)

    ags, headers = results
    ags, headers = list(ags), list(headers)

    if model == 0:
        LOGGER.info('only header is requested and returned')
        return results

    if header:
        results = (ags, headers)
    else:
#        ags = results
#        ags = list(ags)
        results = ags

    LOGGER.progress('Extracting Pfam domains...', len(ags))
    comma_splitter = re.compile(r'\s*,\s*').split
    no_info = []
    for i, ag in enumerate(ags):
        LOGGER.update(i)
        data_dict = data_dicts[i]
        pfamRange = data_dict['UniprotResnumRange'].split('-')
        uniprotAcc = data_dict['UniprotAcc']
        try:
            uniData = queryUniprot(uniprotAcc)
        except:
            LOGGER.warn('No Uniprot record found for {0}'.format(data_dict['PBD_ID']))
            continue

        resrange = None
        found = False
        for key, value in uniData.items():
            if not key.startswith('dbReference'):
                continue
            try:
                pdbid = value['PDB']
            except:
                continue
            if pdbid != data_dict['PDB_ID']:
                continue
            pdbchains = value['chains']

            # example chain strings: "A=27-139, B=140-150" or "A/B=27-150"
            pdbchains = comma_splitter(pdbchains)
            for chain in pdbchains:
                chids, resrange = chain.split('=')
                chids = [chid.strip() for chid in chids.split('/')]
                if data_dict['chain'] in chids:
                    resrange = resrange.split('-')
                    found = True
                    break
            if found:
                break

        if found:
            header = headers[i]
            chain_accessions = [dbref.accession 
                                for dbref in header[data_dict['chain']].dbrefs]
            try:
                if len(chain_accessions) > 0:
                    right_part = np.where(np.array(chain_accessions) == 
                                        data_dict['UniprotAcc'])[0][0]
                else:
                    raise ValueError('There is no accession for a chain in the Header')
            except:
                LOGGER.warn('Could not map domains in {0}'
                            .format(data_dict['PDB_ID'] 
                            + data_dict['chain']))
                no_info.append(i)
                continue

            right_dbref = header[data_dict['chain']].dbrefs[right_part]
            chainStart = ag.select('chain {0}'.format(data_dict['chain'])
                                  ).getResnums()[0]
            missing = chainStart - right_dbref.first[0]
            partStart = ag.getResindices()[np.where(ag.getResnums() == 
                                           right_dbref.first[0] + missing)][0]
            pfStart, pfEnd = int(pfamRange[0]), int(pfamRange[1])
            uniStart, uniEnd = int(resrange[0]), int(resrange[1])

            resiStart = pfStart - uniStart + partStart - missing
            resiEnd = pfEnd - uniStart + partStart - missing
            ags[i] = ag.select('resindex {0} to {1}'.format(
                            resiStart, resiEnd)) 
        else:
            no_info.append(i)
    LOGGER.finish()

    for i in reversed(no_info):
        ags.pop(i)
        if header:
            headers.pop(i)

    if isinstance(data, list):
        data.extend(data_dicts)
    else:
        LOGGER.warn('data should be a list in order to get output')
    
    return results
Example #27
0
def parsePDB(*pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a PDB file.

    This function extends :func:`.parsePDBStream`.

    See :ref:`parsepdb` for a detailed usage example.

    :arg pdb: one PDB identifier or filename, or a list of them.
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    
    You can also provide arguments that you would like passed on to fetchPDB().
    """

    n_pdb = len(pdb)
    if n_pdb == 1:
        if isListLike(pdb[0]):
            pdb = pdb[0]
            n_pdb = len(pdb)

    if n_pdb == 1:
        return _parsePDB(pdb[0], **kwargs)
    else:
        results = []
        lstkwargs = {}
        for key in kwargs:
            argval = kwargs.get(key)
            if np.isscalar(argval):
                argval = [argval] * n_pdb
            lstkwargs[key] = argval

        start = time.time()
        LOGGER.progress('Retrieving {0} PDB structures...'.format(n_pdb),
                        n_pdb, '_prody_parsePDB')
        for i, p in enumerate(pdb):
            kwargs = {}
            for key in lstkwargs:
                kwargs[key] = lstkwargs[key][i]
            c = kwargs.get('chain', '')
            LOGGER.update(i,
                          'Retrieving {0}...'.format(p + c),
                          label='_prody_parsePDB')
            result = _parsePDB(p, **kwargs)
            if not isinstance(result, tuple):
                if isinstance(result, dict):
                    result = (None, result)
                else:
                    result = (result, None)
            results.append(result)

        results = list(zip(*results))
        LOGGER.finish()

        for i in reversed(range(len(results))):
            if all(j is None for j in results[i]):
                results.pop(i)
        if len(results) == 1:
            results = results[0]
        results = list(results)

        model = kwargs.get('model')
        header = kwargs.get('header', False)
        if model != 0 and header:
            numPdbs = len(results[0])
        else:
            numPdbs = len(results)

        LOGGER.info('{0} PDBs were parsed in {1:.2f}s.'.format(
            numPdbs,
            time.time() - start))

        return results
Example #28
0
File: pfam.py Project: uibcdf/ProDy
def parsePfamPDBs(query, data=[], **kwargs):
    """Returns a list of AtomGroups containing sections of chains that 
    correspond to a particular PFAM domain family. These are defined by 
    alignment start and end residue numbers.

    :arg query: UniProt ID or PDB ID
        If a PDB ID is provided the corresponding UniProt ID is used.
        If this returns multiple matches then start or end must also be provided.
        This query is also used for label refinement of the Pfam domain MSA.
    :type query: str

    :arg data: If given the data list from the Pfam mapping table will 
        be output through this argument.
    :type data: list

    :keyword start: Residue number for defining the start of the domain.
        The PFAM domain that starts closest to this will be selected. 
        Default is **1**
    :type start: int

    :keyword end: Residue number for defining the end of the domain.
        The PFAM domain that ends closest to this will be selected. 
    :type end: int
    """

    start = kwargs.pop('start', 1)
    end = kwargs.pop('end', None)

    if len(query) > 4 and query.startswith('PF'):
        pfam_acc = query
    else:
        pfam_matches = searchPfam(query)
        keys = list(pfam_matches.keys())

        if isinstance(start, Integral):
            start_diff = []
            for i, key in enumerate(pfam_matches):
                start_diff.append(
                    int(pfam_matches[key]['locations'][0]['start']) - start)
            start_diff = np.array(start_diff)
            pfam_acc = keys[np.where(
                abs(start_diff) == min(abs(start_diff)))[0][0]]

        elif isinstance(end, Integral):
            end_diff = []
            for i, key in enumerate(pfam_matches):
                end_diff.append(
                    int(pfam_matches[key]['locations'][0]['end']) - end)
            end_diff = np.array(end_diff)
            pfam_acc = keys[np.where(
                abs(end_diff) == min(abs(end_diff)))[0][0]]

        else:
            raise ValueError('Please provide an integer for start or end '
                             'when using a UniProt ID or PDB ID.')

    from ftplib import FTP
    from .uniprot import queryUniprot

    data_stream = BytesIO()
    ftp_host = 'ftp.ebi.ac.uk'
    ftp = FTP(ftp_host)
    ftp.login()
    ftp.cwd('pub/databases/Pfam/current_release')
    ftp.retrbinary('RETR pdbmap.gz', data_stream.write)
    ftp.quit()
    zip_data = data_stream.getvalue()
    data_stream.close()

    rawdata = gunzip(zip_data)
    if PY3K:
        rawdata = rawdata.decode()

    fields = [
        'PDB_ID', 'chain', 'nothing', 'PFAM_Name', 'PFAM_ACC', 'UniprotAcc',
        'UniprotResnumRange'
    ]

    data_dicts = []
    for line in rawdata.split('\n'):
        if line.find(pfam_acc) != -1:
            data_dicts.append({})
            for j, entry in enumerate(line.strip().split('\t')):
                data_dicts[-1][fields[j]] = entry.strip(';')

    pdb_ids = [data_dict['PDB_ID'] for data_dict in data_dicts]
    chains = [data_dict['chain'] for data_dict in data_dicts]

    header = kwargs.pop('header', False)
    model = kwargs.get('model', None)
    results = parsePDB(*pdb_ids, chain=chains, header=True, **kwargs)

    ags, headers = results
    ags, headers = list(ags), list(headers)

    if model == 0:
        LOGGER.info('only header is requested and returned')
        return results

    if header:
        results = (ags, headers)
    else:
        #        ags = results
        #        ags = list(ags)
        results = ags

    LOGGER.progress('Extracting Pfam domains...', len(ags))
    comma_splitter = re.compile(r'\s*,\s*').split
    no_info = []
    for i, ag in enumerate(ags):
        LOGGER.update(i)
        data_dict = data_dicts[i]
        pfamRange = data_dict['UniprotResnumRange'].split('-')
        uniprotAcc = data_dict['UniprotAcc']
        try:
            uniData = queryUniprot(uniprotAcc)
        except:
            LOGGER.warn('No Uniprot record found for {0}'.format(
                data_dict['PBD_ID']))
            continue

        resrange = None
        found = False
        for key, value in uniData.items():
            if not key.startswith('dbReference'):
                continue
            try:
                pdbid = value['PDB']
            except:
                continue
            if pdbid != data_dict['PDB_ID']:
                continue
            pdbchains = value['chains']

            # example chain strings: "A=27-139, B=140-150" or "A/B=27-150"
            pdbchains = comma_splitter(pdbchains)
            for chain in pdbchains:
                chids, resrange = chain.split('=')
                chids = [chid.strip() for chid in chids.split('/')]
                if data_dict['chain'] in chids:
                    resrange = resrange.split('-')
                    found = True
                    break
            if found:
                break

        if found:
            header = headers[i]
            chain_accessions = [
                dbref.accession for dbref in header[data_dict['chain']].dbrefs
            ]
            try:
                if len(chain_accessions) > 0:
                    right_part = np.where(
                        np.array(chain_accessions) ==
                        data_dict['UniprotAcc'])[0][0]
                else:
                    raise ValueError(
                        'There is no accession for a chain in the Header')
            except:
                LOGGER.warn(
                    'Could not map domains in {0}'.format(data_dict['PDB_ID'] +
                                                          data_dict['chain']))
                no_info.append(i)
                continue

            right_dbref = header[data_dict['chain']].dbrefs[right_part]
            chainStart = ag.select('chain {0}'.format(
                data_dict['chain'])).getResnums()[0]
            missing = chainStart - right_dbref.first[0]
            partStart = ag.getResindices()[np.where(
                ag.getResnums() == right_dbref.first[0] + missing)][0]
            pfStart, pfEnd = int(pfamRange[0]), int(pfamRange[1])
            uniStart, uniEnd = int(resrange[0]), int(resrange[1])

            resiStart = pfStart - uniStart + partStart - missing
            resiEnd = pfEnd - uniStart + partStart - missing
            ags[i] = ag.select('resindex {0} to {1}'.format(
                resiStart, resiEnd))
        else:
            no_info.append(i)
    LOGGER.finish()

    for i in reversed(no_info):
        ags.pop(i)
        if header:
            headers.pop(i)

    if isinstance(data, list):
        data.extend(data_dicts)
    else:
        LOGGER.warn('data should be a list in order to get output')

    return results
Example #29
0
def writeDCD(filename, trajectory, start=None, stop=None, step=None, 
             align=False):
    """Write 32-bit CHARMM format DCD file (also NAMD 2.1 and later).
    *trajectory can be an :class:`Trajectory`, :class:`DCDFile`, or 
    :class:`Ensemble` instance. *filename* is returned upon successful
    output of file."""
    
    if not isinstance(trajectory, (TrajBase, Ensemble, Atomic)):
        raise TypeError('{0:s} is not a valid type for trajectory'
                        .format(type(trajectory)))
    
    irange = range(*slice(start, stop, 
                          step).indices(trajectory.numCoordsets()))
    n_csets = len(irange)
    if n_csets == 0:
        raise ValueError('trajectory does not have any coordinate sets, or '
                         'no coordinate sets are selected')
    
    if isinstance(trajectory, Atomic):
        isEnsemble = False
        isAtomic = True
        n_atoms = trajectory.numAtoms()
    else:
        isEnsemble = True
        isAtomic = False
        n_atoms = trajectory.numSelected()
    if n_atoms == 0:
        raise ValueError('no atoms are selected in the trajectory')
    if isinstance(trajectory, TrajBase):
        isTrajectory = True
        unitcell = trajectory.hasUnitcell()
        nfi = trajectory.nextIndex() 
        trajectory.reset()
        pack_i_48 = pack('i', 48)
        if isinstance(trajectory, Trajectory):
            timestep = trajectory.getTimestep()[0]
            first_ts = trajectory.getFirstTimestep()[0]
            framefreq = trajectory.getFrameFreq()[0]
            n_fixed = trajectory.numFixed()[0]
        else:
            timestep = trajectory.getTimestep()
            first_ts = trajectory.getFirstTimestep()
            framefreq = trajectory.getFrameFreq()
            n_fixed = trajectory.numFixed()
    else:
        isTrajectory = False
        unitcell = False
        if isinstance(trajectory, Ensemble):
            frame = trajectory[0]
        else:
            frame = trajectory
            acsi = trajectory.getACSIndex()
        timestep = 1
        first_ts = 0
        framefreq = 1
        n_fixed = 0
        
    dcd = DCDFile(filename, mode='w')
    LOGGER.progress('Writing DCD', len(irange), '_prody_writeDCD')
    prev = -1
    uc = None
    time_ = time()
    for j, i in enumerate(irange):
        diff = i - prev
        if diff > 1:
            trajectory.skip(diff-1)
        prev = i
        if isTrajectory:
            frame = trajectory.next()
            if frame is None:
                break
            if unitcell:
                uc = frame._getUnitcell()
                uc[3:] = np.sin((PISQUARE/90) * (90-uc[3:]))
                uc = uc[[0,3,1,4,5,2]]
        elif isEnsemble:
            frame._index = i
        else:
            frame.setACSIndex(i) 
        if align:
            frame.superpose()
        if j == 0:
            dcd.write(frame._getCoords(), uc, timestep=timestep, 
                      firsttimestep=first_ts, framefreq=framefreq)
        else:
            dcd.write(frame._getCoords(), uc)
        LOGGER.update(i, '_prody_writeDCD')
    if isAtomic:
        trajectory.setACSIndex(acsi)
    j += 1
    LOGGER.clear()
    dcd.close()
    time_ = time() - time_ or 0.01
    dcd_size = 1.0 * (56 + (n_atoms * 3 + 6) * 4 ) * n_csets / (1024*1024)
    LOGGER.info('DCD file was written in {0:.2f} seconds.'.format(time_))
    LOGGER.info('{0:.2f} MB written at input rate {1:.2f} MB/s.'
                .format(dcd_size, dcd_size/time_))
    LOGGER.info('{0:d} coordinate sets written at output rate {1:d} frame/s.'
                .format(n_csets, int(n_csets/time_)))
    if j != n_csets:
        LOGGER.warn('Warning: {0:d} frames expected, {1:d} written.'
                    .format(n_csets, j))
    if isTrajectory:
        trajectory.goto(nfi)
    return filename
Example #30
0
def buildPDBEnsemble(refpdb,
                     PDBs,
                     title='Unknown',
                     labels=None,
                     seqid=94,
                     coverage=85,
                     mapping_func=mapOntoChain,
                     occupancy=None,
                     unmapped=None,
                     **kwargs):
    """Builds a PDB ensemble from a given reference structure and a list of PDB structures. 
    Note that the reference structure should be included in the list as well.

    :arg refpdb: Reference structure
    :type refpdb: :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg PDBs: A list of PDB structures
    :type PDBs: iterable

    :arg title: The title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg seqid: Minimal sequence identity (percent)
    :type seqid: int

    :arg coverage: Minimal sequence overlap (percent)
    :type coverage: int

    :arg occupancy: Minimal occupancy of columns (range from 0 to 1). Columns whose occupancy
        is below this value will be trimmed.
    :type occupancy: float

    :arg unmapped: A list of PDB IDs that cannot be included in the ensemble. This is an 
        output argument. 
    :type unmapped: list
    """

    if labels is not None:
        if len(labels) != len(PDBs):
            raise ValueError('labels and PDBs must be the same length.')

    # obtain refchains from the hierarhical view of the reference PDB
    try:
        refchains = list(refpdb.getHierView())
    except AttributeError:
        raise TypeError('refpdb must have getHierView')

    # obtain the atommap of all the chains combined.
    atoms = refchains[0]
    for i in range(1, len(refchains)):
        atoms += refchains[i]

    # initialize a PDBEnsemble with referrence atoms and coordinates
    ensemble = PDBEnsemble(title)
    ensemble.setAtoms(atoms)
    ensemble.setCoords(atoms.getCoords())

    # build the ensemble
    if unmapped is None: unmapped = []

    verb = LOGGER.verbosity
    LOGGER.verbosity = 'info'

    LOGGER.progress('Building the ensemble...', len(PDBs))
    for i, pdb in enumerate(PDBs):
        LOGGER.update(i, 'Mapping %s to the reference...' % pdb)
        try:
            pdb.getHierView()
        except AttributeError:
            raise TypeError(
                'PDBs must be a list of instances having the access to getHierView'
            )

        if labels is None:
            lbl = pdb.getTitle()
        else:
            lbl = labels[i]

        atommaps = []
        # find the mapping of the pdb to each reference chain
        for chain in refchains:
            mappings = mapping_func(pdb,
                                    chain,
                                    seqid=seqid,
                                    coverage=coverage,
                                    index=i,
                                    **kwargs)
            if len(mappings) > 0:
                atommaps.append(mappings[0][0])
            else:
                break

        if len(atommaps) != len(refchains):
            unmapped.append(lbl)
            continue

        # combine the mappings of pdb to reference chains
        atommap = atommaps[0]
        for j in range(1, len(atommaps)):
            atommap += atommaps[j]

        # add the mappings to the ensemble
        ensemble.addCoordset(atommap,
                             weights=atommap.getFlags('mapped'),
                             label=lbl)

    LOGGER.update(len(PDBs), 'Finished.')
    LOGGER.verbosity = verb

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)
    ensemble.iterpose()

    return ensemble
Example #31
0
def matchModes(*modesets, **kwargs):
    """Returns the matches of modes among *modesets*. Note that the first 
    modeset will be treated as the reference so that only the matching 
    of each modeset to the first modeset is garanteed to be optimal.
    
    :arg index: if **True** then indices of modes will be returned instead of 
                :class:`Mode` instances
    :type index: bool

    :arg turbo: if **True** then the computation will be performed in parallel. 
                The number of threads is set to be the same as the number of 
                CPUs. Assigning a number to specify the number of threads to be 
                used. Note that if writing a script, ``if __name__ == '__main__'`` 
                is necessary to protect your code when multi-tasking. 
                See https://docs.python.org/2/library/multiprocessing.html for details.
                Default is **False**
    :type turbo: bool, int
    """

    index = kwargs.pop('index', False)
    turbo = kwargs.pop('turbo', False)

    n_worker = None
    if not isinstance(turbo, bool):
        n_worker = int(turbo)

    modeset0 = modesets[0]
    if index:
        ret = [modeset0.getIndices()]
    else:
        ret = [modeset0]

    n_modes = len(modeset0)
    n_sets = len(modesets)
    if n_sets == 1:
        return ret
    elif n_sets == 0:
        raise ValueError('at least one modeset should be given')

    if turbo:
        from multiprocessing import Pool, cpu_count
        from math import ceil
        
        if not n_worker:
            n_worker = cpu_count()

        LOGGER.info('Matching {0} modes across {1} modesets with {2} threads...'
                        .format(n_modes, n_sets, n_worker))

        pool = Pool(n_worker)
        n_sets_per_worker = ceil((n_sets - 1) / n_worker)
        args = []
        for i in range(n_worker):
            start = i*n_sets_per_worker + 1
            end = (i+1)*n_sets_per_worker + 1
            subset = modesets[start:end]
            args.append((modeset0, subset, index))
        nested_ret = pool.map(_pairModes_wrapper, args)
        for entry in nested_ret:
            ret.extend(entry)

        pool.close()
        pool.join()
    else:
        LOGGER.progress('Matching {0} modes across {1} modesets...'
                        .format(n_modes, n_sets), n_sets, '_prody_matchModes')
        for i, modeset in enumerate(modesets):
            LOGGER.update(i, label='_prody_matchModes')
            if i > 0:
                _, reordered_modeset = pairModes(modeset0, modeset, index=index, **kwargs)
                ret.append(reordered_modeset)
        LOGGER.finish()
    
    return ret
Example #32
0
def calcEnsembleENMs(ensemble,
                     model='gnm',
                     trim='reduce',
                     n_modes=20,
                     **kwargs):
    """Description"""

    match = kwargs.pop('match', True)
    if isinstance(ensemble, Conformation):
        conformation = ensemble
        ensemble = conformation.getEnsemble()
        index = conformation.getIndex()
        ensemble = ensemble[index:index + 1]
    if model is GNM:
        model_type = 'GNM'
    elif model is ANM:
        model_type = 'ANM'
    else:
        model_type = str(model).strip().upper()

    start = time.time()

    atoms = ensemble.getAtoms()
    select = None
    if ensemble.isSelected():
        select = atoms
        atoms = ensemble.getAtoms(selected=False)

    labels = ensemble.getLabels()

    ### ENMs ###
    ## ENM for every conf
    enms = []
    n_confs = ensemble.numConfs()

    str_modes = 'all' if n_modes is None else str(n_modes)
    LOGGER.progress(
        'Calculating {0} {1} modes for {2} conformations...'.format(
            str_modes, model_type, n_confs), n_confs,
        '_prody_calcEnsembleENMs')

    for i in range(n_confs):
        LOGGER.update(i, label='_prody_calcEnsembleENMs')
        coords = ensemble.getCoordsets(i, selected=False)
        nodes = coords[0, :, :]
        if atoms is not None:
            atoms.setCoords(nodes)
            nodes = atoms
        enm, _ = calcENM(nodes,
                         select,
                         model=model,
                         trim=trim,
                         n_modes=n_modes,
                         title=labels[i],
                         **kwargs)
        enms.append(enm)

        #lbl = labels[i] if labels[i] != '' else '%d-th conformation'%(i+1)
    LOGGER.finish()

    min_n_modes = ensemble.numAtoms() * 3
    for enm in enms:
        n_modes = enm.numModes()
        if n_modes < min_n_modes:
            min_n_modes = n_modes

    for i in range(len(enms)):
        n_modes = enms[i].numModes()
        if n_modes > min_n_modes:
            enms[i] = enms[i][:min_n_modes]
            LOGGER.warn(
                'last {0} modes for {1} has been discarded because at least one '
                'conformation has only {2} modes'.format(
                    n_modes - min_n_modes, enms[i].getTitle(), min_n_modes))

    LOGGER.info(
        '{0} {1} modes were calculated for each of the {2} conformations in {3:.2f}s.'
        .format(str_modes, model_type, n_confs,
                time.time() - start))

    modeens = ModeEnsemble(title=ensemble.getTitle())
    modeens.addModeSet(enms,
                       weights=ensemble.getWeights(),
                       label=ensemble.getLabels())
    modeens.setAtoms(ensemble.getAtoms())

    if match:
        modeens.match()
    return modeens
Example #33
0
def calcPerturbResponse(model, atoms=None, repeats=100):
    """Returns a matrix of profiles from scanning of the response of the
    structure to random perturbations at specific atom (or node) positions.
    The function implements the perturbation response scanning (PRS) method
    described in [CA09]_.  Rows of the matrix are the average magnitude of the
    responses obtained by perturbing the atom/node position at that row index,
    i.e. ``prs_profile[i,j]`` will give the response of residue/node *j* to
    perturbations in residue/node *i*.  PRS is performed using the covariance
    matrix from *model*, e.t. :class:`.ANM` instance.  Each residue/node is
    perturbed *repeats* times with a random unit force vector.  When *atoms*
    instance is given, PRS profile for residues will be added as an attribute
    which then can be retrieved as ``atoms.getData('prs_profile')``.  *model*
    and *atoms* must have the same number of atoms. *atoms* must be an
    :class:`.AtomGroup` instance.


    .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning
       Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein.
       *PLoS Comput Biol* **2009** 5(10):e1000544.

    The PRS matrix can be saved as follows::

      prs_matrix = calcPerturbationResponse(p38_anm)
      writeArray('prs_matrix.txt', prs_matrix, format='%8.6f', delimiter='\t')
    """

    if not isinstance(model, NMA):
        raise TypeError('model must be an NMA instance')
    elif not model.is3d():
        raise TypeError('model must be a 3-dimensional NMA instance')
    elif len(model) == 0:
        raise ValueError('model must have normal modes calculated')
    if atoms is not None:
        if not isinstance(atoms, AtomGroup):
            raise TypeError('atoms must be an AtomGroup instance')
        elif atoms.numAtoms() != model.numAtoms():
            raise ValueError('model and atoms must have the same number atoms')

    assert isinstance(repeats, int), 'repeats must be an integer'
    cov = calcCovariance(model)
    if cov is None:
        raise ValueError('model did not return a covariance matrix')

    n_atoms = model.numAtoms()
    response_matrix = np.zeros((n_atoms, n_atoms))
    LOGGER.progress('Calculating perturbation response', n_atoms, '_prody_prs')
    i3 = -3
    i3p3 = 0
    for i in range(n_atoms):
        i3 += 3
        i3p3 += 3
        forces = np.random.rand(repeats * 3).reshape((repeats, 3))
        forces /= ((forces**2).sum(1)**0.5).reshape((repeats, 1))
        for force in forces:
            response_matrix[i] += (
                np.dot(cov[:, i3:i3p3], force)
                ** 2).reshape((n_atoms, 3)).sum(1)
        LOGGER.update(i, '_prody_prs')

    response_matrix /= repeats
    LOGGER.clear()
    LOGGER.report('Perturbation response scanning completed in %.1fs.',
                  '_prody_prs')
    if atoms is not None:
        atoms.setData('prs_profile', response_matrix)
    return response_matrix

    # save the original PRS matrix
    np.savetxt('orig_PRS_matrix', response_matrix, delimiter='\t', fmt='%8.6f')
    # calculate the normalized PRS matrix
    self_dp = np.diag(response_matrix)  # using self displacement (diagonal of
                               # the original matrix) as a
                               # normalization factor
    self_dp = self_dp.reshape(n_atoms, 1)
    norm_PRS_mat = response_matrix / np.repeat(self_dp, n_atoms, axis=1)
    # suppress the diagonal (self displacement) to facilitate
    # visualizing the response profile
    norm_PRS_mat = norm_PRS_mat - np.diag(np.diag(norm_PRS_mat))
    np.savetxt('norm_PRS_matrix', norm_PRS_mat, delimiter='\t', fmt='%8.6f')
    return response_matrix
Example #34
0
def writeDCD(filename,
             trajectory,
             start=None,
             stop=None,
             step=None,
             align=False):
    """Write 32-bit CHARMM format DCD file (also NAMD 2.1 and later).
    *trajectory* can be an :class:`Trajectory`, :class:`DCDFile`, or
    :class:`Ensemble` instance. *filename* is returned upon successful
    output of file."""
    if not filename.lower().endswith('.dcd'):
        filename += '.dcd'

    if not isinstance(trajectory, (TrajBase, Ensemble, Atomic)):
        raise TypeError('{0} is not a valid type for trajectory'.format(
            type(trajectory)))

    irange = list(
        range(*slice(start, stop, step).indices(trajectory.numCoordsets())))
    n_csets = len(irange)
    if n_csets == 0:
        raise ValueError('trajectory does not have any coordinate sets, or '
                         'no coordinate sets are selected')

    if isinstance(trajectory, Atomic):
        isEnsemble = False
        isAtomic = True
        n_atoms = trajectory.numAtoms()
    else:
        isEnsemble = True
        isAtomic = False
        n_atoms = trajectory.numSelected()
    if n_atoms == 0:
        raise ValueError('no atoms are selected in the trajectory')
    if isinstance(trajectory, TrajBase):
        isTrajectory = True
        unitcell = trajectory.hasUnitcell()
        nfi = trajectory.nextIndex()
        trajectory.reset()
        pack_i_48 = pack('i', 48)
        if isinstance(trajectory, Trajectory):
            timestep = trajectory.getTimestep()[0]
            first_ts = trajectory.getFirstTimestep()[0]
            framefreq = trajectory.getFrameFreq()[0]
            n_fixed = trajectory.numFixed()[0]
        else:
            timestep = trajectory.getTimestep()
            first_ts = trajectory.getFirstTimestep()
            framefreq = trajectory.getFrameFreq()
            n_fixed = trajectory.numFixed()
    else:
        isTrajectory = False
        unitcell = False
        if isinstance(trajectory, Ensemble):
            frame = trajectory[0]
        else:
            frame = trajectory
            acsi = trajectory.getACSIndex()
        timestep = 1
        first_ts = 0
        framefreq = 1
        n_fixed = 0

    dcd = DCDFile(filename, mode='w')
    LOGGER.progress('Writing DCD', len(irange), '_prody_writeDCD')
    prev = -1
    uc = None
    time_ = time()
    for j, i in enumerate(irange):
        diff = i - prev
        prev = i
        if isTrajectory:
            if diff > 1:
                trajectory.skip(diff - 1)
            frame = next(trajectory)
            if frame is None:
                break
            if unitcell:
                uc = frame._getUnitcell()
                uc[3:] = np.sin((PISQUARE / 90) * (90 - uc[3:]))
                uc = uc[[0, 3, 1, 4, 5, 2]]
        elif isEnsemble:
            frame._index = i
        else:
            frame.setACSIndex(i)
        if align:
            frame.superpose()
        if j == 0:
            dcd.write(frame._getCoords(),
                      uc,
                      timestep=timestep,
                      firsttimestep=first_ts,
                      framefreq=framefreq)
        else:
            dcd.write(frame._getCoords(), uc)
        LOGGER.update(i, label='_prody_writeDCD')
    if isAtomic:
        trajectory.setACSIndex(acsi)
    j += 1
    LOGGER.finish()
    dcd.close()
    time_ = time() - time_ or 0.01
    dcd_size = 1.0 * (56 + (n_atoms * 3 + 6) * 4) * n_csets / (1024 * 1024)
    LOGGER.info('DCD file was written in {0:.2f} seconds.'.format(time_))
    LOGGER.info('{0:.2f} MB written at input rate {1:.2f} MB/s.'.format(
        dcd_size, dcd_size / time_))
    LOGGER.info(
        '{0} coordinate sets written at output rate {1} frame/s.'.format(
            n_csets, int(n_csets / time_)))
    if j != n_csets:
        LOGGER.warn('Warning: {0} frames expected, {1} written.'.format(
            n_csets, j))
    if isTrajectory:
        trajectory.goto(nfi)
    return filename
Example #35
0
def addPDBEnsemble(ensemble,
                   PDBs,
                   refpdb=None,
                   labels=None,
                   mapping_func=mapOntoChain,
                   occupancy=None,
                   unmapped=None,
                   **kwargs):
    """Adds extra structures to a given PDB ensemble. 

    :arg ensemble: the ensemble to which the PDBs are added
    :type ensemble: :class:`.PDBEnsemble`

    :arg refpdb: reference structure. If set to `None`, it will be set to `ensemble.getAtoms()` automatically
    :type refpdb: :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup`

    :arg PDBs: A list of PDB structures
    :type PDBs: iterable

    :arg title: the title of the ensemble
    :type title: str

    :arg labels: labels of the conformations
    :type labels: list

    :arg seqid: minimal sequence identity (percent)
    :type seqid: int

    :arg coverage: minimal sequence overlap (percent)
    :type coverage: int

    :arg occupancy: minimal occupancy of columns (range from 0 to 1). Columns whose occupancy 
                    is below this value will be trimmed
    :type occupancy: float

    :arg unmapped: a list of PDB IDs that cannot be included in the ensemble. This is an 
                   output argument
    :type unmapped: list
    """

    degeneracy = kwargs.pop('degeneracy', True)
    subset = str(kwargs.get('subset', 'calpha')).lower()
    superpose = kwargs.pop('superpose', True)

    if labels is not None:
        if len(labels) != len(PDBs):
            raise TypeError('Labels and PDBs must have the same lengths.')
    else:
        labels = []

        for pdb in PDBs:
            if pdb is None:
                labels.append(None)
            else:
                labels.append(pdb.getTitle())

    # obtain refchains from the hierarhical view of the reference PDB
    if refpdb is None:
        refpdb = ensemble._atoms
    else:
        if subset != 'all':
            refpdb = refpdb.select(subset)

    refchains = list(refpdb.getHierView())

    start = time.time()

    # obtain the atommap of all the chains combined.
    atoms = refchains[0]
    for i in range(1, len(refchains)):
        atoms += refchains[i]

    # add the PDBs to the ensemble
    if unmapped is None: unmapped = []

    LOGGER.progress('Appending the ensemble...', len(PDBs),
                    '_prody_addPDBEnsemble')
    for i, pdb in enumerate(PDBs):
        lbl = labels[i]
        if pdb is None:
            unmapped.append(labels[i])
            continue

        LOGGER.update(i,
                      'Mapping %s to the reference...' % pdb.getTitle(),
                      label='_prody_addPDBEnsemble')
        if not isinstance(pdb, (Chain, Selection, AtomGroup)):
            raise TypeError(
                'PDBs must be a list of Chain, Selection, or AtomGroup.')

        atommaps = []
        # find the mapping of the pdb to each reference chain
        for chain in refchains:
            mappings = mapping_func(pdb, chain, index=i, **kwargs)
            if len(mappings) > 0:
                atommaps.append(mappings[0][0])
            else:
                break

        if len(atommaps) != len(refchains):
            unmapped.append(lbl)
            continue

        # combine the mappings of pdb to reference chains
        atommap = atommaps[0]
        for i in range(1, len(atommaps)):
            atommap += atommaps[i]

        # add the mappings to the ensemble
        ensemble.addCoordset(atommap,
                             weights=atommap.getFlags('mapped'),
                             label=lbl,
                             degeneracy=degeneracy)
    LOGGER.finish()

    if occupancy is not None:
        ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy)
    if superpose:
        ensemble.iterpose()

    LOGGER.info('{0} PDBs were added to the ensemble in {1:.2f}s.'.format(
        len(PDBs) - len(unmapped),
        time.time() - start))

    if unmapped:
        LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped)))

    return ensemble
Example #36
0
def calcPerturbResponse(model, **kwargs):
    """Returns a matrix of profiles from scanning the response of the
    structure to random perturbations at specific atom (or node) positions.
    The function implements the perturbation response scanning (PRS) method
    described in [CA09]_.  Rows of the matrix are the average magnitude of the
    responses obtained by perturbing the atom/node position at that row index,
    i.e. ``prs_profile[i,j]`` will give the response of residue/node *j* to
    perturbations in residue/node *i*.  PRS is performed using the covariance
    matrix from *model*, e.g. :class:`.ANM` instance.

    When an *atoms* instance is given, the PRS matrix will be added as data, 
    which can be retrieved with ``atoms.getData('prs_matrix')``.  

    *model* and *atoms* must have the same number of atoms. *atoms* must be an
    :class:`.AtomGroup` instance. 

    .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning
       Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein.
       *PLoS Comput Biol* **2009** 5(10):e1000544.

    The PRS matrix can be calculated and saved as follows::

      prs_matrix = calcPerturbResponse(p38_anm, saveMatrix=True)
      
    The PRS matrix can also be save later as follows::
    
      writeArray('prs_matrix.txt', prs_matrix, format='%8.6f', delimiter='\t')

    :arg saveMatrix: whether to save the last matrix generated to a text file.
        Default is False
    :type saveMatrix: bool

    :arg saveName: The file name for saved matrices
        Default is 'response_matrix.txt'.
    :type saveName: str
    """

    if not isinstance(model, (NMA, ModeSet, Mode)):
        raise TypeError('model must be an NMA, ModeSet, or Mode instance')

    if isinstance(model, NMA) and len(model) == 0:
        raise ValueError('model must have normal modes calculated')

    atoms = kwargs.get('atoms', None)
    if atoms is not None:
        if isinstance(atoms, Selection):
            atoms = atoms.copy()
        if not isinstance(atoms, AtomGroup):
            raise TypeError('atoms must be an AtomGroup instance')
        elif atoms.numAtoms() != model.numAtoms():
            raise ValueError('model and atoms must have the same number atoms')

    n_atoms = model.numAtoms()
    LOGGER.timeit('_prody_prs_all')
    LOGGER.info('Calculating covariance matrix')
    LOGGER.timeit('_prody_cov')

    cov = calcCovariance(model)
    if cov is None:
        raise ValueError('model did not return a covariance matrix')

    LOGGER.clear()
    LOGGER.report('Covariance matrix calculated in %.1fs.', '_prody_cov')

    LOGGER.progress('Calculating perturbation response', n_atoms,
                    '_prody_prs_mat')

    if not model.is3d():
        prs_matrix = cov**2

    else:
        cov_squared = cov**2
        n_by_3n_cov_squared = np.zeros((n_atoms, 3 * n_atoms))
        prs_matrix = np.zeros((n_atoms, n_atoms))
        i3 = -3
        i3p3 = 0
        for i in range(n_atoms):
            i3 += 3
            i3p3 += 3
            n_by_3n_cov_squared[i, :] = (cov_squared[i3:i3p3, :]).sum(0)

        j3 = -3
        j3p3 = 0
        for j in range(n_atoms):
            j3 += 3
            j3p3 += 3
            prs_matrix[:, j] = (n_by_3n_cov_squared[:, j3:j3p3]).sum(1)

    LOGGER.clear()
    LOGGER.report('Perturbation response matrix calculated in %.1fs.',
                  '_prody_prs_mat')

    saveMatrix = kwargs.get('saveMatrix', False)
    suppressDiag = kwargs.get('suppressDiag', False)
    saveName = kwargs.get('saveName', 'response_matrix.txt')

    norm_prs_matrix = np.zeros((n_atoms, n_atoms))
    self_dp = np.diag(prs_matrix)
    self_dp = self_dp.reshape(n_atoms, 1)
    norm_prs_matrix = prs_matrix / np.repeat(self_dp, n_atoms, axis=1)

    if suppressDiag == True:
        # suppress the diagonal (self displacement) to facilitate
        # visualizing the response profile
        norm_prs_matrix = norm_prs_matrix - np.diag(np.diag(norm_prs_matrix))

    if saveMatrix == True:
        np.savetxt(saveName, norm_prs_matrix, delimiter='\t', fmt='%8.6f')

    LOGGER.report('Perturbation response scanning completed in %.1fs.',
                  '_prody_prs_all')

    if atoms is not None:
        atoms.setData('prs_matrix', norm_prs_matrix)
        return atoms, norm_prs_matrix
    else:
        return norm_prs_matrix