Beispiel #1
0
def loadAtoms(filename):
    """Returns :class:`.AtomGroup` instance loaded from *filename* using
    :func:`numpy.load` function.  See also :func:`saveAtoms`."""

    LOGGER.timeit('_prody_loadatoms')
    attr_dict = load(filename)
    files = set(attr_dict.files)

    if not 'n_atoms' in files:
        raise ValueError('{0} is not a valid atomic data file'.format(
            repr(filename)))
    title = str(attr_dict['title'])

    if 'coordinates' in files:
        coords = attr_dict['coordinates']
        ag = AtomGroup(title)
        ag._n_csets = int(attr_dict['n_csets'])
        ag._coords = coords
    ag._n_atoms = int(attr_dict['n_atoms'])
    ag._setTimeStamp()
    if 'flagsts' in files:
        ag._flagsts = int(attr_dict['flagsts'])

    if 'bonds' in files and 'bmap' in files and 'numbonds' in files:
        ag._bonds = attr_dict['bonds']
        ag._bmap = attr_dict['bmap']
        ag._data['numbonds'] = attr_dict['numbonds']

    skip_flags = set()

    for label, data in attr_dict.items():
        if label in SKIPLOAD:
            continue
        if data.ndim == 1 and data.dtype == bool:
            if label in skip_flags:
                continue
            else:
                ag._setFlags(label, data)
                skip_flags.update(flags.ALIASES.get(label, [label]))
        else:
            ag.setData(label, data)

    for label in ['segindex', 'chindex', 'resindex']:
        if label in attr_dict:
            ag._data[label] = attr_dict[label]

    if ag.numCoordsets() > 0:
        ag._acsi = 0

    if 'cslabels' in files:
        ag.setCSLabels(list(attr_dict['cslabels']))

    LOGGER.report('Atom group was loaded in %.2fs.', '_prody_loadatoms')
    return ag
Beispiel #2
0
    def superpose(self, **kwargs):
        """Superpose the ensemble onto the reference coordinates obtained by 
        :meth:`getCoords`.
        """

        trans = kwargs.pop('trans', True)
        if self._coords is None:
            raise ValueError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise ValueError('conformations are not set, use `addCoordset`')
        LOGGER.timeit('_prody_ensemble')
        self._superpose(trans=trans)  # trans kwarg is used by PDBEnsemble
        LOGGER.report('Superposition completed in %.2f seconds.',
                      '_prody_ensemble')
Beispiel #3
0
    def superpose(self, **kwargs):
        """Superpose the ensemble onto the reference coordinates.
        
        :arg ref: index of the reference coordinate. If **None**, the average 
            coordinate will be assumed as the reference. Default is **None**
        :type ref: int
        """

        ref = kwargs.pop('ref', None)
        if self._coords is None:
            raise ValueError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise ValueError('conformations are not set, use `addCoordset`')
        LOGGER.timeit('_prody_ensemble')
        self._superpose(ref=ref)  # trans kwarg is used by PDBEnsemble
        LOGGER.report('Superposition completed in %.2f seconds.',
                      '_prody_ensemble')
Beispiel #4
0
    def iterpose(self, rmsd=0.0001):
        """Iteratively superpose the ensemble until convergence.  Initially,
        all conformations are aligned with the reference coordinates.  Then
        mean coordinates are calculated, and are set as the new reference
        coordinates.  This is repeated until reference coordinates do not
        change.  This is determined by the value of RMSD between the new and
        old reference coordinates.  Note that at the end of the iterative
        procedure the reference coordinate set will be average of conformations
        in the ensemble.

        :arg rmsd: change in reference coordinates to determine convergence,
            default is 0.0001 Å RMSD
        :type rmsd: float"""

        if self._coords is None:
            raise AttributeError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise AttributeError('conformations are not set, use'
                                 '`addCoordset`')
        LOGGER.info('Starting iterative superposition:')
        LOGGER.timeit('_prody_ensemble')
        rmsdif = 1
        step = 0
        weights = self._weights
        length = len(self)
        if weights is not None:
            if weights.ndim == 3:
                weightsum = weights.sum(axis=0)
                weightsum[weightsum == 0.] = 1.  # add pseudocount to avoid nan
            else:
                weightsum = length

        while rmsdif > rmsd:
            self._superpose()
            if weights is None:
                newxyz = self._confs.sum(0) / length
            else:
                newxyz = (self._confs * weights).sum(0) / weightsum
            rmsdif = getRMSD(self._coords, newxyz)
            self._coords = newxyz
            step += 1
            LOGGER.info('Step #{0}: RMSD difference = {1:.4e}'.format(
                step, rmsdif))
        LOGGER.report('Iterative superposition completed in %.2fs.',
                      '_prody_ensemble')
Beispiel #5
0
 def run(self, tmax=200, li=0.2, lf=0.01, ei=0.3,
         ef=0.05, Ti=0.1, Tf=2, c=0, calcC=False):
     LOGGER.info('Building coordinates from electron density map. This may take a while.')
     LOGGER.timeit('_prody_make_nodes')
     tmax = int(tmax * self.N)
     li = li * self.N
     if calcC:
         Ti = Ti * self.N
         Tf = Tf * self.N
     for t in range(1, tmax + 1):
         # calc the parameters
         tt = float(t) / tmax
         l = li * np.power(lf / li, tt)
         ep = ei * np.power(ef / ei, tt)
         if calcC:
             T = Ti * np.power(Tf / Ti, tt)
         else:
             T = -1
         self.runOnce(t, l, ep, T, c)
     LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format(
         self.N), '_prody_make_nodes')
     return
Beispiel #6
0
def parseMMCIFStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` 
    containing header data parsed from a stream of CIF lines.
    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    subset = kwargs.get('subset')
    chain = kwargs.get('chain')
    altloc = kwargs.get('altloc', 'A')
    header = kwargs.get('header', False)

    if model is not None:
        if isinstance(model, int):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'
                            .format(str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix

    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')

        if header:
            ag, header = _parseMMCIFLines(ag, lines, model, chain, subset,
                                          altloc, header)
        else:
            ag = _parseMMCIFLines(ag, lines, model, chain, subset,
                                  altloc, header)

        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(ag.numAtoms(),
                                                    ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
        if header:
            return ag, StarDict(*header, title=str(kwargs.get('title', 'Unknown')))
        return ag
Beispiel #7
0
def refineEnsemble(ensemble, lower=.5, upper=10., **kwargs):
    """Refine a :class:`.PDBEnsemble` based on RMSD criterions.
    
    :arg ensemble: the ensemble to be refined
    :type ensemble: :class:`.Ensemble`, :class:`.PDBEnsemble`

    :arg lower: the smallest allowed RMSD between two conformations with the exception of **protected** 
    :type lower: float

    :arg upper: the highest allowed RMSD between two conformations with the exception of **protected** 
    :type upper: float

    :keyword protected: a list of either the indices or labels of the conformations needed to be kept 
                        in the refined ensemble
    :type protected: list
    
    :arg ref: the index or label of the reference conformation which will also be kept.
        Default is 0
    :type ref: int or str
    """

    protected = kwargs.pop('protected', [])
    P = []
    if len(protected):
        labels = ensemble.getLabels()
        for p in protected:
            if isinstance(p, Integral):
                i = p
            else:
                if p in labels:
                    i = labels.index(p)
                else:
                    LOGGER.warn(
                        'could not find any conformation with the label %s in the ensemble'
                        % str(p))
            P.append(i)

    LOGGER.timeit('_prody_refineEnsemble')
    from numpy import argsort

    ### obtain reference index
    # rmsd = ensemble.getRMSDs()
    # ref_i = np.argmin(rmsd)
    ref_i = kwargs.pop('ref', 0)
    if isinstance(ref_i, Integral):
        pass
    elif isinstance(ref_i, str):
        labels = ensemble.getLabels()
        ref_i = labels.index(ref_i)
    else:
        LOGGER.warn(
            'could not find any conformation with the label %s in the ensemble'
            % str(ref_i))
    if not ref_i in P:
        P = [ref_i] + P

    ### calculate pairwise RMSDs ###
    RMSDs = ensemble.getRMSDs(pairwise=True)

    def getRefinedIndices(A):
        deg = A.sum(axis=0)
        sorted_indices = list(argsort(deg))
        # sorted_indices = P + [x for x in sorted_indices if x not in P]
        sorted_indices.remove(ref_i)
        sorted_indices.insert(0, ref_i)

        n_confs = ensemble.numConfs()
        isdel_temp = np.zeros(n_confs)
        for a in range(n_confs):
            i = sorted_indices[a]
            for b in range(n_confs):
                if a >= b:
                    continue
                j = sorted_indices[b]
                if isdel_temp[i] or isdel_temp[j]:
                    continue
                else:
                    if A[i, j]:
                        # isdel_temp[j] = 1
                        if not j in P:
                            isdel_temp[j] = 1
                        elif not i in P:
                            isdel_temp[i] = 1
        temp_list = isdel_temp.tolist()
        ind_list = []
        for i in range(n_confs):
            if not temp_list[i]:
                ind_list.append(i)
        return ind_list

    L = list(range(len(ensemble)))
    U = list(range(len(ensemble)))
    if lower is not None:
        A = RMSDs < lower
        L = getRefinedIndices(A)

    if upper is not None:
        B = RMSDs > upper
        U = getRefinedIndices(B)

    # find common indices from L and U
    I = list(set(L) - (set(L) - set(U)))

    # for p in P:
    # if p not in I:
    # I.append(p)
    I.sort()
    reens = ensemble[I]

    LOGGER.report('Ensemble was refined in %.2fs.', '_prody_refineEnsemble')
    LOGGER.info('%d conformations were removed from ensemble.' %
                (len(ensemble) - len(I)))

    return reens