def loadAtoms(filename): """Returns :class:`.AtomGroup` instance loaded from *filename* using :func:`numpy.load` function. See also :func:`saveAtoms`.""" LOGGER.timeit('_prody_loadatoms') attr_dict = load(filename) files = set(attr_dict.files) if not 'n_atoms' in files: raise ValueError('{0} is not a valid atomic data file'.format( repr(filename))) title = str(attr_dict['title']) if 'coordinates' in files: coords = attr_dict['coordinates'] ag = AtomGroup(title) ag._n_csets = int(attr_dict['n_csets']) ag._coords = coords ag._n_atoms = int(attr_dict['n_atoms']) ag._setTimeStamp() if 'flagsts' in files: ag._flagsts = int(attr_dict['flagsts']) if 'bonds' in files and 'bmap' in files and 'numbonds' in files: ag._bonds = attr_dict['bonds'] ag._bmap = attr_dict['bmap'] ag._data['numbonds'] = attr_dict['numbonds'] skip_flags = set() for label, data in attr_dict.items(): if label in SKIPLOAD: continue if data.ndim == 1 and data.dtype == bool: if label in skip_flags: continue else: ag._setFlags(label, data) skip_flags.update(flags.ALIASES.get(label, [label])) else: ag.setData(label, data) for label in ['segindex', 'chindex', 'resindex']: if label in attr_dict: ag._data[label] = attr_dict[label] if ag.numCoordsets() > 0: ag._acsi = 0 if 'cslabels' in files: ag.setCSLabels(list(attr_dict['cslabels'])) LOGGER.report('Atom group was loaded in %.2fs.', '_prody_loadatoms') return ag
def superpose(self, **kwargs): """Superpose the ensemble onto the reference coordinates obtained by :meth:`getCoords`. """ trans = kwargs.pop('trans', True) if self._coords is None: raise ValueError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise ValueError('conformations are not set, use `addCoordset`') LOGGER.timeit('_prody_ensemble') self._superpose(trans=trans) # trans kwarg is used by PDBEnsemble LOGGER.report('Superposition completed in %.2f seconds.', '_prody_ensemble')
def superpose(self, **kwargs): """Superpose the ensemble onto the reference coordinates. :arg ref: index of the reference coordinate. If **None**, the average coordinate will be assumed as the reference. Default is **None** :type ref: int """ ref = kwargs.pop('ref', None) if self._coords is None: raise ValueError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise ValueError('conformations are not set, use `addCoordset`') LOGGER.timeit('_prody_ensemble') self._superpose(ref=ref) # trans kwarg is used by PDBEnsemble LOGGER.report('Superposition completed in %.2f seconds.', '_prody_ensemble')
def iterpose(self, rmsd=0.0001): """Iteratively superpose the ensemble until convergence. Initially, all conformations are aligned with the reference coordinates. Then mean coordinates are calculated, and are set as the new reference coordinates. This is repeated until reference coordinates do not change. This is determined by the value of RMSD between the new and old reference coordinates. Note that at the end of the iterative procedure the reference coordinate set will be average of conformations in the ensemble. :arg rmsd: change in reference coordinates to determine convergence, default is 0.0001 Å RMSD :type rmsd: float""" if self._coords is None: raise AttributeError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise AttributeError('conformations are not set, use' '`addCoordset`') LOGGER.info('Starting iterative superposition:') LOGGER.timeit('_prody_ensemble') rmsdif = 1 step = 0 weights = self._weights length = len(self) if weights is not None: if weights.ndim == 3: weightsum = weights.sum(axis=0) weightsum[weightsum == 0.] = 1. # add pseudocount to avoid nan else: weightsum = length while rmsdif > rmsd: self._superpose() if weights is None: newxyz = self._confs.sum(0) / length else: newxyz = (self._confs * weights).sum(0) / weightsum rmsdif = getRMSD(self._coords, newxyz) self._coords = newxyz step += 1 LOGGER.info('Step #{0}: RMSD difference = {1:.4e}'.format( step, rmsdif)) LOGGER.report('Iterative superposition completed in %.2fs.', '_prody_ensemble')
def run(self, tmax=200, li=0.2, lf=0.01, ei=0.3, ef=0.05, Ti=0.1, Tf=2, c=0, calcC=False): LOGGER.info('Building coordinates from electron density map. This may take a while.') LOGGER.timeit('_prody_make_nodes') tmax = int(tmax * self.N) li = li * self.N if calcC: Ti = Ti * self.N Tf = Tf * self.N for t in range(1, tmax + 1): # calc the parameters tt = float(t) / tmax l = li * np.power(lf / li, tt) ep = ei * np.power(ef / ei, tt) if calcC: T = Ti * np.power(Tf / Ti, tt) else: T = -1 self.runOnce(t, l, ep, T, c) LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format( self.N), '_prody_make_nodes') return
def parseMMCIFStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` containing header data parsed from a stream of CIF lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') subset = kwargs.get('subset') chain = kwargs.get('chain') altloc = kwargs.get('altloc', 'A') header = kwargs.get('header', False) if model is not None: if isinstance(model, int): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid' .format(str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset' .format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix ag = None if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') if header: ag, header = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) else: ag = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') if header: return ag, StarDict(*header, title=str(kwargs.get('title', 'Unknown'))) return ag
def refineEnsemble(ensemble, lower=.5, upper=10., **kwargs): """Refine a :class:`.PDBEnsemble` based on RMSD criterions. :arg ensemble: the ensemble to be refined :type ensemble: :class:`.Ensemble`, :class:`.PDBEnsemble` :arg lower: the smallest allowed RMSD between two conformations with the exception of **protected** :type lower: float :arg upper: the highest allowed RMSD between two conformations with the exception of **protected** :type upper: float :keyword protected: a list of either the indices or labels of the conformations needed to be kept in the refined ensemble :type protected: list :arg ref: the index or label of the reference conformation which will also be kept. Default is 0 :type ref: int or str """ protected = kwargs.pop('protected', []) P = [] if len(protected): labels = ensemble.getLabels() for p in protected: if isinstance(p, Integral): i = p else: if p in labels: i = labels.index(p) else: LOGGER.warn( 'could not find any conformation with the label %s in the ensemble' % str(p)) P.append(i) LOGGER.timeit('_prody_refineEnsemble') from numpy import argsort ### obtain reference index # rmsd = ensemble.getRMSDs() # ref_i = np.argmin(rmsd) ref_i = kwargs.pop('ref', 0) if isinstance(ref_i, Integral): pass elif isinstance(ref_i, str): labels = ensemble.getLabels() ref_i = labels.index(ref_i) else: LOGGER.warn( 'could not find any conformation with the label %s in the ensemble' % str(ref_i)) if not ref_i in P: P = [ref_i] + P ### calculate pairwise RMSDs ### RMSDs = ensemble.getRMSDs(pairwise=True) def getRefinedIndices(A): deg = A.sum(axis=0) sorted_indices = list(argsort(deg)) # sorted_indices = P + [x for x in sorted_indices if x not in P] sorted_indices.remove(ref_i) sorted_indices.insert(0, ref_i) n_confs = ensemble.numConfs() isdel_temp = np.zeros(n_confs) for a in range(n_confs): i = sorted_indices[a] for b in range(n_confs): if a >= b: continue j = sorted_indices[b] if isdel_temp[i] or isdel_temp[j]: continue else: if A[i, j]: # isdel_temp[j] = 1 if not j in P: isdel_temp[j] = 1 elif not i in P: isdel_temp[i] = 1 temp_list = isdel_temp.tolist() ind_list = [] for i in range(n_confs): if not temp_list[i]: ind_list.append(i) return ind_list L = list(range(len(ensemble))) U = list(range(len(ensemble))) if lower is not None: A = RMSDs < lower L = getRefinedIndices(A) if upper is not None: B = RMSDs > upper U = getRefinedIndices(B) # find common indices from L and U I = list(set(L) - (set(L) - set(U))) # for p in P: # if p not in I: # I.append(p) I.sort() reens = ensemble[I] LOGGER.report('Ensemble was refined in %.2fs.', '_prody_refineEnsemble') LOGGER.info('%d conformations were removed from ensemble.' % (len(ensemble) - len(I))) return reens