def calcMSF(coordsets): """Calculate mean square fluctuation(s) (MSF).""" try: ncsets = coordsets.numFrames() except AttributeError: try: coordsets = coordsets.getCoordsets() except AttributeError: pass try: ndim, shape = coordsets.ndim, coordsets.shape except: raise TypeError('coordsets must be a Numpy array or a ProDy ' 'object with `getCoordsets` method') if ndim != 3 or shape[0] == 1: raise ValueError('coordsets must contain multiple sets') msf = var(coordsets, 0).sum(1) else: nfi = coordsets.nextIndex() natoms = coordsets.numSelected() total = zeros((natoms, 3)) sqsum = zeros((natoms, 3)) LOGGER.progress( 'Evaluating {0} frames from {1}:'.format(ncsets, str(coordsets)), ncsets, '_prody_calcMSF') ncsets = 0 coordsets.reset() for frame in coordsets: frame.superpose() coords = frame._getCoords() total += coords sqsum += coords**2 ncsets += 1 LOGGER.update(ncsets, label='_prody_calcMSF') LOGGER.finish() msf = (sqsum / ncsets - (total / ncsets)**2).sum(1) coordsets.goto(nfi) return msf
def _superpose(self, **kwargs): """Superpose conformations and update coordinates.""" ref = kwargs.pop('ref', None) indices = self._indices weights = self._weights mobs = self._confs if indices is None: idx = False tar = self._coords movs = None else: idx = True if self._weights is not None: weights = weights[indices] tar = self._coords[indices] movs = self._confs linalg = importLA() svd = linalg.svd det = linalg.det if weights is None: if ref is None: tar_com = tar.mean(0) else: tar_com = tar[ref] tar_org = (tar - tar_com) mob_org = zeros(tar_org.shape, dtype=mobs.dtype) tar_org = tar_org.T else: weights_sum = weights.sum() weights_dot = dot(weights.T, weights) if ref is None: tar_com = (tar * weights).sum(axis=0) / weights_sum else: tar_com = (tar[ref] * weights[ref]).sum(axis=0) / sum( weights[ref]) tar_org = (tar - tar_com) mob_org = zeros(tar_org.shape, dtype=mobs.dtype) LOGGER.progress('Superposing ', len(mobs), '_prody_ensemble') for i, mob in enumerate(mobs): if idx: mob = mob[indices] if weights is None: mob_com = mob.mean(0) matrix = dot(tar_org, subtract(mob, mob_com, mob_org)) else: mob_com = (mob * weights).sum(axis=0) / weights_sum subtract(mob, mob_com, mob_org) matrix = dot((tar_org * weights).T, (mob_org * weights)) / weights_dot U, s, Vh = svd(matrix) Id = array([[1, 0, 0], [0, 1, 0], [0, 0, sign(det(matrix))]]) rotation = dot(Vh.T, dot(Id, U.T)) if movs is None: mobs[i] = dot(mob_org, rotation) add(mobs[i], tar_com, mobs[i]) else: add(dot(movs[i], rotation), (tar_com - dot(mob_com, rotation)), movs[i]) LOGGER.update(i + 1, label='_prody_ensemble') LOGGER.finish()
def buildPDBEnsemble(atomics, ref=None, title='Unknown', labels=None, unmapped=None, **kwargs): """Builds a :class:`.PDBEnsemble` from a given reference structure and a list of structures (:class:`.Atomic` instances). Note that the reference should be included in the list as well. :arg atomics: a list of :class:`.Atomic` instances :type atomics: list :arg ref: reference structure or the index to the reference in *atomics*. If **None**, then the first item in *atomics* will be considered as the reference. If it is a :class:`.PDBEnsemble` instance, then *atomics* will be appended to the existing ensemble. Default is **None** :type ref: int, :class:`.Chain`, :class:`.Selection`, or :class:`.AtomGroup` :arg title: the title of the ensemble :type title: str :arg labels: labels of the conformations :type labels: list :arg degeneracy: whether only the active coordinate set (**True**) or all the coordinate sets (**False**) of each structure should be added to the ensemble. Default is **True** :type degeneracy: bool :arg occupancy: minimal occupancy of columns (range from 0 to 1). Columns whose occupancy is below this value will be trimmed :type occupancy: float :arg unmapped: labels of *atomics* that cannot be included in the ensemble. This is an output argument :type unmapped: list :arg subset: a subset for selecting particular atoms from the input structures. Default is ``"all"`` :type subset: str :arg superpose: if set to ``'iter'``, :func:`.PDBEnsemble.iterpose` will be used to superpose the structures, otherwise conformations will be superposed with respect to the reference specified by *ref* unless set to ``False``. Default is ``'iter'`` :type superpose: str, bool """ occupancy = kwargs.pop('occupancy', None) degeneracy = kwargs.pop('degeneracy', True) subset = str(kwargs.get('subset', 'all')).lower() superpose = kwargs.pop('superpose', 'iter') superpose = kwargs.pop('iterpose', superpose) debug = kwargs.pop('debug', {}) if 'mapping_func' in kwargs: raise DeprecationWarning( 'mapping_func is deprecated. Please see release notes for ' 'more details: http://prody.csb.pitt.edu/manual/release/v1.11_series.html' ) start = time.time() if not isListLike(atomics): raise TypeError('atomics should be list-like') if len(atomics) == 1 and degeneracy is True: raise ValueError('atomics should have at least two items') if labels is not None: if len(labels) != len(atomics): raise TypeError('Labels and atomics must have the same lengths.') else: labels = [] for atoms in atomics: if atoms is None: labels.append(None) else: labels.append(atoms.getTitle()) if ref is None: target = atomics[0] elif isinstance(ref, Integral): target = atomics[ref] elif isinstance(ref, PDBEnsemble): target = ref._atoms else: target = ref # initialize a PDBEnsemble with reference atoms and coordinates isrefset = False if isinstance(ref, PDBEnsemble): ensemble = ref else: # select the subset of reference beforehand for the sake of efficiency if subset != 'all': target = target.select(subset) ensemble = PDBEnsemble(title) if isinstance(target, Atomic): ensemble.setAtoms(target) ensemble.setCoords(target.getCoords()) isrefset = True else: ensemble._n_atoms = len(target) isrefset = False # build the ensemble if unmapped is None: unmapped = [] LOGGER.progress('Building the ensemble...', len(atomics), '_prody_buildPDBEnsemble') for i, atoms in enumerate(atomics): if atoms is None: unmapped.append(labels[i]) continue LOGGER.update(i, 'Mapping %s to the reference...' % atoms.getTitle(), label='_prody_buildPDBEnsemble') try: atoms.getHierView() except AttributeError: raise TypeError( 'atomics must be a list of instances having the access to getHierView' ) if subset != 'all': atoms = atoms.select(subset) # find the mapping of chains of atoms to those of target debug[labels[i]] = {} atommaps = alignChains(atoms, target, debug=debug[labels[i]], **kwargs) if len(atommaps) == 0: unmapped.append(labels[i]) continue # add the atommaps to the ensemble for atommap in atommaps: lbl = pystr(labels[i]) if len(atommaps) > 1: chids = np.unique(atommap.getChids()) strchids = ''.join(chids) lbl += '_%s' % strchids ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped'), label=lbl, degeneracy=degeneracy) if not isrefset: ensemble.setCoords(atommap.getCoords()) isrefset = True LOGGER.finish() if occupancy is not None: ensemble = trimPDBEnsemble(ensemble, occupancy=occupancy) if superpose == 'iter': ensemble.iterpose() elif superpose is not False: ensemble.superpose() LOGGER.info('Ensemble ({0} conformations) were built in {1:.2f}s.'.format( ensemble.numConfs(), time.time() - start)) if unmapped: LOGGER.warn('{0} structures cannot be mapped.'.format(len(unmapped))) return ensemble
def writeDCD(filename, trajectory, start=None, stop=None, step=None, align=False): """Write 32-bit CHARMM format DCD file (also NAMD 2.1 and later). *trajectory* can be an :class:`Trajectory`, :class:`DCDFile`, or :class:`Ensemble` instance. *filename* is returned upon successful output of file.""" if not filename.lower().endswith('.dcd'): filename += '.dcd' if not isinstance(trajectory, (TrajBase, Ensemble, Atomic)): raise TypeError('{0} is not a valid type for trajectory'.format( type(trajectory))) irange = list( range(*slice(start, stop, step).indices(trajectory.numCoordsets()))) n_csets = len(irange) if n_csets == 0: raise ValueError('trajectory does not have any coordinate sets, or ' 'no coordinate sets are selected') if isinstance(trajectory, Atomic): isEnsemble = False isAtomic = True n_atoms = trajectory.numAtoms() else: isEnsemble = True isAtomic = False n_atoms = trajectory.numSelected() if n_atoms == 0: raise ValueError('no atoms are selected in the trajectory') if isinstance(trajectory, TrajBase): isTrajectory = True unitcell = trajectory.hasUnitcell() nfi = trajectory.nextIndex() trajectory.reset() pack_i_48 = pack('i', 48) if isinstance(trajectory, Trajectory): timestep = trajectory.getTimestep()[0] first_ts = trajectory.getFirstTimestep()[0] framefreq = trajectory.getFrameFreq()[0] n_fixed = trajectory.numFixed()[0] else: timestep = trajectory.getTimestep() first_ts = trajectory.getFirstTimestep() framefreq = trajectory.getFrameFreq() n_fixed = trajectory.numFixed() else: isTrajectory = False unitcell = False if isinstance(trajectory, Ensemble): frame = trajectory[0] else: frame = trajectory acsi = trajectory.getACSIndex() timestep = 1 first_ts = 0 framefreq = 1 n_fixed = 0 dcd = DCDFile(filename, mode='w') LOGGER.progress('Writing DCD', len(irange), '_prody_writeDCD') prev = -1 uc = None time_ = time() for j, i in enumerate(irange): diff = i - prev prev = i if isTrajectory: if diff > 1: trajectory.skip(diff - 1) frame = next(trajectory) if frame is None: break if unitcell: uc = frame._getUnitcell() uc[3:] = np.sin((PISQUARE / 90) * (90 - uc[3:])) uc = uc[[0, 3, 1, 4, 5, 2]] elif isEnsemble: frame._index = i else: frame.setACSIndex(i) if align: frame.superpose() if j == 0: dcd.write(frame._getCoords(), uc, timestep=timestep, firsttimestep=first_ts, framefreq=framefreq) else: dcd.write(frame._getCoords(), uc) LOGGER.update(i, label='_prody_writeDCD') if isAtomic: trajectory.setACSIndex(acsi) j += 1 LOGGER.finish() dcd.close() time_ = time() - time_ or 0.01 dcd_size = 1.0 * (56 + (n_atoms * 3 + 6) * 4) * n_csets / (1024 * 1024) LOGGER.info('DCD file was written in {0:.2f} seconds.'.format(time_)) LOGGER.info('{0:.2f} MB written at input rate {1:.2f} MB/s.'.format( dcd_size, dcd_size / time_)) LOGGER.info( '{0} coordinate sets written at output rate {1} frame/s.'.format( n_csets, int(n_csets / time_))) if j != n_csets: LOGGER.warn('Warning: {0} frames expected, {1} written.'.format( n_csets, j)) if isTrajectory: trajectory.goto(nfi) return filename