def _getEigvecs(modes, row_norm=False, dummy_mode=False): la = importLA() if isinstance(modes, (Mode, ModeSet, NMA)): model = modes._model if isinstance(model, MaskedGNM): masked = model.masked model.masked = True V = modes.getArray() model.masked = masked else: V = modes.getArray() elif isinstance(modes, np.ndarray): V = modes else: try: mode0 = modes[0] if isinstance(mode0, Mode): V = np.empty((len(mode0), 0)) for mode in modes: assert isinstance(mode, Mode), 'Modes should be a list of modes.' v = mode.getEigvec() v = np.expand_dims(v, axis=1) V = np.hstack((V, v)) else: V = np.array(modes) except TypeError: raise TypeError('Modes should be a list of modes.') if V.ndim == 1: V = np.expand_dims(V, axis=1) # add a dummy zero mode to the modeset if dummy_mode: v0 = V[:, 0] if np.allclose(v0, np.mean(v0)): dummy_mode = False LOGGER.warn( 'at least one zero mode is detected therefore dummy mode will NOT be added' ) if dummy_mode: n, _ = V.shape v0 = np.ones((n, 1), dtype=V.dtype) v0 /= la.norm(v0) V = np.hstack((v0, V)) LOGGER.debug('a dummy zero mode is added') # normalize the rows so that feature vectors are unit vectors if row_norm: norms = la.norm(V, axis=1) N = np.diag(div0(1., norms)) V = np.dot(N, V) return V
def _getEigvecs(modes, row_norm=False, remove_zero_rows=False): if isinstance(modes, (ModeSet, NMA)): V = modes.getEigvecs() elif isinstance(modes, Mode): V = modes.getEigvec() elif isinstance(modes, np.ndarray): V = modes else: try: mode0 = modes[0] if isinstance(mode0, Mode): V = np.empty((len(mode0), 0)) for mode in modes: assert isinstance(mode, Mode), 'Modes should be a list of modes.' v = mode.getEigvec() v = np.expand_dims(v, axis=1) V = np.hstack((V, v)) else: V = np.array(modes) except TypeError: TypeError('Modes should be a list of modes.') if V.ndim == 1: V = np.expand_dims(V, axis=1) # normalize the rows so that feature vectors are unit vectors if row_norm: la = importLA() norms = la.norm(V, axis=1) N = np.diag(div0(1., norms)) V = np.dot(N, V) # remove rows with all zeros m, _ = V.shape mask = np.ones(m, dtype=bool) if remove_zero_rows: mask = V.any(axis=1) V = V[mask] return V, mask
def _getEigvecs(modes, row_norm=False, remove_zero_rows=False): if isinstance(modes, (ModeSet, NMA)): V = modes.getEigvecs() elif isinstance(modes, Mode): V = modes.getEigvec() elif isinstance(modes, np.ndarray): V = modes else: try: mode0 = modes[0] if isinstance(mode0, Mode): V = np.empty((len(mode0),0)) for mode in modes: assert isinstance(mode, Mode), 'Modes should be a list of modes.' v = mode.getEigvec() v = np.expand_dims(v, axis=1) V = np.hstack((V, v)) else: V = np.array(modes) except TypeError: TypeError('Modes should be a list of modes.') if V.ndim == 1: V = np.expand_dims(V, axis=1) # normalize the rows so that feature vectors are unit vectors if row_norm: la = importLA() norms = la.norm(V, axis=1) N = np.diag(div0(1., norms)) V = np.dot(N, V) # remove rows with all zeros m, _ = V.shape mask = np.ones(m, dtype=bool) if remove_zero_rows: mask = V.any(axis=1) V = V[mask] return V, mask
def calcPerturbResponse(model, **kwargs): """This function implements the perturbation response scanning (PRS) method described in [CA09]_ and [IG14]_. It returns a PRS matrix, and effectiveness and sensitivity profiles. Rows of the matrix are the average magnitude of the responses obtained by perturbing the atom/node position at that row index, i.e. ``prs_matrix[i,j]`` will give the response of residue/node *j* to perturbations in residue/node *i*. PRS is performed using the covariance matrix from a *model*, e.g. a :class:`.ANM` instance. To use an external matrix, please provide it to a :class:`.PCA` instance using the :meth:`.PCA.setCovariance`. When an *atoms* instance is given, the PRS matrix will be added as data, which can be retrieved with ``atoms.getData('prs_matrix')``. *model* and *atoms* must have the same number of atoms. *atoms* must be an :class:`.AtomGroup` instance. .. [CA09] Atilgan C, Atilgan AR, Perturbation-Response Scanning Reveals Ligand Entry-Exit Mechanisms of Ferric Binding Protein. *PLoS Comput Biol* **2009** 5(10):e1000544. .. [IG14] General IJ, Liu Y, Blackburn ME, Mao W, Gierasch LM, Bahar I. ATPase subdomain IA is a mediator of interdomain allostery in Hsp70 molecular chaperones. *PLoS Comput. Biol.* **2014** 10:e1003624. If *turbo* is **True** (default), then PRS is approximated by the limit of large numbers of forces and no perturbation forces are explicitly applied. If set to **False**, then each residue/node is perturbed *repeats* times (default 100) with a random unit force vector as in ProDy v1.8 and earlier. """ if not isinstance(model, (NMA, ModeSet, Mode)): raise TypeError('model must be an NMA, ModeSet, or Mode instance') if isinstance(model, NMA) and len(model) == 0: raise ValueError('model must have normal modes calculated') atoms = kwargs.get('atoms', None) suppress_diag = kwargs.get('suppress_diag', False) no_diag = kwargs.get('no_diag', suppress_diag) if atoms is not None: if isinstance(atoms, Selection): atoms = atoms.copy() if not isinstance(atoms, AtomGroup): raise TypeError('atoms must be an AtomGroup instance') elif atoms.numAtoms() != model.numAtoms(): raise ValueError('model and atoms must have the same number atoms') n_atoms = model.numAtoms() # LOGGER.timeit('_prody_prs_all') # LOGGER.info('Calculating covariance matrix') # LOGGER.timeit('_prody_cov') cov = model.getCovariance() turbo = kwargs.get('turbo', True) if turbo: if not model.is3d(): prs_matrix = cov**2 else: cov_squared = cov**2 n_by_3n_cov_squared = np.zeros((n_atoms, 3 * n_atoms)) prs_matrix = np.zeros((n_atoms, n_atoms)) i3 = -3 i3p3 = 0 for i in range(n_atoms): i3 += 3 i3p3 += 3 n_by_3n_cov_squared[i, :] = (cov_squared[i3:i3p3, :]).sum(0) j3 = -3 j3p3 = 0 for j in range(n_atoms): j3 += 3 j3p3 += 3 prs_matrix[:, j] = (n_by_3n_cov_squared[:, j3:j3p3]).sum(1) else: repeats = kwargs.pop('repeats', 100) LOGGER.info( 'Calculating perturbation response with {0} repeats'.format( repeats)) LOGGER.timeit('_prody_prs_mat') response_matrix = np.zeros((n_atoms, n_atoms)) LOGGER.progress('Calculating perturbation response', n_atoms, '_prody_prs') i3 = -3 i3p3 = 0 for i in range(n_atoms): i3 += 3 i3p3 += 3 forces = np.random.rand(repeats * 3).reshape((repeats, 3)) forces /= ((forces**2).sum(1)**0.5).reshape((repeats, 1)) for force in forces: response_matrix[i] += (np.dot(cov[:, i3:i3p3], force)**2).reshape( (n_atoms, 3)).sum(1) LOGGER.update(i, '_prody_prs') response_matrix /= repeats LOGGER.clear() LOGGER.report('Perturbation response matrix calculated in %.1fs.', '_prody_prs_mat') norm_prs_matrix = np.zeros((n_atoms, n_atoms)) self_dp = np.diag(prs_matrix) self_dp = self_dp.reshape(n_atoms, 1) re_self_dp = np.repeat(self_dp, n_atoms, axis=1) norm_prs_matrix = div0(prs_matrix, re_self_dp) if no_diag: # suppress the diagonal (self displacement) to facilitate # visualizing the response profile norm_prs_matrix = norm_prs_matrix - np.diag(np.diag(norm_prs_matrix)) W = 1 - np.eye(n_atoms) effectiveness = np.average(norm_prs_matrix, weights=W, axis=1) sensitivity = np.average(norm_prs_matrix, weights=W, axis=0) # LOGGER.report('Perturbation response scanning completed in %.1fs.', # '_prody_prs_all') if atoms is not None: try: ag = atoms.getAtomGroup() defdata = np.zeros(ag.numAtoms(), dtype=float) ag.setData('effectiveness', defdata.copy()) ag.setData('sensitivity', defdata.copy()) except AttributeError: pass atoms.setData('effectiveness', effectiveness) atoms.setData('sensitivity', sensitivity) #atoms.setData('prs_matrix', norm_prs_matrix) return norm_prs_matrix, effectiveness, sensitivity
def calcCrossCorr(modes, n_cpu=1, norm=True): """Returns cross-correlations matrix. For a 3-d model, cross-correlations matrix is an NxN matrix, where N is the number of atoms. Each element of this matrix is the trace of the submatrix corresponding to a pair of atoms. Covariance matrix may be calculated using all modes or a subset of modes of an NMA instance. For large systems, calculation of cross-correlations matrix may be time consuming. Optionally, multiple processors may be employed to perform calculations by passing ``n_cpu=2`` or more.""" if not isinstance(n_cpu, int): raise TypeError('n_cpu must be an integer') elif n_cpu < 1: raise ValueError('n_cpu must be equal to or greater than 1') if not isinstance(modes, (Mode, NMA, ModeSet)): if isinstance(modes, list): try: is3d = modes[0].is3d() except: raise TypeError( 'modes must be a list of Mode or Vector instances, ' 'not {0}'.format(type(modes))) else: raise TypeError('modes must be a Mode, NMA, or ModeSet instance, ' 'not {0}'.format(type(modes))) else: is3d = modes.is3d() if is3d: model = modes if isinstance(modes, (Mode, ModeSet)): model = modes._model if isinstance(modes, (Mode)): indices = [modes.getIndex()] n_modes = 1 else: indices = modes.getIndices() n_modes = len(modes) else: n_modes = len(modes) indices = np.arange(n_modes) array = model._getArray() n_atoms = model._n_atoms variances = model._vars if n_cpu == 1: s = (n_modes, n_atoms, 3) arvar = (array[:, indices] * variances[indices]).T.reshape(s) array = array[:, indices].T.reshape(s) covariance = np.tensordot(array.transpose(2, 0, 1), arvar.transpose(0, 2, 1), axes=([0, 1], [1, 0])) else: import multiprocessing n_cpu = min(multiprocessing.cpu_count(), n_cpu) queue = multiprocessing.Queue() size = n_modes / n_cpu for i in range(n_cpu): if n_cpu - i == 1: indices = modes.indices[i * size:] else: indices = modes.indices[i * size:(i + 1) * size] process = multiprocessing.Process(target=_crossCorrelations, args=(queue, n_atoms, array, variances, indices)) process.start() while queue.qsize() < n_cpu: time.sleep(0.05) covariance = queue.get() while queue.qsize() > 0: covariance += queue.get() else: covariance = calcCovariance(modes) if norm: diag = np.power(covariance.diagonal(), 0.5) D = np.outer(diag, diag) covariance = div0(covariance, D) return covariance
def solveEig(M, n_modes=None, zeros=False, turbo=True, is3d=False): linalg = importLA() dof = M.shape[0] expct_n_zeros = 6 if is3d else 1 if n_modes is None: eigvals = None n_modes = dof else: if n_modes >= dof: eigvals = None n_modes = dof else: eigvals = (0, n_modes+expct_n_zeros-1) def _eigh(M, eigvals=None, turbo=True): if linalg.__package__.startswith('scipy'): from scipy.sparse import issparse if eigvals: turbo = False if not issparse(M): values, vectors = linalg.eigh(M, turbo=turbo, eigvals=eigvals) else: try: from scipy.sparse import linalg as scipy_sparse_la except ImportError: raise ImportError('failed to import scipy.sparse.linalg, ' 'which is required for sparse matrix ' 'decomposition') if eigvals: j = eigvals[0] k = eigvals[-1] + 1 else: j = 0 k = dof if k >= dof: k -= 1 LOGGER.warning('Cannot calculate all eigenvalues for sparse matrices, thus ' 'the last eigenvalue is omitted. See scipy.sparse.linalg.eigsh ' 'for more information') values, vectors = scipy_sparse_la.eigsh(M, k=k, which='SA') values = values[j:k] vectors = vectors[:, j:k] else: if n_modes is not None: LOGGER.info('Scipy is not found, all modes were calculated.') else: n_modes = dof values, vectors = linalg.eigh(M) return values, vectors def _calc_n_zero_modes(M): from scipy.sparse import issparse if not issparse(M): w = linalg.eigvalsh(M) else: try: from scipy.sparse import linalg as scipy_sparse_la except ImportError: raise ImportError('failed to import scipy.sparse.linalg, ' 'which is required for sparse matrix ' 'decomposition') w, _ = scipy_sparse_la.eigsh(M, k=dof-1, which='SA') n_zeros = sum(w < ZERO) return n_zeros values, vectors = _eigh(M, eigvals, turbo) n_zeros = sum(values < ZERO) if n_zeros < n_modes + expct_n_zeros: if n_zeros < expct_n_zeros: LOGGER.warning('Fewer than %d (%d) zero eigenvalues were calculated.'%(expct_n_zeros, n_zeros)) elif n_zeros > expct_n_zeros: LOGGER.warning('More than %d (%d) zero eigenvalues were calculated.'%(expct_n_zeros, n_zeros)) else: LOGGER.warning('More than %d zero eigenvalues were detected.'%expct_n_zeros) if not zeros: if n_zeros > expct_n_zeros: if n_zeros == n_modes + expct_n_zeros and n_modes != dof: LOGGER.debug('Determing the number of zero eigenvalues...') # find the actual number of zero modes n_zeros = _calc_n_zero_modes(M) LOGGER.debug('%d zero eigenvalues detected.'%n_zeros) LOGGER.debug('Solving for additional eigenvalues...') start = min(n_modes+expct_n_zeros, dof-1); end = min(n_modes+n_zeros-1, dof-1) values_, vectors_ = _eigh(M, eigvals=(start, end)) values = np.concatenate((values, values_)) vectors = np.hstack((vectors, vectors_)) # final_n_modes may exceed len(eigvals) - no need to fix for the sake of the simplicity of the code final_n_modes = n_zeros + n_modes eigvals = values[n_zeros:final_n_modes] eigvecs = vectors[:, n_zeros:final_n_modes] vars = 1 / eigvals else: eigvals = values[:n_modes] eigvecs = vectors[:, :n_modes] vars = div0(1, values) vars[:n_zeros] = 0. vars = vars[:n_modes] return eigvals, eigvecs, vars
def calcCrossCorr(modes, n_cpu=1, norm=True): """Returns cross-correlations matrix. For a 3-d model, cross-correlations matrix is an NxN matrix, where N is the number of atoms. Each element of this matrix is the trace of the submatrix corresponding to a pair of atoms. Covariance matrix may be calculated using all modes or a subset of modes of an NMA instance. For large systems, calculation of cross-correlations matrix may be time consuming. Optionally, multiple processors may be employed to perform calculations by passing ``n_cpu=2`` or more.""" if not isinstance(n_cpu, int): raise TypeError('n_cpu must be an integer') elif n_cpu < 1: raise ValueError('n_cpu must be equal to or greater than 1') if not isinstance(modes, (Mode, NMA, ModeSet)): if isinstance(modes, list): try: is3d = modes[0].is3d() except: raise TypeError('modes must be a list of Mode or Vector instances, ' 'not {0}'.format(type(modes))) else: raise TypeError('modes must be a Mode, NMA, or ModeSet instance, ' 'not {0}'.format(type(modes))) else: is3d = modes.is3d() if is3d: model = modes if isinstance(modes, (Mode, ModeSet)): model = modes._model if isinstance(modes, (Mode)): indices = [modes.getIndex()] n_modes = 1 else: indices = modes.getIndices() n_modes = len(modes) else: n_modes = len(modes) indices = np.arange(n_modes) array = model._getArray() n_atoms = model._n_atoms variances = model._vars if n_cpu == 1: s = (n_modes, n_atoms, 3) arvar = (array[:, indices]*variances[indices]).T.reshape(s) array = array[:, indices].T.reshape(s) covariance = np.tensordot(array.transpose(2, 0, 1), arvar.transpose(0, 2, 1), axes=([0, 1], [1, 0])) else: import multiprocessing n_cpu = min(multiprocessing.cpu_count(), n_cpu) queue = multiprocessing.Queue() size = n_modes / n_cpu for i in range(n_cpu): if n_cpu - i == 1: indices = modes.indices[i*size:] else: indices = modes.indices[i*size:(i+1)*size] process = multiprocessing.Process( target=_crossCorrelations, args=(queue, n_atoms, array, variances, indices)) process.start() while queue.qsize() < n_cpu: time.sleep(0.05) covariance = queue.get() while queue.qsize() > 0: covariance += queue.get() else: covariance = calcCovariance(modes) if norm: diag = np.power(covariance.diagonal(), 0.5) D = np.outer(diag, diag) covariance = div0(covariance, D) return covariance