def test_1(): #Compare msmbuilder.pca with sklearn.decomposition pcar = PCAr() pcar.fit(np.concatenate(trajs)) pca = PCA() pca.fit(trajs) y_ref1 = pcar.transform(trajs[0]) y1 = pca.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1) np.testing.assert_array_almost_equal(pca.components_, pcar.components_) np.testing.assert_array_almost_equal(pca.explained_variance_, pcar.explained_variance_) np.testing.assert_array_almost_equal(pca.mean_, pcar.mean_) np.testing.assert_array_almost_equal(pca.n_components_, pcar.n_components_) np.testing.assert_array_almost_equal(pca.noise_variance_, pcar.noise_variance_)
def test_generator(): # Check to see if it works with a generator traj_dict = dict((i, t) for i, t in enumerate(trajs)) pcar = PCAr() pcar.fit(np.concatenate(trajs)) pca = PCA() # on python 3, dict.values() returns a generator pca.fit(traj_dict.values()) y_ref1 = pcar.transform(trajs[0]) y1 = pca.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1) np.testing.assert_array_almost_equal(pca.components_, pcar.components_) np.testing.assert_array_almost_equal(pca.explained_variance_, pcar.explained_variance_) np.testing.assert_array_almost_equal(pca.mean_, pcar.mean_) np.testing.assert_array_almost_equal(pca.n_components_, pcar.n_components_) np.testing.assert_array_almost_equal(pca.noise_variance_, pcar.noise_variance_)
class SolventShellsAnalysis(): """Do analysis on solvent shell results. The protocol is as follows: 1. Normalize by shell volume 2. Flatten to 2d (for compatibility with tICA, et. al.) 3. Remove zero-variance features :param seqs: Sequences of counts. List of shape (n_frames, n_solute, n_shells) arrays :param shell_w: Shell width (nm) """ def __init__(self, seqs, shell_w): self._seqs3d_unnormed = seqs self._seqs3d = None self._seqs2d_unpruned = None self._seqs2d = None self._deleted = None self.shell_w = shell_w self.tica = None self.pca = None self.ticax = None self.pcax = None @property def seqs3d_unnormed(self): """Unnormalized (input) sequences""" return self._seqs3d_unnormed @property def seqs3d(self): """Normalized 3d sequences.""" if self._seqs3d is None: self._seqs3d = [normalize(fp3d, self.shell_w) for fp3d in self.seqs3d_unnormed] return self._seqs3d @property def seqs2d_unpruned(self): """Reshaped (2D) sequences.""" if self._seqs2d_unpruned is None: self._seqs2d_unpruned = [reshape(fp3d) for fp3d in self.seqs3d] return self._seqs2d_unpruned @property def seqs2d(self): """Reshaped with zero-variance features removed. Input this to tICA, MSM, etc. """ if self._seqs2d is None: self._seqs2d, self._deleted = prune_all(self.seqs2d_unpruned) return self._seqs2d @property def deleted(self): """Which features (2d-indexing) we deleted.""" if self._deleted is None: self._seqs2d, self._deleted = prune_all(self.seqs2d_unpruned) return self._deleted def fit_tica(self, lag_time): self.tica = tICA(n_components=10, lag_time=lag_time, weighted_transform=True) self.tica.fit(self.seqs2d) self.ticax = self.tica.transform(self.seqs2d) def fit_pca(self): self.pca = PCA(n_components=10) self.pca.fit(self.seqs2d) self.pcax = self.pca.transform(self.seqs2d)
class SolventShellsAnalysis(): """Do analysis on solvent shell results. The protocol is as follows: 1. Normalize by shell volume 2. Flatten to 2d (for compatibility with tICA, et. al.) 3. Remove zero-variance features :param seqs: Sequences of counts. List of shape (n_frames, n_solute, n_shells) arrays :param shell_w: Shell width (nm) """ def __init__(self, seqs, shell_w): self._seqs3d_unnormed = seqs self._seqs3d = None self._seqs2d_unpruned = None self._seqs2d = None self._deleted = None self.shell_w = shell_w self.tica = None self.pca = None self.ticax = None self.pcax = None @property def seqs3d_unnormed(self): """Unnormalized (input) sequences""" return self._seqs3d_unnormed @property def seqs3d(self): """Normalized 3d sequences.""" if self._seqs3d is None: self._seqs3d = [ normalize(fp3d, self.shell_w) for fp3d in self.seqs3d_unnormed ] return self._seqs3d @property def seqs2d_unpruned(self): """Reshaped (2D) sequences.""" if self._seqs2d_unpruned is None: self._seqs2d_unpruned = [reshape(fp3d) for fp3d in self.seqs3d] return self._seqs2d_unpruned @property def seqs2d(self): """Reshaped with zero-variance features removed. Input this to tICA, MSM, etc. """ if self._seqs2d is None: self._seqs2d, self._deleted = prune_all(self.seqs2d_unpruned) return self._seqs2d @property def deleted(self): """Which features (2d-indexing) we deleted.""" if self._deleted is None: self._seqs2d, self._deleted = prune_all(self.seqs2d_unpruned) return self._deleted def fit_tica(self, lag_time): self.tica = tICA(n_components=10, lag_time=lag_time, weighted_transform=True) self.tica.fit(self.seqs2d) self.ticax = self.tica.transform(self.seqs2d) def fit_pca(self): self.pca = PCA(n_components=10) self.pca.fit(self.seqs2d) self.pcax = self.pca.transform(self.seqs2d)