def test_singular_1(): tica = tICA(n_components=1) # make some data that has one column repeated twice X = np.random.randn(100, 2) X = np.hstack((X, X[:,0, np.newaxis])) tica.fit([X]) assert tica.components_.dtype == np.float64 assert tica.eigenvalues_.dtype == np.float64
def test_singular_2(): tica = tICA(n_components=1) # make some data that has one column of all zeros X = np.random.randn(100, 2) X = np.hstack((X, np.zeros((100, 1)))) tica.fit([X]) assert tica.components_.dtype == np.float64 assert tica.eigenvalues_.dtype == np.float64
def test_score_1(): X = np.random.randn(100, 5) for n in range(1, 5): tica = tICA(n_components=n, gamma=0) tica.fit([X]) assert_approx_equal( tica.score([X]), tica.eigenvalues_.sum()) X2 = np.random.randn(100, 5) assert tica.score([X2]) < tica.score([X])
def test_1(): fn = get_fn("frame0.h5") with enter_temp_directory(): assert os.system("mixtape DRIDFeaturizer --trjs {} --out a.pkl".format(fn)) == 0 assert os.system("mixtape DihedralFeaturizer --types phi psi --trjs {} --out b.pkl".format(fn)) == 0 assert os.system("mixtape tICA --inp a.pkl --out ticamodel.pkl --transformed tics.pkl") == 0 assert ( os.system( "mixtape KMeans --random_state 0 --n_init 1 --inp b.pkl --out kmeans.pkl --transformed labels.pkl" ) == 0 ) kmeans0 = verboseload("labels.pkl") kmeans1 = KMeans(random_state=0, n_init=1).fit_predict(verboseload("b.pkl")) tica0 = verboseload("tics.pkl") tica1 = tICA().fit_transform(verboseload("a.pkl")) eq(kmeans0[0], kmeans1[0]) eq(tica0[0], tica1[0])
def test_1(): # verify that mixtape.tica.tICA and another implementation # of the method in msmbuilder give identicial results. np.random.seed(42) X = np.random.randn(10, 3) ticar = tICAr(lag=1) ticar.train(prep_trajectory=np.copy(X)) y1 = ticar.project(prep_trajectory=np.copy(X), which=[0, 1]) tica = tICA(n_components=2, lag_time=1) y2 = tica.fit_transform([np.copy(X)])[0] # check all of the internals state of the two implementations np.testing.assert_array_almost_equal(ticar.corrs, tica._outer_0_to_T_lagged) np.testing.assert_array_almost_equal(ticar.sum_t, tica._sum_0_to_TminusTau) np.testing.assert_array_almost_equal(ticar.sum_t_dt, tica._sum_tau_to_T) np.testing.assert_array_almost_equal(ticar.sum_all, tica._sum_0_to_T) a, b = ticar.get_current_estimate() np.testing.assert_array_almost_equal(a, tica.offset_correlation_) np.testing.assert_array_almost_equal(b, tica.covariance_)
def test_shape(): model = tICA(n_components=3).fit([np.random.randn(100,10)]) eq(model.eigenvalues_.shape, (3,)) eq(model.eigenvectors_.shape, (10, 3)) eq(model.components_.shape, (3, 10))
from sklearn.pipeline import Pipeline #from mixtape.featurizer import DihedralFeaturizer from mixtape.featurizer import ContactFeaturizer from mixtape.tica import tICA from mixtape.utils import verbosedump, verboseload from glob import glob import mdtraj as md from msmbuilder import Project import numpy as np from mixtape.ghmm import GaussianFusionHMM prj = Project.load_from("ProjectInfo.yaml") #feat = DihedralFeaturizer(['phi', 'psi'], sincos=True) feat = ContactFeaturizer(contacts='all', scheme='closest-heavy') tica = tICA(n_components=5, gamma=0,lag_time=20) paths = glob('*.lh5') output = {} for path in np.arange(prj.n_trajs): featurized_path = feat.partial_transform(prj.load_traj(path)) try: tica.partial_fit(featurized_path) except: print "skipping",path for path in np.arange(prj.n_trajs): featurized_path = feat.partial_transform(prj.load_traj(path)) output[path] = tica.partial_transform(featurized_path) # save output
from sklearn.pipeline import Pipeline from mixtape.tica import tICA from mixtape.featurizer import ContactFeaturizer import pickle pipeline = Pipeline([ ('featurizer', ContactFeaturizer('closest-heavy')), ('tica', tICA(n_components=5)), ]) with open('contact-tica-n5.pkl', 'w') as f: pickle.dump(pipeline, f)
# something, but reusing the `tolerance` parameter seems fine too. sparsecutoff = tol mask = (np.abs(x) > sparsecutoff) grid = np.ix_(mask, mask) Ak, Bk = A[grid], B[grid] # form the submatrices gevals, gevecs = scipy.linalg.eigh(Ak, Bk, eigvals=(Ak.shape[0]-2, Ak.shape[0]-1)) u = gevals[-1] v = np.zeros(length) v[mask] = gevecs[:, -1] return u, v if __name__ == '__main__': X = np.random.randn(1000, 10) X[:,0] += np.sin(np.arange(1000) / 100.0) X[:,1] += np.cos(np.arange(1000) / 100.0) tica = tICA(n_components=2).fit(X) print('tica eigenvector\n', tica.components_[0]) print('tica eigenvalue\n', tica.eigenvalues_[0]) print('\ntica eigenvector\n', tica.components_[1]) print('tica eigenvalue\n', tica.eigenvalues_[1]) print('\n\n') sptica = SparseTICA(n_components=2, rho=0.01, tolerance=1e-6, verbose=False) sptica.fit(X) print('sptica eigenvector\n', sptica.components_[0]) print('sptica eigenvalue\n', sptica.eigenvalues_[0]) print('\nsptica eigenvector\n', sptica.components_[1]) print('sptica eigenvalue\n', sptica.eigenvalues_[1])