Example #1
0
def test_singular_1():
    tica = tICA(n_components=1)

    # make some data that has one column repeated twice
    X = np.random.randn(100, 2)
    X = np.hstack((X, X[:,0, np.newaxis]))

    tica.fit([X])
    assert tica.components_.dtype == np.float64
    assert tica.eigenvalues_.dtype == np.float64
Example #2
0
def test_singular_2():
    tica = tICA(n_components=1)

    # make some data that has one column of all zeros
    X = np.random.randn(100, 2)
    X = np.hstack((X, np.zeros((100, 1))))

    tica.fit([X])
    assert tica.components_.dtype == np.float64
    assert tica.eigenvalues_.dtype == np.float64
Example #3
0
def test_score_1():
    X = np.random.randn(100, 5)
    for n in range(1, 5):
        tica = tICA(n_components=n, gamma=0)
        tica.fit([X])
        assert_approx_equal(
            tica.score([X]),
            tica.eigenvalues_.sum())
        X2 = np.random.randn(100, 5)
        assert tica.score([X2]) < tica.score([X])
Example #4
0
def test_1():
    fn = get_fn("frame0.h5")
    with enter_temp_directory():
        assert os.system("mixtape DRIDFeaturizer --trjs {} --out a.pkl".format(fn)) == 0
        assert os.system("mixtape DihedralFeaturizer --types phi psi --trjs {} --out b.pkl".format(fn)) == 0

        assert os.system("mixtape tICA --inp a.pkl --out ticamodel.pkl --transformed tics.pkl") == 0
        assert (
            os.system(
                "mixtape KMeans --random_state 0 --n_init 1 --inp b.pkl --out kmeans.pkl --transformed labels.pkl"
            )
            == 0
        )

        kmeans0 = verboseload("labels.pkl")
        kmeans1 = KMeans(random_state=0, n_init=1).fit_predict(verboseload("b.pkl"))
        tica0 = verboseload("tics.pkl")
        tica1 = tICA().fit_transform(verboseload("a.pkl"))

    eq(kmeans0[0], kmeans1[0])
    eq(tica0[0], tica1[0])
Example #5
0
def test_1():
    # verify that mixtape.tica.tICA and another implementation
    # of the method in msmbuilder give identicial results.
    np.random.seed(42)
    X = np.random.randn(10, 3)


    ticar = tICAr(lag=1)
    ticar.train(prep_trajectory=np.copy(X))
    y1 = ticar.project(prep_trajectory=np.copy(X), which=[0, 1])

    tica = tICA(n_components=2, lag_time=1)
    y2 = tica.fit_transform([np.copy(X)])[0]

    # check all of the internals state of the two implementations
    np.testing.assert_array_almost_equal(ticar.corrs, tica._outer_0_to_T_lagged)
    np.testing.assert_array_almost_equal(ticar.sum_t, tica._sum_0_to_TminusTau)
    np.testing.assert_array_almost_equal(ticar.sum_t_dt, tica._sum_tau_to_T)
    np.testing.assert_array_almost_equal(ticar.sum_all, tica._sum_0_to_T)

    a, b = ticar.get_current_estimate()
    np.testing.assert_array_almost_equal(a, tica.offset_correlation_)
    np.testing.assert_array_almost_equal(b, tica.covariance_)
Example #6
0
def test_shape():
    model = tICA(n_components=3).fit([np.random.randn(100,10)])
    eq(model.eigenvalues_.shape, (3,))
    eq(model.eigenvectors_.shape, (10, 3))
    eq(model.components_.shape, (3, 10))
Example #7
0
from sklearn.pipeline import Pipeline
#from mixtape.featurizer import DihedralFeaturizer
from mixtape.featurizer import ContactFeaturizer
from mixtape.tica import tICA
from mixtape.utils import verbosedump, verboseload
from glob import glob
import mdtraj as md
from msmbuilder import Project
import numpy as np
from mixtape.ghmm import GaussianFusionHMM

prj  =  Project.load_from("ProjectInfo.yaml")
#feat = DihedralFeaturizer(['phi', 'psi'], sincos=True)
feat = ContactFeaturizer(contacts='all', scheme='closest-heavy')
tica = tICA(n_components=5, gamma=0,lag_time=20)
paths = glob('*.lh5')

output = {}

for path in np.arange(prj.n_trajs):
        featurized_path =  feat.partial_transform(prj.load_traj(path))
        try:
                tica.partial_fit(featurized_path)
        except:
                print "skipping",path

for path in np.arange(prj.n_trajs):
        featurized_path =  feat.partial_transform(prj.load_traj(path))
        output[path] = tica.partial_transform(featurized_path)

# save output
Example #8
0
from sklearn.pipeline import Pipeline
from mixtape.tica import tICA
from mixtape.featurizer import ContactFeaturizer
import pickle


pipeline = Pipeline([
    ('featurizer', ContactFeaturizer('closest-heavy')),
    ('tica', tICA(n_components=5)),
])

with open('contact-tica-n5.pkl', 'w') as f:
    pickle.dump(pipeline, f)
Example #9
0
    # something, but reusing the `tolerance` parameter seems fine too.
    sparsecutoff = tol

    mask = (np.abs(x) > sparsecutoff)
    grid = np.ix_(mask, mask)
    Ak, Bk = A[grid], B[grid]  # form the submatrices
    gevals, gevecs = scipy.linalg.eigh(Ak, Bk, eigvals=(Ak.shape[0]-2, Ak.shape[0]-1))
    u = gevals[-1]
    v = np.zeros(length)
    v[mask] = gevecs[:, -1]
    return u, v

if __name__ == '__main__':
    X = np.random.randn(1000, 10)
    X[:,0] += np.sin(np.arange(1000) / 100.0)
    X[:,1] += np.cos(np.arange(1000) / 100.0)

    tica = tICA(n_components=2).fit(X)
    print('tica eigenvector\n', tica.components_[0])
    print('tica eigenvalue\n', tica.eigenvalues_[0])
    print('\ntica eigenvector\n', tica.components_[1])
    print('tica eigenvalue\n', tica.eigenvalues_[1])
    print('\n\n')

    sptica = SparseTICA(n_components=2, rho=0.01, tolerance=1e-6, verbose=False)
    sptica.fit(X)
    print('sptica eigenvector\n', sptica.components_[0])
    print('sptica eigenvalue\n', sptica.eigenvalues_[0])
    print('\nsptica eigenvector\n', sptica.components_[1])
    print('sptica eigenvalue\n', sptica.eigenvalues_[1])