def test_timeshifted_split_wrong_args(): data = [ np.zeros(shape=(100, 3), dtype=np.float32), np.zeros(shape=(10, 3), dtype=np.float32) ] with assert_raises(ValueError): # negative chunksize list(timeshifted_split(data, lagtime=1, chunksize=-1)) with assert_raises(ValueError): # too long lagtime list(timeshifted_split(data, lagtime=15)) with assert_raises(ValueError): # too long lagtime list(timeshifted_split(data, lagtime=10)) list(timeshifted_split(data, lagtime=9)) # sanity this should not raise
def test_timeshifted_split_shuffle(lagtime, n_splits): x = np.arange(31, 5000) chunks = [] chunks_lagged = [] for chunk in timeshifted_split(x, lagtime=lagtime, n_splits=23, shuffle=True): if lagtime > 0: chunks.append(chunk[0]) chunks_lagged.append(chunk[1]) else: chunks.append(chunk) chunks_lagged.append(chunk) chunks = np.concatenate(chunks) chunks_lagged = np.concatenate(chunks_lagged) np.testing.assert_equal(len(chunks), len(x) - lagtime) # we lose lagtime many frames np.testing.assert_equal(len(chunks_lagged), len(x) - lagtime) # we lose lagtime many frames np.testing.assert_equal( chunks + lagtime, chunks_lagged) # since data is sequential this must hold all_data = np.concatenate( (chunks, chunks_lagged )) # check whether everything combined is the full dataset np.testing.assert_equal(len(np.setdiff1d(x, all_data)), 0)
def test_koopman_estimator_partial_fit(self): from deeptime.covariance import KoopmanWeightingEstimator est = KoopmanWeightingEstimator(lagtime=self.tau) est.lagtime = 1 np.testing.assert_equal(est.lagtime, 1) est.lagtime = self.tau np.testing.assert_equal(est.lagtime, self.tau) data_lagged = timeshifted_split(self.data, lagtime=self.tau, n_splits=10) for traj in data_lagged: est.partial_fit(traj) m = est.fetch_model() np.testing.assert_allclose(m.weights_input, self.weight_obj.weights_input) np.testing.assert_allclose(m.const_weight_input, self.weight_obj.const_weight_input) # weights and transform are identical np.testing.assert_allclose(m.weights(self.data[0]), m.transform(self.data[0])) # dispatches to model np.testing.assert_allclose(m.weights(self.data[0]), est.transform(self.data[0]))
def setUpClass(cls): N_steps = 10000 N_traj = 20 lag = 1 T = np.linalg.matrix_power( np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] p0 = np.zeros(3) p1 = np.zeros(3) trajs = [] for dtraj in dtrajs: traj = np.zeros((N_steps, T.shape[0])) traj[np.arange(len(dtraj)), dtraj] = 1.0 trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) p1 += traj[lag:, :].sum(axis=0) estimator = VAMP(scaling=None, var_cutoff=1.0) cov = VAMP.covariance_estimator(lagtime=lag).fit(trajs).fetch_model() vamp = estimator.fit(cov).fetch_model() msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) cls.trajs = trajs cls.dtrajs = dtrajs cls.trajs_timeshifted = list( timeshifted_split(cls.trajs, lagtime=lag, chunksize=5000)) cls.lag = lag cls.msm = msm cls.vamp = vamp cls.estimator = estimator cls.p0 = p0 / p0.sum() cls.p1 = p1 / p1.sum() cls.atol = np.finfo(np.float32).eps * 1000.0
def test_dim_and_var_cutoff(full_rank_time_series, dim, var_cutoff, partial_fit): traj, ds = full_rank_time_series # basically dim should be ignored here since var_cutoff takes precedence if it is None est = VAMP(lagtime=1, dim=dim, var_cutoff=var_cutoff) if partial_fit: for chunk in timeshifted_split(traj, lagtime=1, chunksize=15): est.partial_fit(chunk) est2 = VAMP(lagtime=1, dim=dim, var_cutoff=var_cutoff).fit(ds) np.testing.assert_array_almost_equal( est.fetch_model().operator, est2.fetch_model().operator, decimal=4) # can fail on M$ with higher acc. else: est.fit(ds) projection = est.transform(traj) np.testing.assert_equal(projection.shape[0], traj.shape[0]) if var_cutoff is not None: if var_cutoff == 1.: # data is internally mean-free np.testing.assert_equal(projection.shape[1], traj.shape[1] - 1) else: np.testing.assert_array_less(projection.shape[1], traj.shape[1]) else: if dim is None: # data is internally mean-free np.testing.assert_equal(projection.shape[1], traj.shape[1] - 1) else: np.testing.assert_equal(projection.shape[1], dim)
def test_timeshifted_split_nolag(): x = np.arange(5000) splits = [] for chunk in timeshifted_split(x, 0, n_splits=3): splits.append(chunk) np.testing.assert_equal(np.concatenate(splits), x) np.testing.assert_equal(len(splits), 3) for i in range(3): np.testing.assert_(len(splits[i]) > 0)
def test_timeshifted_split_chunksize(data): chunks = [] chunks_lagged = [] for X, Y in timeshifted_split(data, lagtime=1, chunksize=2): chunks.append(X) chunks_lagged.append(Y) np.testing.assert_(0 < len(X) <= 2) np.testing.assert_(0 < len(Y) <= 2) np.testing.assert_equal(np.concatenate(chunks), data[:-1]) np.testing.assert_equal(np.concatenate(chunks_lagged), data[1:])
def test_timeshifed_split_nsplits(data): chunks = [] chunks_lagged = [] n = 0 for X, Y in timeshifted_split(data, lagtime=1, n_splits=2): chunks.append(X) chunks_lagged.append(Y) n += 1 np.testing.assert_equal(n, 2) np.testing.assert_equal(np.concatenate(chunks), data[:-1]) np.testing.assert_equal(np.concatenate(chunks_lagged), data[1:])