def setUpClass(cls): N_steps = 10000 N_traj = 20 lag = 1 T = np.linalg.matrix_power( np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] p0 = np.zeros(3) p1 = np.zeros(3) trajs = [] for dtraj in dtrajs: traj = np.zeros((N_steps, T.shape[0])) traj[np.arange(len(dtraj)), dtraj] = 1.0 trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) p1 += traj[lag:, :].sum(axis=0) estimator, vamp = estimate_vamp(trajs, lag=lag, scaling=None, dim=1.0, return_estimator=True) msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) cls.trajs = trajs cls.dtrajs = dtrajs cls.trajs_timeshifted = list( timeshifted_split(cls.trajs, lagtime=lag, chunksize=5000)) cls.lag = lag cls.msm = msm cls.vamp = vamp cls.estimator = estimator cls.p0 = p0 / p0.sum() cls.p1 = p1 / p1.sum() cls.atol = np.finfo(np.float32).eps * 1000.0
def test_koopman_estimator_partial_fit(self): from sktime.covariance.online_covariance import KoopmanEstimator est = KoopmanEstimator(lagtime=self.tau) data_lagged = timeshifted_split(self.data, lagtime=self.tau, n_splits=10) for traj in data_lagged: est.partial_fit(traj) m = est.fetch_model() np.testing.assert_allclose(m.u, self.weight_obj.u) np.testing.assert_allclose(m.u_const, self.weight_obj.u_const)
def test_fit_reset(self): chunk = 40 lag = 100 np.random.seed(0) data = np.random.randn(23000, 3) est = TICA(lagtime=lag, dim=1) for X, Y in timeshifted_split(data, lagtime=lag, chunksize=chunk): est.partial_fit((X, Y)) model1 = est.fetch_model().copy() # ------- run again with new chunksize ------- est.fit(data) model2 = est.fetch_model().copy() assert model1 != model2 np.testing.assert_array_almost_equal(model1.mean_0, model2.mean_0) np.testing.assert_array_almost_equal(model1.cov_00, model2.cov_00) np.testing.assert_array_almost_equal(model1.cov_0t, model2.cov_0t)
def fit(self, data, lagtime=None, weights=None, n_splits=None, column_selection=None): """ column_selection: ndarray(k, dtype=int) or None Indices of those columns that are to be computed. If None, all columns are computed. :param data: list of sequences (n elements) :param weights: list of weight arrays (n elements) or array (shape :param n_splits: :param column_selection: :return: """ # TODO: constistent dtype data = ensure_timeseries_data(data) self._rc.clear() if n_splits is None: dlen = min(len(d) for d in data) n_splits = int(dlen // 100 if dlen >= 1e4 else 1) if lagtime is None: lagtime = self.lagtime else: self.lagtime = lagtime assert lagtime is not None lazy_weights = False wsplit = itertools.repeat(None) if weights is not None: if hasattr(weights, 'weights'): lazy_weights = True elif len(np.atleast_1d(weights)) != len(data[0]): raise ValueError( "Weights have incompatible shape " f"(#weights={len(weights) if weights is not None else None} != {len(data[0])}=#frames." ) elif isinstance(weights, np.ndarray): wsplit = np.array_split(weights, n_splits) if self.is_lagged: for (x, y), w in zip( timeshifted_split(data, lagtime=lagtime, n_splits=n_splits), wsplit): if lazy_weights: w = weights.weights(x) # weights can weights be shorter than actual data if isinstance(w, np.ndarray): w = w[:len(x)] self.partial_fit((x, y), weights=w, column_selection=column_selection) else: for x in data: self.partial_fit(x, weights=weights, column_selection=column_selection) return self