def position_based_fluids(n_burn_in=5000, initial_positions=None, n_jobs=None): r""" Creates a position based fluids (PBF) simulator. It was introduced in :footcite:`macklin2013position`. Up to numerics the simulation is deterministic. The simulation box has dimensions :math:`[-40, 40]\times [-25, 25]` and the initial positions of the particles are around the top boundary of the box. For simplicity of use, the initial positions are fixed in this method and yield 972 particles. For custom positioning, please use the simulator directly. The interaction distance is set to :math:`d = 1.5` and `n_burn_in` steps are performed to equilibrate the system before returning the simulator. For more details see :class:`PBFSimulator <deeptime.data.PBFSimulator>`. .. plot:: import matplotlib.pyplot as plt import deeptime ftraj = deeptime.data.position_based_fluids(n_burn_in=150).run(300) f, axes = plt.subplots(3, 2, figsize=(15, 10)) for i, ax in enumerate(axes.flat): ax.scatter(*(ftraj[i*50].reshape(-1, 2).T)) ax.set_title("t = {}".format(i*50)) ax.grid() f.suptitle(r'PBF dataset observations.') plt.show() Parameters ---------- n_burn_in : int, default=5000 Number of steps without any drift force to equilibrate the system. initial_positions : ndarray, optional, default=None Explicit initial positions, optional. n_jobs : int or None, default=None Number of threads to use for simulation. Returns ------- simulator : deeptime.data.PBFSimulator The PBF simulator. References ---------- .. footbibliography:: """ from deeptime.util.parallel import handle_n_jobs n_jobs = handle_n_jobs(n_jobs) interaction_distance = 1.5 if initial_positions is None: init_pos_x = np.arange(-24, 24, interaction_distance * .9).astype(np.float32) init_pos_y = np.arange(-12, 24, interaction_distance * .9).astype(np.float32) initial_positions = np.dstack(np.meshgrid(init_pos_x, init_pos_y)).reshape(-1, 2) domain = np.array([80, 50]) pbf = PBFSimulator(domain_size=domain, initial_positions=initial_positions, interaction_distance=interaction_distance, n_jobs=n_jobs) # equilibrate pbf.run(n_burn_in, 0) return pbf
def _transform_to_density_impl(domain_size, trajectory, n_grid_x=20, n_grid_y=10, n_jobs=None): trajectory = trajectory.reshape((len(trajectory), -1, 2)) gridx = np.linspace(-domain_size[0] / 2, domain_size[0] / 2, num=n_grid_x).astype(np.float32) gridy = np.linspace(-domain_size[1] / 2, domain_size[1] / 2, num=n_grid_y).astype(np.float32) grid = np.meshgrid(gridx, gridy) kde_input = np.dstack(grid).reshape(-1, 2) traj_kde = np.empty((len(trajectory), len(kde_input))) import multiprocessing as mp with mp.Pool(processes=handle_n_jobs(n_jobs)) as pool: args = [(t, trajectory[t], kde_input) for t in range(len(trajectory))] for result in pool.imap_unordered(_transform_to_density_impl_worker, args): traj_kde[result[0]] = result[1] return traj_kde
def __init__(self, domain_size: np.ndarray, initial_positions: np.ndarray, interaction_distance: float, n_jobs=None, n_solver_iterations: int = 5, gravity: float = 10., epsilon: float = 10., timestep: float = 0.016, rest_density: float = 1., tensile_instability_distance: float = .2, tensile_instability_k: float = 0.1): if np.atleast_1d(domain_size).ndim != 1 or domain_size.shape[0] != 2 or np.any(domain_size <= 0): raise ValueError("Invalid domain size: must be positive and 1-dimensional of length two.") if initial_positions.ndim != 2 or initial_positions.shape[1] != 2: raise ValueError("initial positions must be a 2-dimensional numpy array of shape (N, 2), where N is" "the number of particles.") if interaction_distance <= 0: raise ValueError("Interaction distance must be positive.") domain_size = domain_size.astype(np.float32, subok=False, copy=False) initial_positions = initial_positions.astype(np.float32, subok=False, copy=False) self._engine = bd.PBF(initial_positions, domain_size, interaction_distance, handle_n_jobs(n_jobs)) self._engine.n_solver_iterations = n_solver_iterations self._engine.gravity = gravity self._engine.epsilon = epsilon self._engine.timestep = timestep self._engine.rest_density = rest_density self._engine.tensile_instability_distance = tensile_instability_distance self._engine.tensile_instability_k = tensile_instability_k
def transform_to_density(self, trajectory, n_grid_x=20, n_grid_y=10, n_jobs=None): r"""Transforms a two-dimensional PBF particle trajectory to a trajectory of densities by performing KDEs. Since we have the prior knowledge that no particles get lost on the way, the densities are normalized frame-wise. Parameters ---------- trajectory : (T, n, 2) ndarray The input trajectory for n particles. n_grid_x : int, default=20 Number of evaluation points of simulation box in x direction. n_grid_y : int, default=10 Number of evaluation points of simulation box in y direction. n_jobs : int or None, default=None Number of jobs to use when transforming to densities. Returns ------- trajectory : (T, n_grid_x * n_grid_y) ndarray Output trajectory """ n_jobs = handle_n_jobs(n_jobs) return _transform_to_density_impl(self.domain_size, trajectory, n_grid_x, n_grid_y, n_jobs)
def bickley_jet(n_particles: int, n_jobs: Optional[int] = None, seed: Optional[int] = None) -> BickleyJetDataset: r"""Simulates the Bickley jet for a number of particles. The implementation is based on :footcite:`hadjighasem2016spectral` with parameters .. math:: \begin{aligned} U_0 &= 5.4138 \times \frac{10^6\mathrm{m}}{\mathrm{day}},\\ L_0 &= 1.77 \times 10^6\,\mathrm{m},\\ r_0 &= 6.371 \times 10^6\,\mathrm{m},\\ c &= (0.1446, 0.205, 0.461)^\top U_0,\\ \mathrm{eps} &= (0.075, 0.15, 0.3)^\top,\\ k &= (2,4,6)^\top \frac{1}{r_0}, \end{aligned} in a domain :math:`\Omega = [0, 20] \times [-3, 3]`. The resulting dataset describes the temporal evolution of :code:`n_particles` over 401 timesteps in :math:`\Omega`. The domain is periodic in x-direction. The dataset offers methods to wrap the domain into three-dimensional space onto the surface of a cylinder .. math:: \begin{pmatrix} x \\ y \end{pmatrix} \mapsto \begin{pmatrix} r\cdot \cos\left( 2\pi \frac{x}{20} \right) \\ r\cdot \sin\left( 2\pi \frac{x}{20} \right) \\ \frac{y}{3} \end{pmatrix}, with the option to further discretize the three-dimensional dataspace via binning. This way the discontinuity introduced by 2D periodicity is treated. .. plot:: import matplotlib.pyplot as plt import deeptime n_particles = 1000 dataset = deeptime.data.bickley_jet(n_particles, n_jobs=8) fig, axes = plt.subplots(2, 3, sharex=True, sharey=True, figsize=(16, 10)) for t, ax in zip([0, 1, 2, 200, 300, 400], axes.flatten()): ax.scatter(*dataset[t].T, c=dataset[0, :, 0], s=50) ax.set_title(f"Particles at t={t}") Parameters ---------- n_particles : int Number of particles which are propagated. n_jobs : int or None, default=None Number of threads to use for simulation. seed : int or None, optional, default=None Random seed used for initialization of particle positions at :math:`t=0`. Returns ------- dataset : BickleyJetDataset Dataset over all the generated frames. See Also -------- BickleyJet Underlying trajectory generator. Examples -------- >>> import deeptime >>> dataset = deeptime.data.bickley_jet(n_particles=5, n_jobs=1) >>> # shape is 401 frames for 5 particles in two dimensions >>> print(dataset.data.shape) (401, 5, 2) >>> # returns a timelagged dataset for first and last frame >>> endpoints = dataset.endpoints_dataset() >>> endpoints.data.shape (5, 2) >>> # maps the endpoints dataset onto a cylinder of radius 5 >>> endpoints_3d = endpoints.to_3d(radius=5.) >>> endpoints_3d.data.shape (5, 3) >>> # bins the data uniformly with 10 bins per axis >>> endpoints_3d_clustered = endpoints_3d.cluster(n_bins=10) >>> # 5 particles and 10*10*10 bins >>> endpoints_3d_clustered.data.shape (5, 1000) References ---------- .. footbibliography:: """ from deeptime.util.parallel import handle_n_jobs n_jobs = handle_n_jobs(n_jobs) simulator = BickleyJet(h=1e-2, n_steps=10) traj = simulator.generate(n_particles=n_particles, n_jobs=n_jobs, seed=seed) traj_reshaped = traj.transpose(1, 0, 2) return BickleyJetDataset(traj_reshaped)
def effective_count_matrix(dtrajs, lag, average='row', mact=1.0, n_jobs=None, callback=None): r""" Computes the statistically effective transition count matrix Given a list of discrete trajectories, compute the effective number of statistically uncorrelated transition counts at the given lag time. First computes the full sliding-window counts :math:`c_{ij}(tau)`. Then uses :func:`statistical_inefficiencies` to compute statistical inefficiencies :math:`I_{ij}(tau)`. The number of effective counts in a row is then computed as .. math: c_i^{\mathrm{eff}}(tau) = \sum_j I_{ij}(tau) c_{ij}(tau) and the effective transition counts are obtained by scaling the rows accordingly: .. math: c_{ij}^{\mathrm{eff}}(tau) = \frac{c_i^{\mathrm{eff}}(tau)}{c_i(tau)} c_{ij}(tau) This procedure is not yet published, but a manuscript is in preparation [1]_. Parameters ---------- dtrajs : list of int-iterables discrete trajectories lag : int lag time average : str, default='row' Use either of 'row', 'all', 'none', with the following consequences: 'none': the statistical inefficiency is applied separately to each transition count (not recommended) 'row': the statistical inefficiency is averaged (weighted) by row (recommended). 'all': the statistical inefficiency is averaged (weighted) over all transition counts (not recommended). mact : float, default=1.0 multiplier for the autocorrelation time. We tend to underestimate the autocorrelation time (and thus overestimate effective counts) because the autocorrelation function is truncated when it passes through 0 in order to avoid numerical instabilities. This is a purely heuristic factor trying to compensate this effect. This parameter might be removed in the future when a more robust estimation method of the autocorrelation time is used. n_jobs : int, default=None If None, uses all available logical cores, otherwise the function will be evaluated with as many processes as specified (must then be positive). callback : callable, default=None will be called for every statistical inefficiency computed (number of nonzero elements in count matrix). If n_jobs is greater one, the callback will be invoked per finished batch. See also -------- statistical_inefficiencies is used for computing the statistical inefficiencies of sliding window transition counts References ---------- .. [1] Noe, F. and H. Wu: in preparation (2015) """ from deeptime.util.parallel import handle_n_jobs n_jobs = handle_n_jobs(n_jobs) dtrajs = ensure_dtraj_list(dtrajs) return sparse.effective_counts.effective_count_matrix(dtrajs, lag, average=average, mact=mact, n_jobs=n_jobs, callback=callback)
def vamp_score_cv(fit_fetch: Union[Estimator, Callable], trajs, lagtime=None, n=10, splitting_mode="sliding", r=2, dim: Optional[int] = None, blocksplit: bool = True, random_state=None, n_jobs=1): r""" Scores the MSM using the variational approach for Markov processes and cross-validation. Implementation and ideas following :footcite:`noe2013variational` :footcite:`wu2020variational` and cross-validation :footcite:`mcgibbon2015variational`. Divides the data into training and test data, fits a MSM using the training data using the parameters of this estimator, and scores is using the test data. Currently only one way of splitting is implemented, where for each n, the data is randomly divided into two approximately equally large sets of discrete trajectory fragments with lengths of at least the lagtime. Currently only implemented using dense matrices - will be slow for large state spaces. Parameters ---------- fit_fetch : callable or estimator Can be provided as callable for a custom fit and fetch method. Should be a function pointer or lambda which takes a list of discrete trajectories as input and yields a :class:`CovarianceKoomanModel <deeptime.decomposition.CovarianceKoopmanModel>`. Or an estimator which yields this kind of model. trajs : list of array_like Input data. lagtime : int, optional, default=None lag time, must be provided if blocksplitting is used, otherwise can be left None splitting_mode : str, optional, default="sliding" Can be one of "sliding" and "sample". In former case the blocks may overlap, otherwise not. n : number of samples Number of repetitions of the cross-validation. Use large n to get solid means of the score. r : float or str, default=2 Available scores are based on the variational approach for Markov processes :footcite:`noe2013variational` :footcite:`wu2020variational`, see :meth:`deeptime.decomposition.vamp_score` for available options. blocksplit : bool, optional, default=True Whether to perform blocksplitting (see :meth:`blocksplit_dtrajs` ) before evaluating folds. Defaults to `True`. In case no blocksplitting is performed, individual dtrajs are used for training and validation. This means that at least two dtrajs must be provided (`len(dtrajs) >= 2`), otherwise this method raises an exception. dim : int or None, optional, default=None The maximum number of eigenvalues or singular values used in the score. If set to None, all available eigenvalues will be used. random_state : None or int or np.random.RandomState Random seed to use. n_jobs : int, optional, default=1 Number of jobs for folds. In case n_jobs is 1, no parallelization. References ---------- .. footbibliography:: """ from deeptime.util.parallel import handle_n_jobs from deeptime.util.types import ensure_timeseries_data if blocksplit and lagtime is None: raise ValueError("In case blocksplit is used, please provide a lagtime.") n_jobs = handle_n_jobs(n_jobs) if isinstance(fit_fetch, Estimator): fit_fetch = _FitFetch(fit_fetch) ttrajs = ensure_timeseries_data(trajs) # ensure format if splitting_mode not in ('sliding', 'sample'): raise ValueError('vamp_score_cv currently only supports count modes "sliding" and "sample"') scores = np.empty((n,), float) sliding = splitting_mode == 'sliding' args = [(i, fit_fetch, ttrajs, r, dim, lagtime, blocksplit, sliding, random_state, n_jobs) for i in range(n)] if n_jobs > 1: from multiprocessing import get_context with joining(get_context("spawn").Pool(processes=n_jobs)) as pool: for result in pool.imap_unordered(_worker, args): fold, score = result scores[fold] = score else: for fold in range(n): _, score = _worker(args[fold]) scores[fold] = score return scores