예제 #1
0
def position_based_fluids(n_burn_in=5000, initial_positions=None, n_jobs=None):
    r""" Creates a position based fluids (PBF) simulator. It was introduced in :footcite:`macklin2013position`.
    Up to numerics the simulation is deterministic.

    The simulation box has dimensions :math:`[-40, 40]\times [-25, 25]` and the initial positions of the particles are
    around the top boundary of the box. For simplicity of use, the initial positions are fixed in this method and yield
    972 particles. For custom positioning, please use the simulator directly.

    The interaction distance is set to :math:`d = 1.5` and `n_burn_in` steps are
    performed to equilibrate the system before returning the simulator.

    For more details see :class:`PBFSimulator <deeptime.data.PBFSimulator>`.

    .. plot::

        import matplotlib.pyplot as plt
        import deeptime

        ftraj = deeptime.data.position_based_fluids(n_burn_in=150).run(300)
        f, axes = plt.subplots(3, 2, figsize=(15, 10))
        for i, ax in enumerate(axes.flat):
            ax.scatter(*(ftraj[i*50].reshape(-1, 2).T))
            ax.set_title("t = {}".format(i*50))
            ax.grid()
        f.suptitle(r'PBF dataset observations.')
        plt.show()

    Parameters
    ----------
    n_burn_in : int, default=5000
        Number of steps without any drift force to equilibrate the system.
    initial_positions : ndarray, optional, default=None
        Explicit initial positions, optional.
    n_jobs : int or None, default=None
        Number of threads to use for simulation.

    Returns
    -------
    simulator : deeptime.data.PBFSimulator
        The PBF simulator.

    References
    ----------
    .. footbibliography::
    """
    from deeptime.util.parallel import handle_n_jobs
    n_jobs = handle_n_jobs(n_jobs)
    interaction_distance = 1.5
    if initial_positions is None:
        init_pos_x = np.arange(-24, 24, interaction_distance * .9).astype(np.float32)
        init_pos_y = np.arange(-12, 24, interaction_distance * .9).astype(np.float32)
        initial_positions = np.dstack(np.meshgrid(init_pos_x, init_pos_y)).reshape(-1, 2)
    domain = np.array([80, 50])
    pbf = PBFSimulator(domain_size=domain, initial_positions=initial_positions,
                       interaction_distance=interaction_distance,
                       n_jobs=n_jobs)
    # equilibrate
    pbf.run(n_burn_in, 0)

    return pbf
예제 #2
0
def _transform_to_density_impl(domain_size, trajectory, n_grid_x=20, n_grid_y=10, n_jobs=None):
    trajectory = trajectory.reshape((len(trajectory), -1, 2))

    gridx = np.linspace(-domain_size[0] / 2, domain_size[0] / 2, num=n_grid_x).astype(np.float32)
    gridy = np.linspace(-domain_size[1] / 2, domain_size[1] / 2, num=n_grid_y).astype(np.float32)
    grid = np.meshgrid(gridx, gridy)
    kde_input = np.dstack(grid).reshape(-1, 2)
    traj_kde = np.empty((len(trajectory), len(kde_input)))

    import multiprocessing as mp
    with mp.Pool(processes=handle_n_jobs(n_jobs)) as pool:
        args = [(t, trajectory[t], kde_input) for t in range(len(trajectory))]
        for result in pool.imap_unordered(_transform_to_density_impl_worker, args):
            traj_kde[result[0]] = result[1]

    return traj_kde
예제 #3
0
    def __init__(self, domain_size: np.ndarray, initial_positions: np.ndarray,
                 interaction_distance: float, n_jobs=None, n_solver_iterations: int = 5,
                 gravity: float = 10., epsilon: float = 10., timestep: float = 0.016, rest_density: float = 1.,
                 tensile_instability_distance: float = .2, tensile_instability_k: float = 0.1):
        if np.atleast_1d(domain_size).ndim != 1 or domain_size.shape[0] != 2 or np.any(domain_size <= 0):
            raise ValueError("Invalid domain size: must be positive and 1-dimensional of length two.")
        if initial_positions.ndim != 2 or initial_positions.shape[1] != 2:
            raise ValueError("initial positions must be a 2-dimensional numpy array of shape (N, 2), where N is"
                             "the number of particles.")
        if interaction_distance <= 0:
            raise ValueError("Interaction distance must be positive.")
        domain_size = domain_size.astype(np.float32, subok=False, copy=False)
        initial_positions = initial_positions.astype(np.float32, subok=False, copy=False)

        self._engine = bd.PBF(initial_positions, domain_size, interaction_distance, handle_n_jobs(n_jobs))
        self._engine.n_solver_iterations = n_solver_iterations
        self._engine.gravity = gravity
        self._engine.epsilon = epsilon
        self._engine.timestep = timestep
        self._engine.rest_density = rest_density
        self._engine.tensile_instability_distance = tensile_instability_distance
        self._engine.tensile_instability_k = tensile_instability_k
예제 #4
0
    def transform_to_density(self, trajectory, n_grid_x=20, n_grid_y=10, n_jobs=None):
        r"""Transforms a two-dimensional PBF particle trajectory to a trajectory of densities by performing KDEs.
        Since we have the prior knowledge that no particles get lost on the way, the densities are
        normalized frame-wise.

        Parameters
        ----------
        trajectory : (T, n, 2) ndarray
            The input trajectory for n particles.
        n_grid_x : int, default=20
            Number of evaluation points of simulation box in x direction.
        n_grid_y : int, default=10
            Number of evaluation points of simulation box in y direction.
        n_jobs : int or None, default=None
            Number of jobs to use when transforming to densities.

        Returns
        -------
        trajectory : (T, n_grid_x * n_grid_y) ndarray
            Output trajectory
        """
        n_jobs = handle_n_jobs(n_jobs)
        return _transform_to_density_impl(self.domain_size, trajectory, n_grid_x, n_grid_y, n_jobs)
예제 #5
0
def bickley_jet(n_particles: int,
                n_jobs: Optional[int] = None,
                seed: Optional[int] = None) -> BickleyJetDataset:
    r"""Simulates the Bickley jet for a number of particles.
    The implementation is based on :footcite:`hadjighasem2016spectral` with parameters

    .. math::

            \begin{aligned}
                U_0 &= 5.4138 \times \frac{10^6\mathrm{m}}{\mathrm{day}},\\
                L_0 &= 1.77 \times 10^6\,\mathrm{m},\\
                r_0 &= 6.371 \times 10^6\,\mathrm{m},\\
                c &= (0.1446, 0.205, 0.461)^\top U_0,\\
                \mathrm{eps} &= (0.075, 0.15, 0.3)^\top,\\
                k &= (2,4,6)^\top \frac{1}{r_0},
            \end{aligned}

    in a domain :math:`\Omega = [0, 20] \times [-3, 3]`. The resulting dataset describes the temporal evolution
    of :code:`n_particles` over 401 timesteps in :math:`\Omega`. The domain is periodic in x-direction.
    The dataset offers methods to wrap the domain into three-dimensional
    space onto the surface of a cylinder

    .. math::

        \begin{pmatrix} x \\ y \end{pmatrix} \mapsto \begin{pmatrix}
            r\cdot \cos\left( 2\pi \frac{x}{20} \right) \\
            r\cdot \sin\left( 2\pi \frac{x}{20} \right) \\
            \frac{y}{3}
        \end{pmatrix},

    with the option to further discretize the three-dimensional dataspace via binning. This way the
    discontinuity introduced by 2D periodicity is treated.

    .. plot::

        import matplotlib.pyplot as plt
        import deeptime

        n_particles = 1000
        dataset = deeptime.data.bickley_jet(n_particles, n_jobs=8)

        fig, axes = plt.subplots(2, 3, sharex=True, sharey=True, figsize=(16, 10))

        for t, ax in zip([0, 1, 2, 200, 300, 400], axes.flatten()):
            ax.scatter(*dataset[t].T, c=dataset[0, :, 0], s=50)
            ax.set_title(f"Particles at t={t}")

    Parameters
    ----------
    n_particles : int
        Number of particles which are propagated.
    n_jobs : int or None, default=None
        Number of threads to use for simulation.
    seed : int or None, optional, default=None
        Random seed used for initialization of particle positions at :math:`t=0`.

    Returns
    -------
    dataset : BickleyJetDataset
        Dataset over all the generated frames.

    See Also
    --------
    BickleyJet
        Underlying trajectory generator.

    Examples
    --------

    >>> import deeptime
    >>> dataset = deeptime.data.bickley_jet(n_particles=5, n_jobs=1)
    >>> # shape is 401 frames for 5 particles in two dimensions
    >>> print(dataset.data.shape)
    (401, 5, 2)

    >>> # returns a timelagged dataset for first and last frame
    >>> endpoints = dataset.endpoints_dataset()
    >>> endpoints.data.shape
    (5, 2)

    >>> # maps the endpoints dataset onto a cylinder of radius 5
    >>> endpoints_3d = endpoints.to_3d(radius=5.)
    >>> endpoints_3d.data.shape
    (5, 3)

    >>> # bins the data uniformly with 10 bins per axis
    >>> endpoints_3d_clustered = endpoints_3d.cluster(n_bins=10)
    >>> # 5 particles and 10*10*10 bins
    >>> endpoints_3d_clustered.data.shape
    (5, 1000)

    References
    ----------
    .. footbibliography::
    """
    from deeptime.util.parallel import handle_n_jobs
    n_jobs = handle_n_jobs(n_jobs)
    simulator = BickleyJet(h=1e-2, n_steps=10)
    traj = simulator.generate(n_particles=n_particles,
                              n_jobs=n_jobs,
                              seed=seed)
    traj_reshaped = traj.transpose(1, 0, 2)
    return BickleyJetDataset(traj_reshaped)
예제 #6
0
파일: api.py 프로젝트: thempel/scikit-time
def effective_count_matrix(dtrajs,
                           lag,
                           average='row',
                           mact=1.0,
                           n_jobs=None,
                           callback=None):
    r""" Computes the statistically effective transition count matrix

    Given a list of discrete trajectories, compute the effective number of statistically uncorrelated transition
    counts at the given lag time. First computes the full sliding-window counts :math:`c_{ij}(tau)`. Then uses
    :func:`statistical_inefficiencies` to compute statistical inefficiencies :math:`I_{ij}(tau)`. The number of
    effective counts in a row is then computed as

    .. math:
        c_i^{\mathrm{eff}}(tau) = \sum_j I_{ij}(tau) c_{ij}(tau)

    and the effective transition counts are obtained by scaling the rows accordingly:

    .. math:
        c_{ij}^{\mathrm{eff}}(tau) = \frac{c_i^{\mathrm{eff}}(tau)}{c_i(tau)} c_{ij}(tau)

    This procedure is not yet published, but a manuscript is in preparation [1]_.

    Parameters
    ----------
    dtrajs : list of int-iterables
        discrete trajectories
    lag : int
        lag time
    average : str, default='row'
        Use either of 'row', 'all', 'none', with the following consequences:
        'none': the statistical inefficiency is applied separately to each
            transition count (not recommended)
        'row': the statistical inefficiency is averaged (weighted) by row
            (recommended).
        'all': the statistical inefficiency is averaged (weighted) over all
            transition counts (not recommended).
    mact : float, default=1.0
        multiplier for the autocorrelation time. We tend to underestimate the
        autocorrelation time (and thus overestimate effective counts)
        because the autocorrelation function is truncated when it passes
        through 0 in order to avoid numerical instabilities.
        This is a purely heuristic factor trying to compensate this effect.
        This parameter might be removed in the future when a more robust
        estimation method of the autocorrelation time is used.
    n_jobs : int, default=None
        If None, uses all available logical cores, otherwise the function will be evaluated with as
        many processes as specified (must then be positive).
    callback : callable, default=None
        will be called for every statistical inefficiency computed (number of nonzero elements in count matrix).
        If n_jobs is greater one, the callback will be invoked per finished batch.

    See also
    --------
    statistical_inefficiencies
        is used for computing the statistical inefficiencies of sliding window transition counts

    References
    ----------
    .. [1] Noe, F. and H. Wu: in preparation (2015)

    """
    from deeptime.util.parallel import handle_n_jobs
    n_jobs = handle_n_jobs(n_jobs)
    dtrajs = ensure_dtraj_list(dtrajs)
    return sparse.effective_counts.effective_count_matrix(dtrajs,
                                                          lag,
                                                          average=average,
                                                          mact=mact,
                                                          n_jobs=n_jobs,
                                                          callback=callback)
예제 #7
0
def vamp_score_cv(fit_fetch: Union[Estimator, Callable], trajs, lagtime=None, n=10, splitting_mode="sliding", r=2,
                  dim: Optional[int] = None, blocksplit: bool = True, random_state=None, n_jobs=1):
    r""" Scores the MSM using the variational approach for Markov processes and cross-validation.

    Implementation and ideas following :footcite:`noe2013variational` :footcite:`wu2020variational` and
    cross-validation :footcite:`mcgibbon2015variational`.

    Divides the data into training and test data, fits a MSM using the training
    data using the parameters of this estimator, and scores is using the test
    data.
    Currently only one way of splitting is implemented, where for each n,
    the data is randomly divided into two approximately equally large sets of
    discrete trajectory fragments with lengths of at least the lagtime.

    Currently only implemented using dense matrices - will be slow for large state spaces.

    Parameters
    ----------
    fit_fetch : callable or estimator
        Can be provided as callable for a custom fit and fetch method. Should be a function pointer or lambda which
        takes a list of discrete trajectories as input and yields a
        :class:`CovarianceKoomanModel <deeptime.decomposition.CovarianceKoopmanModel>`. Or an estimator which
        yields this kind of model.
    trajs : list of array_like
        Input data.
    lagtime : int, optional, default=None
        lag time, must be provided if blocksplitting is used, otherwise can be left None
    splitting_mode : str, optional, default="sliding"
        Can be one of "sliding" and "sample". In former case the blocks may overlap, otherwise not.
    n : number of samples
        Number of repetitions of the cross-validation. Use large n to get solid means of the score.
    r : float or str, default=2
        Available scores are based on the variational approach for Markov processes :footcite:`noe2013variational`
        :footcite:`wu2020variational`, see :meth:`deeptime.decomposition.vamp_score` for available options.
    blocksplit : bool, optional, default=True
        Whether to perform blocksplitting (see :meth:`blocksplit_dtrajs` ) before evaluating folds. Defaults to `True`.
        In case no blocksplitting is performed, individual dtrajs are used for training and validation. This means that
        at least two dtrajs must be provided (`len(dtrajs) >= 2`), otherwise this method raises an exception.
    dim : int or None, optional, default=None
        The maximum number of eigenvalues or singular values used in the score. If set to None,
        all available eigenvalues will be used.
    random_state : None or int or np.random.RandomState
        Random seed to use.
    n_jobs : int, optional, default=1
        Number of jobs for folds. In case n_jobs is 1, no parallelization.

    References
    ----------
    .. footbibliography::
    """
    from deeptime.util.parallel import handle_n_jobs
    from deeptime.util.types import ensure_timeseries_data

    if blocksplit and lagtime is None:
        raise ValueError("In case blocksplit is used, please provide a lagtime.")

    n_jobs = handle_n_jobs(n_jobs)
    if isinstance(fit_fetch, Estimator):
        fit_fetch = _FitFetch(fit_fetch)

    ttrajs = ensure_timeseries_data(trajs)  # ensure format
    if splitting_mode not in ('sliding', 'sample'):
        raise ValueError('vamp_score_cv currently only supports count modes "sliding" and "sample"')
    scores = np.empty((n,), float)
    sliding = splitting_mode == 'sliding'

    args = [(i, fit_fetch, ttrajs, r, dim, lagtime, blocksplit, sliding, random_state, n_jobs) for i in range(n)]

    if n_jobs > 1:
        from multiprocessing import get_context
        with joining(get_context("spawn").Pool(processes=n_jobs)) as pool:
            for result in pool.imap_unordered(_worker, args):
                fold, score = result
                scores[fold] = score
    else:
        for fold in range(n):
            _, score = _worker(args[fold])
            scores[fold] = score
    return scores