Ejemplo n.º 1
0
    def sample_indexes_by_cluster(self, clusters, nsample, replace=True):
        """Samples trajectory/time indexes according to the given sequence of states.

        Parameters
        ----------
        clusters : iterable of integers
            It contains the cluster indexes to be sampled

        nsample : int
            Number of samples per cluster. If replace = False, the number of returned samples per cluster could be smaller
            if less than nsample indexes are available for a cluster.

        replace : boolean, optional
            Whether the sample is with or without replacement

        Returns
        -------
        indexes : list of ndarray( (N, 2) )
            List of the sampled indices by cluster.
            Each element is an index array with a number of rows equal to N=len(sequence), with rows consisting of a
            tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory.
        """

        # Check if the catalogue (index_states)
        if len(self._index_states) == 0:  # has never been run
            self._index_states = index_states(self.dtrajs)

        return sample_indexes_by_state(self._index_states[clusters],
                                       nsample,
                                       replace=replace)
Ejemplo n.º 2
0
 def test_sample_by_state_replace(self):
     dtraj = [0, 1, 2, 3, 2, 1, 0]
     idx = dt.index_states(dtraj)
     sidx = dt.sample_indexes_by_state(idx, 5)
     for i in range(4):
         assert (sidx[i].shape[0] == 5)
         for t in range(sidx[i].shape[0]):
             assert (dtraj[sidx[i][t, 1]] == i)
 def test_sample_by_state_replace(self):
     dtraj =[0,1,2,3,2,1,0]
     idx = dt.index_states(dtraj)
     sidx = dt.sample_indexes_by_state(idx, 5)
     for i in range(4):
         assert(sidx[i].shape[0] == 5)
         for t in range(sidx[i].shape[0]):
             assert(dtraj[sidx[i][t,1]] == i)
Ejemplo n.º 4
0
 def test_sample_by_state_replace_subset(self):
     dtraj = [0, 1, 2, 3, 2, 1, 0]
     idx = dt.index_states(dtraj)
     subset = [1, 2]
     sidx = dt.sample_indexes_by_state(idx, 5, subset=subset)
     for i in range(len(subset)):
         assert (sidx[i].shape[0] == 5)
         for t in range(sidx[i].shape[0]):
             assert (dtraj[sidx[i][t, 1]] == subset[i])
 def test_sample_by_state_replace_subset(self):
     dtraj =[0,1,2,3,2,1,0]
     idx = dt.index_states(dtraj)
     subset = [1,2]
     sidx = dt.sample_indexes_by_state(idx, 5, subset=subset)
     for i in range(len(subset)):
         assert(sidx[i].shape[0] == 5)
         for t in range(sidx[i].shape[0]):
             assert(dtraj[sidx[i][t,1]] == subset[i])
Ejemplo n.º 6
0
    def sample_by_state(self, nsample, subset=None, replace=True):
        """Generates samples of the connected states.

        For each state in the active set of states, generates nsample samples with trajectory/time indexes.
        This information can be used in order to generate a trajectory of length nsample * nconnected using
        :func:`pyemma.coordinates.save_traj` or nconnected trajectories of length nsample each using
        :func:`pyemma.coordinates.save_traj`

        Parameters
        ----------
        N : int
            Number of time steps in the output trajectory. The total simulation time is stride * lag time * N
        nsample : int
            Number of samples per state. If replace = False, the number of returned samples per state could be smaller
            if less than nsample indexes are available for a state.
        subset : ndarray((n)), optional, default = None
            array of states to be indexed. By default all states in the connected set will be used
        replace : boolean, optional
            Whether the sample is with or without replacement
        start : int, optional, default = None
            starting state. If not given, will sample from the stationary distribution of P

        Returns
        -------
        indexes : list of ndarray( (N, 2) )
            list of trajectory/time index arrays with an array for each state.
            Within each index array, each row consist of a tuple (i, t), where i is
            the index of the trajectory and t is the time index within the trajectory.

        See also
        --------
        pyemma.coordinates.save_traj
            in order to save the sampled frames sequentially in a trajectory file with molecular structures
        pyemma.coordinates.save_trajs
            in order to save the sampled frames in nconnected trajectory files with molecular structures

        """
        self._check_is_estimated()
        # generate connected state indexes
        import pyemma.util.discrete_trajectories as dt

        return dt.sample_indexes_by_state(self.active_state_indexes,
                                          nsample,
                                          subset=subset,
                                          replace=replace)