def _handle_slice(self, idx): idx = np.index_exp[idx] frames, dims = None, None if isinstance(idx, (tuple, list)): if len(idx) == 1: frames, dims = idx[0], slice(None, None, None) if len(idx) == 2: frames, dims = idx[0], idx[1] if len(idx) > 2: raise IndexError( "Slice was more than two-dimensional, not supported.") cumsum = np.cumsum(self._source.trajectory_lengths()) if not isinstance(frames, (list, np.ndarray)): frames = self._get_indices(frames, cumsum[-1]) dims = self._get_indices(dims, self._source.ndim) nframes = len(frames) ndims = len(dims) data = np.empty((nframes, ndims), dtype=self._source.output_type()) from pyemma.coordinates.clustering import UniformTimeClustering for i, x in enumerate(frames): traj, idx = UniformTimeClustering._idx_to_traj_idx(x, cumsum) data[i, :] = self._source.data[traj][idx, dims] return data
def setUpClass(cls): from pyemma.coordinates.clustering import KmeansClustering, RegularSpaceClustering, UniformTimeClustering cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [ np.array([-3, 0]), np.array([-1, 1]), np.array([0, 0]), np.array([1, -1]), np.array([4, 2]) ] widths = [ np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]) ] # continuous trajectory nsample = 1000 cls.T = len(means) * nsample cls.X = np.zeros((cls.T, 2)) for i in range(len(means)): cls.X[i * nsample:(i + 1) * nsample, 0] = widths[i][0] * np.random.randn() + means[i][0] cls.X[i * nsample:(i + 1) * nsample, 1] = widths[i][1] * np.random.randn() + means[i][1] # cluster in different ways cls.km = KmeansClustering(n_clusters=100).estimate(cls.X) cls.rs = RegularSpaceClustering(dmin=0.5).estimate(cls.X) cls.rt = UniformTimeClustering(n_clusters=100).estimate(cls.X) cls.cl = [cls.km, cls.rs, cls.rt] return cls
def _get_itraj_random_accessible(self, itrajs, frames, dims): itrajs = self._get_indices(itrajs, self._source.ntraj) frames = self._get_indices(frames, sum(self._source.trajectory_lengths()[itrajs])) dims = self._get_indices(dims, self._source.ndim) nframes = len(frames) ndims = len(dims) if max(dims) > self._source.ndim: raise IndexError("Data only has %s dimensions, wanted to slice by dimension %s." % (self._source.ndim, max(dims))) cumsum = np.cumsum(self._source.trajectory_lengths()[itrajs]) from pyemma.coordinates.clustering import UniformTimeClustering ra = np.array([self._map_to_absolute_traj_idx(UniformTimeClustering._idx_to_traj_idx(x, cumsum), itrajs) for x in frames]) indices = np.lexsort((ra[:, 1], ra[:, 0])) ra = ra[indices] data = np.empty((nframes, ndims), dtype=self._source.output_type()) curr = 0 for X in self._source.iterator(stride=ra, lag=0, chunk=0, return_trajindex=False): L = len(X) data[indices[curr:curr + L]] = X curr += L return data
def _handle_slice(self, idx): idx = np.index_exp[idx] frames, dims = None, None if isinstance(idx, (tuple, list)): if len(idx) == 1: frames, dims = idx[0], slice(None, None, None) if len(idx) == 2: frames, dims = idx[0], idx[1] if len(idx) > 2: raise IndexError("Slice was more than two-dimensional, not supported.") cumsum = np.cumsum(self._source.trajectory_lengths()) frames = self._get_indices(frames, cumsum[-1]) dims = self._get_indices(dims, self._source.ndim) nframes = len(frames) ndims = len(dims) frames_order = frames.argsort().argsort() frames_sorted = np.sort(frames) from pyemma.coordinates.clustering import UniformTimeClustering ra_stride = np.array([UniformTimeClustering._idx_to_traj_idx(x, cumsum) for x in frames_sorted]) data = np.empty((nframes, ndims), dtype=self._source.output_type()) offset = 0 for X in self._source.iterator(stride=ra_stride, lag=0, chunk=0, return_trajindex=False): L = len(X) data[offset:offset + L, :] = X[:, dims] offset += L return data[frames_order]
def _get_itraj_random_accessible(self, itrajs, frames, dims): itrajs = self._get_indices(itrajs, self._source.ntraj) frames = self._get_indices(frames, sum(self._source.trajectory_lengths()[itrajs])) dims = self._get_indices(dims, self._source.ndim) nframes = len(frames) ndims = len(dims) if max(dims) > self._source.ndim: raise IndexError("Data only has %s dimensions, wanted to slice by dimension %s." % (self._source.ndim, max(dims))) cumsum = np.cumsum(self._source.trajectory_lengths()[itrajs]) data = np.empty((nframes, ndims), dtype=self._source.output_type()) from pyemma.coordinates.clustering import UniformTimeClustering for i, x in enumerate(frames): traj, idx = self._map_to_absolute_traj_idx(UniformTimeClustering._idx_to_traj_idx(x, cumsum), itrajs) data[i, :] = self._source.data[traj][idx, dims] return data