def get_output(self, dimensions=slice(0, None), stride=1, skip=0, chunk=0): if isinstance(dimensions, int): ndim = 1 dimensions = slice(dimensions, dimensions + 1) elif isinstance(dimensions, (list, np.ndarray, tuple, slice)): if hasattr(dimensions, 'ndim') and dimensions.ndim > 1: raise ValueError('dimension indices can\'t have more than one dimension') ndim = len(np.zeros(self.ndim)[dimensions]) else: raise ValueError('unsupported type (%s) of "dimensions"' % type(dimensions)) assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__ # create iterator if self.in_memory and not self._mapping_to_mem_active: from pyemma.coordinates.data.data_in_memory import DataInMemory assert self._Y is not None it = DataInMemory(self._Y)._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) else: it = self._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) with it: # allocate memory try: # TODO: avoid having a copy here, if Y is already filled trajs = [np.empty((l, ndim), dtype=self.output_type()) for l in it.trajectory_lengths()] except MemoryError: self._logger.exception("Could not allocate enough memory to map all data." " Consider using a larger stride.") return if self._logger_is_active(self._loglevel_DEBUG): self._logger.debug("get_output(): dimensions=%s" % str(dimensions)) self._logger.debug("get_output(): created output trajs with shapes: %s" % [x.shape for x in trajs]) # fetch data self.logger.debug("nchunks :%s, chunksize=%s" % (it._n_chunks, it.chunksize)) self._progress_register(it._n_chunks, description='getting output of %s' % self.__class__.__name__, stage=1) for itraj, chunk in it: L = len(chunk) if L > 0: trajs[itraj][it.pos:it.pos + L, :] = chunk[:, dimensions] # update progress self._progress_update(1, stage=1) return trajs
def get_output(self, dimensions=slice(0, None), stride=1, skip=0, chunk=None): """Maps all input data of this transformer and returns it as an array or list of arrays Parameters ---------- dimensions : list-like of indexes or slice, default=all indices of dimensions you like to keep. stride : int, default=1 only take every n'th frame. skip : int, default=0 initially skip n frames of each file. chunk: int, default=None How many frames to process at once. If not given obtain the chunk size from the source. Returns ------- output : list of ndarray(T_i, d) the mapped data, where T is the number of time steps of the input data, or if stride > 1, floor(T_in / stride). d is the output dimension of this transformer. If the input consists of a list of trajectories, Y will also be a corresponding list of trajectories """ if isinstance(dimensions, int): ndim = 1 dimensions = slice(dimensions, dimensions + 1) elif isinstance(dimensions, (list, np.ndarray, tuple, slice)): if hasattr(dimensions, 'ndim') and dimensions.ndim > 1: raise ValueError( 'dimension indices can\'t have more than one dimension') ndim = len(np.zeros(self.ndim)[dimensions]) else: raise ValueError('unsupported type (%s) of "dimensions"' % type(dimensions)) assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__ if chunk is None: chunk = self.chunksize # create iterator if self.in_memory and not self._mapping_to_mem_active: from pyemma.coordinates.data.data_in_memory import DataInMemory assert self._Y is not None it = DataInMemory(self._Y)._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) else: it = self._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) with it: # allocate memory try: # TODO: avoid having a copy here, if Y is already filled trajs = [ np.empty((l, ndim), dtype=self.output_type()) for l in it.trajectory_lengths() ] except MemoryError: self._logger.exception( "Could not allocate enough memory to map all data." " Consider using a larger stride.") return if self._logger_is_active(self._loglevel_DEBUG): self._logger.debug("get_output(): dimensions=%s" % str(dimensions)) self._logger.debug( "get_output(): created output trajs with shapes: %s" % [x.shape for x in trajs]) # fetch data self.logger.debug("nchunks :%s, chunksize=%s" % (it.n_chunks, it.chunksize)) self._progress_register(it.n_chunks, description='getting output of %s' % self.__class__.__name__, stage=1) for itraj, chunk in it: L = len(chunk) if L > 0: trajs[itraj][it.pos:it.pos + L, :] = chunk[:, dimensions] # update progress self._progress_update(1, stage=1) return trajs