Ejemplo n.º 1
0
    def get_output(self, dimensions=slice(0, None), stride=1, skip=0, chunk=0):
        if isinstance(dimensions, int):
            ndim = 1
            dimensions = slice(dimensions, dimensions + 1)
        elif isinstance(dimensions, (list, np.ndarray, tuple, slice)):
            if hasattr(dimensions, 'ndim') and dimensions.ndim > 1:
                raise ValueError('dimension indices can\'t have more than one dimension')
            ndim = len(np.zeros(self.ndim)[dimensions])
        else:
            raise ValueError('unsupported type (%s) of "dimensions"' % type(dimensions))

        assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__

        # create iterator
        if self.in_memory and not self._mapping_to_mem_active:
            from pyemma.coordinates.data.data_in_memory import DataInMemory
            assert self._Y is not None
            it = DataInMemory(self._Y)._create_iterator(skip=skip, chunk=chunk,
                                                        stride=stride, return_trajindex=True)
        else:
            it = self._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True)

        with it:
            # allocate memory
            try:
                # TODO: avoid having a copy here, if Y is already filled
                trajs = [np.empty((l, ndim), dtype=self.output_type())
                         for l in it.trajectory_lengths()]
            except MemoryError:
                self._logger.exception("Could not allocate enough memory to map all data."
                                       " Consider using a larger stride.")
                return

            if self._logger_is_active(self._loglevel_DEBUG):
                self._logger.debug("get_output(): dimensions=%s" % str(dimensions))
                self._logger.debug("get_output(): created output trajs with shapes: %s"
                                   % [x.shape for x in trajs])
            # fetch data
            self.logger.debug("nchunks :%s, chunksize=%s" % (it._n_chunks, it.chunksize))
            self._progress_register(it._n_chunks,
                                    description='getting output of %s' % self.__class__.__name__,
                                    stage=1)
            for itraj, chunk in it:
                L = len(chunk)
                if L > 0:
                    trajs[itraj][it.pos:it.pos + L, :] = chunk[:, dimensions]

                # update progress
                self._progress_update(1, stage=1)

        return trajs
Ejemplo n.º 2
0
    def get_output(self,
                   dimensions=slice(0, None),
                   stride=1,
                   skip=0,
                   chunk=None):
        """Maps all input data of this transformer and returns it as an array or list of arrays

        Parameters
        ----------
        dimensions : list-like of indexes or slice, default=all
           indices of dimensions you like to keep.
        stride : int, default=1
           only take every n'th frame.
        skip : int, default=0
            initially skip n frames of each file.
        chunk: int, default=None
            How many frames to process at once. If not given obtain the chunk size
            from the source.

        Returns
        -------
        output : list of ndarray(T_i, d)
           the mapped data, where T is the number of time steps of the input data, or if stride > 1,
           floor(T_in / stride). d is the output dimension of this transformer.
           If the input consists of a list of trajectories, Y will also be a corresponding list of trajectories

        """
        if isinstance(dimensions, int):
            ndim = 1
            dimensions = slice(dimensions, dimensions + 1)
        elif isinstance(dimensions, (list, np.ndarray, tuple, slice)):
            if hasattr(dimensions, 'ndim') and dimensions.ndim > 1:
                raise ValueError(
                    'dimension indices can\'t have more than one dimension')
            ndim = len(np.zeros(self.ndim)[dimensions])
        else:
            raise ValueError('unsupported type (%s) of "dimensions"' %
                             type(dimensions))

        assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__

        if chunk is None:
            chunk = self.chunksize

        # create iterator
        if self.in_memory and not self._mapping_to_mem_active:
            from pyemma.coordinates.data.data_in_memory import DataInMemory
            assert self._Y is not None
            it = DataInMemory(self._Y)._create_iterator(skip=skip,
                                                        chunk=chunk,
                                                        stride=stride,
                                                        return_trajindex=True)
        else:
            it = self._create_iterator(skip=skip,
                                       chunk=chunk,
                                       stride=stride,
                                       return_trajindex=True)

        with it:
            # allocate memory
            try:
                # TODO: avoid having a copy here, if Y is already filled
                trajs = [
                    np.empty((l, ndim), dtype=self.output_type())
                    for l in it.trajectory_lengths()
                ]
            except MemoryError:
                self._logger.exception(
                    "Could not allocate enough memory to map all data."
                    " Consider using a larger stride.")
                return

            if self._logger_is_active(self._loglevel_DEBUG):
                self._logger.debug("get_output(): dimensions=%s" %
                                   str(dimensions))
                self._logger.debug(
                    "get_output(): created output trajs with shapes: %s" %
                    [x.shape for x in trajs])
            # fetch data
            self.logger.debug("nchunks :%s, chunksize=%s" %
                              (it.n_chunks, it.chunksize))
            self._progress_register(it.n_chunks,
                                    description='getting output of %s' %
                                    self.__class__.__name__,
                                    stage=1)
            for itraj, chunk in it:
                L = len(chunk)
                if L > 0:
                    trajs[itraj][it.pos:it.pos + L, :] = chunk[:, dimensions]

                # update progress
                self._progress_update(1, stage=1)

        return trajs