Esempi in Python per ProgressReporter, esempi in Python per pyemma._base.progress.ProgressReporter

Esempio n. 1

0

Mostra file

    def _estimate(self, dtrajs):
        ### PREPARE AND CHECK DATA
        # TODO: Currently only discrete trajectories are implemented. For a general class this needs to be changed.
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # check trajectory lengths
        if self._estimated:
            # if dtrajs has now changed, unset the _estimated flag to re-set every derived quantity.
            assert hasattr(self, '_last_dtrajs_input_hash')
            current_hash = _hash_dtrajs(dtrajs)
            if self._last_dtrajs_input_hash != current_hash:
                self.logger.warning("estimating from new data, discard all previously computed models.")
                self._estimated = False
                self._last_dtrajs_input_hash = current_hash
        else:
            self._last_dtrajs_input_hash = _hash_dtrajs(dtrajs)

        self._trajlengths = np.fromiter((len(traj) for traj in dtrajs), dtype=int, count=len(dtrajs))
        maxlength = np.max(self._trajlengths)

        # set lag times by data if not yet set
        if self._lags is None:
            maxlag = 0.5 * np.sum(self._trajlengths) / float(len(self._trajlengths))
            self._lags = _generate_lags(maxlag, 1.5)

        # check if some lag times are forbidden.
        if np.max(self._lags) >= maxlength:
            Ifit = np.where(self._lags < maxlength)[0]
            Inofit = np.where(self._lags >= maxlength)[0]
            self.logger.warning('Ignoring lag times that exceed the longest trajectory: %s', self._lags[Inofit])
            self._lags = self._lags[Ifit]

        ### RUN ESTIMATION
        if self._estimated:
            # we already had run an estimation, determine which lag times we need to compute
            # TODO: this will re-evaluate problematic lag times, wont it?
            lags = sorted(list(set(self._lags).difference(self._last_lags)))
            if len(lags) == 0:
                self.logger.info("All lag times already estimated.")
                return self
            assert lags
            self.logger.info("Running estimating for not yet estimated lags times: %s", lags)
        else:
            lags = self._lags

        # construct all parameter sets for the estimator
        param_sets = tuple(param_grid({'lag': lags}))

        # run estimation on all lag times
        pg = ProgressReporter()
        with pg.context():
            models, estimators = estimate_param_scan(self.estimator, dtrajs, param_sets, failfast=False,
                                                     return_estimators=True, n_jobs=self.n_jobs,
                                                     progress_reporter=pg, return_exceptions=True)
        self._estimators = estimators

        self._postprocess_results(models)
        return self

Esempio n. 2

0

Mostra file

class TestProgress(unittest.TestCase):
    def setUp(self):
        self.pg = ProgressReporter()
        self.pg._progress_register(100, "test")

    def test_config_override(self):
        self.pg.show_progress = True
        with settings(show_progress_bars=False):
            assert self.pg.show_progress == False

    def test_config_2(self):
        self.pg.show_progress = False
        with settings(show_progress_bars=True):
            assert not self.pg.show_progress

Esempio n. 3

0

Mostra file

File: test_progress.py Progetto: greenTara/PyEMMA

 def test_force_finish(self):
     import warnings
     worker = ProgressReporter()
     worker._progress_register(100)
     # intentionally overshoot registered work
     with warnings.catch_warnings(record=True) as cm:
         worker._progress_update(101)
     self.assertIn("more work than registered", cm[0].message[0])
     worker._progress_force_finish()

Esempio n. 4

0

Mostra file

File: test_progress.py Progetto: yuxuanzhuang/PyEMMA

 def test_ctx3(self):
     pg = ProgressReporter()
     assert pg.show_progress
     pg.register(100, stage='test')
     pg.register(40, stage='test2')
     pg.register(25, stage='test3')
     try:
         with pg.context(stage=('test', 'test3')):
             pg.update(50, stage='test')
             pg.update(2, stage='test3')
             raise Exception()
     except Exception:
         assert pg.num_registered == 1
         assert 'test2' in pg.registered_stages

Esempio n. 5

0

Mostra file

    def _estimate(self, iterable, **kw):
        partial_fit = 'partial' in kw
        it = iterable.iterator(return_trajindex=False,
                               chunk=self.chunksize,
                               stride=self.stride,
                               skip=self.skip)
        from pyemma._base.progress import ProgressReporter
        pg = ProgressReporter()
        pg.register(it.n_chunks, "calc mean+cov", 0)

        with it, pg.context():
            self._init_covar(partial_fit, it.n_chunks)

            for chunk in it:
                self._covar.add(chunk)
                pg.update(1)

        self.cov = self._covar.cov_XX(bessel=True)
        self.mu = self._covar.mean_X()

        self._model.update_model_params(mean=self._covar.mean_X())
        if not partial_fit:
            self._diagonalize()

        return self._model

Esempio n. 6

0

Mostra file

    def test_mute_progress(self):
        """ switch mute on shall turn off progress bars"""
        from pyemma._base.progress import ProgressReporter
        import mock
        rp = ProgressReporter()

        self.config_inst.mute = True
        with mock.patch('pyemma.config', self.config_inst):
            assert not rp.show_progress

Esempio n. 7

0

Mostra file

File: test_progress.py Progetto: yuxuanzhuang/PyEMMA

 def test_ctx4(self):
     pg = ProgressReporter()
     pg.register(100, 'test')
     pg.register(40, 'test2')
     try:
         with pg.context():
             pg.update(50, stage='all')
             raise Exception()
     except Exception:
         assert pg.num_registered == 0

Esempio n. 8

0

Mostra file

File: test_progress.py Progetto: greenTara/PyEMMA

    def test_callback(self):
        self.has_been_called = 0

        def call_back(stage, progressbar, *args, **kw):
            self.has_been_called += 1
            assert isinstance(stage, int)
            assert isinstance(progressbar, ProgressBar)

        amount_of_work = 100
        worker = ProgressReporter()
        worker._progress_register(amount_of_work,
                                  description="hard working",
                                  stage=0)
        worker.register_progress_callback(call_back, stage=0)
        for _ in range(amount_of_work):
            worker._progress_update(1, stage=0)
        self.assertEqual(self.has_been_called, amount_of_work)

Esempio n. 9

0

Mostra file

    def filenames(self, filename_list):

        if isinstance(filename_list, str):
            filename_list = [filename_list]

        uniq = set(filename_list)
        if len(uniq) != len(filename_list):
            self.logger.warning("duplicate files/arrays detected")
            filename_list = list(uniq)

        from pyemma.coordinates.data.data_in_memory import DataInMemory

        if self._is_reader:
            if isinstance(self, DataInMemory):
                import warnings
                warnings.warn('filenames are not being used for DataInMemory')
                return

            self._ntraj = len(filename_list)
            if self._ntraj == 0:
                raise ValueError("empty file list")

            # validate files
            for f in filename_list:
                try:
                    stat = os.stat(f)
                except EnvironmentError:
                    self.logger.exception('Error during access of file "%s"' %
                                          f)
                    raise ValueError('could not read file "%s"' % f)

                if not os.path.isfile(
                        f):  # can be true for symlinks to directories
                    raise ValueError('"%s" is not a valid file')

                if stat.st_size == 0:
                    raise ValueError('file "%s" is empty' % f)

            # number of trajectories/data sets
            self._filenames = filename_list
            # determine len and dim via cache lookup,
            lengths = []
            offsets = []
            ndims = []
            # avoid cyclic imports
            from pyemma.coordinates.data.util.traj_info_cache import TrajectoryInfoCache
            from pyemma._base.progress import ProgressReporter
            pg = ProgressReporter()
            pg.register(len(filename_list), 'Obtaining file info')
            with pg.context():
                for filename in filename_list:
                    if config.use_trajectory_lengths_cache:
                        info = TrajectoryInfoCache.instance()[filename, self]
                    else:
                        info = self._get_traj_info(filename)
                    # nested data set support.
                    if hasattr(info, 'children'):
                        lengths.append(info.length)
                        offsets.append(info.offsets)
                        ndims.append(info.ndim)
                        for c in info.children:
                            lengths.append(c.length)
                            offsets.append(c.offsets)
                            ndims.append(c.ndim)
                    else:
                        lengths.append(info.length)
                        offsets.append(info.offsets)
                        ndims.append(info.ndim)
                    if len(filename_list) > 3:
                        pg.update(1)

            # ensure all trajs have same dim
            if not np.unique(ndims).size == 1:
                # group files by their dimensions to give user indicator
                ndims = np.array(ndims)
                filename_list = np.asarray(filename_list)
                sort_inds = np.argsort(ndims)
                import itertools, operator
                res = {}
                for dim, files in itertools.groupby(
                        zip(ndims[sort_inds], filename_list[sort_inds]),
                        operator.itemgetter(0)):
                    res[dim] = list(f[1] for f in files)

                raise ValueError(
                    "Input data has different dimensions ({dims})!"
                    " Files grouped by dimensions: {groups}".format(
                        dims=res.keys(), groups=res))

            self._ndim = ndims[0]
            self._lengths = lengths
            self._offsets = offsets

        else:
            # propagate this until we finally have a a reader
            self.data_producer.filenames = filename_list

Esempio n. 10

0

Mostra file

    def write_to_csv(self,
                     filename=None,
                     extension='.dat',
                     overwrite=False,
                     stride=1,
                     chunksize=None,
                     **kw):
        """ write all data to csv with numpy.savetxt

        Parameters
        ----------
        filename : str, optional
            filename string, which may contain placeholders {itraj} and {stride}:

            * itraj will be replaced by trajetory index
            * stride is stride argument of this method

            If filename is not given, it is being tried to obtain the filenames
            from the data source of this iterator.
        extension : str, optional, default='.dat'
            filename extension of created files
        overwrite : bool, optional, default=False
            shall existing files be overwritten? If a file exists, this method will raise.
        stride : int
            omit every n'th frame
        chunksize: int, default=None
            how many frames to process at once
        kw : dict, optional
            named arguments passed into numpy.savetxt (header, seperator etc.)

        Example
        -------
        Assume you want to save features calculated by some FeatureReader to ASCII:

        >>> import numpy as np, pyemma
        >>> import os
        >>> from pyemma.util.files import TemporaryDirectory
        >>> from pyemma.util.contexts import settings
        >>> data = [np.random.random((10,3))] * 3
        >>> reader = pyemma.coordinates.source(data)
        >>> filename = "distances_{itraj}.dat"
        >>> with TemporaryDirectory() as td, settings(show_progress_bars=False):
        ...    out = os.path.join(td, filename)
        ...    reader.write_to_csv(out, header='', delimiter=';')
        ...    print(sorted(os.listdir(td)))
        ['distances_0.dat', 'distances_1.dat', 'distances_2.dat']
        """
        import os
        if not filename:
            assert hasattr(self, 'filenames')
            #    raise RuntimeError("could not determine filenames")
            filenames = []
            for f in self.filenames:
                base, _ = os.path.splitext(f)
                filenames.append(base + extension)
        elif isinstance(filename, str):
            filename = filename.replace('{stride}', str(stride))
            filenames = [
                filename.replace('{itraj}', str(itraj))
                for itraj in range(self.number_of_trajectories())
            ]
        else:
            raise TypeError("filename should be str or None")
        self.logger.debug("write_to_csv, filenames=%s" % filenames)
        # check files before starting to write
        import errno
        for f in filenames:
            try:
                st = os.stat(f)
                raise OSError(errno.EEXIST)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    if overwrite:
                        continue
                elif e.errno == errno.ENOENT:
                    continue
                raise
        f = None
        from pyemma._base.progress import ProgressReporter
        pg = ProgressReporter()
        it = self.iterator(stride, chunk=chunksize, return_trajindex=False)
        pg.register(it.n_chunks, "saving to csv")
        with it, pg.context():
            oldtraj = -1
            for X in it:
                if oldtraj != it.current_trajindex:
                    if f is not None:
                        f.close()
                    fn = filenames[it.current_trajindex]
                    self.logger.debug("opening file %s for writing csv." % fn)
                    f = open(fn, 'wb')
                    oldtraj = it.current_trajindex
                np.savetxt(f, X, **kw)
                f.flush()
                pg.update(1, 0)
        if f is not None:
            f.close()

Esempio n. 11

0

Mostra file

    def write_to_hdf5(self,
                      filename,
                      group='/',
                      data_set_prefix='',
                      overwrite=False,
                      stride=1,
                      chunksize=None,
                      h5_opt=None):
        """ writes all data of this Iterable to a given HDF5 file.
        This is equivalent of writing the result of func:`pyemma.coordinates.data._base.DataSource.get_output` to a file.

        Parameters
        ----------
        filename: str
            file name of output HDF5 file
        group: str, default='/'
            write all trajectories to this HDF5 group. The group name may not already exist in the file.
        data_set_prefix: str, default=None
            data set name prefix, will postfixed with the index of the trajectory.
        overwrite: bool, default=False
            if group and data sets already exist, shall we overwrite data?
        stride: int, default=1
            stride argument to iterator
        chunksize: int, default=None
            how many frames to process at once
        h5_opt: dict
            optional parameters for h5py.create_dataset

        Notes
        -----
        You can pass the following via h5_opt to enable compression/filters/shuffling etc:

        chunks
            (Tuple) Chunk shape, or True to enable auto-chunking.
        maxshape
            (Tuple) Make the dataset resizable up to this shape.  Use None for
            axes you want to be unlimited.
        compression
            (String or int) Compression strategy.  Legal values are 'gzip',
            'szip', 'lzf'.  If an integer in range(10), this indicates gzip
            compression level. Otherwise, an integer indicates the number of a
            dynamically loaded compression filter.
        compression_opts
            Compression settings.  This is an integer for gzip, 2-tuple for
            szip, etc. If specifying a dynamically loaded compression filter
            number, this must be a tuple of values.
        scaleoffset
            (Integer) Enable scale/offset filter for (usually) lossy
            compression of integer or floating-point data. For integer
            data, the value of scaleoffset is the number of bits to
            retain (pass 0 to let HDF5 determine the minimum number of
            bits necessary for lossless compression). For floating point
            data, scaleoffset is the number of digits after the decimal
            place to retain; stored values thus have absolute error
            less than 0.5*10**(-scaleoffset).
        shuffle
            (T/F) Enable shuffle filter. Only effective in combination with chunks.
        fletcher32
            (T/F) Enable fletcher32 error detection. Not permitted in
            conjunction with the scale/offset filter.
        fillvalue
            (Scalar) Use this value for uninitialized parts of the dataset.
        track_times
            (T/F) Enable dataset creation timestamps.
        """
        if h5_opt is None:
            h5_opt = {}
        import h5py
        from pyemma._base.progress import ProgressReporter
        pg = ProgressReporter()
        it = self.iterator(stride=stride,
                           chunk=chunksize,
                           return_trajindex=True)
        pg.register(it.n_chunks, 'writing output')
        with h5py.File(filename, mode='a') as f, it, pg.context():
            if group not in f:
                g = f.create_group(group)
            elif group == '/':  # root always exists.
                g = f[group]
            elif group in f and overwrite:
                self.logger.info('overwriting group "{}"'.format(group))
                del f[group]
                g = f.create_group(group)
            else:
                raise ValueError(
                    'Given group "{}" already exists. Choose another one.'.
                    format(group))

            # check output data sets
            data_sets = {}
            for itraj in np.arange(self.ntraj):
                template = '{prefix}_{index}' if data_set_prefix else '{index}'
                ds_name = template.format(prefix=data_set_prefix,
                                          index='{:04d}'.format(itraj))
                # group can be reused, eg. was empty before now check if we will overwrite something
                if ds_name in g:
                    if not overwrite:
                        raise ValueError(
                            'Refusing to overwrite data in group "{}".'.format(
                                group))
                else:
                    data_sets[itraj] = g.require_dataset(
                        ds_name,
                        shape=(self.trajectory_length(itraj=itraj,
                                                      stride=stride),
                               self.ndim),
                        dtype=self.output_type(),
                        **h5_opt)
            for itraj, X in it:
                ds = data_sets[itraj]
                ds[it.pos:it.pos + len(X)] = X
                pg.update(1)

Esempio n. 12

0

Mostra file

    def get_output(self,
                   dimensions=slice(0, None),
                   stride=1,
                   skip=0,
                   chunk=None):
        """Maps all input data of this transformer and returns it as an array or list of arrays

        Parameters
        ----------
        dimensions : list-like of indexes or slice, default=all
           indices of dimensions you like to keep.
        stride : int, default=1
           only take every n'th frame.
        skip : int, default=0
            initially skip n frames of each file.
        chunk: int, default=None
            How many frames to process at once. If not given obtain the chunk size
            from the source.

        Returns
        -------
        output : list of ndarray(T_i, d)
           the mapped data, where T is the number of time steps of the input data, or if stride > 1,
           floor(T_in / stride). d is the output dimension of this transformer.
           If the input consists of a list of trajectories, Y will also be a corresponding list of trajectories

        """
        if isinstance(dimensions, int):
            ndim = 1
            dimensions = slice(dimensions, dimensions + 1)
        elif isinstance(dimensions, (list, np.ndarray, tuple, slice)):
            if hasattr(dimensions, 'ndim') and dimensions.ndim > 1:
                raise ValueError(
                    'dimension indices can\'t have more than one dimension')
            ndim = len(np.zeros(self.ndim)[dimensions])
        else:
            raise ValueError('unsupported type (%s) of "dimensions"' %
                             type(dimensions))

        assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__

        if chunk is None:
            chunk = self.chunksize

        # create iterator
        if self.in_memory and not self._mapping_to_mem_active:
            from pyemma.coordinates.data.data_in_memory import DataInMemory
            assert self._Y is not None
            it = DataInMemory(self._Y)._create_iterator(skip=skip,
                                                        chunk=chunk,
                                                        stride=stride,
                                                        return_trajindex=True)
        else:
            it = self._create_iterator(skip=skip,
                                       chunk=chunk,
                                       stride=stride,
                                       return_trajindex=True)

        with it:
            # allocate memory
            try:
                from pyemma import config
                if config.coordinates_check_output:
                    trajs = [
                        np.full((l, ndim), np.nan, dtype=self.output_type())
                        for l in it.trajectory_lengths()
                    ]
                else:
                    # TODO: avoid having a copy here, if Y is already filled
                    trajs = [
                        np.empty((l, ndim), dtype=self.output_type())
                        for l in it.trajectory_lengths()
                    ]
            except MemoryError:
                self.logger.exception(
                    "Could not allocate enough memory to map all data."
                    " Consider using a larger stride.")
                return

            if self._logger_is_active(self._loglevel_DEBUG):
                self.logger.debug("get_output(): dimensions=%s" %
                                  str(dimensions))
                self.logger.debug(
                    "get_output(): created output trajs with shapes: %s" %
                    [x.shape for x in trajs])
                self.logger.debug("nchunks :%s, chunksize=%s" %
                                  (it.n_chunks, it.chunksize))
            # fetch data
            from pyemma._base.progress import ProgressReporter
            pg = ProgressReporter()
            pg.register(it.n_chunks,
                        description='getting output of %s' %
                        self.__class__.__name__)
            with pg.context(), it:
                for itraj, chunk in it:
                    i = slice(it.pos, it.pos + len(chunk))
                    assert i.stop - i.start > 0
                    trajs[itraj][i, :] = chunk[:, dimensions]
                    pg.update(1)

        if config.coordinates_check_output:
            for i, t in enumerate(trajs):
                finite = self._chunk_finite(t)
                if not np.all(finite):
                    # determine position
                    frames = np.where(np.logical_not(finite))
                    if not len(frames):
                        raise RuntimeError(
                            'nothing got assigned for traj {}'.format(i))
                    raise RuntimeError(
                        'unassigned sections in traj {i} in range [{frames}]'.
                        format(frames=frames, i=i))

        return trajs

Esempio n. 13

0

Mostra file

def frames_from_files(files,
                      top,
                      frames,
                      chunksize=1000,
                      stride=1,
                      verbose=False,
                      copy_not_join=None,
                      reader=None):
    """
    Constructs a Trajectory object out of given frames collected from files (or given reader).

    :param files: source files
    :param top: topology file
    :param frames: indices
    :param chunksize:
    :param stride:
    :param verbose:
    :param copy_not_join: not used
    :param reader: if a reader is given, ignore files and top param!
    :return: mdtra.Trajectory consisting out of frames indices.
    """
    # Enforce topology to be a md.Topology object
    if reader is None:
        top = _enforce_top(top)
    else:
        if not reader.number_of_trajectories():
            raise ValueError("need at least one trajectory file in reader.")
        if isinstance(reader, FragmentedTrajectoryReader):
            top = reader._readers[0][0].featurizer.topology
        elif isinstance(reader, FeatureReader):
            top = reader.featurizer.topology
        else:
            raise ValueError("unsupported reader (only md readers).")

    stride = int(stride)
    frames = np.array(frames)

    # only one file, so we expect frames to be a one dimensional array
    if isinstance(files, str):
        files = [files]
        if frames.ndim == 1:
            # insert a constant column for file index
            frames = np.insert(np.atleast_2d(frames),
                               0,
                               np.zeros_like(frames),
                               axis=0).T

    if stride != 1:
        frames[:, 1] *= int(stride)
        if verbose:
            log.info('A stride value of = %u was parsed, '
                     'interpreting "indexes" accordingly.' % stride)

    # sort by file and frame index
    sort_inds = np.lexsort((frames[:, 1], frames[:, 0]))
    sorted_inds = frames[sort_inds]
    assert len(sorted_inds) == len(frames)

    file_inds_unique = np.unique(sorted_inds[:, 0])
    # construct reader
    if reader is None:
        # filter out files, we would never read, because no indices are pointing to them
        reader = source(np.array(files)[file_inds_unique].tolist(), top=top)
        # re-map indices to reflect filtered files:
        for itraj, c in zip(file_inds_unique, itertools.count(0)):
            mask = sorted_inds[:, 0] == itraj
            sorted_inds[mask, 0] = c

        inds_to_check = np.arange(len(file_inds_unique))
    else:
        inds_to_check = file_inds_unique

    # sanity check of indices
    for itraj in inds_to_check:
        inds_by_traj = sorted_inds[sorted_inds[:, 0] == itraj]
        largest_ind_in_traj = np.max(inds_by_traj)
        length = reader.trajectory_length(itraj)
        if length < largest_ind_in_traj:
            raise ValueError(
                "largest specified index (%i * stride=%i * %i=%i) "
                "is larger than trajectory length '%s' = %i" %
                (largest_ind_in_traj / stride, largest_ind_in_traj / stride,
                 stride, largest_ind_in_traj, reader.filenames[itraj], length))

    # we want the FeatureReader to return mdtraj.Trajectory objects
    if isinstance(reader, FeatureReader):
        reader._return_traj_obj = True
    elif isinstance(reader, FragmentedTrajectoryReader):
        for file in reader.filenames_flat:
            r = reader.reader_by_filename(file)
            if isinstance(r, FeatureReader):
                r = [r]
            for _r in r:
                _r._return_traj_obj = True

    it = reader.iterator(chunk=chunksize,
                         stride=sorted_inds,
                         return_trajindex=False)
    reporter = ProgressReporter()
    reporter._progress_register(it._n_chunks, description="collecting frames")
    collected_frames = []
    with it:
        for x in it:
            collected_frames.append(x)
            reporter._progress_update(1)
    reporter._progress_force_finish()

    dest = _preallocate_empty_trajectory(top, len(frames))
    i = 0
    for chunk in collected_frames:
        _copy_traj_attributes(dest, chunk, i)
        i += len(chunk)
    dest = dest.slice(sort_inds.argsort(), copy=False)
    return dest

Esempio n. 14

0

Mostra file

    def count_lagged(self, lag, count_mode='sliding', mincount_connectivity='1/n',
                     show_progress=True, n_jobs=None, name='', core_set=None, milestoning_method='last_core'):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        n_jobs: int or None

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if core_set is not None and count_mode in ('sliding', 'sample'):
            if milestoning_method == 'last_core':

                # assign -1 frames to last visited core
                for d in self._dtrajs:
                    assert d[0] != -1
                    while -1 in d:
                        mask = (d == -1)
                        d[mask] = d[np.roll(mask, -1)]
                self._C = msmest.count_matrix(self._dtrajs, lag, sliding=count_mode == 'sliding')

            else:
                raise NotImplementedError('Milestoning method {} not implemented.'.format(milestoning_method))


        elif count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            if core_set is not None:
                raise RuntimeError('Cannot estimate core set MSM with effective counting.')
            from pyemma.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            from pyemma.util.contexts import nullcontext
            ctx = nullcontext()
            if 'callback' in argspec.args:  # msmtools effective cmatrix ready for multiprocessing?
                from pyemma._base.progress import ProgressReporter
                from pyemma._base.parallel import get_n_jobs

                kw['n_jobs'] = get_n_jobs() if n_jobs is None else n_jobs

                if show_progress:
                    pg = ProgressReporter()
                    # this is a fast operation
                    C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True)
                    pg.register(C_temp.nnz, '{}: compute stat. inefficiencies'.format(name), stage=0)
                    del C_temp
                    kw['callback'] = pg.update
                    ctx = pg.context(stage=0)
            with ctx:
                self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True

Esempio n. 15

0

Mostra file

 def setUp(self):
     self.pg = ProgressReporter()
     self.pg._progress_register(100, "test")

Esempio n. 16

0

Mostra file

    def count_lagged(self,
                     lag,
                     count_mode='sliding',
                     mincount_connectivity='1/n',
                     show_progress=True):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            from pyemma.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            if show_progress and 'callback' in argspec.args:
                from pyemma._base.progress import ProgressReporter
                from pyemma._base.parallel import get_n_jobs

                pg = ProgressReporter()
                # this is a fast operation
                C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True)
                pg.register(C_temp.nnz, 'compute statistical inefficiencies')
                del C_temp
                callback = lambda: pg.update(1)
                kw['callback'] = callback
                kw['n_jobs'] = get_n_jobs()

            self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True

Esempio n. 17

0

Mostra file

    def _estimate(self, iterable, partial_fit=False):
        indim = iterable.dimension()
        if not indim:
            raise ValueError("zero dimension from data source!")

        if not any(
                iterable.trajectory_lengths(stride=self.stride,
                                            skip=self.lag + self.skip) > 0):
            if partial_fit:
                self.logger.warning(
                    "Could not use data passed to partial_fit(), "
                    "because no single data set [longest=%i] is longer than lag+skip [%i]",
                    max(
                        iterable.trajectory_lengths(self.stride,
                                                    skip=self.skip)),
                    self.lag + self.skip)
                return self
            else:
                raise ValueError(
                    "None single dataset [longest=%i] is longer than"
                    " lag+skip [%i]." % (max(
                        iterable.trajectory_lengths(
                            self.stride,
                            skip=self.skip)), self.lag + self.skip))

        self.logger.debug(
            "will use %s total frames for %s",
            iterable.trajectory_lengths(self.stride, skip=self.skip),
            self.name)

        chunksize = 0 if partial_fit else iterable.chunksize
        it = iterable.iterator(lag=self.lag,
                               return_trajindex=False,
                               stride=self.stride,
                               skip=self.skip,
                               chunk=chunksize)
        # iterator over input weights
        if hasattr(self.weights, 'iterator'):
            if hasattr(self.weights, '_transform_array'):
                self.weights.data_producer = iterable
            it_weights = self.weights.iterator(lag=0,
                                               return_trajindex=False,
                                               stride=self.stride,
                                               skip=self.skip,
                                               chunk=chunksize)
            if it_weights.number_of_trajectories(
            ) != iterable.number_of_trajectories():
                raise ValueError(
                    "number of weight arrays did not match number of input data sets. {} vs. {}"
                    .format(it_weights.number_of_trajectories(),
                            iterable.number_of_trajectories()))
        else:
            # if we only have a scalar, repeat it.
            import itertools
            it_weights = itertools.repeat(self.weights)

        # TODO: we could possibly optimize the case lag>0 and c0t=False using skip.
        # Access how much iterator hassle this would be.
        #self.skipped=0
        pg = ProgressReporter()
        pg.register(it.n_chunks, 'calculate covariances', stage=0)
        with it, pg.context(stage=0):
            self._init_covar(partial_fit, it.n_chunks)
            for data, weight in zip(it, it_weights):
                if self.lag != 0:
                    X, Y = data
                else:
                    X, Y = data, None

                if weight is not None:
                    if isinstance(weight, np.ndarray):
                        weight = weight.squeeze()[:len(X)]
                        # TODO: if the weight is exactly zero it makes not sense to add the chunk to running moments.
                        # however doing so, leads to wrong results...
                        # if np.all(np.abs(weight) < np.finfo(np.float).eps):
                        #     #print("skip")
                        #     self.skipped += len(X)
                        #     continue
                if self.remove_constant_mean is not None:
                    X = X - self.remove_constant_mean[np.newaxis, :]
                    if Y is not None:
                        Y = Y - self.remove_constant_mean[np.newaxis, :]

                try:
                    self._rc.add(X, Y, weights=weight)
                except MemoryError:
                    raise MemoryError(
                        'Covariance matrix does not fit into memory. '
                        'Input is too high-dimensional ({} dimensions). '.
                        format(X.shape[1]))
                pg.update(1, stage=0)

        if partial_fit:
            if '_rc' not in self.__serialize_fields:
                self.__serialize_fields.append('_rc')
        else:
            if '_rc' in self.__serialize_fields:
                self.__serialize_fields.remove('_rc')
        return self

Esempio n. 18

0

Mostra file

File: test_progress.py Progetto: yuxuanzhuang/PyEMMA

 def test_below_threshold(self):
     # show not raise
     pg = ProgressReporter()
     pg.register(2)
     pg.update(1)
     pg.set_description('dummy')