Example #1
0
    def inverse_transform(self, sequences):
        """Transform a list of sequences from internal indexing into
        labels

        Parameters
        ----------
        sequences : list
            List of sequences, each of which is one-dimensional array of
            integers in ``0, ..., n_states_ - 1``.

        Returns
        -------
        sequences : list
            List of sequences, each of which is one-dimensional array
            of labels.
        """
        sequences = list_of_1d(sequences)
        inverse_mapping = {v: k for k, v in self.mapping_.items()}
        f = np.vectorize(inverse_mapping.get)

        result = []
        for y in sequences:
            uq = np.unique(y)
            if not np.all(np.logical_and(0 <= uq, uq < self.n_states_)):
                raise ValueError('sequence must be between 0 and n_states-1')

            result.append(f(y))
        return result
Example #2
0
    def fit(self, sequences, y=None):
        sequences = list_of_1d(sequences)
        raw_counts, mapping = _transition_counts(sequences, self.lag_time)

        if self.ergodic_cutoff >= 1:
            self.countsmat_, mapping2 = _strongly_connected_subgraph(
                raw_counts, self.ergodic_cutoff, self.verbose)
            self.mapping_ = _dict_compose(mapping, mapping2)
        else:
            self.countsmat_ = raw_counts
            self.mapping_ = mapping

        self.n_states_ = self.countsmat_.shape[0]
        fit_method_map = {
            True: self._fit_reversible,
            False: self._fit_non_reversible}

        try:
            fit_method = fit_method_map[self.reversible]
            self.all_transmats_ = fit_method(self.countsmat_)
        except KeyError:
            raise ValueError('reversible_type must be one of %s: %s' % (
                ', '.join(fit_method_map.keys()), self.reversible_type))

        return self
Example #3
0
    def transform(self, sequences, mode='clip'):
        r"""Transform a list of sequences to internal indexing

        Recall that `sequences` can be arbitrary labels, whereas ``transmat_``
        and ``countsmat_`` are indexed with integers between 0 and
        ``n_states - 1``. This methods maps a set of sequences from the labels
        onto this internal indexing.

        Parameters
        ----------
        sequences : list of array-like
            List of sequences, or a single sequence. Each sequence should be a
            1D iterable of state labels. Labels can be integers, strings, or
            other orderable objects.
        mode : {'clip', 'fill'}
            Method by which to treat labels in `sequences` which do not have
            a corresponding index. This can be due, for example, to the ergodic
            trimming step.

           ``clip``
               Unmapped labels are removed during transform. If they occur
               at the beginning or end of a sequence, the resulting transformed
               sequence will be shorted. If they occur in the middle of a
               sequence, that sequence will be broken into two (or more)
               sequences. (Default)
           ``fill``
               Unmapped labels will be replaced with NaN, to signal missing
               data. [The use of NaN to signal missing data is not fantastic,
               but it's consistent with current behavior of the ``pandas``
               library.]

        Returns
        -------
        mapped_sequences : list
            List of sequences in internal indexing
        """
        if mode not in ['clip', 'fill']:
            raise ValueError('mode must be one of ["clip", "fill"]: %s' % mode)
        sequences = list_of_1d(sequences)

        f = np.vectorize(lambda k: self.mapping_.get(k, np.nan),
                         otypes=[np.float])

        result = []
        for y in sequences:
            a = f(y)
            if mode == 'fill':
                if np.all(np.mod(a, 1) == 0):
                    result.append(a.astype(int))
                else:
                    result.append(a)
            elif mode == 'clip':
                result.extend([a[s].astype(int) for s in
                               np.ma.clump_unmasked(np.ma.masked_invalid(a))])
            else:
                raise RuntimeError()

        return result
Example #4
0
    def fit(self, sequences, y=None):
        """Estimate model parameters.

        Parameters
        ----------
        sequences : list of array-like
            List of sequences, or a single sequence. Each sequence should be a
            1D iterable of state labels. Labels can be integers, strings, or
            other orderable objects.

        Returns
        -------
        self

        Notes
        -----
        `None` and `NaN` are recognized immediately as invalid labels.
        Therefore, transition counts from or to a sequence item which is NaN or
        None will not be counted. The mapping_ attribute will not include the
        NaN or None.
        """
        sequences = list_of_1d(sequences)
        # step 1. count the number of transitions
        raw_counts, mapping = _transition_counts(sequences, self.lag_time)

        if self.ergodic_cutoff >= 1:
            # step 2. restrict the counts to the maximal strongly ergodic
            # subgraph
            self.countsmat_, mapping2 = _strongly_connected_subgraph(raw_counts, self.ergodic_cutoff, self.verbose)
            self.mapping_ = _dict_compose(mapping, mapping2)
        else:
            # no ergodic trimming.
            self.countsmat_ = raw_counts
            self.mapping_ = mapping

        self.n_states_ = self.countsmat_.shape[0]

        # use a dict like a switch statement: dispatch to different
        # transition matrix estimators depending on the value of
        # self.reversible_type
        fit_method_map = {"mle": self._fit_mle, "transpose": self._fit_transpose, "none": self._fit_asymetric}

        try:
            # pull out the appropriate method
            fit_method = fit_method_map[str(self.reversible_type).lower()]
            # step 3. estimate transition matrix
            self.transmat_, self.populations_ = fit_method(self.countsmat_)
        except KeyError:
            raise ValueError(
                "reversible_type must be one of %s: %s" % (", ".join(fit_method_map.keys()), self.reversible_type)
            )

        self._is_dirty = True
        return self