def inverse_transform(self, sequences): """Transform a list of sequences from internal indexing into labels Parameters ---------- sequences : list List of sequences, each of which is one-dimensional array of integers in ``0, ..., n_states_ - 1``. Returns ------- sequences : list List of sequences, each of which is one-dimensional array of labels. """ sequences = list_of_1d(sequences) inverse_mapping = {v: k for k, v in self.mapping_.items()} f = np.vectorize(inverse_mapping.get) result = [] for y in sequences: uq = np.unique(y) if not np.all(np.logical_and(0 <= uq, uq < self.n_states_)): raise ValueError('sequence must be between 0 and n_states-1') result.append(f(y)) return result
def fit(self, sequences, y=None): sequences = list_of_1d(sequences) raw_counts, mapping = _transition_counts(sequences, self.lag_time) if self.ergodic_cutoff >= 1: self.countsmat_, mapping2 = _strongly_connected_subgraph( raw_counts, self.ergodic_cutoff, self.verbose) self.mapping_ = _dict_compose(mapping, mapping2) else: self.countsmat_ = raw_counts self.mapping_ = mapping self.n_states_ = self.countsmat_.shape[0] fit_method_map = { True: self._fit_reversible, False: self._fit_non_reversible} try: fit_method = fit_method_map[self.reversible] self.all_transmats_ = fit_method(self.countsmat_) except KeyError: raise ValueError('reversible_type must be one of %s: %s' % ( ', '.join(fit_method_map.keys()), self.reversible_type)) return self
def transform(self, sequences, mode='clip'): r"""Transform a list of sequences to internal indexing Recall that `sequences` can be arbitrary labels, whereas ``transmat_`` and ``countsmat_`` are indexed with integers between 0 and ``n_states - 1``. This methods maps a set of sequences from the labels onto this internal indexing. Parameters ---------- sequences : list of array-like List of sequences, or a single sequence. Each sequence should be a 1D iterable of state labels. Labels can be integers, strings, or other orderable objects. mode : {'clip', 'fill'} Method by which to treat labels in `sequences` which do not have a corresponding index. This can be due, for example, to the ergodic trimming step. ``clip`` Unmapped labels are removed during transform. If they occur at the beginning or end of a sequence, the resulting transformed sequence will be shorted. If they occur in the middle of a sequence, that sequence will be broken into two (or more) sequences. (Default) ``fill`` Unmapped labels will be replaced with NaN, to signal missing data. [The use of NaN to signal missing data is not fantastic, but it's consistent with current behavior of the ``pandas`` library.] Returns ------- mapped_sequences : list List of sequences in internal indexing """ if mode not in ['clip', 'fill']: raise ValueError('mode must be one of ["clip", "fill"]: %s' % mode) sequences = list_of_1d(sequences) f = np.vectorize(lambda k: self.mapping_.get(k, np.nan), otypes=[np.float]) result = [] for y in sequences: a = f(y) if mode == 'fill': if np.all(np.mod(a, 1) == 0): result.append(a.astype(int)) else: result.append(a) elif mode == 'clip': result.extend([a[s].astype(int) for s in np.ma.clump_unmasked(np.ma.masked_invalid(a))]) else: raise RuntimeError() return result
def fit(self, sequences, y=None): """Estimate model parameters. Parameters ---------- sequences : list of array-like List of sequences, or a single sequence. Each sequence should be a 1D iterable of state labels. Labels can be integers, strings, or other orderable objects. Returns ------- self Notes ----- `None` and `NaN` are recognized immediately as invalid labels. Therefore, transition counts from or to a sequence item which is NaN or None will not be counted. The mapping_ attribute will not include the NaN or None. """ sequences = list_of_1d(sequences) # step 1. count the number of transitions raw_counts, mapping = _transition_counts(sequences, self.lag_time) if self.ergodic_cutoff >= 1: # step 2. restrict the counts to the maximal strongly ergodic # subgraph self.countsmat_, mapping2 = _strongly_connected_subgraph(raw_counts, self.ergodic_cutoff, self.verbose) self.mapping_ = _dict_compose(mapping, mapping2) else: # no ergodic trimming. self.countsmat_ = raw_counts self.mapping_ = mapping self.n_states_ = self.countsmat_.shape[0] # use a dict like a switch statement: dispatch to different # transition matrix estimators depending on the value of # self.reversible_type fit_method_map = {"mle": self._fit_mle, "transpose": self._fit_transpose, "none": self._fit_asymetric} try: # pull out the appropriate method fit_method = fit_method_map[str(self.reversible_type).lower()] # step 3. estimate transition matrix self.transmat_, self.populations_ = fit_method(self.countsmat_) except KeyError: raise ValueError( "reversible_type must be one of %s: %s" % (", ".join(fit_method_map.keys()), self.reversible_type) ) self._is_dirty = True return self