def fit(self, sequences, y=None): sequences = list_of_1d(sequences) raw_counts, mapping = _transition_counts(sequences, self.lag_time) if self.ergodic_cutoff >= 1: self.countsmat_, mapping2 = _strongly_connected_subgraph( raw_counts, self.ergodic_cutoff, self.verbose) self.mapping_ = _dict_compose(mapping, mapping2) else: self.countsmat_ = raw_counts self.mapping_ = mapping self.n_states_ = self.countsmat_.shape[0] fit_method_map = { True: self._fit_reversible, False: self._fit_non_reversible} try: fit_method = fit_method_map[self.reversible] self.all_transmats_ = fit_method(self.countsmat_) except KeyError: raise ValueError('reversible_type must be one of %s: %s' % ( ', '.join(fit_method_map.keys()), self.reversible_type)) return self
def score_ll(self, sequences): r"""log of the likelihood of sequences with respect to the model Parameters ---------- sequences : list of array-like List of sequences, or a single sequence. Each sequence should be a 1D iterable of state labels. Labels can be integers, strings, or other orderable objects. Returns ------- loglikelihood : float The natural log of the likelihood, computed as :math:`\sum_{ij} C_{ij} \log(P_{ij})` where C is a matrix of counts computed from the input sequences. """ counts, mapping = _transition_counts(sequences) if not set(self.mapping_.keys()).issuperset(mapping.keys()): return -np.inf inverse_mapping = {v: k for k, v in mapping.items()} # maps indices in counts to indices in transmat m2 = _dict_compose(inverse_mapping, self.mapping_) indices = [e[1] for e in sorted(m2.items())] transmat_slice = self.transmat_[np.ix_(indices, indices)] return np.nansum(np.log(transmat_slice) * counts)
def _map_eigenvectors(self, V, other_mapping): self_inverse_mapping = {v: k for k, v in self.mapping_.items()} transform_mapping = _dict_compose(self_inverse_mapping, other_mapping) source_indices, dest_indices = zip(*transform_mapping.items()) # print(source_indices, dest_indices) mapped_V = np.zeros((len(other_mapping), V.shape[1])) mapped_V[dest_indices, :] = np.take(V, source_indices, axis=0) return mapped_V
def fit(self, sequences, y=None): """Estimate model parameters. Parameters ---------- sequences : list of array-like List of sequences, or a single sequence. Each sequence should be a 1D iterable of state labels. Labels can be integers, strings, or other orderable objects. Returns ------- self Notes ----- `None` and `NaN` are recognized immediately as invalid labels. Therefore, transition counts from or to a sequence item which is NaN or None will not be counted. The mapping_ attribute will not include the NaN or None. """ sequences = list_of_1d(sequences) # step 1. count the number of transitions raw_counts, mapping = _transition_counts(sequences, self.lag_time) if self.ergodic_cutoff >= 1: # step 2. restrict the counts to the maximal strongly ergodic # subgraph self.countsmat_, mapping2 = _strongly_connected_subgraph(raw_counts, self.ergodic_cutoff, self.verbose) self.mapping_ = _dict_compose(mapping, mapping2) else: # no ergodic trimming. self.countsmat_ = raw_counts self.mapping_ = mapping self.n_states_ = self.countsmat_.shape[0] # use a dict like a switch statement: dispatch to different # transition matrix estimators depending on the value of # self.reversible_type fit_method_map = {"mle": self._fit_mle, "transpose": self._fit_transpose, "none": self._fit_asymetric} try: # pull out the appropriate method fit_method = fit_method_map[str(self.reversible_type).lower()] # step 3. estimate transition matrix self.transmat_, self.populations_ = fit_method(self.countsmat_) except KeyError: raise ValueError( "reversible_type must be one of %s: %s" % (", ".join(fit_method_map.keys()), self.reversible_type) ) self._is_dirty = True return self