def fit(self): """ Compute model based on current observations in state """ # check number of observations added n_observed = get_mode(self._counter.n_obs) if n_observed == 0: raise CBRWFitError('must add observations before calling fit method') # execute biased random walk transition_matrix = self._compute_biased_transition_matrix() pi = random_walk(transition_matrix, **self.rw_params).ravel() stationary_prob = {} feature_relevance = defaultdict(int) for feature, idx in iteritems(self._counter.index): prob = pi[idx] stationary_prob[feature] = prob feature_relevance[get_feature_name(feature)] += prob # feature relevance scores are to be used as weights; accordingly the paper # normalizes them to sum to 1, however this sum normalization should not be # necessary since sum(pi) = 1 by definition self._stationary_prob = stationary_prob self._feature_relevance = dict(feature_relevance) return self
def _compute_biases(self): """ Computes bias for random walk for each feature tuple """ bias_dict = {} for feature_name, value_counts in iteritems(self._counter.counts): mode = get_mode(value_counts) base = 1 - (mode / self._counter.n_obs[feature_name]) bias_dict.update({feature_val: (1 - (count / mode) + base) / 2 for feature_val, count in iteritems(value_counts)}) return bias_dict
def _compute_biases(self) -> Dict[obs_item_type, float]: """ Computes bias for random walk for each feature tuple """ bias_dict = {} # type: Dict[obs_item_type, float] for feature_name, value_counts in self._counter.counts.items(): mode = get_mode(value_counts) base = 1 - (mode / self._counter.n_obs[feature_name]) for feature_val, count in value_counts.items(): bias = (1 - (count / mode) + base) / 2 bias_dict[feature_val] = bias return bias_dict
def test_get_mode(self): table = { 'empty counter': { 'counter': Counter(), 'expected': 0 }, 'unique mode': { 'counter': self.c1, 'expected': 3 }, 'nonunique mode': { 'counter': self.c2, 'expected': 2 }, } for test_name, test in table.items(): mode = cnt.get_mode(test['counter']) self.assertEqual(mode, test['expected'], test_name)
def fit(self) -> CBRW: """ Compute model based on current observations in state """ # check number of observations added n_observed = get_mode(self._counter.n_obs) if n_observed == 0: raise CBRWFitError( 'must add observations before calling fit method') # execute biased random walk try: pi = random_walk(self._compute_biased_transition_matrix(), **self.rw_params).ravel() except ValueError as err: raise CBRWFitError(err) # allocate probability by feature stationary_prob = {} feature_relevance = defaultdict(int) for feature, idx in self._counter.index.items(): prob = pi[idx] stationary_prob[feature] = prob feature_relevance[get_feature_name(feature)] += prob # sum normalize feature_relevance feature_rel_sum = sum(feature_relevance.values()) if feature_rel_sum < EPS: raise CBRWFitError('feature weights sum approximately zero') feature_relevance = { key: val / feature_rel_sum for key, val in feature_relevance.items() } self._stationary_prob = stationary_prob self._feature_relevance = feature_relevance return self