コード例 #1
0
    def fit(self):
        """
        Compute model based on current observations in state
        """
        # check number of observations added
        n_observed = get_mode(self._counter.n_obs)
        if n_observed == 0:
            raise CBRWFitError('must add observations before calling fit method')

        # execute biased random walk
        transition_matrix = self._compute_biased_transition_matrix()
        pi = random_walk(transition_matrix, **self.rw_params).ravel()

        stationary_prob = {}
        feature_relevance = defaultdict(int)

        for feature, idx in iteritems(self._counter.index):
            prob = pi[idx]
            stationary_prob[feature] = prob
            feature_relevance[get_feature_name(feature)] += prob
        # feature relevance scores are to be used as weights; accordingly the paper
        # normalizes them to sum to 1, however this sum normalization should not be
        # necessary since sum(pi) = 1 by definition
        self._stationary_prob = stationary_prob
        self._feature_relevance = dict(feature_relevance)
        return self
コード例 #2
0
 def _compute_biases(self):
     """
     Computes bias for random walk for each feature tuple
     """
     bias_dict = {}
     for feature_name, value_counts in iteritems(self._counter.counts):
         mode = get_mode(value_counts)
         base = 1 - (mode / self._counter.n_obs[feature_name])
         bias_dict.update({feature_val: (1 - (count / mode) + base) / 2
                           for feature_val, count in iteritems(value_counts)})
     return bias_dict
コード例 #3
0
 def _compute_biases(self) -> Dict[obs_item_type, float]:
     """
     Computes bias for random walk for each feature tuple
     """
     bias_dict = {}  # type: Dict[obs_item_type, float]
     for feature_name, value_counts in self._counter.counts.items():
         mode = get_mode(value_counts)
         base = 1 - (mode / self._counter.n_obs[feature_name])
         for feature_val, count in value_counts.items():
             bias = (1 - (count / mode) + base) / 2
             bias_dict[feature_val] = bias
     return bias_dict
コード例 #4
0
 def test_get_mode(self):
     table = {
         'empty counter': {
             'counter': Counter(),
             'expected': 0
         },
         'unique mode': {
             'counter': self.c1,
             'expected': 3
         },
         'nonunique mode': {
             'counter': self.c2,
             'expected': 2
         },
     }
     for test_name, test in table.items():
         mode = cnt.get_mode(test['counter'])
         self.assertEqual(mode, test['expected'], test_name)
コード例 #5
0
    def fit(self) -> CBRW:
        """
        Compute model based on current observations in state
        """
        # check number of observations added
        n_observed = get_mode(self._counter.n_obs)
        if n_observed == 0:
            raise CBRWFitError(
                'must add observations before calling fit method')

        # execute biased random walk
        try:
            pi = random_walk(self._compute_biased_transition_matrix(),
                             **self.rw_params).ravel()
        except ValueError as err:
            raise CBRWFitError(err)

        # allocate probability by feature
        stationary_prob = {}
        feature_relevance = defaultdict(int)
        for feature, idx in self._counter.index.items():
            prob = pi[idx]
            stationary_prob[feature] = prob
            feature_relevance[get_feature_name(feature)] += prob

        # sum normalize feature_relevance
        feature_rel_sum = sum(feature_relevance.values())
        if feature_rel_sum < EPS:
            raise CBRWFitError('feature weights sum approximately zero')
        feature_relevance = {
            key: val / feature_rel_sum
            for key, val in feature_relevance.items()
        }

        self._stationary_prob = stationary_prob
        self._feature_relevance = feature_relevance
        return self