Esempio n. 1
0
    def _fit(self, classif, X, a, r, p):
        obs_take = np.in1d(a, self.tree.node_comparisons[classif][0])
        X_node = X[obs_take, :]
        a_node = a[obs_take]
        r_node = r[obs_take]
        p_node = p[obs_take]

        r_more_onehalf = r_node >= .5
        y = (np.in1d(a_node,
                     self.tree.node_comparisons[classif][2])).astype('uint8')

        y_node = y.copy()
        y_node[r_more_onehalf] = 1 - y[r_more_onehalf]
        w_node = (.5 - r_node) / p_node
        w_node[r_more_onehalf] = ((r_node - .5) / p_node)[r_more_onehalf]
        w_node = w_node * w_node.shape[0] / np.sum(w_node)

        if y_node.shape[0] == 0:
            self._oracles[classif] = _RandomPredictor()
        elif y_node.sum() == y_node.shape[0]:
            self._oracles[classif] = _OnePredictor()
        elif y_node.sum() == 0:
            self._oracles[classif] = _ZeroPredictor()
        else:
            self._oracles[classif].fit(X_node, y_node, sample_weight=w_node)
Esempio n. 2
0
    def fit(self, X, a, r, p):
        """
        Fits the Offset Tree estimator to partially-labeled data collected from a different policy.
        
        Parameters
        ----------
        X : array (n_samples, n_features)
            Matrix of covariates for the available data.
        a : array (n_samples), int type
            Arms or actions that were chosen for each observations.
        r : array (n_samples), {0,1}
            Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
        p : array (n_samples)
            Reward estimates for the actions that were chosen by the policy.
        """
        X, a, r = _check_fit_input(X, a, r)
        p = _check_1d_inp(p)
        assert p.shape[0] == X.shape[0]

        if self.c is not None:
            p = self.c * p
        if self.pmin is not None:
            p = np.clip(p, a_min=self.pmin, a_max=None)

        self._oracles = [
            deepcopy(self.base_algorithm) for c in range(self.nchoices - 1)
        ]
        for classif in range(len(self._oracles)):
            obs_take = np.in1d(a, self.tree.node_comparisons[classif][0])
            X_node = X[obs_take, :]
            a_node = a[obs_take]
            r_node = r[obs_take]
            p_node = p[obs_take]

            r_more_onehalf = r_node >= .5
            y = (np.in1d(
                a_node,
                self.tree.node_comparisons[classif][2])).astype('uint8')

            y_node = y.copy()
            y_node[r_more_onehalf] = 1 - y[r_more_onehalf]
            w_node = (.5 - r_node) / p_node
            w_node[r_more_onehalf] = ((r_node - .5) / p_node)[r_more_onehalf]
            w_node = w_node * w_node.shape[0] / np.sum(w_node)

            if y_node.shape[0] == 0:
                self._oracles[classif] = _RandomPredictor()
            elif y_node.sum() == y_node.shape[0]:
                self._oracles[classif] = _OnePredictor()
            elif y_node.sum() == 0:
                self._oracles[classif] = _ZeroPredictor()
            else:
                self._oracles[classif].fit(X_node,
                                           y_node,
                                           sample_weight=w_node)