def _fit(self, classif, X, a, r, p): obs_take = np.in1d(a, self.tree.node_comparisons[classif][0]) X_node = X[obs_take, :] a_node = a[obs_take] r_node = r[obs_take] p_node = p[obs_take] r_more_onehalf = r_node >= .5 y = (np.in1d(a_node, self.tree.node_comparisons[classif][2])).astype('uint8') y_node = y.copy() y_node[r_more_onehalf] = 1 - y[r_more_onehalf] w_node = (.5 - r_node) / p_node w_node[r_more_onehalf] = ((r_node - .5) / p_node)[r_more_onehalf] w_node = w_node * w_node.shape[0] / np.sum(w_node) if y_node.shape[0] == 0: self._oracles[classif] = _RandomPredictor() elif y_node.sum() == y_node.shape[0]: self._oracles[classif] = _OnePredictor() elif y_node.sum() == 0: self._oracles[classif] = _ZeroPredictor() else: self._oracles[classif].fit(X_node, y_node, sample_weight=w_node)
def fit(self, X, a, r, p): """ Fits the Offset Tree estimator to partially-labeled data collected from a different policy. Parameters ---------- X : array (n_samples, n_features) Matrix of covariates for the available data. a : array (n_samples), int type Arms or actions that were chosen for each observations. r : array (n_samples), {0,1} Rewards that were observed for the chosen actions. Must be binary rewards 0/1. p : array (n_samples) Reward estimates for the actions that were chosen by the policy. """ X, a, r = _check_fit_input(X, a, r) p = _check_1d_inp(p) assert p.shape[0] == X.shape[0] if self.c is not None: p = self.c * p if self.pmin is not None: p = np.clip(p, a_min=self.pmin, a_max=None) self._oracles = [ deepcopy(self.base_algorithm) for c in range(self.nchoices - 1) ] for classif in range(len(self._oracles)): obs_take = np.in1d(a, self.tree.node_comparisons[classif][0]) X_node = X[obs_take, :] a_node = a[obs_take] r_node = r[obs_take] p_node = p[obs_take] r_more_onehalf = r_node >= .5 y = (np.in1d( a_node, self.tree.node_comparisons[classif][2])).astype('uint8') y_node = y.copy() y_node[r_more_onehalf] = 1 - y[r_more_onehalf] w_node = (.5 - r_node) / p_node w_node[r_more_onehalf] = ((r_node - .5) / p_node)[r_more_onehalf] w_node = w_node * w_node.shape[0] / np.sum(w_node) if y_node.shape[0] == 0: self._oracles[classif] = _RandomPredictor() elif y_node.sum() == y_node.shape[0]: self._oracles[classif] = _OnePredictor() elif y_node.sum() == 0: self._oracles[classif] = _ZeroPredictor() else: self._oracles[classif].fit(X_node, y_node, sample_weight=w_node)