def _generate_leaf_node_predictions(self, data: xgb.DMatrix) -> np.ndarray: """Method to generate leaf node predictions from the xgboost model. Method calls xgb.Booster.predict with pred_leaf = True and ntree_limit = model's best_iteration + 1. If the output of predict is not a 2d matrix the output is shaped to be 2d. Parameters ---------- data : xgb.DMatrix Data to generate predictions on. """ check_type(data, [xgb.DMatrix], "data") # matrix of (nsample, ntrees) with each record giving # the leaf node of each sample in each tree leaf_node_predictions = self.model.predict( data=data, pred_leaf=True, ntree_limit=self.model.best_iteration + 1) # if the input data is a single column reshape the output to # be 2d array rather than 1d if len(leaf_node_predictions.shape) == 1: leaf_node_predictions = leaf_node_predictions.reshape( (data.num_row(), 1)) return leaf_node_predictions
def xgbooster_predict_proba(booster: xgb.Booster, d_x: xgb.DMatrix) -> np.ndarray: """ Simulate the `predict_proba` interface from sklearn This function will only work as expected if `booster` has been training using the `binary:logistic` loss. Parameters ---------- booster : xgboost.Booster The trained booster d_x : xgboost.DMatrix The dataset Returns ------- y_proba_pred : numpy.ndarray The probabilistic predictions. The shape of the array is (n_row, 2). """ y_score = booster.predict(d_x) y_false = 1 - y_score size = (d_x.num_row(), 2) y_probas_pred = np.zeros(size) y_probas_pred[:, 0] = y_false y_probas_pred[:, 1] = y_score return y_probas_pred
def dmatrix_to_numpy(dmatrix: xgb.DMatrix) -> np.ndarray: """Convert DMatrix to 2d numpy array Parameters ---------- dmatrix : xgb.DMatrix DMatrix to convert Returns ------- np.ndarray 2d numpy array with the corresponding DMatrix feature values Raises ------ InvalidInput Input is not a valid DMatrix """ if not isinstance(dmatrix, xgb.DMatrix): raise InvalidInput("Type error: input parameter is not DMatrix") stream_parser = DMatrixStreamParserV0_80 if version.parse(xgb.__version__) < version.parse('1.0.0') \ else DMatrixStreamParserV1_0_0 # We set delete=False to avoid permissions error. This way, file can be accessed # by XGBoost without being deleted while handle is closed try: with tempfile.NamedTemporaryFile(delete=False) as fp: dmatrix.save_binary(fp.name) result = stream_parser(fp, dmatrix.num_row(), dmatrix.num_col()).parse() finally: # We can safely remove the temp file now, parsing process finished with suppress(OSError): os.remove(fp.name) return result