def _get_tree_from_booster(booster: xgboost.core.Booster): """get string from the booster object""" tree = booster.get_dump()[0] tree = tree.replace("\t", "") tree = tree.split("\n") tree = tree[:-1] # last element is empty return tree
async def _pickle_artifact(model: xgb.core.Booster, args: argparse.Namespace) -> str: """ Save the model to disk as a bz2 compressed pickled binary artifact. :param xgb.core.Booster model: Trained XGBoost MNIST model :param argparse.Namespace args: An object to take the attributes The default is a new empty Namespace object :return: str path to the pickled binary artifact """ # dump the model into a text file model.dump_model('{}_dump.model.raw.txt'.format(args.train_datetime)) compressor = 'bz2' path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '{}_model.pkl.{}'.format(args.train_datetime, compressor)) with open(path, 'wb') as f: joblib.dump(model, f, compress=(compressor, 3)) _logger.info('saved model: %s' % path) return path
async def evaluate(model: xgb.core.Booster, data: Tuple[xgb.DMatrix, xgb.DMatrix], args: argparse.Namespace): """ Cross validate results, this will print result out as [iteration] metric_name:mean_value :param xgb.core.Booster model: Trained XGBoost MNIST model :param Tuple[xgb.DMatrix, xgb.DMatrix] data: MNIST database train and test data and labels :param argparse.Namespace args: An object to take the attributes The default is a new empty Namespace object :return: None """ dtrain = data[0] dtest = data[1] y_pred = model.predict(dtest) _logger.info('y_pred.shape: {}'.format(y_pred.shape)) # ------------- extract most confident predictions --------------------------------------------- # output is a vector of ndata * nclass, which can be further reshaped to ndata * nclass matrix # probabilities contains predicted probability of each data point belonging to each class probabilities = y_pred.reshape(y_pred.shape[0], y_pred.shape[1]) # classes is an array of the most confident classification predictions classes = np.argmax(probabilities, axis=1).tolist() y_pred_precision_score = precision_score(dtest.get_label(), classes, average='macro') _logger.info('y_pred_precision_score: %s' % y_pred_precision_score) _logger.info('running cross validation') cv_result = xgb.cv(args.booster_params, dtrain, num_boost_round=10, nfold=5, metrics={EVAL_METRIC}, seed=0, callbacks=[ xgb.callback.print_evaluation(show_stdv=False), xgb.callback.early_stop(3) ]) _logger.info('evaluate.cv_result: %s' % cv_result)
def evaluate_XGBoost_model(regressor: xgb.core.Booster, X_test: pd.DataFrame, parameters: Dict) -> pd.DataFrame: #X_test = X_test.values #print(regressor.feature_names) target_name = parameters['target'] output_id = parameters['id_name'] use_features = regressor.feature_names is_train = parameters['isTrain'] xgb_test = xgb.DMatrix(X_test[use_features], feature_names=regressor.feature_names) y_pred = regressor.predict(xgb_test, ntree_limit=regressor.best_ntree_limit) print('y predicted on XGBoost!') if is_train: y_test = X_test[target_name] print(type(y_pred)) fpr, tpr, _ = roc_curve(y_test, y_pred) plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill') plt.plot(fpr, tpr, marker='.', label='XGBM') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc='lower right') score = roc_auc_score(y_test, y_pred) output_date = datetime.date.today() filepath_ = 'data/07_model_output/ROC_plot_XGB' + str( output_date) + '.png' single_plot_writer = MatplotlibLocalWriter(filepath=filepath_) single_plot_writer.save(plt) plt.clf() #y_pred = np.argmax(y_pred, axis=1) #roc_curve = r score = roc_auc_score(y_test, y_pred) logger = logging.getLogger(__name__) logger.info('XGBoost AUC is %.3f.', score) output = pd.DataFrame({'ID': output_id, 'y_pred': y_pred}) return output
def predict_df(model: xgb.core.Booster, df: pd.DataFrame): dm = xgb.DMatrix(df) res = model.predict(dm) return np.asarray([np.argmax(line) for line in res])