コード例 #1
0
def _get_tree_from_booster(booster: xgboost.core.Booster):
    """get string from the booster object"""
    tree = booster.get_dump()[0]
    tree = tree.replace("\t", "")
    tree = tree.split("\n")
    tree = tree[:-1]  # last element is empty
    return tree
コード例 #2
0
async def _pickle_artifact(model: xgb.core.Booster,
                           args: argparse.Namespace) -> str:
    """
    Save the model to disk as a bz2 compressed pickled binary artifact.

    :param xgb.core.Booster model:  Trained XGBoost MNIST model
    :param argparse.Namespace args: An object to take the attributes
                                    The default is a new empty Namespace object

    :return:                        str path to the pickled binary artifact
    """
    # dump the model into a text file
    model.dump_model('{}_dump.model.raw.txt'.format(args.train_datetime))

    compressor = 'bz2'
    path = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        '{}_model.pkl.{}'.format(args.train_datetime, compressor))

    with open(path, 'wb') as f:
        joblib.dump(model, f, compress=(compressor, 3))

    _logger.info('saved model: %s' % path)
    return path
コード例 #3
0
async def evaluate(model: xgb.core.Booster, data: Tuple[xgb.DMatrix,
                                                        xgb.DMatrix],
                   args: argparse.Namespace):
    """
    Cross validate results, this will print result out as [iteration]  metric_name:mean_value

    :param xgb.core.Booster model:                  Trained XGBoost MNIST model
    :param Tuple[xgb.DMatrix, xgb.DMatrix] data:    MNIST database train and test data and labels
    :param argparse.Namespace args:                 An object to take the attributes
                                                        The default is a new empty Namespace object

    :return:                                        None
    """
    dtrain = data[0]
    dtest = data[1]
    y_pred = model.predict(dtest)
    _logger.info('y_pred.shape: {}'.format(y_pred.shape))

    # ------------- extract most confident predictions ---------------------------------------------
    # output is a vector of ndata * nclass, which can be further reshaped to ndata * nclass matrix
    # probabilities contains predicted probability of each data point belonging to each class
    probabilities = y_pred.reshape(y_pred.shape[0], y_pred.shape[1])
    # classes is an array of the most confident classification predictions
    classes = np.argmax(probabilities, axis=1).tolist()

    y_pred_precision_score = precision_score(dtest.get_label(),
                                             classes,
                                             average='macro')
    _logger.info('y_pred_precision_score: %s' % y_pred_precision_score)

    _logger.info('running cross validation')

    cv_result = xgb.cv(args.booster_params,
                       dtrain,
                       num_boost_round=10,
                       nfold=5,
                       metrics={EVAL_METRIC},
                       seed=0,
                       callbacks=[
                           xgb.callback.print_evaluation(show_stdv=False),
                           xgb.callback.early_stop(3)
                       ])
    _logger.info('evaluate.cv_result: %s' % cv_result)
コード例 #4
0
ファイル: nodes.py プロジェクト: 8-u8/kedro_trial
def evaluate_XGBoost_model(regressor: xgb.core.Booster, X_test: pd.DataFrame,
                           parameters: Dict) -> pd.DataFrame:
    #X_test = X_test.values
    #print(regressor.feature_names)
    target_name = parameters['target']
    output_id = parameters['id_name']
    use_features = regressor.feature_names
    is_train = parameters['isTrain']
    xgb_test = xgb.DMatrix(X_test[use_features],
                           feature_names=regressor.feature_names)
    y_pred = regressor.predict(xgb_test,
                               ntree_limit=regressor.best_ntree_limit)
    print('y predicted on XGBoost!')
    if is_train:
        y_test = X_test[target_name]
        print(type(y_pred))
        fpr, tpr, _ = roc_curve(y_test, y_pred)
        plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill')
        plt.plot(fpr, tpr, marker='.', label='XGBM')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc='lower right')

        score = roc_auc_score(y_test, y_pred)
        output_date = datetime.date.today()
        filepath_ = 'data/07_model_output/ROC_plot_XGB' + str(
            output_date) + '.png'
        single_plot_writer = MatplotlibLocalWriter(filepath=filepath_)
        single_plot_writer.save(plt)
        plt.clf()

        #y_pred = np.argmax(y_pred, axis=1)
        #roc_curve = r
        score = roc_auc_score(y_test, y_pred)
        logger = logging.getLogger(__name__)
        logger.info('XGBoost AUC is %.3f.', score)

    output = pd.DataFrame({'ID': output_id, 'y_pred': y_pred})
    return output
コード例 #5
0
ファイル: test_xgboost_impl.py プロジェクト: aarnphm/BentoML
def predict_df(model: xgb.core.Booster, df: pd.DataFrame):
    dm = xgb.DMatrix(df)
    res = model.predict(dm)
    return np.asarray([np.argmax(line) for line in res])