def test_train_zero_or_negative_rounds(tmpdir, caplog):

    X_train = np.random.random(size=(100, 5))
    y_train = np.random.random(size=(100, 1))
    dtrain = xgb.DMatrix(X_train, label=y_train)

    X_test = np.random.random(size=(100, 5))
    y_test = np.random.random(size=(100, 1))
    dtest = xgb.DMatrix(X_test, label=y_test)

    params = {"objective": "binary:logistic"}

    train_args = dict(params=params,
                      dtrain=dtrain,
                      num_boost_round=0,
                      evals=[(dtrain, 'train'), (dtest, 'test')])
    checkpoint_dir = os.path.join(tmpdir, "test_checkpoints")

    bst = checkpointing.train(train_args, checkpoint_dir)
    assert isinstance(bst, xgb.Booster)
    assert not os.listdir(checkpoint_dir)

    train_args["num_boost_round"] = -1
    bst = checkpointing.train(train_args, checkpoint_dir)
    assert isinstance(bst, xgb.Booster)
    assert not os.listdir(checkpoint_dir)
Beispiel #2
0
def test_train(tmpdir, caplog):

    X_train = np.random.random(size=(100, 5))
    y_train = np.random.random(size=(100, 1))
    dtrain = xgb.DMatrix(X_train, label=y_train)

    X_test = np.random.random(size=(100, 5))
    y_test = np.random.random(size=(100, 1))
    dtest = xgb.DMatrix(X_test, label=y_test)

    params = {"objective": "binary:logistic"}

    train_args = dict(
        params=params,
        dtrain=dtrain,
        num_boost_round=20,
        evals=[(dtrain, 'train'), (dtest, 'test')]
    )
    checkpoint_dir = os.path.join(tmpdir, "test_checkpoints")

    checkpointing.train(train_args, checkpoint_dir)

    # check that original train_args was not modified
    assert "callbacks" not in train_args
    assert "xgb_model" not in train_args
    assert "verbose_eval" not in train_args
    assert train_args["params"] == params
    assert train_args["dtrain"] is dtrain
    assert train_args["num_boost_round"] == 20
    # check that checkpoints were saved
    expected_files = [
        "xgboost-checkpoint.15",
        "xgboost-checkpoint.16",
        "xgboost-checkpoint.17",
        "xgboost-checkpoint.18",
        "xgboost-checkpoint.19"]
    assert sorted(os.listdir(checkpoint_dir)) == expected_files

    train_args["num_boost_round"] = 30
    checkpointing.train(train_args, checkpoint_dir)

    assert "callbacks" not in train_args
    assert "xgb_model" not in train_args
    assert "verbose_eval" not in train_args
    assert train_args["num_boost_round"] == 30

    assert "Checkpoint loaded from" in caplog.text
    assert "Resuming from iteration 20" in caplog.text

    expected_files.extend(
        ["xgboost-checkpoint.25",
         "xgboost-checkpoint.26",
         "xgboost-checkpoint.27",
         "xgboost-checkpoint.28",
         "xgboost-checkpoint.29"])
    assert sorted(os.listdir(checkpoint_dir)) == expected_files
Beispiel #3
0
def _xgb_train(params, dtrain, evals, num_boost_round, model_dir, is_master,
               checkpoint_path):
    """Run xgb train on arguments given with rabit initialized.

    This is our rabit execution function.

    :param args_dict: Argument dictionary used to run xgb.train().
    :param is_master: True if current node is master host in distributed training,
                        or is running single node training job.
                        Note that rabit_run will include this argument.
    """

    logging.info("params: {}, num_boost_round: {}, checkpoint_path: {}".format(
        params, num_boost_round, checkpoint_path))

    train_args = dict(params=params,
                      dtrain=dtrain,
                      num_boost_round=num_boost_round,
                      evals=evals)

    booster = checkpointing.train(train_args, checkpoint_path)

    if is_master:
        model_location = model_dir + '/xgboost-model'
        pkl.dump(booster, open(model_location, 'wb'))
        logging.info("Stored trained model at {}".format(model_location))