def test_train_zero_or_negative_rounds(tmpdir, caplog): X_train = np.random.random(size=(100, 5)) y_train = np.random.random(size=(100, 1)) dtrain = xgb.DMatrix(X_train, label=y_train) X_test = np.random.random(size=(100, 5)) y_test = np.random.random(size=(100, 1)) dtest = xgb.DMatrix(X_test, label=y_test) params = {"objective": "binary:logistic"} train_args = dict(params=params, dtrain=dtrain, num_boost_round=0, evals=[(dtrain, 'train'), (dtest, 'test')]) checkpoint_dir = os.path.join(tmpdir, "test_checkpoints") bst = checkpointing.train(train_args, checkpoint_dir) assert isinstance(bst, xgb.Booster) assert not os.listdir(checkpoint_dir) train_args["num_boost_round"] = -1 bst = checkpointing.train(train_args, checkpoint_dir) assert isinstance(bst, xgb.Booster) assert not os.listdir(checkpoint_dir)
def test_train(tmpdir, caplog): X_train = np.random.random(size=(100, 5)) y_train = np.random.random(size=(100, 1)) dtrain = xgb.DMatrix(X_train, label=y_train) X_test = np.random.random(size=(100, 5)) y_test = np.random.random(size=(100, 1)) dtest = xgb.DMatrix(X_test, label=y_test) params = {"objective": "binary:logistic"} train_args = dict( params=params, dtrain=dtrain, num_boost_round=20, evals=[(dtrain, 'train'), (dtest, 'test')] ) checkpoint_dir = os.path.join(tmpdir, "test_checkpoints") checkpointing.train(train_args, checkpoint_dir) # check that original train_args was not modified assert "callbacks" not in train_args assert "xgb_model" not in train_args assert "verbose_eval" not in train_args assert train_args["params"] == params assert train_args["dtrain"] is dtrain assert train_args["num_boost_round"] == 20 # check that checkpoints were saved expected_files = [ "xgboost-checkpoint.15", "xgboost-checkpoint.16", "xgboost-checkpoint.17", "xgboost-checkpoint.18", "xgboost-checkpoint.19"] assert sorted(os.listdir(checkpoint_dir)) == expected_files train_args["num_boost_round"] = 30 checkpointing.train(train_args, checkpoint_dir) assert "callbacks" not in train_args assert "xgb_model" not in train_args assert "verbose_eval" not in train_args assert train_args["num_boost_round"] == 30 assert "Checkpoint loaded from" in caplog.text assert "Resuming from iteration 20" in caplog.text expected_files.extend( ["xgboost-checkpoint.25", "xgboost-checkpoint.26", "xgboost-checkpoint.27", "xgboost-checkpoint.28", "xgboost-checkpoint.29"]) assert sorted(os.listdir(checkpoint_dir)) == expected_files
def _xgb_train(params, dtrain, evals, num_boost_round, model_dir, is_master, checkpoint_path): """Run xgb train on arguments given with rabit initialized. This is our rabit execution function. :param args_dict: Argument dictionary used to run xgb.train(). :param is_master: True if current node is master host in distributed training, or is running single node training job. Note that rabit_run will include this argument. """ logging.info("params: {}, num_boost_round: {}, checkpoint_path: {}".format( params, num_boost_round, checkpoint_path)) train_args = dict(params=params, dtrain=dtrain, num_boost_round=num_boost_round, evals=evals) booster = checkpointing.train(train_args, checkpoint_path) if is_master: model_location = model_dir + '/xgboost-model' pkl.dump(booster, open(model_location, 'wb')) logging.info("Stored trained model at {}".format(model_location))