Example #1
0
def test_dist_training_misconfigured_num_machines():
    config = EXAMPLE_CONFIG.copy()
    config["tree_learner"] = "data"
    config["train_data"] = ",".join(["gs://lightgbm-test/regression.train1",
                            "gs://lightgbm-test/regression.train2"])
    with pytest.raises(RuntimeError) as excinfo:
        lightgbm.generate_context_files(config, EXMAPLE_CONFIG_FILE_NAME, 3)
    assert "field in the config should be equal to the num_machines=3 config value." in str(excinfo.value)
Example #2
0
def test_dist_training_misconfigured_input_files():
    config = EXAMPLE_CONFIG.copy()
    config["tree_learner"] = "feature"
    config["train_data"] = ",".join(["gs://lightgbm-test/regression.train1",
                            "gs://lightgbm-test/regression.train2"])
    with pytest.raises(RuntimeError) as excinfo:
        lightgbm.generate_context_files(config, EXMAPLE_CONFIG_FILE_NAME, 2)
    assert "train_data has more than one file specified" in str(excinfo.value)
Example #3
0
def test_entrypoint_content_dist_data_parallel_no_weight_files():
    config = EXAMPLE_CONFIG.copy()
    config["tree_learner"] = "data"
    config["train_data"] = ",".join(["gs://lightgbm-test/regression.train1",
                            "gs://lightgbm-test/regression.train2"])
    with patch('fairing.cloud.storage.GCSStorage.exists', new=lambda bucket,path: not path.endswith(".weight")):
        output_map = lightgbm.generate_context_files(
            config, EXMAPLE_CONFIG_FILE_NAME, 2)
    entrypoint_file_in_docker = posixpath.join(constants.DEFAULT_DEST_PREFIX, 'entrypoint.sh')
    entrypoint_file = None
    for k, v in output_map.items():
        if v == entrypoint_file_in_docker:
            entrypoint_file = k
    actual = open(entrypoint_file, "r").read()
    expected = """#!/bin/sh
set -e
RANK=`python lightgbm_dist_training_init.py config.conf mlist.txt`
case $RANK in
	0)
		gsutil cp -r gs://lightgbm-test/regression.train1 /app/train_data
		;;
	1)
		gsutil cp -r gs://lightgbm-test/regression.train2 /app/train_data
		;;
esac
gsutil cp -r gs://lightgbm-test/regression.test {0}/regression.test
echo 'All files are copied!'
lightgbm config={0}/config.conf
gsutil cp -r {0}/model.txt gs://lightgbm-test/model.txt
""".format(posixpath.realpath(constants.DEFAULT_DEST_PREFIX))
    print(actual)
    assert expected == actual
Example #4
0
def test_input_file_not_found():    
    with pytest.raises(RuntimeError) as excinfo:
        with patch('fairing.cloud.storage.GCSStorage.exists', new=lambda x, y: False):
            _ = lightgbm.generate_context_files(
                    EXAMPLE_CONFIG, EXMAPLE_CONFIG_FILE_NAME, 1)
    err_msg = str(excinfo.value)
    assert "Remote file " in err_msg and "does't exist" in err_msg
Example #5
0
def test_context_files_list():
    output_map = lightgbm.generate_context_files(EXAMPLE_CONFIG,
                                                 EXMAPLE_CONFIG_FILE_NAME)
    actual = list(output_map.values())
    actual.sort()
    expected = [
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'config.conf.original'),
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'config.conf'),
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'entrypoint.sh')
    ]
    expected.sort()
    assert actual == expected
Example #6
0
def test_context_files_list():
    with patch('fairing.cloud.storage.GCSStorage.exists'):
        output_map = lightgbm.generate_context_files(
            EXAMPLE_CONFIG, EXMAPLE_CONFIG_FILE_NAME, 1)
    actual = list(output_map.values())
    actual.sort()
    expected = [
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'config.conf.original'),
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'config.conf'),
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'entrypoint.sh'),
        posixpath.join(constants.DEFAULT_DEST_PREFIX, 'utils.py')
    ]
    expected.sort()
    assert expected == actual
Example #7
0
def test_entrypoint_content():
    output_map = lightgbm.generate_context_files(EXAMPLE_CONFIG,
                                                 EXMAPLE_CONFIG_FILE_NAME)
    entrypoint_file_in_docker = posixpath.join(constants.DEFAULT_DEST_PREFIX,
                                               'entrypoint.sh')
    entrypoint_file = None
    for k, v in output_map.items():
        if v == entrypoint_file_in_docker:
            entrypoint_file = k
    actual = open(entrypoint_file, "r").read()
    expected = """#!/bin/sh
set -e
gsutil cp gs://lightgbm-test/regression.train {0}/regression.train
gsutil cp gs://lightgbm-test/regression.test {0}/regression.test
echo 'All files are copied!'
lightgbm config={0}/config.conf
gsutil cp {0}/model.txt gs://lightgbm-test/model.txt
""".format(posixpath.realpath(constants.DEFAULT_DEST_PREFIX))
    print(actual)
    assert actual == expected
Example #8
0
def test_entrypoint_content_no_weight_file():
    with patch('fairing.cloud.storage.GCSStorage.exists', new=lambda bucket,path: not path.endswith(".weight")):
        output_map = lightgbm.generate_context_files(
            EXAMPLE_CONFIG, EXMAPLE_CONFIG_FILE_NAME, 1)
    entrypoint_file_in_docker = posixpath.join(constants.DEFAULT_DEST_PREFIX, 'entrypoint.sh')
    entrypoint_file = None
    for k, v in output_map.items():
        if v == entrypoint_file_in_docker:
            entrypoint_file = k
    actual = open(entrypoint_file, "r").read()
    expected = """#!/bin/sh
set -e
gsutil cp -r gs://lightgbm-test/regression.train {0}/regression.train
gsutil cp -r gs://lightgbm-test/regression.test {0}/regression.test
echo 'All files are copied!'
lightgbm config={0}/config.conf
gsutil cp -r {0}/model.txt gs://lightgbm-test/model.txt
""".format(posixpath.realpath(constants.DEFAULT_DEST_PREFIX))
    print(actual)
    assert expected == actual
Example #9
0
def test_final_config():
    output_map = lightgbm.generate_context_files(EXAMPLE_CONFIG,
                                                 EXMAPLE_CONFIG_FILE_NAME)
    config_file_in_docker = posixpath.join(constants.DEFAULT_DEST_PREFIX,
                                           'config.conf')
    config_file_local = None
    for k, v in output_map.items():
        if v == config_file_in_docker:
            config_file_local = k
    actual = open(config_file_local, "r").read()
    expected = """task=train
boosting_type=gbdt
objective=regression
n_estimators=10
is_training_metric=true
valid_data={0}/regression.test
train_data={0}/regression.train
verbose=1
model_output={0}/model.txt
""".format(posixpath.realpath(constants.DEFAULT_DEST_PREFIX))
    print(actual)
    assert actual == expected