Esempio n. 1
0
def test_optimize_criteo(tmpdir):
    input_path = str(tmpdir.mkdir("input"))
    _get_random_criteo_data(1000).to_csv(os.path.join(input_path, "day_0"),
                                         sep="\t",
                                         header=False)
    os.environ["INPUT_DATA_DIR"] = input_path
    os.environ["OUTPUT_DATA_DIR"] = str(tmpdir.mkdir("output"))
    with get_cuda_cluster() as cuda_cluster:
        scheduler_port = cuda_cluster.scheduler_address

        def _nb_modify(line):
            # Use cuda_cluster "fixture" port rather than allowing notebook
            # to deploy a LocalCUDACluster within the subprocess
            line = line.replace("download_criteo = True",
                                "download_criteo = False")
            line = line.replace("cluster = None",
                                f"cluster = '{scheduler_port}'")
            return line

        notebook_path = os.path.join(
            dirname(TEST_PATH),
            "examples/scaling-criteo/",
            "01-Download-Convert.ipynb",
        )
        _run_notebook(tmpdir, notebook_path, _nb_modify)
Esempio n. 2
0
def test_multigpu_dask_example(tmpdir):
    with get_cuda_cluster() as cuda_cluster:
        os.environ["BASE_DIR"] = str(tmpdir)
        scheduler_port = cuda_cluster.scheduler_address

        def _nb_modify(line):
            # Use cuda_cluster "fixture" port rather than allowing notebook
            # to deploy a LocalCUDACluster within the subprocess
            line = line.replace("cluster = None", f"cluster = '{scheduler_port}'")
            # Use a much smaller "toy" dataset
            line = line.replace("write_count = 25", "write_count = 4")
            line = line.replace('freq = "1s"', 'freq = "1h"')
            # Use smaller partitions for smaller dataset
            line = line.replace("part_mem_fraction=0.1", "part_size=1_000_000")
            line = line.replace("out_files_per_proc=8", "out_files_per_proc=1")
            return line

        notebook_path = os.path.join(dirname(TEST_PATH), "examples", "multi-gpu_dask.ipynb")
        _run_notebook(tmpdir, notebook_path, _nb_modify)