コード例 #1
0
def _benchmark_algo(
    benchmark,
    name,
    dataset_name,
    n_samples=10000,
    n_features=100,
    input_type='numpy',
    data_kwargs={},
    algo_args={},
):
    """Simplest benchmark wrapper to time algorithm 'name' on dataset
    'dataset_name'"""
    algo = algorithms.algorithm_by_name(name)
    data = datagen.gen_data(
        dataset_name,
        input_type,
        n_samples=n_samples,
        n_features=n_features,
        **data_kwargs
    )

    def _benchmark_inner():
        algo.run_cuml(data, **algo_args)

    benchmark(_benchmark_inner)
コード例 #2
0
ファイル: test_benchmark.py プロジェクト: rapidsai/cuml
def test_fil_input_types(input_type):
    pair = algorithms.algorithm_by_name('FIL')

    if not has_xgboost():
        pytest.xfail()

    runner = AccuracyComparisonRunner(
        [20], [5], dataset_name='classification', test_fraction=0.5,
        input_type=input_type)
    results = runner.run(pair, run_cpu=False)[0]
    assert results["cuml_acc"] is not None
コード例 #3
0
ファイル: test_benchmark.py プロジェクト: teju85/cuml
def test_run_variations():
    algo = algorithms.algorithm_by_name("LogisticRegression")

    res = run_variations(
        [algo],
        dataset_name="classification",
        bench_rows=[100, 200],
        bench_dims=[10, 20],
    )
    assert res.shape[0] == 4
    assert (res.n_samples == 100).sum() == 2
    assert (res.n_features == 20).sum() == 2
コード例 #4
0
ファイル: test_benchmark.py プロジェクト: teju85/cuml
def test_real_algos_runner(algo_name):
    pair = algorithms.algorithm_by_name(algo_name)

    if (algo_name == 'UMAP' and not has_umap()) or \
       (algo_name == 'FIL' and not has_xgboost()):
        pytest.xfail()

    runner = AccuracyComparisonRunner([20], [5],
                                      dataset_name='classification',
                                      test_fraction=0.20)
    results = runner.run(pair)[0]
    print(results)
    assert results["cuml_acc"] is not None
コード例 #5
0
ファイル: ci_benchmark.py プロジェクト: st071300/cuML
        algos = algos.union(preprocessing_algo_names)
        algos.remove('preprocessing')
    invalidAlgoNames = (algos - allAlgoNames)
    if invalidAlgoNames:
        raise ValueError("Invalid algo name(s): %s" % invalidAlgoNames)

    bench_to_run = bench_config[args.benchmark]

    default_args = dict(run_cpu=True, n_reps=args.n_reps)
    all_results = []
    for cfg_in in bench_to_run:
        if (algos is None) or ("ALL" in algos) or \
           (cfg_in["algo_name"] in algos):
            # Pass an actual algo object instead of an algo_name string
            cfg = cfg_in.copy()
            algo = algorithms.algorithm_by_name(cfg_in["algo_name"])
            cfg["algos"] = [algo]
            alg_name = cfg["algo_name"]
            if alg_name.startswith('Sparse'):
                if alg_name.startswith('SparseCSR'):
                    input_type = 'scipy-sparse-csr'
                elif alg_name.startswith('SparseCSC'):
                    input_type = 'scipy-sparse-csc'
            else:
                input_type = 'numpy'
            del cfg["algo_name"]
            res = run_variations(**{
                **default_args,
                **cfg
            },
                                 input_type=input_type)
コード例 #6
0
    if args.num_features > 0:
        bench_dims = [args.num_features]
    if args.default_size:
        bench_rows = [0]
        bench_dims = [0]

    param_override_list = extract_param_overrides(args.param_sweep)
    cuml_param_override_list = extract_param_overrides(args.cuml_param_sweep)
    cpu_param_override_list = extract_param_overrides(args.cpu_param_sweep)
    dataset_param_override_list = extract_param_overrides(
        args.dataset_param_sweep)

    if args.algorithms:
        algos_to_run = []
        for name in args.algorithms:
            algo = algorithms.algorithm_by_name(name)
            if not algo:
                raise ValueError("No %s 'algorithm' found" % name)
            algos_to_run.append(algo)
    else:
        # Run all by default
        algos_to_run = algorithms.all_algorithms()

    results_df = runners.run_variations(
        algos_to_run,
        dataset_name=args.dataset,
        bench_rows=bench_rows,
        bench_dims=bench_dims,
        input_type=args.input_type,
        test_fraction=args.test_split,
        param_override_list=param_override_list,
コード例 #7
0
ファイル: auto_nvtx_bench.py プロジェクト: rapidsai/cuml
        args.dataset_type = params['dataset_type']
    if 'n_samples' in params:
        args.n_samples = params['n_samples']
    if 'n_features' in params:
        args.n_features = params['n_features']
    if 'dataset_format' in params:
        args.dataset_format = params['dataset_format']
    if 'data_kwargs' in params:
        args.data_kwargs = params['data_kwargs']
    if 'setup_kwargs' in params:
        args.setup_kwargs = params['setup_kwargs']
    if 'training_kwargs' in params:
        args.training_kwargs = params['training_kwargs']
    if 'inference_kwargs' in params:
        args.inference_kwargs = params['inference_kwargs']


if len(args.json):
    parse_json(args)

dataset = datagen.gen_data(args.dataset_type,
                           args.dataset_format,
                           n_samples=args.n_samples,
                           n_features=args.n_features,
                           **args.data_kwargs)

algo = algorithms.algorithm_by_name(args.algo_name)
cuml_setup = setup_bench('cuml', algo, 'inference', dataset, args.setup_kwargs,
                         args.training_kwargs)
algo.run_cuml(dataset, bench_args=args.inference_kwargs, **cuml_setup)
コード例 #8
0
def _benchmark_algo(
    benchmarker,
    algo_name,
    bench_step,
    dataset,
    setup_kwargs={},
    training_kwargs={},
    inference_kwargs={},
    client=None
):
    """
    Benchmark utility

    Parameters
    ----------
    benchmarker :
       Pytest benchmark function, allows to enclose the code
       that should be benchmarked
    algo_name :
       Algorithm/model name, can be found in the algorithms.py file
    bench_step :
        Either 'training' or 'inference', describe the algorithm/model
        step to be benchmarked
    dataset :
        Tuple with the data and a dictionnary that describes how it was built.
        The dictionnary can be later used during the NVTX benchmark.
    setup_kwargs :
        Algorithm/model setup kwargs
    training_kwargs :
        Algorithm/model training kwargs
    inference_kwargs :
        Algorithm/model inference kwargs
    client :
        Dask client used in MNMG settings
    """

    # Get data and dict describing how it was built
    dataset, data_kwargs = dataset

    # The presence of a Dask client signifies MNMG mode
    MNMG_mode = client is not None

    # Distribute data in MNMG settings
    if MNMG_mode:
        # Add the client to the setup kwargs used by model instantiation
        setup_kwargs['client'] = client
        # Exception : data is scattered by the MNMG DBSCAN model itself
        if algo_name != 'MNMG.DBSCAN':
            # Distribute data
            dataset = [distribute(client, d) for d in dataset]

    # Search AlgorithmPair instance by name
    algo = algorithms.algorithm_by_name(algo_name)
    # Setup the AlgorithmPair and the model to be ready for benchmark on GPU
    cuml_setup = setup_bench('cuml', algo, bench_step, dataset,
                             setup_kwargs, training_kwargs)

    # Pytest benchmark
    if bench_step == 'training':
        benchmarker(algo.run_cuml, dataset, bench_args=training_kwargs,
                    **cuml_setup)
    elif bench_step == 'inference':
        benchmarker(algo.run_cuml, dataset, bench_args=inference_kwargs,
                    **cuml_setup)

    # CPU benchmark and NVTX benchmark (only in SG mode)
    if not MNMG_mode:
        # Check that the cuML model has a CPU equivalency
        if algo.cpu_class:
            # Convert sataset to a Numpy array
            cpu_dataset = datagen._convert_to_numpy(dataset)
            # Setup the AlgorithmPair and the model
            # to be ready for benchmark on CPU
            cpu_setup = setup_bench('cpu', algo, bench_step, cpu_dataset,
                                    setup_kwargs, training_kwargs)
            # CPU benchmark
            cpu_bench(algo, bench_step, cpu_dataset, inference_kwargs,
                      cpu_setup)

        # NVTX benchmark performs both the training and inference at once
        # but only when bench_step == 'inference'
        if bench_step == 'inference':
            # NVTX benchmark
            nvtx_profiling(algo_name, data_kwargs, setup_kwargs,
                           training_kwargs, inference_kwargs)