Beispiel #1
0
def _benchmark_algo(
    benchmark,
    name,
    dataset_name,
    n_samples=10000,
    n_features=100,
    input_type='numpy',
    data_kwargs={},
    algo_args={},
):
    """Simplest benchmark wrapper to time algorithm 'name' on dataset
    'dataset_name'"""
    algo = algorithms.algorithm_by_name(name)
    data = datagen.gen_data(
        dataset_name,
        input_type,
        n_samples=n_samples,
        n_features=n_features,
        **data_kwargs
    )

    def _benchmark_inner():
        algo.run_cuml(data, **algo_args)

    benchmark(_benchmark_inner)
Beispiel #2
0
    def _run_one_size(
        self,
        algo_pair,
        n_samples,
        n_features,
        param_overrides={},
        cuml_param_overrides={},
        cpu_param_overrides={},
        run_cpu=True,
    ):
        data = datagen.gen_data(self.dataset_name, self.input_type, n_samples,
                                n_features)
        print("data type: ", data[0].__class__)

        cu_start = time.time()
        algo_pair.run_cuml(data, **param_overrides, **cuml_param_overrides)
        cu_elapsed = time.time() - cu_start

        if run_cpu and algo_pair.cpu_class is not None:
            cpu_start = time.time()
            algo_pair.run_cpu(data, **param_overrides)
            cpu_elapsed = time.time() - cpu_start
        else:
            cpu_elapsed = 0.0

        return dict(cu_time=cu_elapsed,
                    cpu_time=cpu_elapsed,
                    speedup=cpu_elapsed / float(cu_elapsed),
                    n_samples=n_samples,
                    n_features=n_features,
                    **param_overrides,
                    **cuml_param_overrides)
Beispiel #3
0
def test_training_data_to_numpy(input_type):
    X, y, *_ = datagen.gen_data(
        'blobs', input_type, n_samples=100, n_features=10
    )
    X_np, y_np = _training_data_to_numpy(X, y)
    assert isinstance(X_np, np.ndarray)
    assert isinstance(y_np, np.ndarray)
Beispiel #4
0
def test_data_generator_split():
    X_train, y_train, X_test, y_test = datagen.gen_data('blobs',
                                                        'numpy',
                                                        n_samples=100,
                                                        n_features=10,
                                                        test_fraction=0.20)
    assert X_train.shape == (100, 10)
    assert X_test.shape == (25, 10)
Beispiel #5
0
def test_data_generator_types(input_type):
    X, *_ = datagen.gen_data('blobs', input_type, n_samples=100, n_features=10)
    if input_type == 'numpy':
        assert isinstance(X, np.ndarray)
    elif input_type == 'cudf':
        assert isinstance(X, cudf.DataFrame)
    elif input_type == 'pandas':
        assert isinstance(X, pd.DataFrame)
    elif input_type == 'gpuarray':
        assert cuda.is_cuda_array(X)
    else:
        assert False
Beispiel #6
0
    def _run_one_size(
        self,
        algo_pair,
        n_samples,
        n_features,
        param_overrides={},
        cuml_param_overrides={},
        cpu_param_overrides={},
        run_cpu=True,
        verbose=False,
    ):
        data = datagen.gen_data(self.dataset_name, self.input_type, n_samples,
                                n_features)

        setup_overrides = algo_pair.setup_cuml(data, **param_overrides,
                                               **cuml_param_overrides)

        cuml_timer = BenchmarkTimer(self.n_reps)
        for rep in cuml_timer.benchmark_runs():
            algo_pair.run_cuml(data, **param_overrides, **cuml_param_overrides,
                               **setup_overrides)
        cu_elapsed = np.min(cuml_timer.timings)

        if run_cpu and algo_pair.cpu_class is not None:
            setup_overrides = algo_pair.setup_cpu(data, **param_overrides)

            cpu_timer = BenchmarkTimer(self.n_reps)
            for rep in cpu_timer.benchmark_runs():
                algo_pair.run_cpu(data, **param_overrides, **setup_overrides)
            cpu_elapsed = np.min(cpu_timer.timings)
        else:
            cpu_elapsed = 0.0

        speedup = cpu_elapsed / float(cu_elapsed)
        if verbose:
            print("%s Speedup (n_samples=%s, n_features=%s) = %s" %
                  (algo_pair.name, n_samples, n_features, speedup))

        return dict(cu_time=cu_elapsed,
                    cpu_time=cpu_elapsed,
                    speedup=speedup,
                    n_samples=n_samples,
                    n_features=n_features,
                    **param_overrides,
                    **cuml_param_overrides)
Beispiel #7
0
def test_data_generators(dataset):
    data = datagen.gen_data(dataset, "numpy", n_samples=100, n_features=10)
    assert isinstance(data[0], np.ndarray)
    assert data[0].shape[0] == 100
Beispiel #8
0
    def _run_one_size(
        self,
        algo_pair,
        n_samples,
        n_features,
        param_overrides={},
        cuml_param_overrides={},
        cpu_param_overrides={},
        run_cpu=True,
        verbose=False,
    ):
        data = datagen.gen_data(
            self.dataset_name,
            self.input_type,
            n_samples,
            n_features,
            test_fraction=self.test_fraction,
        )

        setup_override = algo_pair.setup_cuml(
            data, **{
                **param_overrides,
                **cuml_param_overrides
            })

        cuml_timer = BenchmarkTimer(self.n_reps)
        for _ in cuml_timer.benchmark_runs():
            cuml_model = algo_pair.run_cuml(
                data, **{
                    **param_overrides,
                    **cuml_param_overrides,
                    **setup_override
                })
        cu_elapsed = np.min(cuml_timer.timings)

        if algo_pair.accuracy_function:
            if algo_pair.cuml_data_prep_hook is not None:
                X_test, y_test = algo_pair.cuml_data_prep_hook(data[2:])
            else:
                X_test, y_test = data[2:]

            if hasattr(cuml_model, "predict"):
                y_pred_cuml = cuml_model.predict(X_test)
            else:
                y_pred_cuml = cuml_model.transform(X_test)
            cuml_accuracy = algo_pair.accuracy_function(
                y_test, np.asarray(y_pred_cuml))
        else:
            cuml_accuracy = 0.0

        cpu_accuracy = 0.0
        if run_cpu and algo_pair.cpu_class is not None:
            setup_override = algo_pair.setup_cpu(data, **param_overrides)

            cpu_timer = BenchmarkTimer(self.n_reps)
            for rep in cpu_timer.benchmark_runs():
                cpu_model = algo_pair.run_cpu(data, **param_overrides,
                                              **setup_override)
            cpu_elapsed = np.min(cpu_timer.timings)

            if algo_pair.accuracy_function:
                if algo_pair.cpu_data_prep_hook is not None:
                    X_test, y_test = algo_pair.cpu_data_prep_hook(data[2:])
                else:
                    X_test, y_test = data[2:]
                if hasattr(cpu_model, "predict"):
                    y_pred_cpu = cpu_model.predict(X_test)
                else:
                    y_pred_cpu = cpu_model.transform(X_test)
                cpu_accuracy = algo_pair.accuracy_function(
                    y_test, np.asarray(y_pred_cpu))
        else:
            cpu_elapsed = 0.0

        return dict(cu_time=cu_elapsed,
                    cpu_time=cpu_elapsed,
                    cuml_acc=cuml_accuracy,
                    cpu_acc=cpu_accuracy,
                    speedup=cpu_elapsed / float(cu_elapsed),
                    n_samples=n_samples,
                    n_features=n_features,
                    **param_overrides,
                    **cuml_param_overrides)
Beispiel #9
0
    def _run_one_size(
        self,
        algo_pair,
        n_samples,
        n_features,
        param_overrides={},
        cuml_param_overrides={},
        cpu_param_overrides={},
        dataset_param_overrides={},
        run_cpu=True,
        verbose=False,
    ):
        data = datagen.gen_data(
            self.dataset_name, self.input_type, n_samples, n_features,
            **dataset_param_overrides
        )

        setup_overrides = algo_pair.setup_cuml(
            data, **param_overrides, **cuml_param_overrides
        )

        cuml_timer = BenchmarkTimer(self.n_reps)
        for rep in cuml_timer.benchmark_runs():
            algo_pair.run_cuml(
                data,
                **param_overrides,
                **cuml_param_overrides,
                **setup_overrides
            )
        cu_elapsed = np.min(cuml_timer.timings)

        if run_cpu and algo_pair.cpu_class is not None:
            setup_overrides = algo_pair.setup_cpu(data,
                                                  **param_overrides,
                                                  **cpu_param_overrides)

            cpu_timer = BenchmarkTimer(self.n_reps)
            for rep in cpu_timer.benchmark_runs():
                algo_pair.run_cpu(data, **param_overrides,
                                  **cpu_param_overrides,
                                  **setup_overrides)
            cpu_elapsed = np.min(cpu_timer.timings)
        else:

            if run_cpu:
                warnings.warn("run_cpu argument is set to True but no CPU "
                              "implementation was provided. It's possible "
                              "an additional library is needed but one could "
                              "not be found. Benchmark will be executed with "
                              "run_cpu=False")

            cpu_elapsed = 0.0

        speedup = cpu_elapsed / float(cu_elapsed)
        if verbose:
            print(
                "%s (n_samples=%s, n_features=%s) [cpu=%s, gpu=%s, speedup=%s]"
                % (algo_pair.name, n_samples, n_features, cpu_elapsed,
                   cu_elapsed, speedup)
            )

        return dict(
            cu_time=cu_elapsed,
            cpu_time=cpu_elapsed,
            speedup=speedup,
            n_samples=n_samples,
            n_features=n_features,
            **param_overrides,
            **cuml_param_overrides,
            **cpu_param_overrides,
            **dataset_param_overrides
        )
Beispiel #10
0
        args.dataset_type = params['dataset_type']
    if 'n_samples' in params:
        args.n_samples = params['n_samples']
    if 'n_features' in params:
        args.n_features = params['n_features']
    if 'dataset_format' in params:
        args.dataset_format = params['dataset_format']
    if 'data_kwargs' in params:
        args.data_kwargs = params['data_kwargs']
    if 'setup_kwargs' in params:
        args.setup_kwargs = params['setup_kwargs']
    if 'training_kwargs' in params:
        args.training_kwargs = params['training_kwargs']
    if 'inference_kwargs' in params:
        args.inference_kwargs = params['inference_kwargs']


if len(args.json):
    parse_json(args)

dataset = datagen.gen_data(args.dataset_type,
                           args.dataset_format,
                           n_samples=args.n_samples,
                           n_features=args.n_features,
                           **args.data_kwargs)

algo = algorithms.algorithm_by_name(args.algo_name)
cuml_setup = setup_bench('cuml', algo, 'inference', dataset, args.setup_kwargs,
                         args.training_kwargs)
algo.run_cuml(dataset, bench_args=args.inference_kwargs, **cuml_setup)
Beispiel #11
0
    def _run_one_size(
        self,
        algo_pair,
        n_samples,
        n_features,
        param_overrides={},
        cuml_param_overrides={},
        cpu_param_overrides={},
        run_cpu=True,
    ):
        data = datagen.gen_data(
            self.dataset_name,
            self.input_type,
            n_samples,
            n_features,
            test_fraction=self.test_fraction,
        )
        X_test, y_test = data[2:]

        cu_start = time.time()
        cuml_model = algo_pair.run_cuml(
            data, **{
                **param_overrides,
                **cuml_param_overrides
            })
        cu_elapsed = time.time() - cu_start
        if algo_pair.accuracy_function:
            if hasattr(cuml_model, 'predict'):
                y_pred_cuml = cuml_model.predict(X_test)
            else:
                y_pred_cuml = cuml_model.transform(X_test)
            cuml_accuracy = algo_pair.accuracy_function(
                y_test, np.asarray(y_pred_cuml))
        else:
            cuml_accuracy = 0.0

        cpu_accuracy = 0.0
        if run_cpu and algo_pair.cpu_class is not None:
            cpu_start = time.time()
            cpu_model = algo_pair.run_cpu(data, **param_overrides)
            cpu_elapsed = time.time() - cpu_start

            if algo_pair.accuracy_function:
                if hasattr(cpu_model, 'predict'):
                    y_pred_cpu = cpu_model.predict(X_test)
                else:
                    y_pred_cpu = cpu_model.transform(X_test)
                cpu_accuracy = algo_pair.accuracy_function(
                    y_test, np.asarray(y_pred_cpu))
        else:
            cpu_elapsed = 0.0

        return dict(cu_time=cu_elapsed,
                    cpu_time=cpu_elapsed,
                    cuml_acc=cuml_accuracy,
                    cpu_acc=cpu_accuracy,
                    speedup=cpu_elapsed / float(cu_elapsed),
                    n_samples=n_samples,
                    n_features=n_features,
                    **param_overrides,
                    **cuml_param_overrides)