Esempio n. 1
0
import numpy as np
from daal4py import ridge_regression_training, ridge_regression_prediction
from daal4py.sklearn._utils import getFPType

parser = argparse.ArgumentParser(description='daal4py ridge regression '
                                 'benchmark')
parser.add_argument('--no-fit-intercept',
                    dest='fit_intercept',
                    default=True,
                    action='store_false',
                    help="Don't fit intercept (assume data already centered)")
parser.add_argument('--alpha',
                    type=float,
                    default=1.0,
                    help='Regularization strength')
params = bench.parse_args(parser, prefix='daal4py')

# Generate random data
X_train, X_test, y_train, y_test = bench.load_data(
    params,
    generated_data=['X_train', 'y_train'],
    add_dtype=True,
    label_2d=True if params.file_X_train is not None else False)


# Create our regression objects
def test_fit(X, y):
    regr_train = ridge_regression_training(fptype=getFPType(X),
                                           ridgeParameters=np.array(
                                               [[params.alpha]]),
                                           interceptFlag=params.fit_intercept)
Esempio n. 2
0
                    help='Minimum samples number for node splitting')
parser.add_argument('--max-leaf-nodes',
                    type=int,
                    default=None,
                    help='Maximum leaf nodes per tree')
parser.add_argument('--min-impurity-decrease',
                    type=float,
                    default=0.,
                    help='Needed impurity decrease for node splitting')
parser.add_argument('--no-bootstrap',
                    dest='bootstrap',
                    default=True,
                    action='store_false',
                    help="Don't control bootstraping")

params = bench.parse_args(parser)

from sklearn.ensemble import RandomForestClassifier

# Load and convert data
X_train, X_test, y_train, y_test = bench.load_data(params)

# Create our random forest classifier
clf = RandomForestClassifier(
    criterion=params.criterion,
    n_estimators=params.num_trees,
    max_depth=params.max_depth,
    max_features=params.max_features,
    min_samples_split=params.min_samples_split,
    max_leaf_nodes=params.max_leaf_nodes,
    min_impurity_decrease=params.min_impurity_decrease,
Esempio n. 3
0
                    choices=['daal', 'full', 'correlation'],
                    default='daal',
                    help='SVD solver to use')
parser.add_argument('--n-components',
                    type=int,
                    default=None,
                    help='Number of components to find')
parser.add_argument('--whiten',
                    action='store_true',
                    default=False,
                    help='Perform whitening')
parser.add_argument('--write-results',
                    action='store_true',
                    default=False,
                    help='Write results to disk for verification')
params = parse_args(parser, size=(10000, 1000))

# Load data
X_train, X_test, _, _ = load_data(params,
                                  generated_data=['X_train'],
                                  add_dtype=True)

if params.n_components is None:
    p, n = X_train.shape
    params.n_components = min((n, (2 + min((n, p))) // 3))


# Define how to do our scikit-learn PCA using DAAL...
def pca_fit_daal(X, n_components, method):

    if n_components < 1:
Esempio n. 4
0
                    '--filet',
                    '--fileT',
                    '--tol',
                    required=True,
                    type=str,
                    help='Absolute threshold')
parser.add_argument('-m',
                    '--data-multiplier',
                    default=100,
                    type=int,
                    help='Data multiplier')
parser.add_argument('--maxiter',
                    type=int,
                    default=100,
                    help='Maximum number of iterations')
params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py')

# Load generated data
X = np.load(params.filex)
X_init = np.load(params.filei)
X_mult = np.vstack((X, ) * params.data_multiplier)
tol = np.load(params.filet)

params.size = size_str(X.shape)
params.n_clusters = X_init.shape[0]
params.dtype = X.dtype


# Define functions to time
def test_fit(X, X_init):
    algorithm = kmeans(fptype=getFPType(X),
Esempio n. 5
0
                        action='store_false',
                        default=True,
                        help="Don't fit intercept")
    parser.add_argument('--multiclass',
                        default='auto',
                        choices=('auto', 'ovr', 'multinomial'),
                        help='How to treat multi class data. '
                        '"auto" picks "ovr" for binary classification, and '
                        '"multinomial" otherwise.')
    parser.add_argument('--solver',
                        default='lbfgs',
                        choices=('lbfgs', 'newton-cg', 'saga'),
                        help='Solver to use.')
    parser.add_argument('--maxiter',
                        type=int,
                        default=100,
                        help='Maximum iterations for the iterative solver')
    parser.add_argument('-C',
                        dest='C',
                        type=float,
                        default=1.0,
                        help='Regularization parameter')
    parser.add_argument('--tol',
                        type=float,
                        default=None,
                        help='Tolerance for solver. If solver == "newton-cg", '
                        'then the default is 1e-3. Otherwise, the default '
                        'is 1e-10.')
    params = bench.parse_args(parser, loop_types=('fit', 'predict'))
    bench.run_with_context(params, main)
Esempio n. 6
0
def test_parse_args():
    command = "http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 1,
        'total_requests': 1,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-k http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 1,
        'total_requests': 1,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': True,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-m POST -c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'POST',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-m POST -d test -c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'POST',
        'keep_alive': False,
        'auth': None,
        'data': 'test',
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-m POST -j {'test':'test_json'} -c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'POST',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': {
            'test': 'test_json'
        },
        'headers': None,
        'cookies': None
    }

    command = "-m PUT -c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'PUT',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-m DELETE -c 10 -n 100 http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 10,
        'total_requests': 100,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'DELETE',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-a Basic:nikan:wrong_pass http://127.0.0.1:5000"
    args = parse_args(command.split())
    from requests.auth import HTTPBasicAuth
    assert args == {
        'concurrency': 1,
        'total_requests': 1,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': False,
        'auth': HTTPBasicAuth('nikan', 'wrong_pass'),
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-a Digest:nikan:wrong_pass http://127.0.0.1:5000"
    args = parse_args(command.split())
    from requests.auth import HTTPDigestAuth
    assert args == {
        'concurrency': 1,
        'total_requests': 1,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': False,
        'auth': HTTPDigestAuth('nikan', 'wrong_pass'),
        'data': None,
        'json': None,
        'headers': None,
        'cookies': None
    }

    command = "-H {'user-agent':'hahah'} -C {'a':'1'} http://127.0.0.1:5000"
    args = parse_args(command.split())
    assert args == {
        'concurrency': 1,
        'total_requests': 1,
        'urls': ['http://127.0.0.1:5000'],
        'timeout': None,
        'method': 'GET',
        'keep_alive': False,
        'auth': None,
        'data': None,
        'json': None,
        'headers': {
            'user-agent': 'hahah'
        },
        'cookies': {
            'a': '1'
        }
    }

    command = "-f correct_file"
    args = parse_args(command.split())
    assert args == {
        'concurrency':
        1,
        'total_requests':
        5,
        'urls': [
            'http://example.com/', 'http://example.com/1',
            'http://example.com/2', 'http://example.com/3',
            'http://example.com/4'
        ],
        'timeout':
        None,
        'method':
        'GET',
        'keep_alive':
        False,
        'auth':
        None,
        'data':
        None,
        'json':
        None,
        'headers':
        None,
        'cookies':
        None
    }
Esempio n. 7
0
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
from bench import measure_function_time, parse_args, load_data, print_output
from sklearn.cluster import DBSCAN

parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark')
parser.add_argument('-e', '--eps', '--epsilon', type=float, default=10.,
                    help='Radius of neighborhood of a point')
parser.add_argument('-m', '--min-samples', default=5, type=int,
                    help='The minimum number of samples required in a '
                    'neighborhood to consider a point a core point')
params = parse_args(parser, n_jobs_supported=True)

# Load generated data
X, _, _, _ = load_data(params, add_dtype=True)

# Create our clustering object
dbscan = DBSCAN(eps=params.eps, n_jobs=params.n_jobs,
                min_samples=params.min_samples, metric='euclidean',
                algorithm='auto')

# N.B. algorithm='auto' will select DAAL's brute force method when running
# daal4py-patched scikit-learn, and probably 'kdtree' when running unpatched
# scikit-learn.

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'n_clusters', 'time')
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(description='daal4py SVC benchmark with '
                                     'linear kernel')
    parser.add_argument('-x',
                        '--filex',
                        '--fileX',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with features, in NPY format')
    parser.add_argument('-y',
                        '--filey',
                        '--fileY',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with labels, in NPY format')
    parser.add_argument('-C',
                        dest='C',
                        type=float,
                        default=0.01,
                        help='SVM slack parameter')
    parser.add_argument('--kernel',
                        choices=('linear', ),
                        default='linear',
                        help='SVM kernel function')
    parser.add_argument('--maxiter',
                        type=int,
                        default=2000,
                        help='Maximum iterations for the iterative solver. '
                        '-1 means no limit.')
    parser.add_argument('--max-cache-size',
                        type=int,
                        default=64,
                        help='Maximum cache size, in gigabytes, for SVM.')
    parser.add_argument('--tau',
                        type=float,
                        default=1e-12,
                        help='Tau parameter for working set selection scheme')
    parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance')
    parser.add_argument('--no-shrinking',
                        action='store_false',
                        default=True,
                        dest='shrinking',
                        help="Don't use shrinking heuristic")
    params = parse_args(parser,
                        loop_types=('fit', 'predict'),
                        prefix='daal4py')

    # Load data and cast to float64
    X_train = np.load(params.filex.name).astype('f8')
    y_train = np.load(params.filey.name).astype('f8')

    cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
                                              max_cache=params.max_cache_size)
    params.cache_size_mb = cache_size_bytes / 2**20
    params.cache_size_bytes = cache_size_bytes
    params.n_classes = np.unique(y_train).size

    # This is necessary for daal
    y_train[y_train == 0] = -1
    y_train = y_train[:, np.newaxis]

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes',
               'accuracy', 'time')
    params.size = size_str(X_train.shape)
    params.dtype = X_train.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(test_fit,
                                  X_train,
                                  y_train,
                                  params,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)
    res, support, indices, n_support = res
    params.sv_len = support.shape[0]
    print_row(columns, params, function='SVM.fit', time=fit_time)

    predict_time, yp = time_mean_min(test_predict,
                                     X_train,
                                     res,
                                     params,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
    print_row(columns,
              params,
              function='SVM.predict',
              time=predict_time,
              accuracy=f'{100*accuracy_score(yp, y_train):.3}')
Esempio n. 9
0
                    default='daal',
                    help='SVD solver to use')
parser.add_argument('--n-components',
                    type=int,
                    default=None,
                    help='Number of components to find')
parser.add_argument('--whiten',
                    action='store_true',
                    default=False,
                    help='Perform whitening')
parser.add_argument('--write-results',
                    action='store_true',
                    default=False,
                    help='Write results to disk for verification')
params = parse_args(parser,
                    size=(10000, 1000),
                    dtypes=('f8', 'f4'),
                    loop_types=('fit', 'transform'))

# Generate random data
p, n = params.shape
X = np.random.rand(*params.shape).astype(params.dtype)
Xp = np.random.rand(*params.shape).astype(params.dtype)

if not params.n_components:
    params.n_components = min((n, (2 + min((n, p))) // 3))


# Define how to do our scikit-learn PCA using DAAL...
def pca_fit_daal(X, n_components):

    if n_components < 1:
Esempio n. 10
0
import argparse
from bench import parse_args, time_mean_min, print_header, print_row
import daal4py
from daal4py.sklearn.utils import getFPType
import numpy as np

parser = argparse.ArgumentParser(description='daal4py pairwise distances '
                                 'benchmark')
parser.add_argument('--metrics',
                    nargs='*',
                    default=['cosine', 'correlation'],
                    choices=('cosine', 'correlation'),
                    help='Metrics to test for pairwise_distances')
params = parse_args(parser,
                    size=(1000, 150000),
                    dtypes=('f8', 'f4'),
                    prefix='daal4py')

# Generate random data
X = np.random.rand(*params.shape).astype(params.dtype)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')
print_header(columns, params)

for metric in params.metrics:
    pairwise_distances = getattr(daal4py, f'{metric}_distance')

    def test_distances(pairwise_distances, X):
        algorithm = pairwise_distances(fptype=getFPType(X))
        return algorithm.compute(X)
Esempio n. 11
0
def main():
    parser = argparse.ArgumentParser(description='daal4py SVC benchmark with '
                                     'linear kernel')
    parser.add_argument('-C',
                        dest='C',
                        type=float,
                        default=1.0,
                        help='SVM regularization parameter')
    parser.add_argument('--kernel',
                        choices=('linear', 'rbf'),
                        default='linear',
                        help='SVM kernel function')
    parser.add_argument('--gamma',
                        type=float,
                        default=None,
                        help='Parameter for kernel="rbf"')
    parser.add_argument('--maxiter',
                        type=int,
                        default=100000,
                        help='Maximum iterations for the iterative solver. ')
    parser.add_argument('--max-cache-size',
                        type=int,
                        default=8,
                        help='Maximum cache size, in gigabytes, for SVM.')
    parser.add_argument('--tau',
                        type=float,
                        default=1e-12,
                        help='Tau parameter for working set selection scheme')
    parser.add_argument('--tol', type=float, default=1e-3, help='Tolerance')
    parser.add_argument('--no-shrinking',
                        action='store_false',
                        default=True,
                        dest='shrinking',
                        help="Don't use shrinking heuristic")
    params = parse_args(parser, prefix='daal4py')

    # Load data
    X_train, X_test, y_train, y_test = load_data(params,
                                                 add_dtype=True,
                                                 label_2d=True)

    if params.gamma is None:
        params.gamma = 1 / X_train.shape[1]

    cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
                                              max_cache=params.max_cache_size)
    params.cache_size_mb = cache_size_bytes / 2**20
    params.cache_size_bytes = cache_size_bytes
    params.n_classes = np.unique(y_train).size

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes',
               'accuracy', 'time')

    # Time fit and predict
    fit_time, res = measure_function_time(test_fit,
                                          X_train,
                                          y_train,
                                          params,
                                          params=params)
    res, support, indices, n_support = res
    params.sv_len = support.shape[0]

    yp = test_predict(X_train, res, params)
    train_acc = 100 * accuracy_score(yp, y_train)

    predict_time, yp = measure_function_time(test_predict,
                                             X_test,
                                             res,
                                             params,
                                             params=params)

    test_acc = 100 * accuracy_score(yp, y_train)

    print_output(library='daal4py',
                 algorithm='svc',
                 stages=['training', 'prediction'],
                 columns=columns,
                 params=params,
                 functions=['SVM.fit', 'SVM.predict'],
                 times=[fit_time, predict_time],
                 accuracy_type='accuracy[%]',
                 accuracies=[train_acc, test_acc],
                 data=[X_train, X_test])
Esempio n. 12
0
# ===============================================================================

import argparse

import bench
from cuml import LinearRegression


parser = argparse.ArgumentParser(description='cuML linear regression '
                                             'benchmark')
parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True,
                    action='store_false',
                    help="Don't fit intercept (assume data already centered)")
parser.add_argument('--solver', default='eig', choices=('eig', 'svd'),
                    help='Solver used for training')
params = bench.parse_args(parser, prefix='cuml')

# Load data
X_train, X_test, y_train, y_test = bench.load_data(
    params, generated_data=['X_train', 'y_train'])

# Create our regression object
regr = LinearRegression(fit_intercept=params.fit_intercept,
                        algorithm=params.solver)

# Time fit
fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params)

# Time predict
predict_time, yp = bench.measure_function_time(regr.predict, X_test, params=params)
Esempio n. 13
0
from daal4py import linear_regression_training, linear_regression_prediction

parser = argparse.ArgumentParser(description='daal4py linear regression '
                                 'benchmark')
parser.add_argument('--no-fit-intercept',
                    dest='fit_intercept',
                    default=True,
                    action='store_false',
                    help="Don't fit intercept (assume data already centered)")
parser.add_argument('--method',
                    default='normEqDense',
                    choices=('normEqDense', 'qrDense'),
                    help='Training method used by DAAL. "normEqDense" selects'
                    'the normal equations method, while "qrDense" selects'
                    'the method based on QR decomposition.')
params = parse_args(parser, size=(1000000, 50), prefix='daal4py')

# Generate random data
X_train, X_test, y_train, y_test = load_data(
    params,
    generated_data=['X_train', 'y_train'],
    add_dtype=True,
    label_2d=True if params.file_X_train is not None else False)


# Create our regression objects
def test_fit(X, y):
    regr_train = linear_regression_training(fptype=getFPType(X),
                                            method=params.method,
                                            interceptFlag=params.fit_intercept)
    return regr_train.compute(X, y)
Esempio n. 14
0
#
# SPDX-License-Identifier: MIT

import argparse
from bench import parse_args, time_mean_min, print_header, print_row
import numpy as np
from sklearn.linear_model import Ridge

parser = argparse.ArgumentParser(description='scikit-learn ridge regression '
                                             'benchmark')
parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True,
                    action='store_false',
                    help="Don't fit intercept (assume data already centered)")
parser.add_argument('--solver', default='auto',
                    help='Solver used for training')
params = parse_args(parser, size=(1000000, 50), dtypes=('f8', 'f4'),
                    loop_types=('fit', 'predict'))

# Generate random data
X = np.random.rand(*params.shape).astype(params.dtype)
Xp = np.random.rand(*params.shape).astype(params.dtype)
y = np.random.rand(*params.shape).astype(params.dtype)

# Create our regression object
regr = Ridge(fit_intercept=params.fit_intercept,
             solver=params.solver)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')

print_header(columns, params)
Esempio n. 15
0
                    action='store_false',
                    help="Don't fit intercept (assume data already centered)")
parser.add_argument('--alpha',
                    dest='alpha',
                    type=float,
                    default=1.0,
                    help='Regularization parameter')
parser.add_argument('--maxiter',
                    type=int,
                    default=1000,
                    help='Maximum iterations for the iterative solver')
parser.add_argument('--tol',
                    type=float,
                    default=0.0,
                    help='Tolerance for solver.')
params = parse_args(parser)

# Load data
X_train, X_test, y_train, y_test = load_data(params)

# Create our regression object
regr = Lasso(fit_intercept=params.fit_intercept,
             alpha=params.alpha,
             tol=params.tol,
             max_iter=params.maxiter,
             copy_X=False)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')

# Time fit
Esempio n. 16
0
        'if it is not None')
    parser.add_argument('--min-impurity-decrease',
                        type=float,
                        default=0.,
                        help='Needed impurity decrease for node splitting')
    parser.add_argument('--no-bootstrap',
                        dest='bootstrap',
                        default=True,
                        action='store_false',
                        help="Don't control bootstraping")

    parser.add_argument('--use-sklearn-class',
                        action='store_true',
                        help='Force use of '
                        'sklearn.ensemble.RandomForestRegressor')
    params = parse_args(parser, prefix='daal4py')

    # Load data
    X_train, X_test, y_train, y_test = load_data(params,
                                                 add_dtype=True,
                                                 label_2d=True)

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'num_trees', 'time')
    if isinstance(params.max_features, float):
        params.max_features = int(X_train.shape[1] * params.max_features)

    # Time fit and predict
    fit_time, res = measure_function_time(
        df_regr_fit,
        X_train,
Esempio n. 17
0
                    help='Initial clusters')
parser.add_argument('-t',
                    '--tol',
                    type=float,
                    default=0.,
                    help='Absolute threshold')
parser.add_argument('--maxiter',
                    type=int,
                    default=100,
                    help='Maximum number of iterations')
parser.add_argument('--samples-per-batch',
                    type=int,
                    default=32768,
                    help='Maximum number of iterations')
parser.add_argument('--n-clusters', type=int, help='Number of clusters')
params = parse_args(parser, prefix='cuml', loop_types=('fit', 'predict'))

# Load and convert generated data
X_train, X_test, _, _ = load_data(params)

if params.filei == 'k-means++':
    X_init = 'k-means++'
# Load initial centroids from specified path
elif params.filei is not None:
    X_init = np.load(params.filei).astype(params.dtype)
    params.n_clusters = X_init.shape[0]
# or choose random centroids from training data
else:
    np.random.seed(params.seed)
    centroids_idx = np.random.randint(0,
                                      X_train.shape[0],
Esempio n. 18
0
# Copyright (C) 2017-2019 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
from bench import parse_args, time_mean_min, print_header, print_row
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

parser = argparse.ArgumentParser(description='scikit-learn pairwise distances '
                                 'benchmark')
parser.add_argument('--metrics',
                    nargs='*',
                    default=['cosine', 'correlation'],
                    help='Metrics to test for pairwise_distances')
params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4'))

# Generate random data
X = np.random.rand(*params.shape).astype(params.dtype)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')
print_header(columns, params)

for metric in params.metrics:
    time, _ = time_mean_min(pairwise_distances,
                            X,
                            metric=metric,
                            n_jobs=params.n_jobs,
                            outer_loops=params.outer_loops,
                            inner_loops=params.inner_loops)