import numpy as np from daal4py import ridge_regression_training, ridge_regression_prediction from daal4py.sklearn._utils import getFPType parser = argparse.ArgumentParser(description='daal4py ridge regression ' 'benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True, action='store_false', help="Don't fit intercept (assume data already centered)") parser.add_argument('--alpha', type=float, default=1.0, help='Regularization strength') params = bench.parse_args(parser, prefix='daal4py') # Generate random data X_train, X_test, y_train, y_test = bench.load_data( params, generated_data=['X_train', 'y_train'], add_dtype=True, label_2d=True if params.file_X_train is not None else False) # Create our regression objects def test_fit(X, y): regr_train = ridge_regression_training(fptype=getFPType(X), ridgeParameters=np.array( [[params.alpha]]), interceptFlag=params.fit_intercept)
help='Minimum samples number for node splitting') parser.add_argument('--max-leaf-nodes', type=int, default=None, help='Maximum leaf nodes per tree') parser.add_argument('--min-impurity-decrease', type=float, default=0., help='Needed impurity decrease for node splitting') parser.add_argument('--no-bootstrap', dest='bootstrap', default=True, action='store_false', help="Don't control bootstraping") params = bench.parse_args(parser) from sklearn.ensemble import RandomForestClassifier # Load and convert data X_train, X_test, y_train, y_test = bench.load_data(params) # Create our random forest classifier clf = RandomForestClassifier( criterion=params.criterion, n_estimators=params.num_trees, max_depth=params.max_depth, max_features=params.max_features, min_samples_split=params.min_samples_split, max_leaf_nodes=params.max_leaf_nodes, min_impurity_decrease=params.min_impurity_decrease,
choices=['daal', 'full', 'correlation'], default='daal', help='SVD solver to use') parser.add_argument('--n-components', type=int, default=None, help='Number of components to find') parser.add_argument('--whiten', action='store_true', default=False, help='Perform whitening') parser.add_argument('--write-results', action='store_true', default=False, help='Write results to disk for verification') params = parse_args(parser, size=(10000, 1000)) # Load data X_train, X_test, _, _ = load_data(params, generated_data=['X_train'], add_dtype=True) if params.n_components is None: p, n = X_train.shape params.n_components = min((n, (2 + min((n, p))) // 3)) # Define how to do our scikit-learn PCA using DAAL... def pca_fit_daal(X, n_components, method): if n_components < 1:
'--filet', '--fileT', '--tol', required=True, type=str, help='Absolute threshold') parser.add_argument('-m', '--data-multiplier', default=100, type=int, help='Data multiplier') parser.add_argument('--maxiter', type=int, default=100, help='Maximum number of iterations') params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py') # Load generated data X = np.load(params.filex) X_init = np.load(params.filei) X_mult = np.vstack((X, ) * params.data_multiplier) tol = np.load(params.filet) params.size = size_str(X.shape) params.n_clusters = X_init.shape[0] params.dtype = X.dtype # Define functions to time def test_fit(X, X_init): algorithm = kmeans(fptype=getFPType(X),
action='store_false', default=True, help="Don't fit intercept") parser.add_argument('--multiclass', default='auto', choices=('auto', 'ovr', 'multinomial'), help='How to treat multi class data. ' '"auto" picks "ovr" for binary classification, and ' '"multinomial" otherwise.') parser.add_argument('--solver', default='lbfgs', choices=('lbfgs', 'newton-cg', 'saga'), help='Solver to use.') parser.add_argument('--maxiter', type=int, default=100, help='Maximum iterations for the iterative solver') parser.add_argument('-C', dest='C', type=float, default=1.0, help='Regularization parameter') parser.add_argument('--tol', type=float, default=None, help='Tolerance for solver. If solver == "newton-cg", ' 'then the default is 1e-3. Otherwise, the default ' 'is 1e-10.') params = bench.parse_args(parser, loop_types=('fit', 'predict')) bench.run_with_context(params, main)
def test_parse_args(): command = "http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 1, 'total_requests': 1, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-k http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 1, 'total_requests': 1, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': True, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-m POST -c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'POST', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-m POST -d test -c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'POST', 'keep_alive': False, 'auth': None, 'data': 'test', 'json': None, 'headers': None, 'cookies': None } command = "-m POST -j {'test':'test_json'} -c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'POST', 'keep_alive': False, 'auth': None, 'data': None, 'json': { 'test': 'test_json' }, 'headers': None, 'cookies': None } command = "-m PUT -c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'PUT', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-m DELETE -c 10 -n 100 http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 10, 'total_requests': 100, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'DELETE', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-a Basic:nikan:wrong_pass http://127.0.0.1:5000" args = parse_args(command.split()) from requests.auth import HTTPBasicAuth assert args == { 'concurrency': 1, 'total_requests': 1, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': HTTPBasicAuth('nikan', 'wrong_pass'), 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-a Digest:nikan:wrong_pass http://127.0.0.1:5000" args = parse_args(command.split()) from requests.auth import HTTPDigestAuth assert args == { 'concurrency': 1, 'total_requests': 1, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': HTTPDigestAuth('nikan', 'wrong_pass'), 'data': None, 'json': None, 'headers': None, 'cookies': None } command = "-H {'user-agent':'hahah'} -C {'a':'1'} http://127.0.0.1:5000" args = parse_args(command.split()) assert args == { 'concurrency': 1, 'total_requests': 1, 'urls': ['http://127.0.0.1:5000'], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': { 'user-agent': 'hahah' }, 'cookies': { 'a': '1' } } command = "-f correct_file" args = parse_args(command.split()) assert args == { 'concurrency': 1, 'total_requests': 5, 'urls': [ 'http://example.com/', 'http://example.com/1', 'http://example.com/2', 'http://example.com/3', 'http://example.com/4' ], 'timeout': None, 'method': 'GET', 'keep_alive': False, 'auth': None, 'data': None, 'json': None, 'headers': None, 'cookies': None }
# Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT import argparse from bench import measure_function_time, parse_args, load_data, print_output from sklearn.cluster import DBSCAN parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark') parser.add_argument('-e', '--eps', '--epsilon', type=float, default=10., help='Radius of neighborhood of a point') parser.add_argument('-m', '--min-samples', default=5, type=int, help='The minimum number of samples required in a ' 'neighborhood to consider a point a core point') params = parse_args(parser, n_jobs_supported=True) # Load generated data X, _, _, _ = load_data(params, add_dtype=True) # Create our clustering object dbscan = DBSCAN(eps=params.eps, n_jobs=params.n_jobs, min_samples=params.min_samples, metric='euclidean', algorithm='auto') # N.B. algorithm='auto' will select DAAL's brute force method when running # daal4py-patched scikit-learn, and probably 'kdtree' when running unpatched # scikit-learn. columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'n_clusters', 'time')
def main(): parser = argparse.ArgumentParser(description='daal4py SVC benchmark with ' 'linear kernel') parser.add_argument('-x', '--filex', '--fileX', type=argparse.FileType('r'), required=True, help='Input file with features, in NPY format') parser.add_argument('-y', '--filey', '--fileY', type=argparse.FileType('r'), required=True, help='Input file with labels, in NPY format') parser.add_argument('-C', dest='C', type=float, default=0.01, help='SVM slack parameter') parser.add_argument('--kernel', choices=('linear', ), default='linear', help='SVM kernel function') parser.add_argument('--maxiter', type=int, default=2000, help='Maximum iterations for the iterative solver. ' '-1 means no limit.') parser.add_argument('--max-cache-size', type=int, default=64, help='Maximum cache size, in gigabytes, for SVM.') parser.add_argument('--tau', type=float, default=1e-12, help='Tau parameter for working set selection scheme') parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance') parser.add_argument('--no-shrinking', action='store_false', default=True, dest='shrinking', help="Don't use shrinking heuristic") params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py') # Load data and cast to float64 X_train = np.load(params.filex.name).astype('f8') y_train = np.load(params.filey.name).astype('f8') cache_size_bytes = get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 2**20 params.cache_size_bytes = cache_size_bytes params.n_classes = np.unique(y_train).size # This is necessary for daal y_train[y_train == 0] = -1 y_train = y_train[:, np.newaxis] columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes', 'accuracy', 'time') params.size = size_str(X_train.shape) params.dtype = X_train.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(test_fit, X_train, y_train, params, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) res, support, indices, n_support = res params.sv_len = support.shape[0] print_row(columns, params, function='SVM.fit', time=fit_time) predict_time, yp = time_mean_min(test_predict, X_train, res, params, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='SVM.predict', time=predict_time, accuracy=f'{100*accuracy_score(yp, y_train):.3}')
default='daal', help='SVD solver to use') parser.add_argument('--n-components', type=int, default=None, help='Number of components to find') parser.add_argument('--whiten', action='store_true', default=False, help='Perform whitening') parser.add_argument('--write-results', action='store_true', default=False, help='Write results to disk for verification') params = parse_args(parser, size=(10000, 1000), dtypes=('f8', 'f4'), loop_types=('fit', 'transform')) # Generate random data p, n = params.shape X = np.random.rand(*params.shape).astype(params.dtype) Xp = np.random.rand(*params.shape).astype(params.dtype) if not params.n_components: params.n_components = min((n, (2 + min((n, p))) // 3)) # Define how to do our scikit-learn PCA using DAAL... def pca_fit_daal(X, n_components): if n_components < 1:
import argparse from bench import parse_args, time_mean_min, print_header, print_row import daal4py from daal4py.sklearn.utils import getFPType import numpy as np parser = argparse.ArgumentParser(description='daal4py pairwise distances ' 'benchmark') parser.add_argument('--metrics', nargs='*', default=['cosine', 'correlation'], choices=('cosine', 'correlation'), help='Metrics to test for pairwise_distances') params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4'), prefix='daal4py') # Generate random data X = np.random.rand(*params.shape).astype(params.dtype) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') print_header(columns, params) for metric in params.metrics: pairwise_distances = getattr(daal4py, f'{metric}_distance') def test_distances(pairwise_distances, X): algorithm = pairwise_distances(fptype=getFPType(X)) return algorithm.compute(X)
def main(): parser = argparse.ArgumentParser(description='daal4py SVC benchmark with ' 'linear kernel') parser.add_argument('-C', dest='C', type=float, default=1.0, help='SVM regularization parameter') parser.add_argument('--kernel', choices=('linear', 'rbf'), default='linear', help='SVM kernel function') parser.add_argument('--gamma', type=float, default=None, help='Parameter for kernel="rbf"') parser.add_argument('--maxiter', type=int, default=100000, help='Maximum iterations for the iterative solver. ') parser.add_argument('--max-cache-size', type=int, default=8, help='Maximum cache size, in gigabytes, for SVM.') parser.add_argument('--tau', type=float, default=1e-12, help='Tau parameter for working set selection scheme') parser.add_argument('--tol', type=float, default=1e-3, help='Tolerance') parser.add_argument('--no-shrinking', action='store_false', default=True, dest='shrinking', help="Don't use shrinking heuristic") params = parse_args(parser, prefix='daal4py') # Load data X_train, X_test, y_train, y_test = load_data(params, add_dtype=True, label_2d=True) if params.gamma is None: params.gamma = 1 / X_train.shape[1] cache_size_bytes = get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 2**20 params.cache_size_bytes = cache_size_bytes params.n_classes = np.unique(y_train).size columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes', 'accuracy', 'time') # Time fit and predict fit_time, res = measure_function_time(test_fit, X_train, y_train, params, params=params) res, support, indices, n_support = res params.sv_len = support.shape[0] yp = test_predict(X_train, res, params) train_acc = 100 * accuracy_score(yp, y_train) predict_time, yp = measure_function_time(test_predict, X_test, res, params, params=params) test_acc = 100 * accuracy_score(yp, y_train) print_output(library='daal4py', algorithm='svc', stages=['training', 'prediction'], columns=columns, params=params, functions=['SVM.fit', 'SVM.predict'], times=[fit_time, predict_time], accuracy_type='accuracy[%]', accuracies=[train_acc, test_acc], data=[X_train, X_test])
# =============================================================================== import argparse import bench from cuml import LinearRegression parser = argparse.ArgumentParser(description='cuML linear regression ' 'benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True, action='store_false', help="Don't fit intercept (assume data already centered)") parser.add_argument('--solver', default='eig', choices=('eig', 'svd'), help='Solver used for training') params = bench.parse_args(parser, prefix='cuml') # Load data X_train, X_test, y_train, y_test = bench.load_data( params, generated_data=['X_train', 'y_train']) # Create our regression object regr = LinearRegression(fit_intercept=params.fit_intercept, algorithm=params.solver) # Time fit fit_time, _ = bench.measure_function_time(regr.fit, X_train, y_train, params=params) # Time predict predict_time, yp = bench.measure_function_time(regr.predict, X_test, params=params)
from daal4py import linear_regression_training, linear_regression_prediction parser = argparse.ArgumentParser(description='daal4py linear regression ' 'benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True, action='store_false', help="Don't fit intercept (assume data already centered)") parser.add_argument('--method', default='normEqDense', choices=('normEqDense', 'qrDense'), help='Training method used by DAAL. "normEqDense" selects' 'the normal equations method, while "qrDense" selects' 'the method based on QR decomposition.') params = parse_args(parser, size=(1000000, 50), prefix='daal4py') # Generate random data X_train, X_test, y_train, y_test = load_data( params, generated_data=['X_train', 'y_train'], add_dtype=True, label_2d=True if params.file_X_train is not None else False) # Create our regression objects def test_fit(X, y): regr_train = linear_regression_training(fptype=getFPType(X), method=params.method, interceptFlag=params.fit_intercept) return regr_train.compute(X, y)
# # SPDX-License-Identifier: MIT import argparse from bench import parse_args, time_mean_min, print_header, print_row import numpy as np from sklearn.linear_model import Ridge parser = argparse.ArgumentParser(description='scikit-learn ridge regression ' 'benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=True, action='store_false', help="Don't fit intercept (assume data already centered)") parser.add_argument('--solver', default='auto', help='Solver used for training') params = parse_args(parser, size=(1000000, 50), dtypes=('f8', 'f4'), loop_types=('fit', 'predict')) # Generate random data X = np.random.rand(*params.shape).astype(params.dtype) Xp = np.random.rand(*params.shape).astype(params.dtype) y = np.random.rand(*params.shape).astype(params.dtype) # Create our regression object regr = Ridge(fit_intercept=params.fit_intercept, solver=params.solver) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') print_header(columns, params)
action='store_false', help="Don't fit intercept (assume data already centered)") parser.add_argument('--alpha', dest='alpha', type=float, default=1.0, help='Regularization parameter') parser.add_argument('--maxiter', type=int, default=1000, help='Maximum iterations for the iterative solver') parser.add_argument('--tol', type=float, default=0.0, help='Tolerance for solver.') params = parse_args(parser) # Load data X_train, X_test, y_train, y_test = load_data(params) # Create our regression object regr = Lasso(fit_intercept=params.fit_intercept, alpha=params.alpha, tol=params.tol, max_iter=params.maxiter, copy_X=False) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') # Time fit
'if it is not None') parser.add_argument('--min-impurity-decrease', type=float, default=0., help='Needed impurity decrease for node splitting') parser.add_argument('--no-bootstrap', dest='bootstrap', default=True, action='store_false', help="Don't control bootstraping") parser.add_argument('--use-sklearn-class', action='store_true', help='Force use of ' 'sklearn.ensemble.RandomForestRegressor') params = parse_args(parser, prefix='daal4py') # Load data X_train, X_test, y_train, y_test = load_data(params, add_dtype=True, label_2d=True) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'num_trees', 'time') if isinstance(params.max_features, float): params.max_features = int(X_train.shape[1] * params.max_features) # Time fit and predict fit_time, res = measure_function_time( df_regr_fit, X_train,
help='Initial clusters') parser.add_argument('-t', '--tol', type=float, default=0., help='Absolute threshold') parser.add_argument('--maxiter', type=int, default=100, help='Maximum number of iterations') parser.add_argument('--samples-per-batch', type=int, default=32768, help='Maximum number of iterations') parser.add_argument('--n-clusters', type=int, help='Number of clusters') params = parse_args(parser, prefix='cuml', loop_types=('fit', 'predict')) # Load and convert generated data X_train, X_test, _, _ = load_data(params) if params.filei == 'k-means++': X_init = 'k-means++' # Load initial centroids from specified path elif params.filei is not None: X_init = np.load(params.filei).astype(params.dtype) params.n_clusters = X_init.shape[0] # or choose random centroids from training data else: np.random.seed(params.seed) centroids_idx = np.random.randint(0, X_train.shape[0],
# Copyright (C) 2017-2019 Intel Corporation # # SPDX-License-Identifier: MIT import argparse from bench import parse_args, time_mean_min, print_header, print_row import numpy as np from sklearn.metrics.pairwise import pairwise_distances parser = argparse.ArgumentParser(description='scikit-learn pairwise distances ' 'benchmark') parser.add_argument('--metrics', nargs='*', default=['cosine', 'correlation'], help='Metrics to test for pairwise_distances') params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4')) # Generate random data X = np.random.rand(*params.shape).astype(params.dtype) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') print_header(columns, params) for metric in params.metrics: time, _ = time_mean_min(pairwise_distances, X, metric=metric, n_jobs=params.n_jobs, outer_loops=params.outer_loops, inner_loops=params.inner_loops)