def test_predict(X, X_init): algorithm = kmeans(fptype=getFPType(X), nClusters=params.n_clusters, maxIterations=0, assignFlag=True, accuracyThreshold=0.0) return algorithm.compute(X, X_init) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'n_clusters', 'time') print_header(columns, params) # Time fit fit_time, _ = time_mean_min(test_fit, X, X_init, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='KMeans.fit', time=fit_time) # Time predict predict_time, _ = time_mean_min(test_predict, X, X_init, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='KMeans.predict', time=predict_time)
except ImportError: from sklearn.ensemble import RandomForestRegressor # Load data X = np.load(params.filex.name) y = np.load(params.filey.name) # Create our random forest regressor regr = RandomForestRegressor(n_estimators=params.num_trees, max_depth=params.max_depth, max_features=params.max_features, random_state=params.seed) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'num_trees', 'time') params.size = size_str(X.shape) params.dtype = X.dtype print_header(columns, params) # Time fit and predict fit_time, _ = time_mean_min(regr.fit, X, y, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='df_regr.fit', time=fit_time) predict_time, y_pred = time_mean_min(regr.predict, X, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='df_regr.predict', time=predict_time)
columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'solver', 'C', 'multiclass', 'n_classes', 'accuracy', 'time') params.size = size_str(X.shape) params.dtype = X.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(test_fit, X, y, penalty='l2', C=params.C, verbose=params.verbose, fit_intercept=params.fit_intercept, tol=params.tol, max_iter=params.maxiter, solver=params.solver, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) beta, intercept, solver_result, params.multiclass = res print_row(columns, params, function='LogReg.fit', time=fit_time) predict_time, yp = time_mean_min(test_predict, X, beta, intercept=intercept, multi_class=params.multiclass, outer_loops=params.predict_outer_loops,
def main(): parser = argparse.ArgumentParser(description='daal4py SVC benchmark with ' 'linear kernel') parser.add_argument('-x', '--filex', '--fileX', type=argparse.FileType('r'), required=True, help='Input file with features, in NPY format') parser.add_argument('-y', '--filey', '--fileY', type=argparse.FileType('r'), required=True, help='Input file with labels, in NPY format') parser.add_argument('-C', dest='C', type=float, default=0.01, help='SVM slack parameter') parser.add_argument('--kernel', choices=('linear', ), default='linear', help='SVM kernel function') parser.add_argument('--maxiter', type=int, default=2000, help='Maximum iterations for the iterative solver. ' '-1 means no limit.') parser.add_argument('--max-cache-size', type=int, default=64, help='Maximum cache size, in gigabytes, for SVM.') parser.add_argument('--tau', type=float, default=1e-12, help='Tau parameter for working set selection scheme') parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance') parser.add_argument('--no-shrinking', action='store_false', default=True, dest='shrinking', help="Don't use shrinking heuristic") params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py') # Load data and cast to float64 X_train = np.load(params.filex.name).astype('f8') y_train = np.load(params.filey.name).astype('f8') cache_size_bytes = get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 2**20 params.cache_size_bytes = cache_size_bytes params.n_classes = np.unique(y_train).size # This is necessary for daal y_train[y_train == 0] = -1 y_train = y_train[:, np.newaxis] columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes', 'accuracy', 'time') params.size = size_str(X_train.shape) params.dtype = X_train.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(test_fit, X_train, y_train, params, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) res, support, indices, n_support = res params.sv_len = support.shape[0] print_row(columns, params, function='SVM.fit', time=fit_time) predict_time, yp = time_mean_min(test_predict, X_train, res, params, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='SVM.predict', time=predict_time, accuracy=f'{100*accuracy_score(yp, y_train):.3}')
return pca_transform_daal(pca_result, Xp, params.n_components, X.shape[0], eigenvalues, eigenvectors, whiten=params.whiten) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'svd_solver', 'n_components', 'whiten', 'time') print_header(columns, params) # Time fit fit_time, res = time_mean_min(test_fit, X, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='PCA.fit', time=fit_time) # Time transform transform_time, tr = time_mean_min(test_transform, Xp, *res[:3], outer_loops=params.transform_outer_loops, inner_loops=params.transform_inner_loops) print_row(columns, params, function='PCA.transform', time=transform_time) if params.write_results: np.save('pca_daal4py_X.npy', X) np.save('pca_daal4py_Xp.npy', Xp) np.save('pca_daal4py_eigvals.npy', res[1])
n_clusters = X_init.shape[0] # Create our clustering object kmeans = KMeans(n_clusters=n_clusters, n_jobs=params.n_jobs, tol=1e-16, max_iter=params.maxiter, n_init=1, init=X_init) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'n_clusters', 'time') params.size = size_str(X.shape) params.n_clusters = n_clusters params.dtype = X.dtype print_header(columns, params) # Time fit fit_time, _ = time_mean_min(kmeans.fit, X, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='KMeans.fit', time=fit_time) # Time predict predict_time, _ = time_mean_min(kmeans.predict, X, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='KMeans.predict', time=predict_time)
parser.add_argument('--metrics', nargs='*', default=['cosine', 'correlation'], choices=('cosine', 'correlation'), help='Metrics to test for pairwise_distances') params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4'), prefix='daal4py') # Generate random data X = np.random.rand(*params.shape).astype(params.dtype) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') print_header(columns, params) for metric in params.metrics: pairwise_distances = getattr(daal4py, f'{metric}_distance') def test_distances(pairwise_distances, X): algorithm = pairwise_distances(fptype=getFPType(X)) return algorithm.compute(X) time, _ = time_mean_min(test_distances, pairwise_distances, X, outer_loops=params.outer_loops, inner_loops=params.inner_loops) print_row(columns, params, function=metric.capitalize(), time=time)
# Generate random data p, n = params.shape X = np.random.rand(*params.shape).astype(params.dtype) Xp = np.random.rand(*params.shape).astype(params.dtype) if not params.n_components: params.n_components = min((n, (2 + min((n, p))) // 3)) # Create our PCA object pca = PCA(svd_solver=params.svd_solver, whiten=params.whiten, n_components=params.n_components) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'svd_solver', 'n_components', 'whiten', 'time') print_header(columns, params) # Time fit fit_time, _ = time_mean_min(pca.fit, X, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='PCA.fit', time=fit_time) # Time transform transform_time, _ = time_mean_min(pca.transform, Xp, outer_loops=params.transform_outer_loops, inner_loops=params.transform_inner_loops) print_row(columns, params, function='PCA.transform', time=transform_time)
# Load data X = np.load(params.filex.name) y = np.load(params.filey.name)[:, np.newaxis] columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'num_trees', 'time') params.size = size_str(X.shape) params.dtype = X.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(df_regr_fit, X, y, n_trees=params.num_trees, seed=params.seed, n_features_per_node=params.max_features, max_depth=params.max_depth, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='df_regr.fit', time=fit_time) predict_time, yp = time_mean_min(df_regr_predict, X, res, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='df_regr.predict', time=predict_time)
# SPDX-License-Identifier: MIT import argparse from bench import parse_args, time_mean_min, print_header, print_row import numpy as np from sklearn.metrics.pairwise import pairwise_distances parser = argparse.ArgumentParser(description='scikit-learn pairwise distances ' 'benchmark') parser.add_argument('--metrics', nargs='*', default=['cosine', 'correlation'], help='Metrics to test for pairwise_distances') params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4')) # Generate random data X = np.random.rand(*params.shape).astype(params.dtype) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'time') print_header(columns, params) for metric in params.metrics: time, _ = time_mean_min(pairwise_distances, X, metric=metric, n_jobs=params.n_jobs, outer_loops=params.outer_loops, inner_loops=params.inner_loops) print_row(columns, params, function=metric.capitalize(), time=time)
columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'num_trees', 'n_classes', 'accuracy', 'time') params.n_classes = len(np.unique(y)) params.size = size_str(X.shape) params.dtype = X.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(df_clsf_fit, X, y, params.n_classes, n_trees=params.num_trees, seed=params.seed, n_features_per_node=params.max_features, max_depth=params.max_depth, verbose=params.verbose, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='df_clsf.fit', time=fit_time) predict_time, yp = time_mean_min(df_clsf_predict, X, res, params.n_classes, verbose=params.verbose, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) acc = 100 * accuracy_score(yp, y)
regr_train = linear_regression_training(fptype=getFPType(X), method=params.method, interceptFlag=params.fit_intercept) return regr_train.compute(X, y) def test_predict(Xp, model): regr_predict = linear_regression_prediction(fptype=getFPType(X)) return regr_predict.compute(Xp, model) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'method', 'time') print_header(columns, params) # Time fit fit_time, res = time_mean_min(test_fit, X, y, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='Linear.fit', time=fit_time) # Time predict predict_time, yp = time_mean_min(test_predict, Xp, res.model, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='Linear.predict', time=predict_time)