예제 #1
0

def test_predict(X, X_init):
    algorithm = kmeans(fptype=getFPType(X),
                       nClusters=params.n_clusters,
                       maxIterations=0,
                       assignFlag=True,
                       accuracyThreshold=0.0)
    return algorithm.compute(X, X_init)


columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'n_clusters', 'time')
print_header(columns, params)

# Time fit
fit_time, _ = time_mean_min(test_fit,
                            X,
                            X_init,
                            outer_loops=params.fit_outer_loops,
                            inner_loops=params.fit_inner_loops)
print_row(columns, params, function='KMeans.fit', time=fit_time)

# Time predict
predict_time, _ = time_mean_min(test_predict,
                                X,
                                X_init,
                                outer_loops=params.predict_outer_loops,
                                inner_loops=params.predict_inner_loops)
print_row(columns, params, function='KMeans.predict', time=predict_time)
예제 #2
0
    except ImportError:
        from sklearn.ensemble import RandomForestRegressor

# Load data
X = np.load(params.filex.name)
y = np.load(params.filey.name)

# Create our random forest regressor
regr = RandomForestRegressor(n_estimators=params.num_trees,
                             max_depth=params.max_depth,
                             max_features=params.max_features,
                             random_state=params.seed)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'num_trees', 'time')
params.size = size_str(X.shape)
params.dtype = X.dtype

print_header(columns, params)

# Time fit and predict
fit_time, _ = time_mean_min(regr.fit, X, y,
                            outer_loops=params.fit_outer_loops,
                            inner_loops=params.fit_inner_loops)
print_row(columns, params, function='df_regr.fit', time=fit_time)

predict_time, y_pred = time_mean_min(regr.predict, X,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
print_row(columns, params, function='df_regr.predict', time=predict_time)
예제 #3
0
    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'solver', 'C', 'multiclass', 'n_classes', 'accuracy',
               'time')
    params.size = size_str(X.shape)
    params.dtype = X.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(test_fit,
                                  X,
                                  y,
                                  penalty='l2',
                                  C=params.C,
                                  verbose=params.verbose,
                                  fit_intercept=params.fit_intercept,
                                  tol=params.tol,
                                  max_iter=params.maxiter,
                                  solver=params.solver,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)

    beta, intercept, solver_result, params.multiclass = res
    print_row(columns, params, function='LogReg.fit', time=fit_time)

    predict_time, yp = time_mean_min(test_predict,
                                     X,
                                     beta,
                                     intercept=intercept,
                                     multi_class=params.multiclass,
                                     outer_loops=params.predict_outer_loops,
예제 #4
0
def main():
    parser = argparse.ArgumentParser(description='daal4py SVC benchmark with '
                                     'linear kernel')
    parser.add_argument('-x',
                        '--filex',
                        '--fileX',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with features, in NPY format')
    parser.add_argument('-y',
                        '--filey',
                        '--fileY',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with labels, in NPY format')
    parser.add_argument('-C',
                        dest='C',
                        type=float,
                        default=0.01,
                        help='SVM slack parameter')
    parser.add_argument('--kernel',
                        choices=('linear', ),
                        default='linear',
                        help='SVM kernel function')
    parser.add_argument('--maxiter',
                        type=int,
                        default=2000,
                        help='Maximum iterations for the iterative solver. '
                        '-1 means no limit.')
    parser.add_argument('--max-cache-size',
                        type=int,
                        default=64,
                        help='Maximum cache size, in gigabytes, for SVM.')
    parser.add_argument('--tau',
                        type=float,
                        default=1e-12,
                        help='Tau parameter for working set selection scheme')
    parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance')
    parser.add_argument('--no-shrinking',
                        action='store_false',
                        default=True,
                        dest='shrinking',
                        help="Don't use shrinking heuristic")
    params = parse_args(parser,
                        loop_types=('fit', 'predict'),
                        prefix='daal4py')

    # Load data and cast to float64
    X_train = np.load(params.filex.name).astype('f8')
    y_train = np.load(params.filey.name).astype('f8')

    cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
                                              max_cache=params.max_cache_size)
    params.cache_size_mb = cache_size_bytes / 2**20
    params.cache_size_bytes = cache_size_bytes
    params.n_classes = np.unique(y_train).size

    # This is necessary for daal
    y_train[y_train == 0] = -1
    y_train = y_train[:, np.newaxis]

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes',
               'accuracy', 'time')
    params.size = size_str(X_train.shape)
    params.dtype = X_train.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(test_fit,
                                  X_train,
                                  y_train,
                                  params,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)
    res, support, indices, n_support = res
    params.sv_len = support.shape[0]
    print_row(columns, params, function='SVM.fit', time=fit_time)

    predict_time, yp = time_mean_min(test_predict,
                                     X_train,
                                     res,
                                     params,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
    print_row(columns,
              params,
              function='SVM.predict',
              time=predict_time,
              accuracy=f'{100*accuracy_score(yp, y_train):.3}')
예제 #5
0
    return pca_transform_daal(pca_result,
                              Xp,
                              params.n_components,
                              X.shape[0],
                              eigenvalues,
                              eigenvectors,
                              whiten=params.whiten)


columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'svd_solver', 'n_components', 'whiten', 'time')
print_header(columns, params)

# Time fit
fit_time, res = time_mean_min(test_fit,
                              X,
                              outer_loops=params.fit_outer_loops,
                              inner_loops=params.fit_inner_loops)
print_row(columns, params, function='PCA.fit', time=fit_time)

# Time transform
transform_time, tr = time_mean_min(test_transform,
                                   Xp,
                                   *res[:3],
                                   outer_loops=params.transform_outer_loops,
                                   inner_loops=params.transform_inner_loops)
print_row(columns, params, function='PCA.transform', time=transform_time)

if params.write_results:
    np.save('pca_daal4py_X.npy', X)
    np.save('pca_daal4py_Xp.npy', Xp)
    np.save('pca_daal4py_eigvals.npy', res[1])
예제 #6
0
n_clusters = X_init.shape[0]

# Create our clustering object
kmeans = KMeans(n_clusters=n_clusters,
                n_jobs=params.n_jobs,
                tol=1e-16,
                max_iter=params.maxiter,
                n_init=1,
                init=X_init)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'n_clusters', 'time')
params.size = size_str(X.shape)
params.n_clusters = n_clusters
params.dtype = X.dtype
print_header(columns, params)

# Time fit
fit_time, _ = time_mean_min(kmeans.fit,
                            X,
                            outer_loops=params.fit_outer_loops,
                            inner_loops=params.fit_inner_loops)
print_row(columns, params, function='KMeans.fit', time=fit_time)

# Time predict
predict_time, _ = time_mean_min(kmeans.predict,
                                X,
                                outer_loops=params.predict_outer_loops,
                                inner_loops=params.predict_inner_loops)
print_row(columns, params, function='KMeans.predict', time=predict_time)
예제 #7
0
parser.add_argument('--metrics',
                    nargs='*',
                    default=['cosine', 'correlation'],
                    choices=('cosine', 'correlation'),
                    help='Metrics to test for pairwise_distances')
params = parse_args(parser,
                    size=(1000, 150000),
                    dtypes=('f8', 'f4'),
                    prefix='daal4py')

# Generate random data
X = np.random.rand(*params.shape).astype(params.dtype)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')
print_header(columns, params)

for metric in params.metrics:
    pairwise_distances = getattr(daal4py, f'{metric}_distance')

    def test_distances(pairwise_distances, X):
        algorithm = pairwise_distances(fptype=getFPType(X))
        return algorithm.compute(X)

    time, _ = time_mean_min(test_distances,
                            pairwise_distances,
                            X,
                            outer_loops=params.outer_loops,
                            inner_loops=params.inner_loops)
    print_row(columns, params, function=metric.capitalize(), time=time)
예제 #8
0
# Generate random data
p, n = params.shape
X = np.random.rand(*params.shape).astype(params.dtype)
Xp = np.random.rand(*params.shape).astype(params.dtype)

if not params.n_components:
    params.n_components = min((n, (2 + min((n, p))) // 3))

# Create our PCA object
pca = PCA(svd_solver=params.svd_solver,
          whiten=params.whiten,
          n_components=params.n_components)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'svd_solver', 'n_components', 'whiten', 'time')
print_header(columns, params)

# Time fit
fit_time, _ = time_mean_min(pca.fit,
                            X,
                            outer_loops=params.fit_outer_loops,
                            inner_loops=params.fit_inner_loops)
print_row(columns, params, function='PCA.fit', time=fit_time)

# Time transform
transform_time, _ = time_mean_min(pca.transform,
                                  Xp,
                                  outer_loops=params.transform_outer_loops,
                                  inner_loops=params.transform_inner_loops)
print_row(columns, params, function='PCA.transform', time=transform_time)
예제 #9
0
    # Load data
    X = np.load(params.filex.name)
    y = np.load(params.filey.name)[:, np.newaxis]

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'num_trees', 'time')
    params.size = size_str(X.shape)
    params.dtype = X.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(df_regr_fit,
                                  X,
                                  y,
                                  n_trees=params.num_trees,
                                  seed=params.seed,
                                  n_features_per_node=params.max_features,
                                  max_depth=params.max_depth,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)
    print_row(columns, params, function='df_regr.fit', time=fit_time)

    predict_time, yp = time_mean_min(df_regr_predict,
                                     X,
                                     res,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
    print_row(columns, params, function='df_regr.predict', time=predict_time)
예제 #10
0
# SPDX-License-Identifier: MIT

import argparse
from bench import parse_args, time_mean_min, print_header, print_row
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

parser = argparse.ArgumentParser(description='scikit-learn pairwise distances '
                                 'benchmark')
parser.add_argument('--metrics',
                    nargs='*',
                    default=['cosine', 'correlation'],
                    help='Metrics to test for pairwise_distances')
params = parse_args(parser, size=(1000, 150000), dtypes=('f8', 'f4'))

# Generate random data
X = np.random.rand(*params.shape).astype(params.dtype)

columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'time')
print_header(columns, params)

for metric in params.metrics:
    time, _ = time_mean_min(pairwise_distances,
                            X,
                            metric=metric,
                            n_jobs=params.n_jobs,
                            outer_loops=params.outer_loops,
                            inner_loops=params.inner_loops)
    print_row(columns, params, function=metric.capitalize(), time=time)
예제 #11
0
    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'num_trees', 'n_classes', 'accuracy', 'time')
    params.n_classes = len(np.unique(y))
    params.size = size_str(X.shape)
    params.dtype = X.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(df_clsf_fit,
                                  X,
                                  y,
                                  params.n_classes,
                                  n_trees=params.num_trees,
                                  seed=params.seed,
                                  n_features_per_node=params.max_features,
                                  max_depth=params.max_depth,
                                  verbose=params.verbose,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)
    print_row(columns, params, function='df_clsf.fit', time=fit_time)

    predict_time, yp = time_mean_min(df_clsf_predict,
                                     X,
                                     res,
                                     params.n_classes,
                                     verbose=params.verbose,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
    acc = 100 * accuracy_score(yp, y)
예제 #12
0
    regr_train = linear_regression_training(fptype=getFPType(X),
                                            method=params.method,
                                            interceptFlag=params.fit_intercept)
    return regr_train.compute(X, y)


def test_predict(Xp, model):
    regr_predict = linear_regression_prediction(fptype=getFPType(X))
    return regr_predict.compute(Xp, model)


columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size',
           'method', 'time')
print_header(columns, params)

# Time fit
fit_time, res = time_mean_min(test_fit,
                              X,
                              y,
                              outer_loops=params.fit_outer_loops,
                              inner_loops=params.fit_inner_loops)
print_row(columns, params, function='Linear.fit', time=fit_time)

# Time predict
predict_time, yp = time_mean_min(test_predict,
                                 Xp,
                                 res.model,
                                 outer_loops=params.predict_outer_loops,
                                 inner_loops=params.predict_inner_loops)
print_row(columns, params, function='Linear.predict', time=predict_time)