예제 #1
0
def main():
    argParser = argparse.ArgumentParser(
        prog="svm-linear.py",
        description="sklearn two-class SVC benchmark for linear kernel",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    args = getArguments(argParser)
    numThreads, daal_version = bench.prepare_benchmark(args)

    # This is so the .csv file can go directly into excel. However, it requries adding timing
    # in sklearn/daal4sklearn/svm.py around pydaal compute and post-processing
    if args.verbose:
        print('@ {fit_samples: ' + str(args.fit_samples) +
              ', fit_repetitions: ' + str(args.fit_repetitions) +
              ', predict_samples: ' + str(args.predict_samples) +
              ', predict_repetitions: ' + str(args.predict_repetitions) +
              ', pyDAAL: ' + str(daal_version) + '}',
              file=sys.stderr)

    # Load data and cast to float64
    X_train = np.load(args.fileX.name).astype('f8')
    y_train = np.load(args.fileY.name).astype('f8')

    v, f = X_train.shape
    cache_size_bytes = getOptimalCacheSize(X_train.shape[0])
    cache_size_mb = cache_size_bytes / 1024**2
    meta_info = ",".join([
        args.prefix, 'SVM',
        str(numThreads),
        str(v),
        str(f),
        str(int(cache_size_mb))
    ])

    svc_params_dict = {
        'C': 0.01,
        'kernel': 'linear',
        'max_iter': 2000,
        'cache_size': cache_size_mb,
        'tol': 1e-16,
        'shrinking': True
    }

    if args.verbose:
        print("@ {}".format(svc_params_dict), file=sys.stderr)

    if args.header:
        print(
            'prefix_ID,function,threads,rows,features,cache-size-MB,fit,predict,accuracy,sv-len,classes'
        )
    _bench(meta_info, X_train, y_train, args.fit_samples, args.fit_repetitions,
           args.predict_samples, args.predict_repetitions, svc_params_dict)
예제 #2
0
                               type=int,
                               dest="num_threads",
                               default=0,
                               help="Number of threads for DAAL to use")

        args = argParser.parse_args()

        return args

    argParser = argparse.ArgumentParser(
        prog="df_clsf_bench.py",
        description="Execute RandomForest classification",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    args = getArguments(argParser)
    num_threads, daal_version = bench.prepare_benchmark(args)

    import sklearn
    try:
        from daal4py.sklearn.ensemble import RandomForestClassifier as rfClassifier
    except ImportError:
        from sklearn.ensemble import RandomForestClassifier as rfClassifier

    if args.use_sklearn_class:
        from sklearn.ensemble import RandomForestClassifier as rfClassifier

    import timeit

    if args.fileX is None or args.fileY is None:
        argParser.error(
            "Please specify data for the algorithm to train on. Use --fileX and --fileY or --generate options."
예제 #3
0
from __future__ import print_function
import numpy as np
import timeit
from numpy.random import rand
from sklearn import linear_model
from args import getArguments, coreString
import sklearn
import bench

import argparse
argParser = argparse.ArgumentParser(prog="ridge.py",
                                    description="sklearn ridge regression benchmark",
                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
args = getArguments(argParser)
REP = args.iteration if args.iteration != '?' else 10
core_number, daal_version = bench.prepare_benchmark(args)


def st_time(func):
    def st_func(*args, **keyArgs):
        times = []
        for n in range(REP):
            t1 = timeit.default_timer()
            r = func(*args, **keyArgs)
            t2 = timeit.default_timer()
            times.append(t2-t1)
        print (min(times))
        return r
    return st_func

예제 #4
0
def main():
    argParser = argparse.ArgumentParser(
        prog="svm-linear.py",
        description="SVC benchmark for linear kernel",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    args = getArguments(argParser)
    num_threads, daal_version = prepare_benchmark(args)

    # This is so the .csv file can go directly into excel. However, it requries adding timing
    # in sklearn/daal4sklearn/svm.py around pydaal compute and post-processing
    if args.verbose:
        print('@ {fit_samples: ' + str(args.fit_samples) +
              ', fit_repetitions: ' + str(args.fit_repetitions) +
              ', predict_samples: ' + str(args.predict_samples) +
              ', predict_repetitions: ' + str(args.predict_repetitions) +
              ', pyDAAL: ' + str(daal_version) + '}',
              file=sys.stderr)

    # Load data and cast to float64
    X_train = np.load(args.fileX.name).astype('f8')
    y_train = np.load(args.fileY.name).astype('f8')
    n_classes = np.unique(y_train).size
    y_train[y_train == 0] = -1
    y_train = y_train[:, np.newaxis]

    v, f = X_train.shape
    cache_size = getOptimalCacheSize(X_train.shape[0])
    meta_info = ",".join([
        args.prefix, 'SVM',
        str(num_threads),
        str(v),
        str(f),
        str(int(cache_size))
    ])

    svc_params_dict = {
        'C': 0.01,
        'maxIterations': 2000,
        'tau': 1e-12,
        'cacheSize': cache_size,
        'accuracyThreshold': 1e-16,
        'doShrinking': True,
    }

    if args.verbose:
        print("@ {}".format(svc_params_dict), file=sys.stderr)

    if args.header:
        print(
            'prefix_ID,function,threads,rows,features,cache-size-MB,fit,predict,accuracy,sv_len,classes'
        )
    bench(meta_info,
          X_train,
          y_train,
          args.fit_samples,
          args.fit_repetitions,
          args.predict_samples,
          args.predict_repetitions,
          n_classes,
          cache_size,
          accuracy_threshold=1e-16,
          max_iterations=2000)