def main(): argParser = argparse.ArgumentParser( prog="svm-linear.py", description="sklearn two-class SVC benchmark for linear kernel", formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = getArguments(argParser) numThreads, daal_version = bench.prepare_benchmark(args) # This is so the .csv file can go directly into excel. However, it requries adding timing # in sklearn/daal4sklearn/svm.py around pydaal compute and post-processing if args.verbose: print('@ {fit_samples: ' + str(args.fit_samples) + ', fit_repetitions: ' + str(args.fit_repetitions) + ', predict_samples: ' + str(args.predict_samples) + ', predict_repetitions: ' + str(args.predict_repetitions) + ', pyDAAL: ' + str(daal_version) + '}', file=sys.stderr) # Load data and cast to float64 X_train = np.load(args.fileX.name).astype('f8') y_train = np.load(args.fileY.name).astype('f8') v, f = X_train.shape cache_size_bytes = getOptimalCacheSize(X_train.shape[0]) cache_size_mb = cache_size_bytes / 1024**2 meta_info = ",".join([ args.prefix, 'SVM', str(numThreads), str(v), str(f), str(int(cache_size_mb)) ]) svc_params_dict = { 'C': 0.01, 'kernel': 'linear', 'max_iter': 2000, 'cache_size': cache_size_mb, 'tol': 1e-16, 'shrinking': True } if args.verbose: print("@ {}".format(svc_params_dict), file=sys.stderr) if args.header: print( 'prefix_ID,function,threads,rows,features,cache-size-MB,fit,predict,accuracy,sv-len,classes' ) _bench(meta_info, X_train, y_train, args.fit_samples, args.fit_repetitions, args.predict_samples, args.predict_repetitions, svc_params_dict)
type=int, dest="num_threads", default=0, help="Number of threads for DAAL to use") args = argParser.parse_args() return args argParser = argparse.ArgumentParser( prog="df_clsf_bench.py", description="Execute RandomForest classification", formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = getArguments(argParser) num_threads, daal_version = bench.prepare_benchmark(args) import sklearn try: from daal4py.sklearn.ensemble import RandomForestClassifier as rfClassifier except ImportError: from sklearn.ensemble import RandomForestClassifier as rfClassifier if args.use_sklearn_class: from sklearn.ensemble import RandomForestClassifier as rfClassifier import timeit if args.fileX is None or args.fileY is None: argParser.error( "Please specify data for the algorithm to train on. Use --fileX and --fileY or --generate options."
from __future__ import print_function import numpy as np import timeit from numpy.random import rand from sklearn import linear_model from args import getArguments, coreString import sklearn import bench import argparse argParser = argparse.ArgumentParser(prog="ridge.py", description="sklearn ridge regression benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = getArguments(argParser) REP = args.iteration if args.iteration != '?' else 10 core_number, daal_version = bench.prepare_benchmark(args) def st_time(func): def st_func(*args, **keyArgs): times = [] for n in range(REP): t1 = timeit.default_timer() r = func(*args, **keyArgs) t2 = timeit.default_timer() times.append(t2-t1) print (min(times)) return r return st_func
def main(): argParser = argparse.ArgumentParser( prog="svm-linear.py", description="SVC benchmark for linear kernel", formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = getArguments(argParser) num_threads, daal_version = prepare_benchmark(args) # This is so the .csv file can go directly into excel. However, it requries adding timing # in sklearn/daal4sklearn/svm.py around pydaal compute and post-processing if args.verbose: print('@ {fit_samples: ' + str(args.fit_samples) + ', fit_repetitions: ' + str(args.fit_repetitions) + ', predict_samples: ' + str(args.predict_samples) + ', predict_repetitions: ' + str(args.predict_repetitions) + ', pyDAAL: ' + str(daal_version) + '}', file=sys.stderr) # Load data and cast to float64 X_train = np.load(args.fileX.name).astype('f8') y_train = np.load(args.fileY.name).astype('f8') n_classes = np.unique(y_train).size y_train[y_train == 0] = -1 y_train = y_train[:, np.newaxis] v, f = X_train.shape cache_size = getOptimalCacheSize(X_train.shape[0]) meta_info = ",".join([ args.prefix, 'SVM', str(num_threads), str(v), str(f), str(int(cache_size)) ]) svc_params_dict = { 'C': 0.01, 'maxIterations': 2000, 'tau': 1e-12, 'cacheSize': cache_size, 'accuracyThreshold': 1e-16, 'doShrinking': True, } if args.verbose: print("@ {}".format(svc_params_dict), file=sys.stderr) if args.header: print( 'prefix_ID,function,threads,rows,features,cache-size-MB,fit,predict,accuracy,sv_len,classes' ) bench(meta_info, X_train, y_train, args.fit_samples, args.fit_repetitions, args.predict_samples, args.predict_repetitions, n_classes, cache_size, accuracy_threshold=1e-16, max_iterations=2000)