def main(): from sklearn.linear_model import LogisticRegression # Load generated data X_train, X_test, y_train, y_test = bench.load_data(params) params.n_classes = len(np.unique(y_train)) if params.multiclass == 'auto': params.multiclass = 'ovr' if params.n_classes == 2 else 'multinomial' if not params.tol: params.tol = 1e-3 if params.solver == 'newton-cg' else 1e-10 # Create our classifier object clf = LogisticRegression(penalty='l2', C=params.C, n_jobs=params.n_jobs, fit_intercept=params.fit_intercept, verbose=params.verbose, tol=params.tol, max_iter=params.maxiter, solver=params.solver, multi_class=params.multiclass) # Time fit and predict fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) y_pred = clf.predict(X_train) y_proba = clf.predict_proba(X_train) train_acc = bench.accuracy_score(y_train, y_pred) train_log_loss = bench.log_loss(y_train, y_proba) train_roc_auc = bench.roc_auc_score(y_train, y_proba) predict_time, y_pred = bench.measure_function_time(clf.predict, X_test, params=params) y_proba = clf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, y_pred) test_log_loss = bench.log_loss(y_test, y_proba) test_roc_auc = bench.roc_auc_score(y_test, y_proba) bench.print_output( library='sklearn', algorithm='logistic_regression', stages=['training', 'prediction'], params=params, functions=['LogReg.fit', 'LogReg.predict'], times=[fit_time, predict_time], metric_type=['accuracy', 'log_loss', 'roc_auc'], metrics=[ [train_acc, test_acc], [train_log_loss, test_log_loss], [train_roc_auc, test_roc_auc], ], data=[X_train, X_test], alg_instance=clf, )
def main(): from sklearn.ensemble import RandomForestClassifier # Load and convert data X_train, X_test, y_train, y_test = bench.load_data(params) # Create our random forest classifier clf = RandomForestClassifier( criterion=params.criterion, n_estimators=params.num_trees, max_depth=params.max_depth, max_features=params.max_features, min_samples_split=params.min_samples_split, max_leaf_nodes=params.max_leaf_nodes, min_impurity_decrease=params.min_impurity_decrease, bootstrap=params.bootstrap, random_state=params.seed, n_jobs=params.n_jobs) params.n_classes = len(np.unique(y_train)) fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) y_pred = clf.predict(X_train) y_proba = clf.predict_proba(X_train) train_acc = bench.accuracy_score(y_train, y_pred) train_log_loss = bench.log_loss(y_train, y_proba) train_roc_auc = bench.roc_auc_score(y_train, y_proba) predict_time, y_pred = bench.measure_function_time(clf.predict, X_test, params=params) y_proba = clf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, y_pred) test_log_loss = bench.log_loss(y_test, y_proba) test_roc_auc = bench.roc_auc_score(y_test, y_proba) bench.print_output( library='sklearn', algorithm='df_clsf', stages=['training', 'prediction'], params=params, functions=['df_clsf.fit', 'df_clsf.predict'], times=[fit_time, predict_time], metric_type=['accuracy', 'log_loss', 'roc_auc'], metrics=[ [train_acc, test_acc], [train_log_loss, test_log_loss], [train_roc_auc, test_roc_auc], ], data=[X_train, X_test], alg_instance=clf, )
def main(): from sklearn.neighbors import KNeighborsClassifier # Load generated data X_train, X_test, y_train, y_test = bench.load_data(params) params.n_classes = len(np.unique(y_train)) # Create classification object knn_clsf = KNeighborsClassifier(n_neighbors=params.n_neighbors, weights=params.weights, algorithm=params.method, metric=params.metric, n_jobs=params.n_jobs) # Measure time and accuracy on fitting train_time, _ = bench.measure_function_time(knn_clsf.fit, X_train, y_train, params=params) if params.task == 'classification': y_pred = knn_clsf.predict(X_train) y_proba = knn_clsf.predict_proba(X_train) train_acc = bench.accuracy_score(y_train, y_pred) train_log_loss = bench.log_loss(y_train, y_proba) train_roc_auc = bench.roc_auc_score(y_train, y_proba) # Measure time and accuracy on prediction if params.task == 'classification': predict_time, yp = bench.measure_function_time(knn_clsf.predict, X_test, params=params) y_proba = knn_clsf.predict_proba(X_test) test_acc = bench.accuracy_score(y_test, yp) test_log_loss = bench.log_loss(y_test, y_proba) test_roc_auc = bench.roc_auc_score(y_test, y_proba) else: predict_time, _ = bench.measure_function_time(knn_clsf.kneighbors, X_test, params=params) if params.task == 'classification': bench.print_output( library='sklearn', algorithm=knn_clsf._fit_method + '_knn_classification', stages=['training', 'prediction'], params=params, functions=['knn_clsf.fit', 'knn_clsf.predict'], times=[train_time, predict_time], metric_type=['accuracy', 'log_loss', 'roc_auc'], metrics=[ [train_acc, test_acc], [train_log_loss, test_log_loss], [train_roc_auc, test_roc_auc], ], data=[X_train, X_test], alg_instance=knn_clsf, ) else: bench.print_output( library='sklearn', algorithm=knn_clsf._fit_method + '_knn_search', stages=['training', 'search'], params=params, functions=['knn_clsf.fit', 'knn_clsf.kneighbors'], times=[train_time, predict_time], metric_type=None, metrics=[], data=[X_train, X_test], alg_instance=knn_clsf, )
def metric_call(x, y): return 100 * bench.accuracy_score(x, y)
def main(): from sklearn.svm import SVC X_train, X_test, y_train, y_test = bench.load_data(params) y_train = np.asfortranarray(y_train).ravel() if params.gamma is None: params.gamma = 1.0 / X_train.shape[1] cache_size_bytes = bench.get_optimal_cache_size( X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 1024**2 params.n_classes = len(np.unique(y_train)) clf = SVC(C=params.C, kernel=params.kernel, cache_size=params.cache_size_mb, tol=params.tol, gamma=params.gamma, probability=params.probability, random_state=43, degree=params.degree) fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) params.sv_len = clf.support_.shape[0] if params.probability: state_predict = 'predict_proba' clf_predict = clf.predict_proba train_acc = None test_acc = None predict_train_time, y_pred = bench.measure_function_time(clf_predict, X_train, params=params) train_log_loss = bench.log_loss(y_train, y_pred) train_roc_auc = bench.roc_auc_score(y_train, y_pred) _, y_pred = bench.measure_function_time(clf_predict, X_test, params=params) test_log_loss = bench.log_loss(y_test, y_pred) test_roc_auc = bench.roc_auc_score(y_test, y_pred) else: state_predict = 'prediction' clf_predict = clf.predict train_log_loss = None test_log_loss = None train_roc_auc = None test_roc_auc = None predict_train_time, y_pred = bench.measure_function_time(clf_predict, X_train, params=params) train_acc = bench.accuracy_score(y_train, y_pred) _, y_pred = bench.measure_function_time(clf_predict, X_test, params=params) test_acc = bench.accuracy_score(y_test, y_pred) bench.print_output( library='sklearn', algorithm='SVC', stages=['training', state_predict], params=params, functions=['SVM.fit', f'SVM.{state_predict}'], times=[fit_time, predict_train_time], metric_type=['accuracy', 'log_loss', 'roc_auc', 'n_sv'], metrics=[ [train_acc, test_acc], [train_log_loss, test_log_loss], [train_roc_auc, test_roc_auc], [int(clf.n_support_.sum()), int(clf.n_support_.sum())], ], data=[X_train, X_train], alg_instance=clf, )
xgb_params.update({'nthread': params.threads}) if 'OMP_NUM_THREADS' in os.environ.keys(): xgb_params['nthread'] = int(os.environ['OMP_NUM_THREADS']) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'num_trees') if params.objective.startswith('reg'): task = 'regression' metric_name, metric_func = 'rmse', rmse_score columns += ('rmse', 'time') else: task = 'classification' metric_name = 'accuracy[%]' metric_func = lambda y1, y2: 100 * accuracy_score(y1, y2) columns += ('n_classes', 'accuracy', 'time') if 'cudf' in str(type(y_train)): params.n_classes = y_train[y_train.columns[0]].nunique() else: params.n_classes = len(np.unique(y_train)) if params.n_classes > 2: xgb_params['num_class'] = params.n_classes dtrain = xgb.DMatrix(X_train, y_train) dtest = xgb.DMatrix(X_test, y_test) fit_time, booster = measure_function_time(xgb.train, xgb_params, dtrain, params.n_estimators,
if params.gamma is None: params.gamma = 1.0 / X_train.shape[1] cache_size_bytes = bench.get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 1024**2 params.n_classes = y_train[y_train.columns[0]].nunique() # Create our C-SVM classifier clf = SVC(C=params.C, kernel=params.kernel, max_iter=params.maxiter, cache_size=params.cache_size_mb, tol=params.tol, gamma=params.gamma) # Time fit and predict fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) params.sv_len = clf.support_.shape[0] predict_time, y_pred = bench.measure_function_time( clf.predict, X_train, params=params) train_acc = 100 * bench.accuracy_score(y_pred, y_train) y_pred = clf.predict(X_test) test_acc = 100 * bench.accuracy_score(y_pred, y_test) bench.print_output(library='cuml', algorithm='svc', stages=['training', 'prediction'], params=params, functions=['SVM.fit', 'SVM.predict'], times=[fit_time, predict_time], accuracy_type='accuracy[%]', accuracies=[train_acc, test_acc], data=[X_train, X_train], alg_instance=clf)
max_iter=params.maxiter, solver=params.solver, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) beta, intercept, solver_result, params.multiclass = res print_row(columns, params, function='LogReg.fit', time=fit_time) predict_time, yp = time_mean_min(test_predict, X, beta, intercept=intercept, multi_class=params.multiclass, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) y_pred = np.argmax(yp, axis=1) acc = 100 * accuracy_score(y_pred, y) print_row(columns, params, function='LogReg.predict', time=predict_time, accuracy=acc) if params.verbose: print() print("@ Number of iterations: {}".format(solver_result.nit)) print("@ fit coefficients:") print("@ {}".format(beta.tolist())) print("@ fit intercept:") print("@ {}".format(intercept.tolist()))
# Load generated data X_train, X_test, y_train, y_test = load_data(params) params.n_classes = len(np.unique(y_train)) # Create classification object knn_clsf = KNeighborsClassifier(n_neighbors=params.n_neighbors, weights=params.weights, algorithm=params.method, metric=params.metric) knn_clsf.fit(X_train, y_train) # Time predict time, yp = measure_function_time(knn_clsf.predict, X_test, params=params) acc = 100 * accuracy_score(yp, y_test) columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'n_neighbors', 'n_classes', 'time') print_output(library='sklearn', algorithm='knn_classification', stages=['prediction'], columns=columns, params=params, functions=['knn_clsf.predict'], times=[time], accuracies=[acc], accuracy_type='accuracy[%]', data=[X_test], alg_instance=knn_clsf)
def main(): parser = argparse.ArgumentParser(description='daal4py SVC benchmark with ' 'linear kernel') parser.add_argument('-C', dest='C', type=float, default=1.0, help='SVM regularization parameter') parser.add_argument('--kernel', choices=('linear', 'rbf'), default='linear', help='SVM kernel function') parser.add_argument('--gamma', type=float, default=None, help='Parameter for kernel="rbf"') parser.add_argument('--maxiter', type=int, default=100000, help='Maximum iterations for the iterative solver. ') parser.add_argument('--max-cache-size', type=int, default=8, help='Maximum cache size, in gigabytes, for SVM.') parser.add_argument('--tau', type=float, default=1e-12, help='Tau parameter for working set selection scheme') parser.add_argument('--tol', type=float, default=1e-3, help='Tolerance') parser.add_argument('--no-shrinking', action='store_false', default=True, dest='shrinking', help="Don't use shrinking heuristic") params = parse_args(parser, prefix='daal4py') # Load data X_train, X_test, y_train, y_test = load_data(params, add_dtype=True, label_2d=True) if params.gamma is None: params.gamma = 1 / X_train.shape[1] cache_size_bytes = get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 2**20 params.cache_size_bytes = cache_size_bytes params.n_classes = np.unique(y_train).size columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes', 'accuracy', 'time') # Time fit and predict fit_time, res = measure_function_time(test_fit, X_train, y_train, params, params=params) res, support, indices, n_support = res params.sv_len = support.shape[0] yp = test_predict(X_train, res, params) train_acc = 100 * accuracy_score(yp, y_train) predict_time, yp = measure_function_time(test_predict, X_test, res, params, params=params) test_acc = 100 * accuracy_score(yp, y_train) print_output(library='daal4py', algorithm='svc', stages=['training', 'prediction'], columns=columns, params=params, functions=['SVM.fit', 'SVM.predict'], times=[fit_time, predict_time], accuracy_type='accuracy[%]', accuracies=[train_acc, test_acc], data=[X_train, X_test])
# Time fit and predict fit_time, res = measure_function_time( df_clsf_fit, X_train, y_train, params.n_classes, n_trees=params.num_trees, n_features_per_node=params.max_features, max_depth=params.max_depth, min_impurity=params.min_impurity_decrease, bootstrap=params.bootstrap, seed=params.seed, params=params) yp = df_clsf_predict(X_train, res, params.n_classes) train_acc = 100 * accuracy_score(yp, y_train) predict_time, yp = measure_function_time(df_clsf_predict, X_test, res, params.n_classes, params=params) test_acc = 100 * accuracy_score(yp, y_test) print_output(library='daal4py', algorithm='decision_forest_classification', stages=['training', 'prediction'], columns=columns, params=params, functions=['df_clsf.fit', 'df_clsf.predict'], times=[fit_time, predict_time],
print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(df_clsf_fit, X, y, params.n_classes, n_trees=params.num_trees, seed=params.seed, n_features_per_node=params.max_features, max_depth=params.max_depth, verbose=params.verbose, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) print_row(columns, params, function='df_clsf.fit', time=fit_time) predict_time, yp = time_mean_min(df_clsf_predict, X, res, params.n_classes, verbose=params.verbose, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) acc = 100 * accuracy_score(yp, y) print_row(columns, params, function='df_clsf.predict', time=predict_time, accuracy=acc)