def main(readcsv=read_csv, method='defaultDense'): # input data file infile = "./data/batch/svm_two_class_train_dense.csv" testfile = "./data/batch/svm_two_class_test_dense.csv" # Configure a SVM object to use rbf kernel (and adjusting cachesize) kern = d4p.kernel_function_linear( ) # need an object that lives when creating train_algo train_algo = d4p.svm_training(doShrinking=True, kernel=kern, cacheSize=600000000) # Read data. Let's use features per observation data = readcsv(infile, range(20)) labels = readcsv(infile, range(20, 21)) train_result = train_algo.compute(data, labels) # Now let's do some prediction predict_algo = d4p.svm_prediction(kernel=kern) # read test data (with same #features) pdata = readcsv(testfile, range(20)) plabels = readcsv(testfile, range(20, 21)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (predict_result, plabels)
def compute(train_indep_data, train_dep_data, test_indep_data, method='defaultDense'): # Configure a SVM object to use linear kernel kernel_function = d4p.kernel_function_linear(fptype='float', method='defaultDense', k=1.0, b=0.0) train_algo = d4p.svm_training(fptype='float', method=method, kernel=kernel_function, C=1.0, accuracyThreshold=1e-3, tau=1e-8, cacheSize=600000000) train_result = train_algo.compute(train_indep_data, train_dep_data) # Create an algorithm object and call compute predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function) predict_result = predict_algo.compute(test_indep_data, train_result.model) decision_result = predict_result.prediction predict_labels = np.where(decision_result >= 0, 1, -1) return predict_labels, decision_result
def test_predict(X, training_result, params): fptype = getFPType(X) kf = kernel_function_linear(fptype=fptype) svm_predict = svm_prediction(fptype=fptype, method='defaultDense', kernel=kf) if params.n_classes == 2: prdct = svm_predict else: prdct = multi_class_classifier_prediction(nClasses=params.n_classes, fptype=fptype, maxIterations=params.maxiter, accuracyThreshold=params.tol, pmethod='voteBased', tmethod='oneAgainstOne', prediction=svm_predict) res = prdct.compute(X, training_result.model) if params.n_classes == 2: y_predict = np.greater(res.prediction.ravel(), 0) else: y_predict = res.prediction.ravel() return y_predict
def daal_kernel(name, fptype, gamma=1.0): if name == 'linear': return kernel_function_linear(fptype=fptype) else: sigma = np.sqrt(0.5 / gamma) return kernel_function_rbf(fptype=fptype, sigma=sigma)
def _daal4py_kf(kernel, X_fptype, gamma=1.0): if kernel == 'rbf': sigma_value = np.sqrt(0.5/gamma) kf = daal4py.kernel_function_rbf(fptype=X_fptype, sigma=sigma_value) elif kernel == 'linear': kf = daal4py.kernel_function_linear(fptype=X_fptype) else: raise ValueError("_daal4py_fit received unexpected kernel specifiction {}.".format(kernel)) return kf
def test_fit(X, y, params): fptype = getFPType(X) kf = kernel_function_linear(fptype=fptype) if params.n_classes == 2: y[y == 0] = -1 else: y[y == -1] = 0 svm_train = svm_training(fptype=fptype, C=params.C, maxIterations=params.maxiter, tau=params.tau, cacheSize=params.cache_size_bytes, accuracyThreshold=params.tol, doShrinking=params.shrinking, kernel=kf) if params.n_classes == 2: clf = svm_train else: clf = multi_class_classifier_training(fptype=fptype, nClasses=params.n_classes, accuracyThreshold=params.tol, method='oneAgainstOne', maxIterations=params.maxiter, training=svm_train) training_result = clf.compute(X, y) support = construct_dual_coefs(training_result.model, params.n_classes, X, y) indices = y.take(support, axis=0) if params.n_classes == 2: n_support_ = np.array([np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: n_support_ = np.array([ np.sum(indices == c) for c in [-1] + list(range(1, params.n_classes)) ], dtype=np.int32) return training_result, support, indices, n_support_
def bench(meta_info, X_train, y_train, fit_samples, fit_repetitions, predict_samples, predict_repetitions, classes, cache_size, accuracy_threshold=1e-16, max_iterations=2000): kf = kernel_function_linear(fptype='double') if classes == 2: y_train[y_train == 0] = -1 else: y_train[y_train == -1] = 0 fit_times = [] for it in range(fit_samples): start = time() for __ in range(fit_repetitions): svm_train = svm_training(fptype='double', C=0.01, maxIterations=max_iterations, tau=1e-12, cacheSize=cache_size, accuracyThreshold=accuracy_threshold, doShrinking=True, kernel=kf) if classes == 2: clf = svm_train else: clf = multi_class_classifier_training( nClasses=classes, fptype='double', accuracyThreshold=accuracy_threshold, method='oneAgainstOne', maxIterations=max_iterations, training=svm_train) training_result = clf.compute(X_train, y_train) support = construct_dual_coefs(training_result.model, classes, X_train, y_train) indices = y_train.take(support, axis=0) if classes == 2: n_support_ = np.array( [np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: n_support_ = np.array([ np.sum(indices == c) for c in [-1] + list(range(1, classes)) ], dtype=np.int32) stop = time() fit_times.append(stop - start) predict_times = [] for it in range(predict_samples): svm_predict = svm_prediction(fptype='double', method='defaultDense', kernel=kf) if classes == 2: prdct = svm_predict else: prdct = multi_class_classifier_prediction( nClasses=classes, fptype='double', maxIterations=max_iterations, accuracyThreshold=accuracy_threshold, pmethod='voteBased', tmethod='oneAgainstOne', prediction=svm_predict) start = time() for __ in range(predict_repetitions): res = prdct.compute(X_train, training_result.model) stop = time() predict_times.append(stop - start) if classes == 2: y_predict = np.greater(res.prediction.ravel(), 0) y_train = np.greater(y_train, 0) else: y_predict = res.prediction.ravel() print("{meta_info},{fit_t:0.6g},{pred_t:0.6g},{acc:0.3f},{sv_len},{cl}". format(meta_info=meta_info, fit_t=min(fit_times) / fit_repetitions, pred_t=min(predict_times) / predict_repetitions, acc=100 * accuracy_score(y_train.ravel(), y_predict), sv_len=support.shape[0], cl=n_support_.shape[0]))