def main(readcsv=read_csv, method='defaultDense'): nFeatures = 20 nClasses = 5 # read training data from file with nFeatures features per observation and 1 class label train_file = 'data/batch/svm_multi_class_train_dense.csv' train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create and configure algorithm object algorithm = d4p.multi_class_classifier_training(nClasses=nClasses, training=d4p.svm_training(method='thunder'), prediction=d4p.svm_prediction()) # Pass data to training. Training result provides model train_result = algorithm.compute(train_data, train_labels) assert train_result.model.NumberOfFeatures == nFeatures assert isinstance(train_result.model.TwoClassClassifierModel(0), d4p.svm_model) # Now the prediction stage # Read data pred_file = 'data/batch/svm_multi_class_test_dense.csv' pred_data = readcsv(pred_file, range(nFeatures)) pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object to predict multi-class SVM values algorithm = d4p.multi_class_classifier_prediction(nClasses, training=d4p.svm_training(method='thunder'), prediction=d4p.svm_prediction()) # Pass data to prediction. Prediction result provides prediction pred_result = algorithm.compute(pred_data, train_result.model) assert pred_result.prediction.shape == (train_data.shape[0], 1) return (pred_result, pred_labels)
def compute(train_indep_data, train_dep_data, test_indep_data, method='defaultDense'): # Configure a SVM object to use linear kernel kernel_function = d4p.kernel_function_linear(fptype='float', method='defaultDense', k=1.0, b=0.0) train_algo = d4p.svm_training(fptype='float', method=method, kernel=kernel_function, C=1.0, accuracyThreshold=1e-3, tau=1e-8, cacheSize=600000000) train_result = train_algo.compute(train_indep_data, train_dep_data) # Create an algorithm object and call compute predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function) predict_result = predict_algo.compute(test_indep_data, train_result.model) decision_result = predict_result.prediction predict_labels = np.where(decision_result >= 0, 1, -1) return predict_labels, decision_result
def main(readcsv=read_csv, method='defaultDense'): # input data file infile = "./data/batch/svm_two_class_train_dense.csv" testfile = "./data/batch/svm_two_class_test_dense.csv" # Configure a SVM object to use rbf kernel (and adjusting cachesize) kern = d4p.kernel_function_linear( ) # need an object that lives when creating train_algo train_algo = d4p.svm_training(doShrinking=True, kernel=kern, cacheSize=600000000) # Read data. Let's use features per observation data = readcsv(infile, range(20)) labels = readcsv(infile, range(20, 21)) train_result = train_algo.compute(data, labels) # Now let's do some prediction predict_algo = d4p.svm_prediction(kernel=kern) # read test data (with same #features) pdata = readcsv(testfile, range(20)) plabels = readcsv(testfile, range(20, 21)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (predict_result, plabels)
def _daal4py_svm(fptype, C, accuracyThreshold, tau, maxIterations, cacheSize, doShrinking, kernel, nClasses=2): svm_train = daal4py.svm_training(method='thunder', fptype=fptype, C=C, accuracyThreshold=accuracyThreshold, tau=tau, maxIterations=maxIterations, cacheSize=cacheSize, doShrinking=doShrinking, kernel=kernel) if nClasses == 2: algo = svm_train else: algo = daal4py.multi_class_classifier_training( nClasses=nClasses, fptype=fptype, method='oneAgainstOne', training=svm_train, ) return algo
def _daal4py_svm_compatibility(fptype, C, accuracyThreshold, tau, maxIterations, cacheSize, doShrinking, kernel, nClasses=2): svm_method = 'thunder' if daal_check_version( ((2020, 'P', 2), (2021, 'B', 108))) else 'boser' svm_train = daal4py.svm_training(method=svm_method, fptype=fptype, C=C, accuracyThreshold=accuracyThreshold, tau=tau, maxIterations=maxIterations, cacheSize=cacheSize, doShrinking=doShrinking, kernel=kernel) if nClasses == 2: algo = svm_train else: algo = daal4py.multi_class_classifier_training( nClasses=nClasses, fptype=fptype, method='oneAgainstOne', training=svm_train, ) return algo
def test_fit(X, y, params): fptype = getFPType(X) kf = kernel_function_linear(fptype=fptype) if params.n_classes == 2: y[y == 0] = -1 else: y[y == -1] = 0 svm_train = svm_training(fptype=fptype, C=params.C, maxIterations=params.maxiter, tau=params.tau, cacheSize=params.cache_size_bytes, accuracyThreshold=params.tol, doShrinking=params.shrinking, kernel=kf) if params.n_classes == 2: clf = svm_train else: clf = multi_class_classifier_training(fptype=fptype, nClasses=params.n_classes, accuracyThreshold=params.tol, method='oneAgainstOne', maxIterations=params.maxiter, training=svm_train) training_result = clf.compute(X, y) support = construct_dual_coefs(training_result.model, params.n_classes, X, y) indices = y.take(support, axis=0) if params.n_classes == 2: n_support_ = np.array([np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: n_support_ = np.array([ np.sum(indices == c) for c in [-1] + list(range(1, params.n_classes)) ], dtype=np.int32) return training_result, support, indices, n_support_
def _daal4py_fit(self, X, y_inp, kernel): if self.C <= 0: raise ValueError("C <= 0") y = make2d(y_inp) num_classes = len(self.classes_) if num_classes == 2: # Intel(R) DAAL requires binary classes to be 1 and -1. sklearn normalizes # the classes to 0 and 1, so we temporarily replace the 0s with -1s. y = y.copy() y[y == 0] = -1 X_fptype = getFPType(X) kf = _daal4py_kf(kernel, X_fptype, gamma=self._gamma) svm_train = daal4py.svm_training( fptype=X_fptype, C=float(self.C), accuracyThreshold=float(self.tol), tau=1e-12, maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30), cacheSize=int(self.cache_size * 1024 * 1024), doShrinking=bool(self.shrinking), # shrinkingStep=, kernel=kf) if num_classes == 2: algo = svm_train else: algo = daal4py.multi_class_classifier_training( nClasses=num_classes, fptype=X_fptype, accuracyThreshold=float(self.tol), method='oneAgainstOne', maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30), training=svm_train, #prediction=svm_predict ) res = algo.compute(X, y) model = res.model self.daal_model_ = model if num_classes == 2: # binary two_class_sv_ind_ = model.SupportIndices two_class_sv_ind_ = two_class_sv_ind_.ravel() # support indexes need permutation to arrange them into the same layout as that of Scikit-Learn tmp = np.empty(two_class_sv_ind_.shape, dtype=np.dtype([('label', y.dtype), ('ind', two_class_sv_ind_.dtype)])) tmp['label'][:] = y[two_class_sv_ind_].ravel() tmp['ind'][:] = two_class_sv_ind_ perm = np.argsort(tmp, order=['label', 'ind']) del tmp self.support_ = two_class_sv_ind_[perm] self.support_vectors_ = X[self.support_] self.dual_coef_ = model.ClassificationCoefficients.T self.dual_coef_ = self.dual_coef_[:, perm] self.intercept_ = np.array([model.Bias]) else: # multi-class intercepts = [] coefs = [] num_models = model.NumberOfTwoClassClassifierModels sv_ind_by_clf = [] label_indexes = [] model_id = 0 for i1 in range(num_classes): label_indexes.append(np.where(y == i1)[0]) for i2 in range(i1): svm_model = model.TwoClassClassifierModel(model_id) # Indices correspond to input features with label i1 followed by input features with label i2 two_class_sv_ind_ = svm_model.SupportIndices # Map these indexes to indexes of the training data sv_ind = np.take( np.hstack((label_indexes[i1], label_indexes[i2])), two_class_sv_ind_.ravel()) sv_ind_by_clf.append(sv_ind) # svs_ = getArrayFromNumericTable(svm_model.getSupportVectors()) # assert np.array_equal(svs_, X[sv_ind]) intercepts.append(-svm_model.Bias) coefs.append(-svm_model.ClassificationCoefficients) model_id += 1 # permute solutions to lexicographic ordering to_lex_perm = map_to_lexicographic(num_classes) sv_ind_by_clf = permute_list(sv_ind_by_clf, to_lex_perm) sv_coef_by_clf = permute_list(coefs, to_lex_perm) intercepts = permute_list(intercepts, to_lex_perm) self.dual_coef_, self.support_ = extract_dual_coef( num_classes, # number of classes sv_ind_by_clf, # support vector indexes by two-class classifiers sv_coef_by_clf, # classification coefficients by two-class classifiers y.squeeze().astype(np.intp, copy=False) # integer labels ) self.support_vectors_ = X[self.support_] self.intercept_ = np.array(intercepts) indices = y.take(self.support_, axis=0) if num_classes == 2: self.n_support_ = np.array( [np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: self.n_support_ = np.array( [np.sum(indices == i) for i, c in enumerate(self.classes_)], dtype=np.int32) self.probA_ = np.empty(0) self.probB_ = np.empty(0) return
def bench(meta_info, X_train, y_train, fit_samples, fit_repetitions, predict_samples, predict_repetitions, classes, cache_size, accuracy_threshold=1e-16, max_iterations=2000): kf = kernel_function_linear(fptype='double') if classes == 2: y_train[y_train == 0] = -1 else: y_train[y_train == -1] = 0 fit_times = [] for it in range(fit_samples): start = time() for __ in range(fit_repetitions): svm_train = svm_training(fptype='double', C=0.01, maxIterations=max_iterations, tau=1e-12, cacheSize=cache_size, accuracyThreshold=accuracy_threshold, doShrinking=True, kernel=kf) if classes == 2: clf = svm_train else: clf = multi_class_classifier_training( nClasses=classes, fptype='double', accuracyThreshold=accuracy_threshold, method='oneAgainstOne', maxIterations=max_iterations, training=svm_train) training_result = clf.compute(X_train, y_train) support = construct_dual_coefs(training_result.model, classes, X_train, y_train) indices = y_train.take(support, axis=0) if classes == 2: n_support_ = np.array( [np.sum(indices == -1), np.sum(indices == 1)], dtype=np.int32) else: n_support_ = np.array([ np.sum(indices == c) for c in [-1] + list(range(1, classes)) ], dtype=np.int32) stop = time() fit_times.append(stop - start) predict_times = [] for it in range(predict_samples): svm_predict = svm_prediction(fptype='double', method='defaultDense', kernel=kf) if classes == 2: prdct = svm_predict else: prdct = multi_class_classifier_prediction( nClasses=classes, fptype='double', maxIterations=max_iterations, accuracyThreshold=accuracy_threshold, pmethod='voteBased', tmethod='oneAgainstOne', prediction=svm_predict) start = time() for __ in range(predict_repetitions): res = prdct.compute(X_train, training_result.model) stop = time() predict_times.append(stop - start) if classes == 2: y_predict = np.greater(res.prediction.ravel(), 0) y_train = np.greater(y_train, 0) else: y_predict = res.prediction.ravel() print("{meta_info},{fit_t:0.6g},{pred_t:0.6g},{acc:0.3f},{sv_len},{cl}". format(meta_info=meta_info, fit_t=min(fit_times) / fit_repetitions, pred_t=min(predict_times) / predict_repetitions, acc=100 * accuracy_score(y_train.ravel(), y_predict), sv_len=support.shape[0], cl=n_support_.shape[0]))