def main(readcsv=read_csv, method='defaultDense'): nFeatures = 3 nClasses = 5 maxIterations = 200 minObservationsInLeafNode = 8 # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" # Configure a training object (5 classes) # previous version has different interface from daal4py import __daal_link_version__ as dv daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) if daal_version < (2020, 0): train_algo = d4p.gbt_classification_training( nClasses=nClasses, maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures) else: train_algo = d4p.gbt_classification_training( nClasses=nClasses, maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures, varImportance='weight|totalCover|cover|totalGain|gain') # Read data. Let's use 3 features per observation data = readcsv(infile, range(3), t=np.float32) labels = readcsv(infile, range(3, 4), t=np.float32) train_result = train_algo.compute(data, labels) # Now let's do some prediction # previous version has different interface if daal_version < (2020, 0): predict_algo = d4p.gbt_classification_prediction(nClasses=nClasses) else: predict_algo = d4p.gbt_classification_prediction( nClasses=nClasses, resultsToEvaluate="computeClassLabels|computeClassProbabilities") # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction plabels = readcsv(testfile, range(3, 4), t=np.float32) assert np.count_nonzero(predict_result.prediction - plabels) / pdata.shape[0] < 0.022 return (train_result, predict_result, plabels)
def main(): nFeatures = 3 nClasses = 5 maxIterations = 40 minObservationsInLeafNode = 8 # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" # Configure a training object (5 classes) train_algo = d4p.gbt_classification_training( nClasses=nClasses, maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures) # Read data. Let's use 3 features per observation data = read_csv(infile, range(3)) labels = read_csv(infile, range(3, 4)) train_result = train_algo.compute(data, labels) # Now let's do some prediction predict_algo = d4p.gbt_classification_prediction(5) # read test data (with same #features) pdata = read_csv(testfile, range(3)) plabels = read_csv(testfile, range(3, 4)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (train_result, predict_result, plabels)
def main(readcsv=read_csv, method='defaultDense'): nFeatures = 3 nClasses = 5 maxIterations = 200 minObservationsInLeafNode = 8 # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" # Configure a training object (5 classes) train_algo = d4p.gbt_classification_training( nClasses=nClasses, maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures) # Read data. Let's use 3 features per observation data = readcsv(infile, range(3), t=np.float32) labels = readcsv(infile, range(3, 4), t=np.float32) train_result = train_algo.compute(data, labels) # Now let's do some prediction predict_algo = d4p.gbt_classification_prediction(5) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction plabels = readcsv(testfile, range(3, 4), t=np.float32) assert np.count_nonzero(predict_result.prediction - plabels) / pdata.shape[0] < 0.022 return (train_result, predict_result, plabels)
def main(readcsv=pd_read_csv, method='defaultDense'): # Path to data train_file = "./data/batch/df_classification_train.csv" test_file = "./data/batch/df_classification_test.csv" # Data reading X_train = readcsv(train_file, range(3), t=np.float32) y_train = readcsv(train_file, range(3, 4), t=np.float32) X_test = readcsv(test_file, range(3), t=np.float32) y_test = readcsv(test_file, range(3, 4), t=np.float32) # Datasets creation lgb_train = lgb.Dataset(X_train, np.array(y_train).reshape(X_train.shape[0]), free_raw_data=False) # training parameters setting params = { 'max_bin': 256, 'scale_pos_weight': 2, 'lambda_l2': 1, 'alpha': 0.9, 'max_depth': 8, 'num_leaves': 2**8, 'verbose': -1, 'objective': 'multiclass', 'learning_rate': 0.3, 'num_class': 5, } # Training lgb_model = lgb.train(params, lgb_train, valid_sets=lgb_train, verbose_eval=False) # LightGBM prediction lgb_prediction = np.argmax(lgb_model.predict(X_test), axis=1) lgb_errors_count = np.count_nonzero(lgb_prediction - np.ravel(y_test)) # Conversion to daal4py daal_model = d4p.get_gbt_model_from_lightgbm(lgb_model) # daal4py prediction daal_predict_algo = d4p.gbt_classification_prediction( nClasses=params["num_class"], resultsToEvaluate="computeClassLabels", fptype='float') daal_prediction = daal_predict_algo.compute(X_test, daal_model) daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test) assert np.absolute(lgb_errors_count - daal_errors_count) == 0 return (lgb_prediction, lgb_errors_count, np.ravel(daal_prediction.prediction), daal_errors_count, np.ravel(y_test))
def main(readcsv=pd_read_csv, method='defaultDense'): # Path to data train_file = "./data/batch/df_classification_train.csv" test_file = "./data/batch/df_classification_test.csv" # Data reading X_train = readcsv(train_file, range(3), t=np.float32) y_train = readcsv(train_file, range(3, 4), t=np.float32) X_test = readcsv(test_file, range(3), t=np.float32) y_test = readcsv(test_file, range(3, 4), t=np.float32) # Datasets creation xgb_train = xgb.DMatrix(X_train, label=np.array(y_train)) xgb_test = xgb.DMatrix(X_test, label=np.array(y_test)) # training parameters setting params = { 'max_bin': 256, 'scale_pos_weight': 2, 'lambda_l2': 1, 'alpha': 0.9, 'max_depth': 8, 'num_leaves': 2**8, 'verbosity': 0, 'objective': 'multi:softmax', 'learning_rate': 0.3, 'num_class': 5, } # Training xgb_model = xgb.train(params, xgb_train, num_boost_round=100) # XGBoost prediction xgb_prediction = xgb_model.predict(xgb_test) xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(y_test)) # Conversion to daal4py daal_model = d4p.get_gbt_model_from_xgboost(xgb_model) # daal4py prediction daal_predict_algo = d4p.gbt_classification_prediction( nClasses=params["num_class"], resultsToEvaluate="computeClassLabels", fptype='float') daal_prediction = daal_predict_algo.compute(X_test, daal_model) daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test) assert np.absolute(xgb_errors_count - daal_errors_count) == 0 return (xgb_prediction, xgb_errors_count, np.ravel(daal_prediction.prediction), daal_errors_count, np.ravel(y_test))
def main(readcsv=pd_read_csv, method='defaultDense'): # Path to data train_file = "./data/batch/df_classification_train.csv" test_file = "./data/batch/df_classification_test.csv" # Data reading X_train = readcsv(train_file, range(3), t=np.float32) y_train = readcsv(train_file, range(3, 4), t=np.float32) X_test = readcsv(test_file, range(3), t=np.float32) y_test = readcsv(test_file, range(3, 4), t=np.float32) # Datasets creation cb_train = cb.Pool(X_train, label=np.array(y_train)) cb_test = cb.Pool(X_test, label=np.array(y_test)) # training parameters setting params = { 'reg_lambda': 1, 'max_depth': 8, 'num_leaves': 2**8, 'verbose': 0, 'objective': 'MultiClass', 'learning_rate': 0.3, 'n_estimators': 100, 'classes_count': 5, } # Training cb_model = cb.CatBoost(params) cb_model.fit(cb_train) # Catboost prediction cb_prediction = cb_model.predict(cb_test, prediction_type='Class').T[0] cb_errors_count = np.count_nonzero(cb_prediction - np.ravel(y_test)) # Conversion to daal4py daal_model = d4p.get_gbt_model_from_catboost(cb_model) # daal4py prediction daal_predict_algo = d4p.gbt_classification_prediction( nClasses=params['classes_count'], resultsToEvaluate="computeClassLabels", fptype='float') daal_prediction = daal_predict_algo.compute(X_test, daal_model) daal_errors_count = np.count_nonzero(daal_prediction.prediction - y_test) assert np.absolute(cb_errors_count - daal_errors_count) == 0 return (cb_prediction, cb_errors_count, np.ravel(daal_prediction.prediction), daal_errors_count, np.ravel(y_test))
def _predict(self, X, resultsToEvaluate): # Check is fit had been called check_is_fitted(self, ['n_features_', 'n_classes_']) # Input validation X = check_array(X, dtype=[np.single, np.double]) if X.shape[1] != self.n_features_: raise ValueError('Shape of input is different from what was seen in `fit`') # Trivial case if self.n_classes_ == 1: return np.full(X.shape[0], self.classes_[0]) if not hasattr(self, 'daal_model_'): raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. " "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__)) # Define type of data fptype = getFPType(X) # Prediction if daal_version < (2020,1): predict_algo = d4p.gbt_classification_prediction(fptype=fptype, nClasses=self.n_classes_) else: predict_algo = d4p.gbt_classification_prediction(fptype=fptype, nClasses=self.n_classes_, resultsToEvaluate=resultsToEvaluate) predict_result = predict_algo.compute(X, self.daal_model_) if resultsToEvaluate == "computeClassLabels": # Decode labels le = preprocessing.LabelEncoder() le.classes_ = self.classes_ return le.inverse_transform(predict_result.prediction.ravel().astype(np.int64, copy=False)) else: return predict_result.probabilities
train_metric = None if not X_train.equals(X_test): y_train_pred = model_lgbm.predict(X_train) train_metric = metric_func(y_train, y_train_pred) t_lgbm_pred, y_test_pred = bench.measure_function_time(model_lgbm.predict, X_test, params=params) test_metric_lgbm = metric_func(y_test, y_test_pred) t_trans, model_daal = bench.measure_function_time( daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params) if hasattr(params, 'n_classes'): predict_algo = daal4py.gbt_classification_prediction( nClasses=params.n_classes, resultsToEvaluate='computeClassLabels', fptype='float') t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute, X_test, model_daal, params=params) test_metric_daal = metric_func(y_test, daal_pred.prediction) else: predict_algo = daal4py.gbt_regression_prediction() t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute, X_test, model_daal, params=params) test_metric_daal = metric_func(y_test, daal_pred.prediction) utils.print_output(