def get_classifier(self, traindata, kf): x_tr, x_te, y_tr, y_te = fac.to_kfold(traindata, kf) acc_max, bestK, acc = 0, 0, [[] for a in range(kf)] for i in range(kf): # print('DOAO round', i, 'begin') # svm 00 print('test00') clf_svm = SVC() clf_svm.fit(x_tr[i], y_tr[i].ravel()) label_svm = clf_svm.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_svm)[0]) # KNN 01 print('test01') acc_k = [] aux_k = [3, 5, 7] # for k in range(3, 12, 2): for k in aux_k: clf_knn = KNN_GPU(k=k) clf_knn.fit(x_tr[i], y_tr[i]) label_knn = clf_knn.predict(x_te[i]) acc_k.append(fac.get_acc(y_te[i], label_knn)[0]) acc[i].append(max(acc_k)) bestK = aux_k[acc_k.index(max(acc_k))] # LR 02 print('test02') clf_lr = LogisticRegression() clf_lr.fit(x_tr[i], y_tr[i]) label_LR = clf_lr.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_LR)[0]) # XgBoost 03 print('test03') clf_xgb = DecisionTreeClassifier() clf_xgb.fit(x_tr[i], y_tr[i]) label_xgb = clf_xgb.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_xgb)[0]) # RF 04 print('test04') clf_rf = TGBMClassifier() clf_rf.fit(x_tr[i], y_tr[i]) label_rf = clf_rf.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_rf)[0]) print('DOAO round', i, 'end') acc = np.array(acc) acc_mean = acc.mean(axis=0) # fun_best = np.where(acc_mean == max(acc_mean)) fun_best = np.argmax(acc_mean) return fun_best, bestK
def get_classifier(self, train, kf): x_tr, x_te, y_tr, y_te = fac.to_kfold(train, kf) acc_max, bestK, acc = 0, 0, [[] for a in range(kf)] for i in range(kf): # print('DECOC round', i, 'begin') # svm 00 clf_svm = SVC() clf_svm.fit(x_tr[i], y_tr[i].ravel()) label_svm = clf_svm.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_svm)[0]) # KNN 01 acc_k = [] aux_k = [3, 5, 7] # for k in range(3, 12, 2): for k in aux_k: clf_knn = KNN_GPU(k=k) clf_knn.fit(x_tr[i], y_tr[i]) label_knn = clf_knn.predict(x_te[i]) acc_k.append(fac.get_acc(y_te[i], label_knn)[0]) acc[i].append(max(acc_k)) bestK = aux_k[acc_k.index(max(acc_k))] # # LR 02 # clf_lr = LR_GPU() # clf_lr.fit(x_tr[i], y_tr[i]) # label_LR = clf_lr.predicted(x_te[i]) # acc[i].append(fac.get_acc(y_te[i], label_LR)[0]) # LR 02 clf_lr = LogisticRegression() clf_lr.fit(x_tr[i], y_tr[i]) label_LR = clf_lr.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_LR)[0]) # CART 03 clf_cart = DecisionTreeClassifier() clf_cart.fit(x_tr[i], y_tr[i]) label_cart = clf_cart.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_cart)[0]) # # RF 04 clf_rf = TGBMClassifier() clf_rf.fit(x_tr[i], y_tr[i].ravel()) label_rf = clf_rf.predict(x_te[i]) acc[i].append(fac.get_acc(y_te[i], label_rf)[0]) print('DECOC round', i, 'end') acc = np.array(acc) acc_mean = acc.mean(axis=0) # fun_best = np.where(acc_mean == max(acc_mean)) fun_best = np.argmax(acc_mean) return fun_best, bestK
def fun_predict(self, x_te, C, D, L): print('func_predict') num = len(D) cf = C[0] ck = C[1] allpre = np.zeros((len(x_te), num)) for i in range(num): train = D[i] traindata = train[:, 0:-1] trainlabel = train[:, -1] if cf[i] == 0: # svm print('SVM predict') clf_svm = SVC() clf_svm.fit(traindata, trainlabel.ravel()) label_svm = clf_svm.predict(x_te) allpre[:, i] = label_svm elif cf[i] == 1: # knn clf_knn = KNN_GPU(k=ck[i]) clf_knn.fit(traindata, trainlabel) label_knn = clf_knn.predict(x_te) allpre[:, i] = label_knn elif cf[i] == 2: # LR print('LR predict') clf_lr = LogisticRegression() clf_lr.fit(traindata, trainlabel.ravel()) label_LR = clf_lr.predict(x_te) allpre[:, i] = label_LR elif cf[i] == 3: # CART print('CART predict') clf_xgb = DecisionTreeClassifier() clf_xgb.fit(traindata, trainlabel) label_xgb = clf_xgb.predict(x_te) allpre[:, i] = label_xgb elif cf[i] == 4: # Rf print('RF predict') clf_rf = TGBMClassifier() clf_rf.fit(traindata, trainlabel.ravel()) label_rf = clf_rf.predict(x_te) allpre[:, i] = label_rf else: print('error !!!! DOAO.fun_predict') label = L[i] for j in range(len(x_te)): allpre[j, i] = label[0] if allpre[j, i] == 0 else label[1] # print('predict end for') pre = mode(allpre, axis=1)[0] return pre
def funcPreEDOVO(self, x_test, y_test, C, D): numC = np.asarray(C).shape[0] num_set = len(y_test) allpre = np.zeros([num_set, numC]) for i in range(numC): train = D[i] traindata = np.array(train[:, 0:-1]) trainlabel = np.array(train[:, -1], dtype='int64') if C[i, 0] == 0: print('test0') # svm clf_svm = SVC() clf_svm.fit(traindata, trainlabel.ravel()) label_svm = clf_svm.predict(x_test) allpre[:, i] = label_svm elif C[i, 0] == 1: # print('test1') # knn clf_knn = KNN_GPU(k=C[i][1]) # clf_knn = KNN_torch(k=C[i][1]) clf_knn.fit(traindata, trainlabel) label_knn = clf_knn.predict(x_test) allpre[:, i] = label_knn.ravel() elif C[i, 0] == 2: print('test2') # LR clf_lr = LogisticRegression() clf_lr.fit(traindata, trainlabel) label_LR = clf_lr.predict(x_test) allpre[:, i] = label_LR # # LR # clf_lr = LR_GPU() # clf_lr.fit(traindata, trainlabel) # label_LR = clf_lr.predicted(x_test) # allpre[:, i] = label_LR elif C[i, 0] == 3: print('test3') # CART clf_cart = DecisionTreeClassifier() clf_cart.fit(traindata, trainlabel) label_cart = clf_cart.predict(x_test) allpre[:, i] = label_cart elif C[i, 0] == 4: print('test4') # RandomForest clf_ada = TGBMClassifier() clf_ada.fit(traindata, trainlabel.ravel()) label_ada = clf_ada.predict(x_test) allpre[:, i] = label_ada else: print('error !!!! DECOC.funcPreEDOVO') return allpre
def objective(hyperparams): hyperparams = self.hyperparams.copy() hyperparams['iterations'] = 300 model = TGBMClassifier(**{**self.params, **hyperparams}) model.fit(X_train, y_train) pred = model.predict(X_eval) if self.is_multi_label: score = roc_auc_score(y_eval, pred[:, 1]) else: score = roc_auc_score(y_eval, pred) return {'loss': -score, 'status': STATUS_OK}
def epoch_train(self, dataloader, run_num, is_multi_label=None, info=None): self.is_multi_label = is_multi_label X, y, train_idxs, cat = dataloader['X'], dataloader['y'], dataloader[ 'train_idxs'], dataloader['cat_cols'] train_x, train_y = X.loc[train_idxs], y[train_idxs] if info['mode'] == 'bagging': self.hyperparams = info['tgb'].copy() self.hyperparams['random_seed'] = np.random.randint(0, 2020) run_num = self.explore_params_round if run_num == self.explore_params_round: print('tgb explore_params_round') X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[ 'val_idxs'] val_x, val_y = X.loc[val_idxs], y[val_idxs] self.bayes_opt(train_x, val_x, train_y, val_y, cat) #self.early_stop_opt(train_x, val_x, train_y, val_y, cat) info['tgb'] = self.hyperparams.copy() if run_num == self.all_data_round: print('tgb all data round') all_train_idxs = dataloader['all_train_idxs'] train_x = X.loc[all_train_idxs] train_y = y[all_train_idxs] if self.is_multi_label: for cls in range(self.num_class): cls_y = train_y[:, cls] self.models[cls] = TGBMClassifier(**{ **self.params, **self.hyperparams }) self.models[cls].fit(train_x, cls_y) else: self._model = TGBMClassifier(**{**self.params, **self.hyperparams}) self._model.fit(train_x, ohe2cat(train_y))
def create_classifer(self, index=0): return TGBMClassifier(n_trees=10)
class TGBModel(MetaModel): def __init__(self): super(TGBModel, self).__init__() self.max_run = 2 self.all_data_round = 1 self.explore_params_round = 0 self.not_gain_threhlod = 3 self.patience = 3 self.is_init = False self.name = 'tgb' self.type = 'tree' self._model = None self.params = { 'objective': 'multi:softprob', } self.hyperparams = { "learning_rate": 0.02, 'num_class': None, 'n_trees': 1000, 'depth': 6, 'column_sampling_rate': 0.8 } self.is_multi_label = None self.num_class = None self.models = {} def init_model(self, num_class, **kwargs): self.is_init = True self.num_class = num_class @timeit def epoch_train(self, dataloader, run_num, is_multi_label=None, info=None): self.is_multi_label = is_multi_label X, y, train_idxs, cat = dataloader['X'], dataloader['y'], dataloader[ 'train_idxs'], dataloader['cat_cols'] train_x, train_y = X.loc[train_idxs], y[train_idxs] if info['mode'] == 'bagging': self.hyperparams = info['tgb'].copy() self.hyperparams['random_seed'] = np.random.randint(0, 2020) run_num = self.explore_params_round if run_num == self.explore_params_round: print('tgb explore_params_round') X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[ 'val_idxs'] val_x, val_y = X.loc[val_idxs], y[val_idxs] self.bayes_opt(train_x, val_x, train_y, val_y, cat) #self.early_stop_opt(train_x, val_x, train_y, val_y, cat) info['tgb'] = self.hyperparams.copy() if run_num == self.all_data_round: print('tgb all data round') all_train_idxs = dataloader['all_train_idxs'] train_x = X.loc[all_train_idxs] train_y = y[all_train_idxs] if self.is_multi_label: for cls in range(self.num_class): cls_y = train_y[:, cls] self.models[cls] = TGBMClassifier(**{ **self.params, **self.hyperparams }) self.models[cls].fit(train_x, cls_y) else: self._model = TGBMClassifier(**{**self.params, **self.hyperparams}) self._model.fit(train_x, ohe2cat(train_y)) @timeit def epoch_valid(self, dataloader): X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[ 'val_idxs'] val_x, val_y = X.loc[val_idxs], y[val_idxs] if not self.is_multi_label: preds = self._model.predict_proba(val_x) else: all_preds = [] for cls in range(y.shape[1]): preds = self.models[cls].predict_proba(val_x) all_preds.append(preds[:, 1]) preds = np.stack(all_preds, axis=1) valid_auc = roc_auc_score(val_y, preds) return valid_auc @timeit def predict(self, dataloader): X, test_idxs = dataloader['X'], dataloader['test_idxs'] test_x = X.loc[test_idxs] if not self.is_multi_label: return self._model.predict_proba(test_x) else: all_preds = [] for cls in range(self.num_class): preds = self.models[cls].predict_proba(test_x) all_preds.append(preds[:, 1]) return np.stack(all_preds, axis=1) @timeit def bayes_opt(self, X_train, X_eval, y_train, y_eval, categories): if self.is_multi_label: y_train = y_train[:, 1] y_eval = y_eval[:, 1] else: y_train = ohe2cat(y_train) space = { "learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.1)), "depth": hp.choice("depth", [4, 6, 8, 10, 12]), "lambda_tgbm": hp.uniform('l2_leaf_reg', 0.1, 2), } def objective(hyperparams): hyperparams = self.hyperparams.copy() hyperparams['iterations'] = 300 model = TGBMClassifier(**{**self.params, **hyperparams}) model.fit(X_train, y_train) pred = model.predict(X_eval) if self.is_multi_label: score = roc_auc_score(y_eval, pred[:, 1]) else: score = roc_auc_score(y_eval, pred) return {'loss': -score, 'status': STATUS_OK} trials = Trials() best = hyperopt.fmin(fn=objective, space=space, trials=trials, algo=tpe.suggest, max_evals=10, verbose=1, rstate=np.random.RandomState(1)) self.hyperparams.update(space_eval(space, best)) log("auc = {}, hyperparams: {}".format( -trials.best_trial['result']['loss'], self.hyperparams))
import sys sys.path.append("../") from thundergbm import TGBMClassifier from sklearn.datasets import load_digits from sklearn.metrics import accuracy_score if __name__ == '__main__': x, y = load_digits(return_X_y=True) clf = TGBMClassifier() clf.fit(x, y) y_pred = clf.predict(x) accuracy = accuracy_score(y, y_pred) print(accuracy)