def KNeighbors_en(self, iter): n_estimators = 10 model = BC(KNeighborsClassifier(n_neighbors =iter, weights = 'uniform', algorithm = 'auto', n_jobs = -1), max_samples= 0.5, n_estimators=n_estimators, n_jobs = -1) model.fit(self.x_train, self.y_train) predict = model.predict(self.x_test) print("Ensemble kneighbors Regression : accurancy_is", metrics.accuracy_score(self.y_test, predict)) return predict
def make_model(self, n_estimators, n_jobs, verbose=1): model1 = RF(n_estimators=1, criterion='entropy', bootstrap=False, class_weight='balanced_subsample') self.model = BC(base_estimator=model1, n_estimators=n_estimators, max_features=1., verbose=verbose)
def __init__(self, criterion_, max_depth_, random_state_, n_estimators_, n_jobs_): # initialize DTC self.tree = DTC(criterion=criterion_, max_depth=max_depth_, random_state=random_state_) # initialize BaggingClassifier, with # n_estimators being the only adjustable value self.bag = BC(base_estimator=self.tree, n_estimators=n_estimators_, n_jobs=n_jobs_, random_state=random_state_)
skip_header=1) X = training_data[:, :1000] Y = training_data[:, 1000] # Various Classifiers dtc_min_samples_leaf = DTC(min_samples_leaf=15) etc = ETC() gbc = GBC() rfc = RFC() dtc_max_depth = DTC(max_depth=8) nb = BernoulliNB() svc = SVC() lr = LR() abc = ABC() bc = BC() ''' inv_doc_freq = np.zeros(1000) for i in range(len(inv_doc_freq)): total = sum(X[:, i]) if total == 0: inv_doc_freq[i] = 0 else: inv_doc_freq[i] = math.log(N / sum(X[:, i])) ''' # Data normalization for i in range(len(X)): max_freq = max(X[i]) if max_freq == 0: pass
from sklearn.ensemble import AdaBoostClassifier as ABC model = ABC(n_estimators=100, random_state=42, learning_rate=.80).fit(X_train, y_train) prediction = model.predict(X_test) score = accuracy_score(y_test, prediction) print(score) # Bagging Classifier # ---------------- # In[22]: from sklearn.ensemble import BaggingClassifier as BC model = BC(n_estimators=100, random_state=42).fit(X_train, y_train) prediction = model.predict(X_test) score = accuracy_score(y_test, prediction) print(score) # Extra Trees Classifier # ---------------------- # In[23]: from sklearn.ensemble import ExtraTreesClassifier as XTC model = XTC(n_estimators=100, random_state=42, criterion='entropy', max_depth=20).fit(X_train, y_train)
def __init__(self, matrixdatabase): self._matrix_database = matrixdatabase self._has_fit = False self._bc = BC(n_estimators=10)
print(X_all.shape) # one hot encode y_s = np.zeros((y_all.shape[0], 2)) y_s[:, 1] = (y_all == 1).reshape(y_all.shape[0], ) y_s[:, 0] = (y_all == 0).reshape(y_all.shape[0], ) pos_weight = np.sum(y_s[:, 0]) / np.sum(y_s[:, 1]) X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.30, stratify=y_all, random_state=42) del X_all clf = BC(**bc_params) clf.fit(X_train, y_train.ravel()) #clf = RandomForestClassifier(min_samples_leaf=15, class_weight = 'balanced')#{0:1,1:pos_weight}) #clf.fit(X_train, y_train.ravel()) #print ('oob_score: ', clf.oob_score_) # svm = SVC(probability = True, class_weight = 'balanced') # svm.fit(X_train, y_train) preds_tr = clf.predict_proba(X_train) print("Predictions_tr:") print(preds_tr[0:20]) print(np.median(preds_tr[:, 1])) roc_auc_tr = metrics.roc_auc_score(y_train.ravel(), preds_tr[:, 1]) print('ROC AUC_tr:', roc_auc_tr)
import pandas as pd from sklearn.ensemble import BaggingClassifier as BC from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split # DTLZ STUFFS data = pd.read_csv("features with R2.csv") inputs_train = data[data.keys()[1:11]] outputs_train = data[data.keys()[11:]] targetnames = ["SVM", "GPR", "NN", "EN"] target_train = outputs_train.idxmax(axis=1) num_classes = len(targetnames) clf = BC() clf = clf.fit(inputs_train, target_train) # WFG STUFFS data_wfg = pd.read_csv("WFG features with R2.csv") predictions = clf.predict(x_test) con_mat = confusion_matrix(target_test, predictions) cost_mat = output_test cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0) print(con_mat) num_samples, num_classes = cost_mat.shape cost = 0 lenp = len(predictions) for index in range(num_samples): cost += cost_mat.iloc[index][predictions[index]] # print('Cost independent classification', cost/lenp) cic = cost / lenp
def Find_Optimal_Features(ML_DATA): # Creates all combinations of features lst = list(itertools.product([0, 1], repeat=8)) lst.remove((0, 0, 0, 0, 0, 0, 0, 0)) for i in range(len(lst)): lst[i] = list(lst[i]) outs = [] for i in range(8): outs.append([]) # Clasifiers put into array for easier access and expandability clfs = [ DTC(), KNN(), BC(ETC()), LSVC(max_iter=10_000), RFC(), SVC(), SGDC(), MLP(max_iter=10_000) ] tot = len(lst) y = [] # Trains all models on all feature sets and gets scores for i, item in enumerate(lst[:15]): X_train, ys_train, ya_train, X_test, ys_test, ya_test = ML_DATA.Make_Set( item, 0.25) for item in clfs: item.fit(X_train, ys_train) scores = [] for j, item in enumerate(clfs): scores.append(item.score(X_test, ys_test)) outs[j].append(scores[j]) PBR(i + 1, tot, name="Test Set", end="\r") y = ys_test print() temp = Counter(y) total = len(y) rand_ratio = 0 for item in temp.values(): rand_ratio += item / total rand_ratio = (rand_ratio / 8) * 100 print("Random Choice :", rand_ratio) final = np.zeros(8) final_top = np.zeros(8) final_btop = np.zeros(8) names = [ "Decision Tree", "KNN", "Bag Extra Tree", "Linear SVC", "Random Forest", "SVC", "SGDC", "MLP" ] # Prints all results for k, name in enumerate(names): o = RAS(outs[k], 10) final_top += np.asarray(lst[o[0]]) f = np.zeros(8) count = 0 best = outs[k][0] print(name + ":") for i, item in enumerate(o): if i == 0: best = outs[k][item] print("\t\t{:2}) {:7.4f}% {}".format(i, outs[k][item] * 100, lst[item])) if outs[k][item] == best: count += 1 f += np.asarray(lst[item]) final += np.asarray(lst[item]) final_btop += f / count print() print(final) print(final_top) print(final_btop)
print(con_mat) lent = len(target_test) lenp = len(predictions) cost_mat = output_test cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0) cost = classifier.cost_loss(x_test, cost_mat) # print(lent == lenp, 'Cost based classification', cost/lenp) cbc = cost / lenp """ classifier.optmize_weights(x_t2, o_t2) w = classifier.weights cost = classifier.weightedcost(w, x_test, cost_mat) print(lent == lenp, 'Weighted Votes Cost based classification', cost) """ ################################################ num_classes = len(targetnames) clf = BC() clf = clf.fit(x_train, target_train) predictions = clf.predict(x_test) con_mat = confusion_matrix(target_test, predictions) cost_mat = output_test cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0) print(con_mat) num_samples, num_classes = cost_mat.shape cost = 0 lenp = len(predictions) for index in range(num_samples): cost += cost_mat.iloc[index][predictions[index]] # print('Cost independent classification', cost/lenp) cic = cost / lenp classes = targetnames cost_mat = output_train
def gogo_bagged_svm( fxpath, mpath, spath ): transform = True svc_params = {'penalty':'l2', 'loss':'l2', 'dual':False, 'C':33.0, 'intercept_scaling':1e4, 'class_weight':'auto', 'random_state':42} bc_params = {'base_estimator':LinearSVC(**svc_params), 'n_estimators':96, 'max_samples':0.1, 'max_features':0.8, 'oob_score':False, # if you have tons of memory (i.e. 32gb ram + 32gb swap) # incresaing this parameter may help performance. else, # increasing it may cause "out of memory" errors. 'n_jobs':1, #'n_jobs':8, 'verbose':1, 'random_state':42} ''' lr_params = {'C':1e6,#tr(-3,3,7), 'penalty':'l2', 'class_weight':'auto', 'intercept_scaling':1e6}#tr(-1,6,7)} ''' preds = [] kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath s_fname = '%s/kpca_linear_svm{0}_{1}_preds.csv' % spath for i in range(7): if i < 5: nbreed = 1 sbreed = 'dog' nsubject = i+1 else: nbreed = 2 sbreed = 'human' nsubject = 1 + abs(5-i) print('breed%d.subject%d..' % ( nbreed, nsubject )) X_ictal = load_features( fxpath, nbreed, nsubject, 1 ) X_inter = load_features( fxpath, nbreed, nsubject, 2 ) X_train = vstack((X_inter, X_ictal)) Y = [0 for x in X_inter] + [1 for x in X_ictal] wi = 1.0/len(X_inter) * 1000 wp = 1.0/len(X_ictal) * 1000 W = array([wp if y else wi for y in Y]) del X_inter, X_ictal; gc.collect() with open(kpca_fname.format(sbreed,nsubject),'rb') as f: kpca = pickle.load(f) if transform: X_train = kpca_preprocess_features(X_train) X_train = kpca_incremental_transform(kpca,X_train) gc.collect() X_test = load_features( fxpath, nbreed, nsubject, 3 ) if transform: X_test = kpca_preprocess_features(X_test) X_test = kpca_incremental_transform(kpca,X_test) gc.collect() bc = BC(**bc_params) bc.fit(X_train,Y) #print 'oob_score: ', bc.oob_score_ subject_preds = bc.predict_proba(X_test)[:,1] preds.append(subject_preds) subject_preds = pd.DataFrame(subject_preds) subject_preds.to_csv(s_fname.format(sbreed,nsubject),index=False,header=None) del X_train, X_test; gc.collect() sys.stdout.flush()
from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier as DTC from sklearn.ensemble import BaggingClassifier as BC, RandomForestClassifier as RFC from sklearn.metrics import accuracy_score digits = load_digits() fig = plt.figure() fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) for i in range(36): ax = fig.add_subplot(6, 6, i + 1, xticks=[], yticks=[]) ax.imshow(digits.images[i], cmap='binary', interpolation='nearest') ax.text(0, 0.05, str(digits.target[i]), transform=ax.transAxes) xtr, xte, ytr, yte = train_test_split(digits.data, digits.target, test_size=0.2, random_state=0) names = ['DTC', 'BC', 'RFC'] models = [ DTC(), BC(n_estimators=300, max_samples=0.8, random_state=0), RFC(n_estimators=300, random_state=0) ] for name, model in zip(names, models): print('模型{0}的预测准确率:'.format(name), end='\t') model.fit(xtr, ytr) ypred = model.predict(xte) print(accuracy_score(yte, ypred)) plt.show()