def classify(feats, cantidad): lbp_params = ((1, 1, 2, 2, 5), (5, 10, 8, 15, 6)) har_params = ((1, 1, 1, 2, 5), (1, 10, 20, 11, 8)) gab1_params = (1, 2, 5, 10) gab2_params = (1, 2, 5, 10) params_landmarks = (1, 5, 8) labels_image = main.generate_labels(lbp_params[0], har_params[0], gab1_params, gab2_params) # print(labels_image) # print(len(labels_image)) labels_landmarks = main.generate_labels_landmarks(labels_image[-1] + 1, 6, params_landmarks, (), (1)) # print(labels_landmarks) # print(len(labels_landmarks)) labels = np.concatenate([labels_image, labels_landmarks], axis=0) # print(labels) # print(len(labels)) # labels = labels_image print(labels) print('Removing features with low variance') rem_var_index = lib_pat.delete_zero_variance_features2(feats, labels, 0.1) np.save('rem_var_index.npy', rem_var_index) feats, labels = feats[:, rem_var_index], labels[:, rem_var_index] print('Separating Features...') X_tr, X_te, y_tr, y_te = lib_pat.hold_out(feats, cantidad) print('Reducing features by transformation') X_tr, X_te = main.reduction_routine(feats, labels, .99, cantidad) print('Final reduction (for no colinear features)') X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9) np.save("X_tr_" + str(cantidad), X_tr) np.save("X_te_" + str(cantidad), X_te) np.save("y_tr_" + str(cantidad), y_tr) np.save("y_te_" + str(cantidad), y_te) print('Classification via LDA solver=svd') k1, k1_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te, solver='svd') print('Classification via MLP') k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te) print('Classification via NN') k3, k3_score = classification.training_and_classification_NN( X_tr, X_te, y_tr, y_te) np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',') np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',') np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',')
def plot_confusion_matrix(self, label_test, fn_test): fn_preds = self.clf.predict(fn_test) acc = accuracy_score(label_test, fn_preds) cm_ = CM(label_test, fn_preds) cm = normalize(cm_.astype(np.float), axis=1, norm='l1') fig = pl.figure() ax = fig.add_subplot(111) cax = ax.matshow(cm) fig.colorbar(cax) for x in range(len(cm)): for y in range(len(cm)): ax.annotate(str("%.3f(%d)"%(cm[x][y], cm_[x][y])), xy=(y,x), horizontalalignment='center', verticalalignment='center', fontsize=10) cm_cls =np.unique(np.hstack((label_test,fn_preds))) cls = [] for c in cm_cls: cls.append(mapping[c]) pl.yticks(range(len(cls)), cls) pl.ylabel('True label') pl.xticks(range(len(cls)), cls) pl.xlabel('Predicted label') pl.title('Mn Confusion matrix (%.3f)'%acc) pl.show()
def plot_confusion_matrix(Z_true, Z_pred, normalize=True, ndecimals=2, title="Confusion Matrix", savename=None): """ Function for making and plotting the confusion matrix of a model using sklearn.metrics.confusion_matrix. Arguments: Z_true (array): true observations Z_pred (array): predictions normalize (bool, optional): whether to normalize confusion matrix, defaults to True title (str, optional): title of plot, defaults to "Confusion Matrix" savename (str, optional): plot is saved under this name if provided, defaults to None """ c = CM(Z_true, Z_pred) if normalize is True: c = c/np.sum(c) fig, ax = plt.subplots(figsize= (5, 4.5)) vmax = 1 if normalize else c.max() im = ax.matshow(c, vmin=0, vmax=vmax, cmap="autumn_r") plt.colorbar(im) s = "{:0." + str(ndecimals) + "f}" if normalize else "{:d}" for (i, j), z in np.ndenumerate(c): ax.text(j, i, s.format(z), ha="center", va="center", fontsize=16) ax.set_xlabel("Predicted value", fontsize=12) ax.xaxis.set_label_position("top") ax.set_ylabel("True value", fontsize=12) fig.suptitle(title, fontsize=16) fig.subplots_adjust(top=0.84) if savename is not None: plt.savefig(f"Figures/{savename}.png", dpi=300) plt.show()
def update(self, pred_label, gt_label): """Update per instance Args: pred_label (np.ndarray): (num_points) gt_label (np.ndarray): (num_points,) """ # refer to sklearn.metrics.confusion_matrix confusion_matrix = CM(gt_label, pred_label, labels=self.labels) self.confusion_matrix += confusion_matrix
def landmark_classifier(feats, cantidad, iterations, separate_ratio): params_landmarks = (1, 5, 8) labels_landmarks = main.generate_labels_landmarks(0, 1, params_landmarks, (), (1)) print(labels_landmarks) print('Removing features with low variance') feats, labels = lib_pat.delete_zero_variance_features( feats, labels_landmarks, 0.05) lda_scores = [] mlp_scores = [] for i in range(iterations): print('Classification Nº {}/{}'.format((i + 1), iterations)) print('Separating Features...') X_tr, X_te, y_tr, y_te, sep_list = lib_pat.separate_train_test( feats, separate_ratio, cantidad) print('Reducing features by transformation') X_tr, X_te = main.reduction_routine(feats, labels, separate_ratio, .99, cantidad, sep_list) print('Final reduction (for no colinear features)') X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9) print('Classification via LDA solver=svd') k1, k1_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te, solver='svd') lda_scores.append(k1_score) print('Classification via MLP') k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te) mlp_scores.append(k2_score) np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',') np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',') lda_mean = sum(lda_scores) / float(len(lda_scores)) print('LDA mean accuracy:', lda_mean) mlp_mean = sum(mlp_scores) / float(len(mlp_scores)) print('MLP mean accuracy:', mlp_mean)
def plot_confusion_matrix(cm, title='Confusion Matrix', cmap=plt.cm.binary): plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() xlocations = np.array(range(len(labels))) plt.xticks(xlocations, labels, rotation=90) plt.yticks(xlocations, labels) plt.ylabel('True label') plt.xlabel('Predicted label') cm = CM(ytest, ypred) np.set_printoptions(precision=2)
def classify_trained(cantidad): X_tr = np.load("X_tr_" + str(cantidad) + ".npy") X_te = np.load("X_te_" + str(cantidad) + ".npy") y_tr = np.load("y_tr_" + str(cantidad) + ".npy") y_te = np.load("y_te_" + str(cantidad) + ".npy") print('Classification via LDA solver=svd') k1, k1_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te, solver='svd') print('Classification via MLP') k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te) print('Classification via NN') k3, k3_score = classification.training_and_classification_NN( X_tr, X_te, y_tr, y_te) np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',') np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',') np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',')
def plot_confusion_matrix(test_label, pred): mapping = { 1: 'co2', 2: 'humidity', 3: 'pressure', 4: 'rmt', 5: 'status', 6: 'stpt', 7: 'flow', 8: 'HW sup', 9: 'HW ret', 10: 'CW sup', 11: 'CW ret', 12: 'SAT', 13: 'RAT', 17: 'MAT', 18: 'C enter', 19: 'C leave', 21: 'occu', 30: 'pos', 31: 'power', 32: 'ctrl', 33: 'fan spd', 34: 'timer' } cm_ = CM(test_label, pred) cm = normalize(cm_.astype(np.float), axis=1, norm='l1') fig = pl.figure() ax = fig.add_subplot(111) cax = ax.matshow(cm, cmap=Color.YlOrBr) fig.colorbar(cax) for x in range(len(cm)): for y in range(len(cm)): ax.annotate(str("%.3f(%d)" % (cm[x][y], cm_[x][y])), xy=(y, x), horizontalalignment='center', verticalalignment='center', fontsize=9) cm_cls = np.unique(np.hstack((test_label, pred))) cls = [] for c in cm_cls: cls.append(mapping[c]) pl.yticks(range(len(cls)), cls) pl.ylabel('True label') pl.xticks(range(len(cls)), cls) pl.xlabel('Predicted label') pl.title('Confusion Matrix (%.3f)' % (ACC(pred, test_label))) pl.show()
def update(self, pred_label, gt_label): """Update per instance Args: pred_label (np.ndarray): (num_points) gt_label (np.ndarray): (num_points,) """ # convert ignore_label to num_classes # refer to sklearn.metrics.confusion_matrix gt_label[gt_label == -100] = self.num_classes confusion_matrix = CM(gt_label.flatten(), pred_label.flatten(), labels=self.labels) self.confusion_matrix += confusion_matrix
def __init__(self, y_true, y_pred, labels=None, sample_weight=None, normalize=None): self.normalize = normalize self.y_pred = y_pred Metrics.__init__(self, sample_weight=sample_weight, y_true=y_true, labels=labels) self.value = CM(sample_weight=self.sample_weight, labels=self.labels, y_true=self.y_true, normalize=self.normalize, y_pred=self.y_pred)
def evaluate_3way(X_test, y_test, model): test_y_prob = model.predict(X_test) test_y_pred = np.argmax(test_y_prob, axis=1) test_y_true = np.argmax(y_test, axis=1) # accuracy loss, acc = model.evaluate(X_test, y_test) # precision, recall, specificity, and f1_score p = precision_score(test_y_true, test_y_pred, average="macro") r = recall_score(test_y_true, test_y_pred, average="macro") f1 = f1_score(test_y_true, test_y_pred, average="macro") sen, spe, _ = sss(test_y_true, test_y_pred, average="macro") print("Test accuracy:", acc) print("Test confusion matrix: \n", CM(test_y_true, test_y_pred)) print("Precision: ", p) print("Recall: ", r) print("Specificity: ", spe) print("f1_score: ", f1)
def evaluate_performance(X_test, y_test, model, name): test_y_prob = model.predict(X_test) print("test_y_prob", test_y_prob) test_y_pred = np.argmax(test_y_prob, axis=1) test_y_true = np.argmax(y_test, axis=1) # accuracy loss, acc = model.evaluate(X_test, y_test) p = precision_score(test_y_true, test_y_pred) r = recall_score(test_y_true, test_y_pred) f1 = f1_score(test_y_true, test_y_pred) sen, spe, _ = sss(test_y_true, test_y_pred, average="binary") # print results print("Test accuracy:", acc) print("Test confusion matrix: \n", CM(test_y_true, test_y_pred)) print("Precision: ", p) print("Recall: ", r) print("Specificity: ", spe) print("f1_score: ", f1)
def RunModel(model, data, columns, Predict): X = data[columns] Y = data[Predict] X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=train, test_size=test, random_state=42) Model = model Model.fit(X_train, y_train) prediction = Model.predict(X_test) mse = (MSE(y_test, prediction)) r2 = (R2(y_test, prediction)) mae = (MAE(y_test, prediction)) acc = AS(y_test, prediction) con_met = CM(y_test, prediction) return mse, r2, mae, acc, con_met
def evaluate_binary(X_test, y_test, model, name): test_y_prob = model.predict(X_test) test_y_pred = np.argmax(test_y_prob, axis=1) test_y_true = np.argmax(y_test, axis=1) # accuracy loss, acc = model.evaluate(X_test, y_test) # AUC pos_prob = test_y_prob[:, 1] auc_score = roc_auc_score(test_y_true, pos_prob) # precision, recall, specificity, and f1_score p = precision_score(test_y_true, test_y_pred) r = recall_score(test_y_true, test_y_pred) f1 = f1_score(test_y_true, test_y_pred) sen, spe, _ = sss(test_y_true, test_y_pred, average="binary") # print results print("Test accuracy:", acc) print("Test AUC is: ", auc_score) print("Test confusion matrix: \n", CM(test_y_true, test_y_pred)) print("Precision: ", p) print("Recall: ", r) print("Specificity: ", spe) print("f1_score: ", f1) # plot and save roc curve pos_prob = test_y_prob[:, 1] fpr, tpr, thresholds = roc_curve(test_y_true, pos_prob) ns_probs = [0 for _ in range(len(test_y_prob))] ns_fpr, ns_tpr, _ = roc_curve(test_y_true, ns_probs) plt.axis([0, 1, 0, 1]) plt.plot(fpr, tpr, marker='.', color='darkorange', label='Model AUC (area = {:.2f})'.format(auc_score)) plt.plot(ns_fpr, ns_tpr, color='royalblue', linestyle='--') plt.legend() plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.savefig(name, dpi=300, bbox_inches='tight') plt.show()
""" sklearn 使用朴素贝叶斯分类器 """ #### 1、高斯朴素贝叶斯算法 import numpy as np import matplotlib.pyplot as plt from sklearn.naive_bayes import GaussianNB from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix as CM test_size=0.3 digits=load_digits() x,y=digits.data,digits.target train_X,test_X,train_Y,test_Y=train_test_split(x,y,test_size=test_size) print(train_X[:10]) print(train_Y[:10]) gnb=GaussianNB().fit(train_X,train_Y) acc_score=gnb.score(test_X,test_Y) print(acc_score) pred_Y=gnb.predict(test_X) # print(pred_Y) prob=gnb.predict_proba(test_X) # print(prob) # print(prob[1,:].sum()) print(CM(test_Y,pred_Y))
scaler = SS() scaler.fit(df.drop('TARGET CLASS',axis=1)) scaled = scaler.transform(df.drop('TARGET CLASS',axis=1)) df_scale = pd.DataFrame(scaled,columns=df.columns[:-1]) print(df_scale.head()) # SPLIT DATA INTO TRAINING AND TESTING X_train,X_test,y_train,y_test = TTS(df_scale,df['TARGET CLASS'],test_size=0.3,random_state=101) # KNN model = KNC(n_neighbors=1) model.fit(X_train,y_train) pred = model.predict(X_test) print(CR(y_test,pred)) print(CM(y_test,pred)) # CHOOSE K VALUE (ELBOW METHOD) error_rate = [] for i in range(1,40): model = KNC(n_neighbors=i) model.fit(X_train,y_train) pred_i = model.predict(X_test) error_rate.append(np.mean(y_test != pred_i)) sns.lineplot(x=np.arange(1,40),y=np.array(error_rate)) plt.show() # RERUN WITH NEW K model = KNC(n_neighbors=37)
def evaluate_model(self): """ Evaluate the model. Model is restored from models_path/model_name If model could not be loaded, exits. Data used for evaluation is the testing data. Once the whole dataset is forwarded, classification report and confusion matrix are computed """ # Initialize the variables sess = tf.Session() sess.run(tf.global_variables_initializer()) # Load variables if self.SAVE: try: modelname = self.MODELS_PATH + self.name saver = tf.train.import_meta_graph(modelname + ".meta") self.saver.restore(sess, modelname) except: print "Failed to restore model. Exiting." exit() #### TESTING #### Y_true = [] Y_pred = [] testing_time = time.time() testing_acc = 0 testing_loss = 0 tophonetic = np.vectorize(lambda t: sorted(self.labels)[t]) for batch_id in range(self.nb_batch_test): batch_time = time.time() # Get batch batch_X = self.X_test[batch_id] batch_Y = self.Y_test[batch_id] lengths = self.lengths_test[batch_id] # Get loss and accuracy loss, acc, predictions = sess.run( fetches=[self.loss, self.acc, self.predictions], feed_dict={ self.X_: batch_X, self.Y_: batch_Y, self.seq_lengths: lengths }) # Update global variables testing_acc += acc testing_loss += loss for i in range(self.batchsize): true = batch_Y[i, :lengths[i]] true = np.argmax(true, axis=1) Y_true += list(true) pred = predictions[i, :lengths[i]] pred = np.argmax(pred, axis=1) Y_pred += list(pred) testing_time = time.time() - testing_time testing_acc /= self.nb_batch_test testing_loss /= self.nb_batch_test self.logger.write_log( "\n\nAccuracy:\t%.2f%%\nLoss:\t\t%s\nTime:\t\t%.2fs\n" % (100 * testing_acc, testing_loss, testing_time)) Y_true = tophonetic(Y_true) Y_pred = tophonetic(Y_pred) # Classification Report (CR) self.logger.write_log(CR(Y_true, Y_pred)) # Confusion Matrix (CM) mat = CM(Y_true, Y_pred) # header line CONFMAT = "\t" + "\t".join([lbl[:5] for lbl in sorted(self.labels)]) + "\n" for i, phonetic in enumerate(sorted(self.labels)): CONFMAT += phonetic[:5] + "\t" + "\t".join( map(str, mat[i].tolist() + [np.sum(mat[i])])) + "\n\n" # footer line, sums CONFMAT += "\t" + "\t".join(map(str, np.sum(mat, axis=0).tolist())) self.logger.write_log(CONFMAT)
plt.scatter(X_[:, 0], X_[:, 1], c=y_, cmap="rainbow", s=30) plt.show() clf_lo = LogiR().fit(X_, y_) prob = clf_lo.predict_proba(X_) # 将样本和概率放到一个DataFrame中 prob = pd.DataFrame(prob) prob.columns = ["0", "1"] for i in range(prob.shape[0]): if prob.loc[i, "1"] > 0.5: prob.loc[i, "pred"] = 1 else: prob.loc[i, "pred"] = 0 prob["y_true"] = y_ prob = prob.sort_values(by="1", ascending=False) cm = CM(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0]) # 试试看手动计算Precision和Recall? precision = P(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0]) recall = R(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0]) for i in range(prob.shape[0]): if prob.loc[i, "1"] > 0.4: prob.loc[i, "pred"] = 1 else: prob.loc[i, "pred"] = 0 cm2 = CM(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0]) # 试试看手动计算Precision和Recall? precision2 = P(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0]) recall2 = R(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
features_train, features_test, labels_train, labels_test = TTS(features, labels, test_size=0.3, random_state=0) #fitting logistic regression to the training set from sklearn.linear_model import LogisticRegression as lg classifier = lg(random_state=0) classifier.fit(features_train, labels_train) #predicting the test set result labels_pred = classifier.predict(features_test) #Making the Confusion Matrix from sklearn.metrics import confusion_matrix as CM cm = CM(labels_test, labels_pred) affair = df["affair"].value_counts(normalize=True) * 100 #score of above model Score = classifier.score(features_test, labels_test) print("accuracy of model is ", Score * 100, "%") #Predict the probability of an affair for a random woman not # present in the dataset. She's a 25-year-old teacher who #graduated college, has been married for 3 years, has 1 child, # rates herself as strongly religious, rates her marriage #as fair, and her husband is a farmer. pred_affair = classifier.predict(
# MODEL deep_model = estimator.DNNClassifier( hidden_units=[20, 20, 20, 20], feature_columns=feat_cols, n_classes=3, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001)) # INPUT FUNCTION input_func = estimator.inputs.numpy_input_fn(x={ 'x': scaled_x_train, }, y=y_train, shuffle=True, batch_size=50, num_epochs=100) # TRAINING deep_model.train(input_fn=input_func, steps=500) # EVALUATION input_func_eval = estimator.inputs.numpy_input_fn(x={'x': scaled_x_test}, shuffle=False) preds = list(deep_model.predict(input_fn=input_func_eval)) predictions = [p['class_ids'][0] for p in preds] print(CR(y_test, predictions)) print(CM(y_test, predictions))
c, hash_bucket_size=n) feat = tf.feature_column.embedding_column(cat, dimension=n) fcols.append(feat) # INPUT FUNCTION input_func_train = tf.estimator.inputs.pandas_input_fn(x=X_train, y=y_train, batch_size=1000, num_epochs=100, shuffle=True) # MODEL model = tf.estimator.DNNClassifier(hidden_units=[10, 10, 10, 10], feature_columns=fcols) # TRAINING model.train(input_fn=input_func_train, steps=None) # EVALUATION input_func_eval = tf.estimator.inputs.pandas_input_fn(x=X_test, shuffle=False, num_epochs=1) preds = model.predict(input_fn=input_func_eval) lpreds = list(preds) cpreds = [pred['class_ids'][0] for pred in list(lpreds)] print(CM(y_true=y_test, y_pred=cpreds)) print(CR(y_true=y_test, y_pred=cpreds))
result, test_size=0.3, random_state=0) rfc = RandomForestClassifier(n_estimators=100) rfc = rfc.fit(Xtrain, ytrain) ypred = rfc.predict(Xtest) score = rfc.score(Xtest, ytest) r = recall_score(ytest, ypred, average='micro') r_a = recall_score(ytest, ypred, average='macro') print("score :", score) print("recall_score micro", r) print("recall_score macro", r_a) print(recall_score) cmd = CM(ytest, ypred) print(cmd) print(cmd[32:45, ...]) # feature_importance = rfc.feature_importances_ # print(feature_importance) # print(sorted(zip(map(lambda x: round(x, 4), rfc.feature_importances_), names))) # # # # # # rfc_c = cross_val_score(rfc, x, y, cv=10) # # # # # # plt.plot(range(1, 11), rfc_c, label = "RandomForest") # # # # # # plt.show() labels = list(range(1, 33)) labels.append(50) def plot_confusion_matrix(cm, title='Confusion Matrix', cmap=plt.cm.binary): plt.imshow(cm, interpolation='nearest', cmap=cmap)
for i, lam in enumerate(lam_list): S = np.load(folder + "\\" + "lam" + lam + "\\" + r"l21S.npk", allow_pickle=True) predictions = list(map(binary_error, np.linalg.norm(S, axis=1))) print("lambda:", lam) print("precision", precision(bi_y, predictions, labels=["o", "m"], pos_label="o")) print("recall", recall(bi_y, predictions, labels=["o", "m"], pos_label="o")) print("f1", f1_score(bi_y, predictions, labels=["o", "m"], pos_label="o")) lams.append(lam) precisions.append( precision(bi_y, predictions, labels=["o", "m"], pos_label="o")) recalls.append(recall(bi_y, predictions, labels=["o", "m"], pos_label="o")) f1s.append(f1_score(bi_y, predictions, labels=["o", "m"], pos_label="o")) print(CM(bi_y, predictions)) print("------------") print(len(lams), len(recalls), len(f1s), len(precisions)) d = { "lambda": list(map(float, lams)), "precision": precisions, "recall": recalls, "f1": f1s } data = pd.DataFrame(d) print(data) result = data.sort_values(by=["lambda"], ascending=True) print(result) l = list(range(len(lams)))
common = Counter(k_nearest_Labels).most_common( 1) #finding the most occuring neighbor of same class labels.append(common[0][0]) return np.array(labels) result = Knn(3, X_Features_Train, Y_Feature_Train, X_Features_Test) print("Accuracy is: ", end="") print((np.sum(result == Y_Feature_Test) / len(Y_Feature_Test)) * 100) #calculating accuracy print("Predicting result on the following input data: ", end="") p = np.array([[6.7, 3.3, 5.7, 2.1]]) print(p) prediction = Knn(3, X_Features_Train, Y_Feature_Train, p) if (prediction[0] == 0): print("Model Prdicted a Setosa") elif (prediction[0] == 1): print("Model Prdicted a VersiColor") elif (prediction[0] == 2): print("Model Prdicted a Virginica ") else: print("Not able to predict") confusionMatrix = CM(Y_Feature_Test, result) print("confusionMatrix is: ") print(confusionMatrix)
1: 15 } #注意,这里写的其实是,类别1:10,隐藏了类别0:1这个比例 ).fit(Xtrain, Ytrain) result = clf.predict(Xtest) score = clf.score(Xtest, Ytest) recall = recall_score(Ytest, result) auc = roc_auc_score(Ytest, clf.decision_function(Xtest)) print("testing accuracy %f, recall is %f', auc is %f" % (score, recall, auc)) print(datetime.datetime.fromtimestamp(time() - times).strftime("%M:%S:%f")) valuec = pd.Series(Ytest).value_counts() #查看模型的特异度 from sklearn.metrics import confusion_matrix as CM cm = CM(Ytest, result, labels=(1, 0)) irange = np.linspace(0.01, 0.05, 10) for i in irange: times = time() clf = SVC(kernel="linear", gamma="auto", cache_size=5000, class_weight={ 1: 1 + i }).fit(Xtrain, Ytrain) result = clf.predict(Xtest) score = clf.score(Xtest, Ytest) recall = recall_score(Ytest, result) auc = roc_auc_score(Ytest, clf.decision_function(Xtest)) print("under ratio 1:%f testing accuracy %f, recall is %f', auc is %f" %
X_tr, X_te, y_tr, y_te = lib_pat.separate_train_test(feats, 0.8, cantidad) print('Reducing features by transformation') X_tr, X_te = reduction_routine(feats, labels, .99, cantidad) print('Final reduction (for no colinear features)') X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9) # print('Classification via KNN 9') # k1 = lib_pat.classification_knn(X_tr, X_te, y_tr, y_te, 9) # print('Classification via SVC linear') # k2 = lib_pat.classification_SVM(X_tr, X_te, y_tr, y_te, kernel='linear') # print('Classification via SVC poli') # k3 = lib_pat.classification_SVM(X_tr, X_te, y_tr, y_te, kernel='poly', degree=3) print('Classification via LDA solver=svd') k4 = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te, solver='svd') print('Classification via MLP') k5 = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te) # np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',') # np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',') # np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',') np.savetxt('k4', CM(y_te, k4), fmt='%2i', delimiter=',') np.savetxt('k5', CM(y_te, k5), fmt='%2i', delimiter=',') quit()
from sklearn.linear_model import LogisticRegression as LR import pandas as pd # https://www.bilibili.com/video/BV1P7411P78r?p=209 digits = load_digits() X, y = digits.data, digits.target Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, test_size=0.3, random_state=420) gnb = GaussianNB().fit(Xtrain, Ytrain) acc_score = gnb.score(Xtest, Ytest) Y_pred = gnb.predict(Xtest) prob = gnb.predict_proba(Xtest) cm = CM(Ytest, Y_pred) h = .02 names = ["Multinomial", "Gaussian", "Bernoulli", "Complement"] classifiers = [MultinomialNB(), GaussianNB(), BernoulliNB(), ComplementNB()] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0),
# plt.show() # DUMMI VARIABLES final = pd.get_dummies(data=df, columns=['purpose'], drop_first=True) print(final.head()) # SPLIT DATA X_train, X_test, y_train, y_test = TTS(final.drop('not.fully.paid', axis=1), final['not.fully.paid'], test_size=0.3, random_state=101) # DECISION TREE CLASSIFIER tree = DTC() tree.fit(X_train, y_train) # PREDICT tpred = tree.predict(X_test) print(CR(y_test, tpred)) print(CM(y_test, tpred)) # RANDOM FOREST CLASSIFIER forest = RFC(n_estimators=500) forest.fit(X_train, y_train) fpred = forest.predict(X_test) print(CR(y_test, fpred)) print(CM(y_test, fpred)) # RANDOM FOREST PERFORMED BETTER OVER ALL - BUT THE FALSE NEGATIVES INCREASED COMAPRED TO A SINGLE TREE
# sns.kdeplot(iris[['sepal_width','sepal_length']][iris['species'] == "setosa"]) # plt.show() # SPLIT DATA X_train, X_test, y_train, y_test = TTS(iris.drop('species', axis=1), iris['species'], test_size=0.3, random_state=101) # TRAIN MODEL model = SVC() model.fit(X_train, y_train) pred = model.predict(X_test) print(CR(y_test, pred), CM(y_test, pred)) print(model) # GRID SEARCH - "THIS IS NOT NECESSARY, THE MODEL IS PERFECT" param_grid = { 'C': list(np.arange(0.1, 10, 0.1)), 'gamma': [1, 0.1, 0.001, 0.0001] } grid = GSCV(SVC(), param_grid, verbose=3, n_jobs=4) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_) gpred = grid.predict(X_test)
steps = [('over', over), ('under', under), ('model', model)] pipeline = Pipeline(steps=steps) cv = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=1) scores_over = cross_val_score(pipeline, X, y, scoring='recall', cv=cv, n_jobs=-1) print(f"k={k}\n") print(f"mean recall: {np.mean(scores_over)}\n") print(scores_over) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3) pipeline.fit(X_train,y_train) yhat_test = pipeline.predict(X_test) yhat_test_proba = pipeline.predict_proba(X_test)[:,1] confusion_matrix = CM(y_test,yhat_test,np.unique(y_train)) precision_ls, recall_ls, threshold_ls = precision_recall_curve(y_test,yhat_test_proba) plt.figure(figsize=(10,10)) threshold_ls = np.append(threshold_ls,1) plt.plot(threshold_ls, precision_ls) plt.plot(threshold_ls, recall_ls) plt.legend(["precision","recall"]) tree1 = DecisionTreeClassifier( max_depth=3, min_samples_leaf = 30, class_weight="balanced") tree1.fit(X_train, y_train) fig = plt.figure(figsize=(25,20)) _ = tree.plot_tree(tree1,