def train_n_plot(training, training_label, test, test_label, Dataframe, ax, a): # NN model_NN, X_train, X_test, y_train, y_test, acc_NN = ( NN_tensorflow.NN_clf(training, training_label, test, test_label, Dataframe)) y_pred_0n = model_NN.predict_proba(X_test[y_test == 0])[:, 1] y_pred_1n = model_NN.predict_proba(X_test[y_test == 1])[:, 1] fpr_NN, tpr_NN, area_NN = roc_curve(y_pred_0n, y_pred_1n) # Fisher clf, acc_lda = fisher.fisher_LDA(training, training_label, test, test_label, Dataframe) y_pred_0l = clf.predict_proba(test[test_label==0])[:,1] y_pred_1l = clf.predict_proba(test[test_label==1])[:,1] fpr_fis, tpr_fis, area_fis = roc_curve(y_pred_0l, y_pred_1l) # SVM model_svm, acc_svm = SVM.SVM_clf(training, training_label, test, test_label, Dataframe) y_pred_0s = model_svm.predict_proba(test[test_label == 0])[:, 1] y_pred_1s = model_svm.predict_proba(test[test_label == 1])[:, 1] fpr_svm, tpr_svm, area_svm = roc_curve(y_pred_0s, y_pred_1s) # XGboost model, acc_xgb = xgboost_dec_tree.xgb_clf(training, training_label, test, test_label) y_pred_0x = model.predict_proba(test[test_label == 0])[:, 1] y_pred_1x = model.predict_proba(test[test_label == 1])[:, 1] fpr_XG, tpr_XG, area_XG = roc_curve(y_pred_0x, y_pred_1x) # ============================================================================= # Prediction plot (2x2) of all models # ============================================================================= bins = 80 figp, ((axp1, axp2), (axp3, axp4)) = plt.subplots(2, 2, figsize=(12,12), sharex='col', sharey='row') mpl.rcParams['font.size'] = 16 # LDA _ = axp1.hist(y_pred_0l, bins=bins, label='Signals', edgecolor='darkgreen', facecolor='green', alpha=0.5) _ = axp1.hist(y_pred_1l, bins=bins, label='Afterpulses', edgecolor='darkred', facecolor='red', alpha=0.5) axp1.grid(True, color='black', linestyle='--', linewidth=0.5, alpha=0.25) axp1.legend() axp1.set(#xlabel='prediction', ylabel='Frequency', yscale='log', title='LDA') # SVM _ = axp2.hist(y_pred_0s, bins=bins, label='Signals', edgecolor='darkgreen', facecolor='green', alpha=0.5) _ = axp2.hist(y_pred_1s, bins=bins, label='Afterpulses', edgecolor='darkred', facecolor='red', alpha=0.5) axp2.grid(True, color='black', linestyle='--', linewidth=0.5, alpha=0.25) axp2.legend() axp2.set(#xlabel='prediction', #ylabel='Frequency', yscale='log', title='SVM') # NN _ = axp3.hist(y_pred_0n, bins=bins, label='Signals', edgecolor='darkgreen', facecolor='green', alpha=0.5) _ = axp3.hist(y_pred_1n, bins=bins, label='Afterpulses', edgecolor='darkred', facecolor='red', alpha=0.5) axp3.grid(True, color='black', linestyle='--', linewidth=0.5, alpha=0.25) axp3.legend() axp3.set(xlabel='prediction', ylabel='Frequency', yscale='log', title='NN') # XGB _ = axp4.hist(y_pred_0x, bins=bins, label='Signals', edgecolor='darkgreen', facecolor='green', alpha=0.5) _ = axp4.hist(y_pred_1x, bins=bins, label='Afterpulses', edgecolor='darkred', facecolor='red', alpha=0.5) axp4.grid(True, color='black', linestyle='--', linewidth=0.5, alpha=0.25) axp4.legend() axp4.set(xlabel='prediction', #ylabel='Frequency', yscale='log', title='XGB') figp.savefig("prediction_models.pdf") # ============================================================================= # Adding to ROC curve # ============================================================================= round_n = 4 if opt: ax.plot(fpr_fis, tpr_fis, label="Optimized Fisher's discriminant with area = " + str(round(area_fis,round_n)), linestyle='-.', color='green') ax.plot(fpr_svm, tpr_svm, marker='x', label="Optimized SVM classifier with area = " + str(round(area_svm,round_n)), linestyle='-', color='darkred') ax.plot(fpr_NN, tpr_NN, label="Optimized neural network classifier with area = " + str(round(area_NN,round_n)), linestyle='--', color='darkorange', marker='*') ax.plot(fpr_XG, tpr_XG, label="Optimized XGBoost classifier with area = " + str(round(area_XG,round_n)), linestyle=':', color='darkblue') # this is an inset axes over the main axes a.plot(fpr_fis, tpr_fis, linestyle='-.', color='green') a.plot(fpr_svm, tpr_svm, linestyle='-', color='darkred', marker='x') a.plot(fpr_NN, tpr_NN, linestyle='-', color='darkorange', marker='*') a.plot(fpr_XG, tpr_XG, linestyle=':', color='darkblue',) else: ax.plot(fpr_fis, tpr_fis, label="Fisher's discriminant with area = " + str(round(area_fis,round_n)), linestyle='-', color='green') ax.plot(fpr_svm, tpr_svm, label="SVM classifier with area = " + str(round(area_svm,round_n)), linestyle='-', color='darkred') ax.plot(fpr_NN, tpr_NN, label="Neural network classifier with area = " + str(round(area_NN,round_n)), linestyle='--', color='darkorange') ax.plot(fpr_XG, tpr_XG, label="XGBoost classifier with area = " + str(round(area_XG,round_n)), linestyle='-', color='darkblue') # this is an inset axes over the main axes a.plot(fpr_fis, tpr_fis, linestyle='-', color='green') a.plot(fpr_svm, tpr_svm, linestyle='-', color='darkred') a.plot(fpr_NN, tpr_NN, linestyle='--', color='darkorange') a.plot(fpr_XG, tpr_XG, linestyle='-', color='darkblue') print() print('Ratio of signals in test:', str(round( len(y_test[y_test == 0])/len(y_test)*100, 5)) + "%") print() # Make predictions for test data and printing accuracies print('[ACCURACY] LDA') print(str(round(acc_lda*100,3)) + "%") print() print('[ACCURACY] SVM') print(str(round(acc_svm*100,3)) + "%") print() print('[ACCURACY] NN') print(str(round(acc_NN*100,3)) + "%") print() print('[ACCURACY] XGboost') print(str(round(acc_xgb*100,3)) + "%") print() print("Separation in standard deviation:") print("LDA: ", (np.mean(y_pred_1l) - np.mean(y_pred_0l)) / np.sqrt(np.std(y_pred_1l)**2 + np.std(y_pred_0l)**2)) print("SVM: ", (np.mean(y_pred_1s) - np.mean(y_pred_0s)) / np.sqrt(np.std(y_pred_1s)**2 + np.std(y_pred_0s)**2)) print("NN: ", (np.mean(y_pred_1n) - np.mean(y_pred_0n)) / np.sqrt(np.std(y_pred_1n)**2 + np.std(y_pred_0n)**2)) print("XGB: ", (np.mean(y_pred_1x) - np.mean(y_pred_0x)) / np.sqrt(np.std(y_pred_1x)**2 + np.std(y_pred_0x)**2)) print() print("AUC:") print("LDA: ", area_fis) print("SVM: ", area_svm) print("NN: ", area_NN) print("XGB: ", area_XG) print() print("Afterpulse discriminated at 99.9% correct classifying signals [%]:") print("LDA: ", min(tpr_fis[fpr_fis>0.001])*100) print("SVM: ", min(tpr_svm[fpr_svm>0.001])*100) print("NN: ", min(tpr_NN[fpr_NN>0.001])*100) print("XGB: ", min(tpr_XG[fpr_XG>0.001])*100) print() if not opt: def get_weights(arb_model): _coef = [abs(i) for i in list(arb_model.coef_[0])] _wei = [i / sum(_coef) for i in _coef] return _wei clf_wei = get_weights(clf) svm_wei = get_weights(model_svm) figi, axi = plt.subplots(figsize=(16,10)) width = 0.5 df = pd.DataFrame(dict(graph=list(training.keys()), XGB=list(model.feature_importances_), LDA=clf_wei, SVM=svm_wei)) df = df.iloc[::-1] corr_start, energy_end = 0-width*10, len(df)*2 corr_form, form_energy = 7*width, len(df)*2-25*width axi.axhspan(corr_start, corr_form, facecolor='purple', alpha=0.4) axi.axhspan(corr_form, form_energy, facecolor='yellow', alpha=0.4) axi.axhspan(form_energy, energy_end, facecolor='cyan', alpha=0.4) axi.text(max(df.XGB) - 0.20, (corr_form-corr_start)/4, "Correlation parameters", color='purple', alpha=0.6, fontsize=18) axi.text(max(df.XGB) - 0.20, (form_energy-corr_form)/1.5, "Shape of pulse parameters", color='orange', alpha=0.8, fontsize=18) axi.text(max(df.XGB) - 0.20, energy_end - (energy_end-form_energy)/1.5, "Energy of pulse parameters", color='blue', alpha=0.6, fontsize=18) ind = np.arange(len(df))*2 def format_func(value, tick_number): return str(round(value*100)) + "%" axi.xaxis.set_major_formatter(plt.FuncFormatter(format_func)) axi.barh(ind + width, df.LDA, width, label='LDA', alpha=0.8, color='darkgreen', edgecolor='darkgreen'); axi.barh(ind + 2*width, df.SVM, width, label='SVM', alpha=0.9, color='darkred', edgecolor='darkred'); axi.barh(ind, df.XGB, width, label='XGB', alpha=0.8, color='darkblue', edgecolor='darkblue'); axi.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 2, len(df)*2], xlabel='Parameter importance') axi.legend(loc=(0.45, 0.32 ), framealpha=0.2, title="Models", fontsize=14, ) for i in range(len(ind)): axi.text(list(df.XGB)[i]+0.001, ind[i]-0.21, str(round(list(df.XGB)[i]*100,3))+"%", color='k', fontweight='bold', fontsize=9) for i in range(len(ind)): axi.text(list(df.LDA)[i]+0.001, ind[i]-0.21+width, str(round(list(df.LDA)[i]*100,3))+"%", color='k', fontweight='bold', fontsize=9) for i in range(len(ind)): axi.text(list(df.SVM)[i]+0.001, ind[i]-0.21+width*2, str(round(list(df.SVM)[i]*100,3))+"%", color='k', fontweight='bold', fontsize=9) figi.savefig("Feature_importance2.pdf") return ax, a