def get_performance_report(dataset='credit card fraud', model_nr='rf', model_type='synthpop_so', nr_normal_training=[213224], nr_fraud_training=[0], nr_synthetic_fraud_training=[0], test_size=0.25): model_dict = {} report_dict = {} # report on recall, precision, etc. for i in range(0, len(nr_normal_training)): model_name = model_nr +'_'+ model_type +'_'+ str(nr_normal_training[i]) + '_'+str(nr_fraud_training[i]) + '_' + \ str(nr_synthetic_fraud_training[i]) +'_ts'+ str(test_size) + '.pkl' path = '4) performance improvement/' + dataset + '/synthetic only/models/' + model_name with open(path, 'rb') as file: model = pickle.load(file) model_dict[model_name] = model model_predictions = model_dict[model_name].predict(X_test) model_predictions = [int(round(x)) for x in model_predictions] # round to be 0 or 1 report = classification_report(y_test, model_predictions, labels=[0,1], target_names=['class0','class1'], digits=2, output_dict=True) report_dict[model_name] = report # plot confusion matrix cm_analysis(y_test, model_predictions, filename='4) performance improvement/' + dataset + '/synthetic only/figures/confusion matrices/cm_' + model_name + '.png', labels=[0, 1], ymap=['class0','class1'], title='RF model trained on\n#class0: '+str(nr_normal_training[i])+' #class1: '+str(nr_fraud_training[i])+'\n'+' #synthetic class1: '+str(nr_synthetic_fraud_training[i])) plt.close() return report_dict
get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced, y_test_unbalanced, 'RF', 'fraud dataset') plt.savefig( '1) classification algorithms/assess model performance/credit card fraud/figures/PRcurve_rf_unbalanced_fraud.png' ) plt.close() get_model_performance(balanced_model, 'balanced', X_test_balanced, y_test_balanced, 'RF', 'fraud dataset') plt.savefig( '1) classification algorithms/assess model performance/credit card fraud/figures/PRcurve_rf_balanced_fraud.png' ) plt.close() cm_analysis( y_test_balanced, balanced_predictions, filename= '1) classification algorithms/assess model performance/credit card fraud/figures/cm_rf_balanced_fraud.png', labels=[0, 1], ymap=['normal', 'fraud'], title='RF performance on balanced data\nfraud dataset') cm_analysis( y_test_unbalanced, unbalanced_predictions, filename= '1) classification algorithms/assess model performance/credit card fraud/figures/cm_rf_unbalanced_fraud.png', labels=[0, 1], ymap=['normal', 'fraud'], title='RF performance on unbalanced data\nfraud dataset')
# unpack unbalanced model path = '1) classification algorithms/random forest/customer churn/model_forest_unbalanced_churn.pkl' with open(path, 'rb') as file: unbalanced_model = pickle.load(file) # unpack balanced model path = '1) classification algorithms/random forest/customer churn/model_forest_balanced_churn.pkl' with open(path, 'rb') as file: balanced_model = pickle.load(file) # predict labels unbalanced_predictions = unbalanced_model.predict(X_test_unbalanced) unbalanced_predictions = [int(round(x)) for x in unbalanced_predictions] balanced_predictions = balanced_model.predict(X_test_balanced) balanced_predictions = [int(round(x)) for x in balanced_predictions] # print the confusion matrix, precision, recall, etc. get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced, y_test_unbalanced, 'RF','churn dataset') plt.savefig('1) classification algorithms/assess model performance/customer churn/figures/PRcurve_rf_unbalanced_churn.png') plt.close() get_model_performance(balanced_model, 'balanced', X_test_balanced, y_test_balanced, 'RF','churn dataset') plt.savefig('1) classification algorithms/assess model performance/customer churn/figures/PRcurve_rf_balanced_churn.png') plt.close() cm_analysis(y_test_balanced, balanced_predictions, filename='1) classification algorithms/assess model performance/customer churn/figures/cm_rf_balanced_churn.png',labels=[0, 1], ymap=['normal','churn'],title='RF performance on balanced data\nchurn dataset') cm_analysis(y_test_unbalanced,unbalanced_predictions,filename='1) classification algorithms/assess model performance/customer churn/figures/cm_rf_unbalanced_churn.png',labels=[0, 1], ymap=['normal','churn'],title='RF performance on unbalanced data\nchurn dataset')
get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced, y_test_unbalanced, 'RF', 'bioresponse dataset') plt.savefig( '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_rf_unbalanced_bio.png' ) plt.close() get_model_performance(balanced_model, 'balanced', X_test_balanced, y_test_balanced, 'RF', 'bioresponse dataset') plt.savefig( '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_rf_balanced_bio.png' ) plt.close() cm_analysis( y_test_balanced, balanced_predictions, filename= '1) classification algorithms/assess model performance/bioresponse/figures/cm_rf_balanced_bio.png', labels=[0, 1], ymap=['class0', 'class1'], title='RF performance on balanced data\nbioresponse dataset') cm_analysis( y_test_unbalanced, unbalanced_predictions, filename= '1) classification algorithms/assess model performance/bioresponse/figures/cm_rf_unbalanced_bio.png', labels=[0, 1], ymap=['class0', 'class1'], title='RF performance on unbalanced data\nbioresponse dataset')
plt.savefig( '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_nn_unbalanced_bio_short.png' ) plt.close() get_model_performance(balanced_model, 'balanced', X_test_balanced, y_test_balanced, 'NN', 'bioresponse dataset') plt.savefig( '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_nn_balanced_bio_short.png' ) plt.close() print('PR curve ready') cm_analysis( y_test_balanced, balanced_predictions, filename= '1) classification algorithms/assess model performance/bioresponse/figures/cm_nn_balanced_bio_short.png', labels=[0, 1], ymap=['response', 'no response'], title='NN performance on balanced data\nbioresponse dataset') cm_analysis( y_test_unbalanced, unbalanced_predictions, filename= '1) classification algorithms/assess model performance/bioresponse/figures/cm_nn_unbalanced_bio_short.png', labels=[0, 1], ymap=['response', 'no response'], title='NN performance on unbalanced data\nbioresponse dataset') print('confusion matrix ready')