def plot_save(dist, label, model_name): plt.clf() sns.distplot(dist, fit=norm, kde=False, bins=8) plt.xlabel(label) plt.ylabel('Frequency') plt.title('Classifier Accuracy:' + model_name.replace('_', ' ').upper() ) plt.savefig(PLOT_FOLDER + "pred_%s_%s.png" % (label, model_name)) plt.clf() save_report_to_csv (REPORT_FOLDER + 'acc_validation_report.csv', [ model_name, label, np.mean(dist), np.std(dist), ])
def plot_confusion_matrix (confusion_matrix_array): print ('###### Start Confusion Matrix ####') print (confusion_matrix_array) save_report_to_csv (REPORT_FOLDER + get_model_name_by_file(VALIDATION_FILE)+'_confusion_report.csv', [ 'MultinomialNB', get_model_name_by_file(MODEL_FILE), confusion_matrix_array[0][0], confusion_matrix_array[0][1], confusion_matrix_array[1][0], confusion_matrix_array[1][1] ]) print ('###### End Confusion Matrix ####') df_cm = pd.DataFrame(confusion_matrix_array, range(2), range(2)) #plt.figure(figsize = (10,7)) plot = df_cm.plot() fig = plot.get_figure() ax = plt.subplot() sn.heatmap(df_cm, annot=True, fmt='g', ax = ax, annot_kws={"size": 16})# font size # labels, title and ticks ax.set_xlabel('Predicted') ax.set_ylabel('Real') ax.yaxis.set_ticklabels(['Non Political', 'Political']) ax.xaxis.set_ticklabels(['Non Political', 'Political']) model_name = MODEL_FILE model_name = model_name.replace ('.politics_ben.skl', '') model_name = model_name.replace (SKL_FOLDER, '') ax.set_title(model_name.replace ('_', ' ').upper()) fig.add_subplot(ax) fig.savefig(PLOT_FOLDER + 'confusion_matrix_publica_'+ model_name + '.png', dpi=400)
def plot_save(dist, label): sns.distplot(dist, fit=norm, kde=False, bins=8) plt.xlabel(label) plt.ylabel('Frequency') cnn_normal_plot = H5_FILE.replace('.h5', '') cnn_normal_plot = cnn_normal_plot.replace(H5_FOLDER, '') plt.title('Accuracy of CNN classifier: (%s)' % cnn_normal_plot.upper()) plt.savefig(PLOT_FOLDER + "pred_%s_%s_CNN.png" % (label, cnn_normal_plot.upper())) plt.clf() save_report_to_csv(REPORT_FOLDER + 'acc_validation_report.csv', [ 'CNN', label + ' ' + cnn_normal_plot, np.mean(dist), np.std(dist), ])
def train_CNN(X, y, inp_dim, model, weights, epochs=EPOCHS, batch_size=BATCH_SIZE): cv_object = KFold(n_splits=NO_OF_FOLDS, shuffle=True, random_state=42) print(cv_object) p, r, f1 = [], [], [] p1, r1, f11 = 0., 0., 0. p_class, r_class, f1_class = [], [], [] sentence_len = X.shape[1] marcro_f1, macro_r, macro_p = [], [], [] precision_scores = [] recall_scores = [] f1_scores = [] for train_index, test_index in cv_object.split(X): if INITIALIZE_WEIGHTS_WITH == "word2vec": model.layers[0].set_weights([weights]) elif INITIALIZE_WEIGHTS_WITH == "random": shuffle_weights(model) else: print("ERROR!") return X_train, y_train = X[train_index], y[train_index] X_test, y_test = X[test_index], y[test_index] y_train = y_train.reshape((len(y_train), 1)) X_temp = np.hstack((X_train, y_train)) for epoch in range(epochs): for X_batch in batch_gen(X_temp, batch_size): x = X_batch[:, :sentence_len] y_temp = X_batch[:, sentence_len] class_weights = None if SCALE_LOSS_FUN: class_weights = {} for cw in range(len(set(tx_class))): class_weights[cw] = np.where( y_temp == cw)[0].shape[0] / float(len(y_temp)) try: y_temp = np_utils.to_categorical(y_temp, num_classes=len( set(tx_class))) except Exception as e: print(e) print(y_temp) #print(x.shape, y.shape) loss, acc = model.train_on_batch(x, y_temp, class_weight=class_weights) y_pred = model.predict_on_batch(X_test) y_pred = np.argmax(y_pred, axis=1) #print(classification_report(y_test, y_pred)) #print(precision_recall_fscore_support(y_test, y_pred)) #print(y_pred) p.append(precision_score(y_test, y_pred, average='weighted')) p1 += precision_score(y_test, y_pred, average='micro') p_class.append(precision_score(y_test, y_pred, average=None)) r.append(recall_score(y_test, y_pred, average='weighted')) r1 += recall_score(y_test, y_pred, average='micro') r_class.append(recall_score(y_test, y_pred, average=None)) f1.append(f1_score(y_test, y_pred, average='weighted')) f11 += f1_score(y_test, y_pred, average='micro') f1_class.append(f1_score(y_test, y_pred, average=None)) macro_p.append(precision_score(y_test, y_pred, average='macro')) macro_r.append(recall_score(y_test, y_pred, average='macro')) marcro_f1.append(f1_score(y_test, y_pred, average='macro')) print("macro results are") print("average precision is %f" % (np.array(p).mean())) print("average recall is %f" % (np.array(r).mean())) print("average f1 is %f" % (np.array(f1).mean())) save_report_to_csv( REPORT_FOLDER + 'CNN_training_report.csv', [ 'CNN', get_model_name_by_file(POLITICS_FILE), #weighted scores np.array(p).mean(), np.array(p).std() * 2, np.array(r).mean(), np.array(r).std() * 2, np.array(f1).mean(), np.array(f1).std() * 2, #macro scores np.array(macro_p).mean(), np.array(macro_p).std() * 2, np.array(macro_r).mean(), np.array(macro_r).std() * 2, np.array(marcro_f1).mean(), np.array(marcro_f1).std() * 2, #by class scores np.array(np.array(p_class)[:, 0]).mean(), np.array(np.array(p_class)[:, 1]).mean(), np.array(np.array(r_class)[:, 0]).mean(), np.array(np.array(r_class)[:, 1]).mean(), np.array(np.array(f1_class)[:, 0]).mean(), np.array(np.array(f1_class)[:, 1]).mean() ]) print("micro results are") print("average precision is %f" % (p1 / NO_OF_FOLDS)) print("average recall is %f" % (r1 / NO_OF_FOLDS)) print("average f1 is %f" % (f11 / NO_OF_FOLDS))
accuracy = accuracy_score (y_true, y_pred) save_report_to_csv (REPORT_FOLDER + 'validation_report.csv', [ 'MultinomialNB', get_model_name_by_file(MODEL_FILE), get_model_name_by_file(VALIDATION_FILE), accuracy, p[0], p[1], r[0], r[1], f1[0], f1[1], s[0], s[1], f1_macro, recall_macro, precision_macro, mean_auc, std_auc, ff1, recall, precision ]) print ('Confusion Matrix')
def classification_model(X, Y, model_type=None): X, Y = shuffle(X, Y, random_state=SEED) print("Model Type:", model_type) params = load_hiperparameters (POLITICS_FILE) if not params: model = GridSearchCV(estimator=get_model(model_type), param_grid=param_grid[model_type], n_jobs=-1, verbose=3) else: model = get_model(model_type) model.set_params (**params) model.fit(X, Y) predictions = cross_val_predict(model, X, Y, cv=NO_OF_FOLDS) if params is None: try: print('\n Best estimator:') print(model.best_estimator_) save_hiperparameters (POLITICS_FILE, model.best_estimator_) print('\n Best hyperparameters:') print(model.best_params_) except Exception as error: print (error) print ('Nothind to do!') pass scores1 = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='precision_weighted') print("Precision(avg): %0.3f (+/- %0.3f)" %(scores1.mean(), scores1.std() * 2)) precision_score_mean = scores1.mean() precision_score_std = scores1.std() * 2 scores2 = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='recall_weighted') print("Recall(avg): %0.3f (+/- %0.3f)" % (scores2.mean(), scores2.std() * 2)) recall_score_mean = scores2.mean() recall_score_std = scores2.std() * 2 scores3 = cross_val_score( model, X, Y, cv=NO_OF_FOLDS, scoring='f1_weighted') print("F1-score(avg): %0.3f (+/- %0.3f)" % (scores3.mean(), scores3.std() * 2)) f1_score_mean = scores3.mean() f1_score_std = scores3.std() * 2 # getting metrics by class f1_class = f1_score(Y, predictions, average=None) r_class = recall_score(Y, predictions, average=None) p_class = precision_score(Y, predictions, average=None) f1_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='f1_macro') r_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='recall_macro') p_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='precision_macro') print (f1_class, r_class, p_class) save_report_to_csv (REPORT_FOLDER + model_type +'_training_report.csv', [ model_type, get_model_name_by_file(POLITICS_FILE), # weighted scores precision_score_mean, precision_score_std, recall_score_mean, recall_score_std, f1_score_mean, f1_score_std, #macro scores f1_macro.mean(), f1_macro.std() * 2, r_macro.mean(), r_macro.std() * 2, p_macro.mean(), p_macro.std() * 2, # by class f1_class[0], f1_class[1], r_class[0], r_class[1], p_class[0], p_class[1], ]) return model