def run_classifier(files: Tuple[str, str, str], method: str, method_class: Base, model_file: str, lower_case: bool) -> None: "Inherit classes from classifiers.py and apply the predict/accuracy methods" train, dev, test = files # Unpack train, dev and test filenames result = method_class.predict(train, test, lower_case) method_class.accuracy(result) # Plot confusion matrix make_dirs("Plots") fig, ax = plot_confusion_matrix(result['truth'], result['pred'], normalize=True) ax.set_title("Normalized Confusion Matrix: {}".format(method.title())) fig.tight_layout() fig.savefig("Plots/{}.png".format(method))
if not os.path.exists(directory): os.makedirs(directory) os.makedirs(directory + "/normalized") os.makedirs(directory + "/unnormalized") model.save(directory + "/" + file_name + ".model") i += 1 # print(model.evaluate(x_test, y_test, batch_size=batch_size)) (cnf_mat, acc) = gue.manual_verification_100( model, (x_test, y_test), batch_size=b_size) plt.figure(figsize=(10, 10), dpi=100) plotter.plot_confusion_matrix( cnf_mat, classes=range(16), normalize=True, title='Normalized confusion matrix') plt.savefig(directory + "/normalized/" + file_name + "_normalized_" + str(acc) + ".png") plt.figure(figsize=(10, 10), dpi=100) plotter.plot_confusion_matrix( cnf_mat.astype(int), classes=range(16), normalize=False, title='Non-Normalized confusion matrix') plt.savefig(directory + "/unnormalized/" + file_name + "_" + str(acc) +
def run(args): """Executes the main process of the script Parameters ---------- args : ArgumentParser The arguments of the command typed by the user """ global CONFIG CONFIG = exh.load_json("config/{0}.json".format(args.CONFIG)) print("Loading publications") # Load DIDA publications dida_data = exh.load_json(FILENAME_TEMPLATE.format(CONFIG['DIDA_DOCS'])) # Load Not-DIDA publications notdida_data = exh.load_json( FILENAME_TEMPLATE.format(CONFIG['NOTDIDA_DOCS'])) # docs = [deepcopy(notdida_data), deepcopy(dida_data)] docs = [deepcopy(dida_data), deepcopy(notdida_data)] display.display_ok("Loading publications done") data_directory = DIRECTORY + '/' + CONFIG['ALL_CLUSTERS_DIRECTORY'] Ndw = exh.load_json(data_directory + "/ndw.json") W = exh.load_json(data_directory + "/W.json") # Real labels of each publication # y_true = np.append(np.zeros(len(notdida_data)), np.ones(len(dida_data))) y_true = np.append(np.ones(len(dida_data)), np.zeros(len(notdida_data))) strict_result, doublon_result = classification(docs, Ndw, W, data_directory, y_true) plt.plot_confusion_matrix(strict_result, len(dida_data), len(notdida_data), "strict_", "n_clusters", "Number of clusters", DIRECTORY, step=1000) exh.save_to_log(strict_result, "strict", "n_clusters", LOG_FILENAME.format("strict")) plt.plot_confusion_matrix(doublon_result, len(dida_data), len(notdida_data), "doublon_", "n_clusters", "Number of clusters", DIRECTORY, step=1000) exh.save_to_log(doublon_result, "doublon", "n_clusters", LOG_FILENAME.format("doublon")) scores = [strict_result['score'], doublon_result['score']] classifiers_names = ["Strict converter", "Doublon converter"] plt.plot_lines(strict_result['n_clusters'], scores, classifiers_names, FSCORE_FILENAME, "Number of clusters", "F1-score", step=1000)
def run(args): """Executes the main process of the script Parameters ---------- args : ArgumentParser The arguments of the command typed by the user """ global CONFIG CONFIG = exh.load_json("config/{0}.json".format(args.CONFIG)) exh.create_directory(DIRECTORY) print("Loading publications") # Load DIDA publications dida_data = exh.load_json(FILENAME_TEMPLATE.format(CONFIG['DIDA_DOCS'])) # Load Not-DIDA publications notdida_data = exh.load_json( FILENAME_TEMPLATE.format(CONFIG['NOTDIDA_DOCS'])) display.display_ok("Loading publications done") n = CONFIG['NGRAMS'] csv_files = csv_filenames(n) # Real labels of each publication y_true = np.append(np.ones(len(dida_data)), np.zeros(len(notdida_data))) data = deepcopy(dida_data) data.extend(deepcopy(notdida_data)) scores = [] classifiers_names = [] print("Strict Classifier training") results = train(StrictClassifier, deepcopy(data), csv_files, y_true) plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data), 'strict_', "threshold", "Threshold", DIRECTORY) scores.append(results['score']) exh.save_to_log(results, "strict", "threshold", LOG_FILENAME.format("strict")) classifiers_names.append("Strict Classifier") display.display_ok("Strict Classifier training done") print("Split Weighted Classifier training") results = train(SplitWeightedClassifier, deepcopy(data), csv_files, y_true) plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data), 'splitweighted_', "threshold", "Threshold", DIRECTORY) scores.append(results['score']) exh.save_to_log(results, "splitweighted", "threshold", LOG_FILENAME.format("splitweighted")) classifiers_names.append("Split Weighted Classifier") display.display_ok("Split Weighted Classifier training done") print("Weighted Classifier training") results = train(WeightedClassifier, deepcopy(data), csv_files, y_true) plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data), 'weighted_', "threshold", "Threshold", DIRECTORY) scores.append(results['score']) exh.save_to_log(results, "weighted", "threshold", LOG_FILENAME.format("weighted")) classifiers_names.append("Weighted Classifier") display.display_ok("Weighted Classifier training done") plt.plot_lines(results['threshold'], scores, classifiers_names, FSCORE_FILENAME, "Threshold", "F1-score") display.display_info("Results saved in {0}".format(DIRECTORY))