def plot(method): x = list(range(1, 1 + num_epochs)) colors = config.COLORS line_style = '-' global fig_number fig = plt.figure(fig_number, (10, 6)) fig_number += 1 fig_title = method + " " + config.EMBEDDINGS_KEY + " " + metric_pretty fig.canvas.set_window_title(fig_title) ax = plt.subplot(111) plot_lines = [] embeddings = sorted_by_suffix(data[method].keys()) for i, embedding in enumerate(embeddings): # Each dataset gets a color color = colors[i % len(colors)] epoch_results = data[method][embedding][ name_of_classifier][:num_epochs] f1_scores = list(map(lambda e: e[metric], epoch_results)) line = ax.plot(x, f1_scores, line_style, color=color) plot_lines.append(line) # plt.axis([0, len(embeddings_files), 0, 1]) plt.xticks(x) plt.xlabel('Epoch') plt.ylabel('Score') # Classifier legend (colors) color_patches = [] for i, embedding in enumerate(embeddings): color = colors[i % len(colors)] patch = mpatches.Patch(color=color, label=embedding) color_patches.append(patch) # Show only every second epoch label for label in ax.xaxis.get_ticklabels()[::2]: label.set_visible(False) # Shrink current axis by 20% box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(handles=color_patches, loc='upper right', bbox_to_anchor=(1.3, 1.0)) save_path = path.join(config.RESULT_DIR, method, fig_title.replace(' ', '_')) print("Saving to", save_path) plt.savefig(save_path) plt.show(block=fig_number == number_of_figures + 1)
def load_results(self, method, embedding, classifier, results_dir): """Loads results for all epochs of a method + embedding + classifier combination""" for epoch_dir in sorted_by_suffix([d for d in listdir(results_dir) if path.isdir(path.join(results_dir, d))]): file_path = path.join(results_dir, epoch_dir, classifier.lower()) with open(file_path) as f: epoch_results = {} for line in f: metric = line.split()[0] value = float(line.split()[1]) epoch_results[metric] = value self.add_epoch_results(method, embedding, classifier, epoch_results)
def print_table(method): """ Creates a table corresponding to combined_plot for each 'method' (binary, ternary, agg_ternary) PS! Works only for one classifier! :param method: Method """ print("% " + method + ": " + ", ".join(data[method].keys())) num_columns = len(data[method]) + 2 column_setup = "{" + col_align + " " + col_align + "*{" + str( num_columns - 2) + "}{|" + col_align + "}}" prefix = "\\begin{table}[H]\n\t\\centering\n\t\\begin{tabular}" + column_setup + "\n" # Create header header = "\\multicolumn{" + str( num_columns) + "}{" + col_align + "}{" + bold( config.EMBEDDINGS_KEY) + "} \\\\\n" embeddings = sorted_by_suffix(data[method].keys()) header_values = "& & " + " & ".join( list(map(lambda e: e.split("-")[-1], embeddings))) + " \\\\\n" hline = "\\hhline{~*{" + str(num_columns - 1) + "}{|-}}\n" epochs_header = "\\parbox[t]{2mm}{\\multirow{" + str( num_epochs) + "}{*}{\\rotatebox[origin=c]{90}{\\textbf{Epochs}}}}\n" table = prefix + header + header_values + hline + epochs_header max_value = -1 for epoch in range(num_epochs): s = "& " + str(epoch + 1) + " & " for embedding in data[method]: value = round( data[method][embedding][name_of_classifier][epoch][metric], specificity) if value > max_value: max_value = value s += str(value) + " & " table += s[:-3] + " \\\\\n" caption = metric_pretty + " scores for " + method + " method on " + config.EMBEDDINGS_KEY + " embeddings." caption = caption.replace("_", "\\_") postfix = "\t\\end{tabular}\n\t\\caption{" + caption + "}\n\\end{table}\n" table += postfix table = table.replace(str(max_value), bold(str(max_value))) print(table)
def __init__(self, methods: List[str], embeddings: List[str], classifiers: List[str], num_epochs: int): """ Creates a ResultsData instance that holds results for the given methods, embeddings and classifiers with a certain number of epochs trained. :param methods: List of methods to collect results from ("binary", "ternary", "agg_ternary", ...) :param embeddings: List of embeddings/params to collect results from ("TextBlob", "AFINN", "windowsize=1", ...) :param classifiers: Names of classifiers to collect results from ("SVM c=1", "lexicon classifier", ...) :param num_epochs: The minimum number of epochs a method+embedding combination must have been trained for. """ # { method: { embedding: { classifier_name: [ { metric: value } ] } } } self.data = {} self.methods = sorted(methods) self.embeddings = sorted_by_suffix(embeddings) self.classifiers = sorted(classifiers) self.num_epochs = num_epochs # Load results! for method in methods: embs = embeddings if embs == "all": method_path = path.join(config.RESULT_DIR, method) embs = [d for d in sorted_by_suffix(os.listdir(method_path)) if os.path.isdir(os.path.join(method_path, d))] for embedding in embs: selected_embeddings = path.join(method, embedding) results_dir = path.join(config.RESULT_DIR, selected_embeddings) if not self.filter_tweet(method, embedding, results_dir): continue for classifier in sorted(classifiers): self.load_results(method, embedding, classifier, results_dir)
def test_all_epochs(embeddings_dir, results_dir): # Sort by suffix number of files. Turn to int so that '7' is treated as less that '18', for instance. embeddings_files = sorted_by_suffix(listdir(embeddings_dir)) for embeddings_file in embeddings_files: logger.info(embeddings_file) # Configure test_and_train train_and_test.classifiers = classifiers() train_and_test.baselines = [] train_and_test.embedding_file = path.join(embeddings_dir, embeddings_file) train_and_test.verbose = -2 train_and_test.quiet = True train_and_test.results_dir = path.join(results_dir, embeddings_file) # Run train_and_test.main()
def get_dir_names(prefix): return sorted_by_suffix([ d for d in listdir(RESULTS_DIR) if path.isdir(path.join(RESULTS_DIR, d)) and d.startswith(prefix) ])