def load_quizbowl(folds=c.BUZZER_INPUT_FOLDS) \ -> Tuple[Dict[str, int], Dict[str, list]]: # merge_dfs() log.info('Loading data') question_db = QuestionDatabase() quizbowl_db = QuizBowlDataset(bc.MIN_ANSWERS, guesser_train=True, buzzer_train=True) all_questions = question_db.all_questions() if not os.path.isfile(bc.OPTIONS_DIR): log.info('Loading the set of options') all_options = set(quizbowl_db.training_data()[1]) id2option = list(all_options) with open(safe_path(bc.OPTIONS_DIR), 'wb') as outfile: pickle.dump(id2option, outfile) else: with open(safe_path(bc.OPTIONS_DIR), 'rb') as infile: id2option = pickle.load(infile) option2id = {o: i for i, o in enumerate(id2option)} num_options = len(id2option) log.info('Number of options {0}'.format(len(id2option))) guesses_by_fold = dict() for fold in folds: save_dir = '%s_processed.pickle' % (os.path.join(bc.GUESSES_DIR, fold)) if os.path.isfile(save_dir): with open(safe_path(save_dir), 'rb') as infile: guesses_by_fold[fold] = pickle.load(infile) log.info('Loading {0} guesses'.format(fold)) continue log.info('Processing {0} guesses'.format(fold)) guesses = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold]) worker = partial(_process_question, option2id, all_questions) inputs = guesses.groupby('qnum') guesses_by_fold[fold] = _multiprocess(worker, inputs, info='df data', multi=True) guesses_by_fold[fold] = [ x for x in guesses_by_fold[fold] if x is not None ] print(len(guesses_by_fold[fold])) with open(safe_path(save_dir), 'wb') as outfile: pickle.dump(guesses_by_fold[fold], outfile) log.info('Processed {0} guesses saved to {1}'.format(fold, save_dir)) return option2id, guesses_by_fold
def load_qbml(self, dir, pkl_dir): qbml_dirs = glob.glob(dir + '*.qbml') bonus_questions = [] for qbml_dir in tqdm(qbml_dirs): with open(qbml_dir) as f: soup = BeautifulSoup(f.read(), 'xml') questions = soup.find_all('QUESTION') bonus_qs = [(q.attrs['ID'], next(q.children).title()) for q in questions if q.attrs['KIND'] == 'BONUS'] bonus_qs = _multiprocess(self._process_question, bonus_qs, progress=False) bonus_qs = [x for x in bonus_qs if x is not None] bonus_questions += bonus_qs with open(pkl_dir, 'wb') as f: pickle.dump(bonus_questions, f) return bonus_questions
def buzzer2vwexpo(guesses_df: pd.DataFrame, buzzes: Dict[int, List[List[float]]], fold: str) -> None: # TODO: Will be deprecated after VW stuff is remove from the pipeline '''Given buzzing positions, generate vw_pred, vw_meta, buzz and final files guesses_df: pd.DataFrame of guesses buzzes: dictionary of qnum -> buzzing position fold: string indicating the data fold ''' warnings.warn( "buzzer2vwexpo will be deprecated after VW stuff is completely removed from the pipeline", DeprecationWarning) inputs = guesses_df.groupby('qnum') worker = partial(_buzzer2vwexpo, buzzes) result = _multiprocess(worker, inputs, info='buzzer2vwexpo') result = [x for x in result if x is not None] buzzf, predf, metaf, finalf = list(map(list, zip(*result))) with codecs.open(safe_path(c.PRED_TARGET.format(fold)), 'w', 'utf-8') as pred_file, \ codecs.open(safe_path(c.META_TARGET.format(fold)), 'w', 'utf-8') as meta_file, \ codecs.open(safe_path(c.EXPO_BUZZ.format(fold)), 'w', 'utf-8') as buzz_file, \ codecs.open(safe_path(c.EXPO_FINAL.format(fold)), 'w', 'utf-8') as final_file: buzz_file.write('question|sentence|word|page|evidence|final|weight\n') final_file.write('question,answer\n') log.info('\n\n[buzzer2vwexpo] writing to files') buzz_template = '|'.join(['{}' for _ in range(7)]) buzz_out = '\n'.join( buzz_template.format(*r) for r in itertools.chain(*buzzf)) buzz_file.write(buzz_out) log.info('buzz file written') final_out = '\n'.join('{0},{1}'.format(*r) for r in itertools.chain(*finalf)) final_file.write(final_out) log.info('final file written') pred_out = '\n'.join('{0} {1}_{2}_{3}'.format(*r) for r in itertools.chain(*predf)) pred_file.write(pred_out) log.info('vw_pred file written') meta_out = '\n'.join('{0} {1} {2} {3}'.format(*r) for r in itertools.chain(*metaf)) meta_file.write(meta_out) log.info('vw_meta file written')
def main(folds, model_name): all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} question_texts = {k: v.text for k, v in all_questions.items()} protobowl_ids = { k: all_questions[k].protobowl for k in all_questions if all_questions[k].protobowl != "" } protobowl_df = load_protobowl().groupby("qid") save_dir = "output/summary/new_performance/" if not os.path.exists(save_dir): os.makedirs(save_dir) # feature -> fold -> value variables = defaultdict(lambda: defaultdict()) for fold in folds: guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold]) questions = guesses_df.groupby("qnum") buzzes_dir = bc.BUZZES_DIR.format(fold, model_name) with open(buzzes_dir, "rb") as infile: buzzes = pickle.load(infile) log.info("Buzzes loaded from {}.".format(buzzes_dir)) # qnum -> n_guessers * length top_guesses = _multiprocess(_get_top_guesses, questions, info="Top guesses", multi=True) top_guesses = {k: v for k, v in top_guesses} inputs = [top_guesses, buzzes, answers, variables, fold, save_dir] # get_eop_stats(*inputs) get_his_stats(*inputs) # get_hyper_search(*inputs) p_inputs = [question_texts, protobowl_ids, protobowl_df, questions ] + inputs get_protobowl(p_inputs) for key, value in variables.items(): variables[key] = dict(value) variables = dict(variables) report(variables, save_dir, folds)
def create_batches(self): self.batches = [] buckets = defaultdict(list) total = len(self.dataset) returns = _multiprocess(self._process_example, self.dataset, info="creat batches", multi=False) for example, padded_length in returns: buckets[padded_length].append(example) for examples in buckets.values(): for i in range(0, len(examples), self.batch_size): qids, answers, mask, vecs, results = \ zip(*examples[i : i + self.batch_size]) batch = Batch(qids, answers, mask, vecs, results) self.batches.append(batch)
def load_qbml(self, dir, pkl_dir): qbml_dirs = glob.glob(dir + "*.qbml") bonus_questions = [] for qbml_dir in tqdm(qbml_dirs): with open(qbml_dir) as f: soup = BeautifulSoup(f.read(), "xml") questions = soup.find_all("QUESTION") bonus_qs = [(q.attrs["ID"], next(q.children).title()) for q in questions if q.attrs["KIND"] == "BONUS"] bonus_qs = _multiprocess(self._process_question, bonus_qs, progress=False) bonus_qs = [x for x in bonus_qs if x is not None] bonus_questions += bonus_qs with open(pkl_dir, "wb") as f: pickle.dump(bonus_questions, f) return bonus_questions
def get_eop_stats(top_guesses, buzzes, answers, variables, fold, save_dir): log.info("[{}] End-of-pipelin reporting".format(fold)) inputs = top_guesses.items() worker = partial(_get_eop_stats, buzzes, answers) eop_stats = _multiprocess(worker, inputs, info="End-of-pipeline stats", multi=True) # qnum -> key -> int eop_stats = {k: v for k, v in eop_stats} # key -> int _eop_stats = defaultdict(lambda: []) eop_output = "" for qnum, stat in eop_stats.items(): for key in EOP_STAT_KEYS_0 + EOP_STAT_KEYS_1: if stat[key] != -1: _eop_stats[key].append(stat[key]) for key in EOP_STAT_KEYS_0: values = _eop_stats[key] value = sum(values) / len(values) if len(values) > 0 else 0 _eop_stats[key] = value output = "{0} {1:.3f}".format(key, value) eop_output += output + "\n" # print(output) for key in EOP_STAT_KEYS_1: output = key values = _eop_stats[key] _eop_stats[key] = dict() for i, guesser in enumerate(GUESSERS): output += " {0} {1}".format(guesser, values.count(i)) _eop_stats[key][guesser] = values.count(i) eop_output += output + "\n" # print(output) if variables is not None: variables["eop_stats"][fold] = _eop_stats return _eop_stats
def main(folds, model_name): all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} question_texts = {k: v.text for k, v in all_questions.items()} protobowl_ids = {k: all_questions[k].protobowl for k in all_questions if all_questions[k].protobowl != ''} protobowl_df = load_protobowl().groupby('qid') save_dir = 'output/summary/new_performance/' if not os.path.exists(save_dir): os.makedirs(save_dir) # feature -> fold -> value variables = defaultdict(lambda: defaultdict()) for fold in folds: guesses_df = AbstractGuesser.load_guesses( bc.GUESSES_DIR, folds=[fold]) questions = guesses_df.groupby('qnum') buzzes_dir = bc.BUZZES_DIR.format(fold, model_name) with open(buzzes_dir, 'rb') as infile: buzzes = pickle.load(infile) log.info('Buzzes loaded from {}.'.format(buzzes_dir)) # qnum -> n_guessers * length top_guesses = _multiprocess(_get_top_guesses, questions, info='Top guesses', multi=True) top_guesses = {k: v for k, v in top_guesses} inputs = [top_guesses, buzzes, answers, variables, fold, save_dir] # get_eop_stats(*inputs) get_his_stats(*inputs) # get_hyper_search(*inputs) p_inputs = [question_texts, protobowl_ids, protobowl_df, questions] + inputs get_protobowl(p_inputs) for key, value in variables.items(): variables[key] = dict(value) variables = dict(variables) report(variables, save_dir, folds)
def get_eop_stats(top_guesses, buzzes, answers, variables, fold, save_dir): log.info('[{}] End-of-pipelin reporting'.format(fold)) inputs = top_guesses.items() worker = partial(_get_eop_stats, buzzes, answers) eop_stats = _multiprocess(worker, inputs, info='End-of-pipeline stats', multi=True) # qnum -> key -> int eop_stats = {k: v for k, v in eop_stats} # key -> int _eop_stats = defaultdict(lambda: []) eop_output = "" for qnum, stat in eop_stats.items(): for key in EOP_STAT_KEYS_0 + EOP_STAT_KEYS_1: if stat[key] != -1: _eop_stats[key].append(stat[key]) for key in EOP_STAT_KEYS_0: values = _eop_stats[key] value = sum(values) / len(values) if len(values) > 0 else 0 _eop_stats[key] = value output = "{0} {1:.3f}".format(key, value) eop_output += output + '\n' # print(output) for key in EOP_STAT_KEYS_1: output = key values = _eop_stats[key] _eop_stats[key] = dict() for i, guesser in enumerate(GUESSERS): output += " {0} {1}".format(guesser, values.count(i)) _eop_stats[key][guesser] = values.count(i) eop_output += output + '\n' # print(output) if variables is not None: variables['eop_stats'][fold] = _eop_stats return _eop_stats
def report_ultimate(): all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} question_texts = {k: v.text for k, v in all_questions.items()} protobowl_ids = { k: all_questions[k].protobowl for k in all_questions if all_questions[k].protobowl != '' } protobowl_df, user_count = load_protobowl() guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[c.BUZZER_DEV_FOLD]) questions = guesses_df.groupby('qnum') top_guesses = _multiprocess(_get_top_guesses, questions, info='Top guesses', multi=True) top_guesses = {k: v for k, v in top_guesses} option2id, all_guesses = load_quizbowl() test_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=128) buzzes = ultimate_buzzer(test_iter) save_dir = 'output/summary/new_performance/' inputs = [top_guesses, buzzes, answers, None, c.BUZZER_DEV_FOLD, save_dir] user_answers_thresholds = [1, 10, 50, 100, 500, 1000, 2000] threshold_stats = [] for threshold in user_answers_thresholds: pdf1 = protobowl_df[protobowl_df.user_answers > threshold] p_inputs = [ question_texts, protobowl_ids, pdf1.groupby('qid'), questions ] + inputs pstats = get_protobowl(p_inputs) threshold_stats.append(pstats) print('ultimate', threshold, pstats) print('ultimate', [x['reward'] for x in threshold_stats])
def report(buzzes_dir): all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} question_texts = {k: v.text for k, v in all_questions.items()} protobowl_ids = { k: all_questions[k].protobowl for k in all_questions if all_questions[k].protobowl != '' } protobowl_df, user_count = load_protobowl() guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[c.BUZZER_DEV_FOLD]) questions = guesses_df.groupby('qnum') top_guesses = _multiprocess(_get_top_guesses, questions, info='Top guesses', multi=True) top_guesses = {k: v for k, v in top_guesses} with open(buzzes_dir, 'rb') as infile: buzzes = pickle.load(infile) save_dir = 'output/summary/new_performance/' inputs = [top_guesses, buzzes, answers, None, c.BUZZER_DEV_FOLD, save_dir] user_answers_thresholds = [1, 10, 50, 100, 500, 1000, 2000] threshold_stats = [] for threshold in user_answers_thresholds: pdf1 = protobowl_df[protobowl_df.user_answers > threshold] p_inputs = [ question_texts, protobowl_ids, pdf1.groupby('qid'), questions ] + inputs pstats = get_protobowl(p_inputs) threshold_stats.append(pstats) print(threshold, pstats) with open(buzzes_dir + '.pstats', 'wb') as f: pickle.dump(threshold_stats, f) print([x['reward'] for x in threshold_stats])
def get_his_stats(top_guesses, buzzes, answers, variables, fold, save_dir): log.info("[{}] Histogram reporting".format(fold)) inputs = top_guesses.items() worker = partial(_get_his_stats, buzzes, answers) his_stats = _multiprocess(worker, inputs, info="Histogram stats", multi=True) # qnum -> key -> list(int) his_stats = {k: v for k, v in his_stats} # key -> list(int) _his_stats = defaultdict(lambda: [[] for _ in HISTO_RATIOS]) for stats in his_stats.values(): for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): if stats[key][i] != -1: _his_stats[key][i].append(stats[key][i]) for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): s = _his_stats[key][i] _his_stats[key][i] = sum(s) / len(s) if len(s) > 0 else 0 _his_stats = dict(_his_stats) his_output = "" for i, r in enumerate(HISTO_RATIOS): output = "{}:".format(r) for key in HISTO_KEYS_0 + HISTO_KEYS_1: output += " {0} {1:.2f}".format(key, _his_stats[key][i]) his_output += output + "\n" # print(output) ##### plot lines ##### fig, ax = plt.subplots() lines = [] for k in HISTO_KEYS_0: v = _his_stats[k] lines.append(plt.plot(HISTO_RATIOS, v, LINE_STYLES[k], label=k)[0]) ax.set_xticks(HISTO_RATIOS) plt.legend(handles=lines) plt.title("{} histogram lines chart".format(fold)) if save_dir is not None: his_lines_dir = os.path.join(save_dir, "his_{}_lines.pdf".format(fold)) plt.savefig(his_lines_dir, bbox_inches="tight") else: plt.show() plt.close() ##### plot stacked area chart ##### plt.plot([], [], color="c", alpha=0.5, label="buzz_correct") plt.plot([], [], color="y", alpha=0.5, label="buzz_miss") plt.plot([], [], color="r", alpha=0.5, label="buzz_wrong") plt.plot([], [], color="k", alpha=0.5, label="buzz_impossible") plt.plot([], [], color="m", alpha=0.5, label="wait_wrong") plt.plot([], [], color="g", alpha=0.5, label="wait_correct") plt.plot([], [], color="w", alpha=0.5, label="wait_impossible") plt.stackplot( list(range(len(HISTO_RATIOS))), _his_stats["buzz_correct"], _his_stats["buzz_miss"], _his_stats["buzz_wrong"], _his_stats["buzz_impossible"], _his_stats["wait_wrong"], _his_stats["wait_correct"], _his_stats["wait_impossible"], colors=["c", "y", "r", "k", "m", "g", "w"], alpha=0.5, ) plt.legend() plt.title("{} stacked area chart".format(fold)) if save_dir is not None: his_stacked_dir = os.path.join(save_dir, "his_{}_stacked.pdf".format(fold)) plt.savefig(his_stacked_dir, bbox_inches="tight") else: plt.show() plt.close() if variables is not None: variables["his_stats"][fold] = _his_stats variables["his_lines"][fold] = his_lines_dir variables["his_stacked"][fold] = his_stacked_dir return _his_stats
def get_his_stats(top_guesses, buzzes, answers, variables, fold, save_dir): log.info('[{}] Histogram reporting'.format(fold)) inputs = top_guesses.items() worker = partial(_get_his_stats, buzzes, answers) his_stats = _multiprocess(worker, inputs, info='Histogram stats', multi=True) # qnum -> key -> list(int) his_stats = {k: v for k, v in his_stats} # key -> list(int) _his_stats = defaultdict(lambda: [[] for _ in HISTO_RATIOS]) for stats in his_stats.values(): for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): if stats[key][i] != -1: _his_stats[key][i].append(stats[key][i]) for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): s = _his_stats[key][i] _his_stats[key][i] = sum(s) / len(s) if len(s) > 0 else 0 _his_stats = dict(_his_stats) his_output = "" for i, r in enumerate(HISTO_RATIOS): output = "{}:".format(r) for key in HISTO_KEYS_0 + HISTO_KEYS_1: output += " {0} {1:.2f}".format(key, _his_stats[key][i]) his_output += output + '\n' # print(output) ##### plot lines ##### fig, ax = plt.subplots() lines = [] for k in HISTO_KEYS_0: v = _his_stats[k] lines.append(plt.plot(HISTO_RATIOS, v, LINE_STYLES[k], label=k)[0]) ax.set_xticks(HISTO_RATIOS) plt.legend(handles=lines) plt.title('{} histogram lines chart'.format(fold)) if save_dir is not None: his_lines_dir = os.path.join(save_dir, 'his_{}_lines.pdf'.format(fold)) plt.savefig(his_lines_dir, bbox_inches='tight') else: plt.show() plt.close() ##### plot stacked area chart ##### plt.plot([],[],color='c', alpha=0.5, label='buzz_correct') plt.plot([],[],color='y', alpha=0.5, label='buzz_miss') plt.plot([],[],color='r', alpha=0.5, label='buzz_wrong') plt.plot([],[],color='k', alpha=0.5, label='buzz_impossible') plt.plot([],[],color='m', alpha=0.5, label='wait_wrong') plt.plot([],[],color='g', alpha=0.5, label='wait_correct') plt.plot([],[],color='w', alpha=0.5, label='wait_impossible') plt.stackplot(list(range(len(HISTO_RATIOS))), _his_stats['buzz_correct'], _his_stats['buzz_miss'], _his_stats['buzz_wrong'], _his_stats['buzz_impossible'], _his_stats['wait_wrong'], _his_stats['wait_correct'], _his_stats['wait_impossible'], colors=['c', 'y', 'r', 'k', 'm', 'g', 'w'], alpha=0.5) plt.legend() plt.title('{} stacked area chart'.format(fold)) if save_dir is not None: his_stacked_dir = os.path.join(save_dir, 'his_{}_stacked.pdf'.format(fold)) plt.savefig(his_stacked_dir, bbox_inches='tight') else: plt.show() plt.close() if variables is not None: variables['his_stats'][fold] = _his_stats variables['his_lines'][fold] = his_lines_dir variables['his_stacked'][fold] = his_stacked_dir return _his_stats
def generate(buzzes, answers, guesses_df, fold, checkpoint_dir=None, plot_dir=None, multiprocessing=True): questions = guesses_df.groupby('qnum') # qnum -> n_guessers * length top_guesses = _multiprocess(get_top_guesses, questions, info='Top guesses', multi=multiprocessing) top_guesses = {k: v for k, v in top_guesses} ############# end-of-pipeline stats ############# inputs = top_guesses.items() worker = partial(end_of_pipeline, buzzes, answers) eop_stats = _multiprocess(worker, inputs, info='End-of-pipeline stats', multi=multiprocessing) # qnum -> key -> int eop_stats = {k: v for k, v in eop_stats} # key -> int _eop_stats = defaultdict(lambda: []) eop_output = "" for qnum, stat in eop_stats.items(): for key in EOP_STAT_KEYS_0 + EOP_STAT_KEYS_1: if stat[key] != -1: _eop_stats[key].append(stat[key]) for key in EOP_STAT_KEYS_0: values = _eop_stats[key] value = sum(values) / len(values) if len(values) > 0 else 0 output = "{0} {1:.3f}".format(key, value) eop_output += output + '\n' print(output) for key in EOP_STAT_KEYS_1: output = key values = _eop_stats[key] for i in range(len(GUESSERS)): output += " {0} {1}".format(GUESSERS[i], values.count(i)) eop_output += output + '\n' print(output) ############# histogram stats ############# inputs = top_guesses.items() worker = partial(histogram, buzzes, answers) his_stats = _multiprocess(worker, inputs, info='Histogram stats', multi=multiprocessing) # qnum -> key -> list(int) his_stats = {k: v for k, v in his_stats} # key -> list(int) _his_stats = defaultdict(lambda: [[] for _ in HISTO_RATIOS]) for stats in his_stats.values(): for key in HISTO_KEYS: for i, r in enumerate(HISTO_RATIOS): if stats[key][i] != -1: _his_stats[key][i].append(stats[key][i]) for key in HISTO_KEYS: for i, r in enumerate(HISTO_RATIOS): s = _his_stats[key][i] _his_stats[key][i] = sum(s) / len(s) if len(s) > 0 else 0 _his_stats = dict(_his_stats) his_output = "" for i, r in enumerate(HISTO_RATIOS): output = "{}:".format(r) for key in HISTO_KEYS: output += " {0} {1:.2f}".format(key, _his_stats[key][i]) his_output += output + '\n' print(output) if plot_dir is not None: lines = [] for k, v in _his_stats.items(): lines.append(plt.plot(HISTO_RATIOS, v, LINE_STYLES[k], label=k)[0]) plt.legend(handles=lines) plt.savefig(plot_dir, dpi=200, format='png') plt.clf() if checkpoint_dir is not None: checkpoint = { 'buzzes': buzzes, 'top_guesses': top_guesses, 'eop_keys': EOP_STAT_KEYS_0 + EOP_STAT_KEYS_1, 'his_keys': HISTO_KEYS, 'eop_stats': eop_stats, 'his_stats': his_stats, '_his_stats': _his_stats } with open(checkpoint_dir, 'wb') as outfile: pickle.dump(checkpoint, outfile) return eop_output, his_output
def get_his_stats(top_guesses, buzzes, answers, variables, fold, save_dir): log.info('[{}] Histogram reporting'.format(fold)) inputs = top_guesses.items() worker = partial(_get_his_stats, buzzes, answers) his_stats = _multiprocess(worker, inputs, info='Histogram stats', multi=True) # qnum -> key -> list(int) his_stats = {k: v for k, v in his_stats} # key -> list(int) _his_stats = defaultdict(lambda: [[] for _ in HISTO_RATIOS]) for stats in his_stats.values(): for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): if stats[key][i] != -1: _his_stats[key][i].append(stats[key][i]) for key in HISTO_KEYS_0 + HISTO_KEYS_1: for i, r in enumerate(HISTO_RATIOS): s = _his_stats[key][i] _his_stats[key][i] = sum(s) / len(s) if len(s) > 0 else 0 _his_stats = dict(_his_stats) his_output = "" for i, r in enumerate(HISTO_RATIOS): output = "{}:".format(r) for key in HISTO_KEYS_0 + HISTO_KEYS_1: output += " {0} {1:.2f}".format(key, _his_stats[key][i]) his_output += output + '\n' # print(output) ##### plot lines ##### fig, ax = plt.subplots() lines = [] for k in HISTO_KEYS_0: v = _his_stats[k] lines.append(plt.plot(HISTO_RATIOS, v, LINE_STYLES[k], label=k)[0]) ax.set_xticks(HISTO_RATIOS) plt.legend(handles=lines) plt.title('{} histogram lines chart'.format(fold)) if save_dir is not None: his_lines_dir = os.path.join(save_dir, 'his_{}_lines.pdf'.format(fold)) plt.savefig(his_lines_dir, bbox_inches='tight') else: plt.show() plt.close() ##### plot stacked area chart ##### plt.plot([], [], color='c', alpha=0.5, label='buzz_correct') plt.plot([], [], color='y', alpha=0.5, label='buzz_miss') plt.plot([], [], color='r', alpha=0.5, label='buzz_wrong') plt.plot([], [], color='k', alpha=0.5, label='buzz_impossible') plt.plot([], [], color='m', alpha=0.5, label='wait_wrong') plt.plot([], [], color='g', alpha=0.5, label='wait_correct') plt.plot([], [], color='w', alpha=0.5, label='wait_impossible') plt.stackplot(list(range(len(HISTO_RATIOS))), _his_stats['buzz_correct'], _his_stats['buzz_miss'], _his_stats['buzz_wrong'], _his_stats['buzz_impossible'], _his_stats['wait_wrong'], _his_stats['wait_correct'], _his_stats['wait_impossible'], colors=['c', 'y', 'r', 'k', 'm', 'g', 'w'], alpha=0.5) plt.legend() plt.title('{} stacked area chart'.format(fold)) if save_dir is not None: his_stacked_dir = os.path.join(save_dir, 'his_{}_stacked.pdf'.format(fold)) plt.savefig(his_stacked_dir, bbox_inches='tight') else: plt.show() plt.close() if variables is not None: variables['his_stats'][fold] = _his_stats variables['his_lines'][fold] = his_lines_dir variables['his_stacked'][fold] = his_stacked_dir return _his_stats