def dataset_stat_latex_print(dataset_name): """ Print the avg precision, recall and F1 score in latex format to console. """ # get results txt_results = TextBasedResults() txt_results.load(dataset_name) txt_results.print_results() # package results elist = extractor_list_filter(txt_results.text_eval_results.keys()) extractor_slugs = tuple([e.SLUG for e in elist]) result_list = [] for e in extractor_slugs: result_tuple = ( get_extractor_cls(e).NAME, txt_results.precision_statistics(e)[0], txt_results.recall_statistics(e)[0], txt_results.f1score_statistics(e)[0], ) result_list.append(result_tuple) result_list.sort(key=lambda i: i[3]) result_list.reverse() for r in result_list: print "\\texttt{%s} & %.4f & %.4f & %.4f \\\\ \\hline" % r
def dataset_stat_latex_print(dataset_name): ''' Print the avg precision, recall and F1 score in latex format to console. ''' # get results txt_results = TextBasedResults() txt_results.load(dataset_name) txt_results.print_results() #package results elist = extractor_list_filter(txt_results.text_eval_results.keys()) extractor_slugs = tuple([e.SLUG for e in elist]) result_list = [] for e in extractor_slugs: result_tuple = ( get_extractor_cls(e).NAME, txt_results.precision_statistics(e)[0], txt_results.recall_statistics(e)[0], txt_results.f1score_statistics(e)[0], ) result_list.append(result_tuple) result_list.sort(key=lambda i: i[3]) result_list.reverse() for r in result_list: print '\\texttt{%s} & %.4f & %.4f & %.4f \\\\ \\hline' % r
def dataset_stat_plot(dataset_name, img_name): """ Plot the avg precision, recall and F1 score bar chart for the given dataset name. """ # get results txt_results = TextBasedResults() txt_results.load(dataset_name) txt_results.print_results() # package results elist = extractor_list_filter(txt_results.text_eval_results.keys()) extractor_slugs = tuple([e.SLUG for e in elist]) packaged_data = ( ("Precision", [(txt_results.precision_statistics(e), e) for e in extractor_slugs]), ("Recall", [(txt_results.recall_statistics(e), e) for e in extractor_slugs]), ("F1 score", [(txt_results.f1score_statistics(e), e) for e in extractor_slugs]), ) bar_color = ("b", "c", "m") for i, pdata in enumerate(packaged_data): # package plotting values num_of_extractors = len(extractor_slugs) ind = np.arange(num_of_extractors) # the x locations for the groups width = 0.6 # the width of the bars result_list = pdata[1] result_list.sort(key=lambda i: i[0][0]) result_list.reverse() avg = [x[0][0] for x in result_list] stddev = [x[0][1] for x in result_list] # plot plt.subplot(3, 1, i + 1) plt.grid(True, alpha=0.5) rects_avg = plt.bar(ind, avg, width, color=bar_color[i], ecolor="g", yerr=stddev, linewidth=0.5, alpha=0.8) # lables and titles extractor_names = [get_extractor_cls(r[1]).NAME for r in result_list] plt.title(pdata[0]) plt.xticks(ind + width / 2.0, extractor_names, size="xx-small", rotation="vertical") plt.legend((rects_avg[0],), ("avg",), fancybox=True, prop=dict(size="x-small"), loc=4) # lower right for rect in rects_avg: height = rect.get_height() plt.text( rect.get_x() + rect.get_width() / 2.25, rect.get_height() + 0.01, "%1.2f" % height, ha="center", va="bottom", size="x-small", ) # subplots adjusting plt.subplots_adjust(wspace=0.5, hspace=0.9) # adjust figure height fig = plt.gcf() w, h = fig.get_size_inches() fig.set_size_inches(w, h * 1.6) # output out_path = os.path.join(settings.PATH_LOCAL_DATA, "plot-output", img_name) plt.savefig(out_path)
class TestTextBasedResults(unittest2.TestCase): def setUp(self): self.results = TextBasedResults('e1') # Result(precision, recall, f1_score, id) self.results.add_result(Result(0, 0, float('inf'), None)) self.results.add_result(Result(float('inf'), 0, float('nan'), None)) self.results.add_result(Result(float('inf'), 0, float('nan'), None)) self.results.add_result(Result(0, float('inf'), float('nan'), None)) self.results.add_result(Result(0, float('inf'), float('nan'), None)) self.results.add_result( Result(float('inf'), float('inf'), float('nan'), None)) self.results.add_result(Result(0.2, 0.2, 0.2, None)) self.results.add_result(Result(0.2, 0.2, 0.2, None)) self.results.add_result(Result(0.2, 0.2, 0.2, None)) self.results.add_result(Result(0.2, 0.2, 0.2, None)) self.results.dataset_len = 12 def tearDown(self): self.results.text_eval_results['e1'] = [] def test_results_contents(self): contents = self.results.result_contents('e1') self.assertEqual(contents.fail, 2) self.assertEqual(contents.succ, 4) self.assertEqual(contents.rel_empty, 2) self.assertEqual(contents.ret_empty, 2) self.assertEqual(contents.rel_ret_empty, 1) self.assertEqual(contents.missmatch, 1) def test_result_filter(self): fr = self.results.filtered_results('e1') self.assertEqual(len(fr), 4) def test_precision_statistics(self): avg, std = self.results.precision_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_recall_statistics(self): avg, std = self.results.recall_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_f1score_statistics(self): avg, std = self.results.f1score_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_add_bad_result(self): r = TextBasedResults('e2') with self.assertRaises(AssertionError): r.add_result(Result(2, 1, 1, None)) with self.assertRaises(AssertionError): r.add_result(Result(float('inf'), float('inf'), 1, None)) with self.assertRaises(AssertionError): r.add_result(Result(float('inf'), 0, 1, None)) with self.assertRaises(AssertionError): r.add_result(Result(0, 0, 1, None)) def test_add_good_result(self): r = TextBasedResults('e3') try: r.add_result(Result(0.2, 0.2, 0.2, None)) except AssertionError: self.fail()
def dataset_stat_plot(dataset_name, img_name): ''' Plot the avg precision, recall and F1 score bar chart for the given dataset name. ''' # get results txt_results = TextBasedResults() txt_results.load(dataset_name) txt_results.print_results() #package results elist = extractor_list_filter(txt_results.text_eval_results.keys()) extractor_slugs = tuple([e.SLUG for e in elist]) packaged_data = ( ('Precision', [(txt_results.precision_statistics(e), e) for e in extractor_slugs]), ('Recall', [(txt_results.recall_statistics(e), e) for e in extractor_slugs]), ('F1 score', [(txt_results.f1score_statistics(e), e) for e in extractor_slugs]), ) bar_color = ('b', 'c', 'm') for i, pdata in enumerate(packaged_data): # package plotting values num_of_extractors = len(extractor_slugs) ind = np.arange(num_of_extractors) # the x locations for the groups width = 0.6 # the width of the bars result_list = pdata[1] result_list.sort(key=lambda i: i[0][0]) result_list.reverse() avg = [x[0][0] for x in result_list] stddev = [x[0][1] for x in result_list] # plot plt.subplot(3, 1, i + 1) plt.grid(True, alpha=0.5) rects_avg = plt.bar(ind, avg, width, color=bar_color[i], ecolor='g', yerr=stddev, linewidth=0.5, alpha=0.8) # lables and titles extractor_names = [get_extractor_cls(r[1]).NAME for r in result_list] plt.title(pdata[0]) plt.xticks(ind + width / 2., extractor_names, size='xx-small', rotation='vertical') plt.legend( (rects_avg[0], ), ('avg', ), fancybox=True, prop=dict(size='x-small'), loc=4 # lower right ) for rect in rects_avg: height = rect.get_height() plt.text(rect.get_x() + rect.get_width() / 2.25, rect.get_height() + 0.01, '%1.2f' % height, ha='center', va='bottom', size='x-small') #subplots adjusting plt.subplots_adjust(wspace=0.5, hspace=0.9) #adjust figure height fig = plt.gcf() w, h = fig.get_size_inches() fig.set_size_inches(w, h * 1.6) # output out_path = os.path.join(settings.PATH_LOCAL_DATA, 'plot-output', img_name) plt.savefig(out_path)
class TestTextBasedResults(unittest2.TestCase): def setUp(self): self.results = TextBasedResults('e1') # Result(precision, recall, f1_score, id) self.results.add_result(Result(0,0,float('inf'),None)) self.results.add_result(Result(float('inf'),0,float('nan'),None)) self.results.add_result(Result(float('inf'),0,float('nan'),None)) self.results.add_result(Result(0,float('inf'),float('nan'),None)) self.results.add_result(Result(0,float('inf'),float('nan'),None)) self.results.add_result(Result(float('inf'),float('inf'),float('nan'),None)) self.results.add_result(Result(0.2,0.2,0.2,None)) self.results.add_result(Result(0.2,0.2,0.2,None)) self.results.add_result(Result(0.2,0.2,0.2,None)) self.results.add_result(Result(0.2,0.2,0.2,None)) self.results.dataset_len = 12 def tearDown(self): self.results.text_eval_results['e1'] = [] def test_results_contents(self): contents = self.results.result_contents('e1') self.assertEqual(contents.fail, 2) self.assertEqual(contents.succ, 4) self.assertEqual(contents.rel_empty, 2) self.assertEqual(contents.ret_empty, 2) self.assertEqual(contents.rel_ret_empty, 1) self.assertEqual(contents.missmatch, 1) def test_result_filter(self): fr = self.results.filtered_results('e1') self.assertEqual(len(fr), 4) def test_precision_statistics(self): avg, std = self.results.precision_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_recall_statistics(self): avg, std = self.results.recall_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_f1score_statistics(self): avg, std = self.results.f1score_statistics('e1') self.assertEqual(avg, 0.2) self.assertEqual(std, 0.) def test_add_bad_result(self): r = TextBasedResults('e2') with self.assertRaises(AssertionError): r.add_result(Result(2,1,1,None)) with self.assertRaises(AssertionError): r.add_result(Result(float('inf'),float('inf'),1,None)) with self.assertRaises(AssertionError): r.add_result(Result(float('inf'),0,1,None)) with self.assertRaises(AssertionError): r.add_result(Result(0,0,1,None)) def test_add_good_result(self): r = TextBasedResults('e3') try: r.add_result(Result(0.2,0.2,0.2,None)) except AssertionError: self.fail()