def get_batch_final_score(reference_file, result_dir): files = sorted(os.listdir(result_dir)) print('totally {0} result json files'.format(len(files))) final_scores = [] evaluated_files = get_evaluted_files() evaluated_count = 0 for f in files: if f in evaluated_files: evaluated_count += 1 print('{0} has been caculated and recorded'.format(f)) continue score = compute_m1(result_dir + f, reference_file) final_score = (score['Bleu_4'] + score['CIDEr'] + score['METEOR'] + score['ROUGE_L'])/4.0 final_scores.append(final_score) logging.info('{0} steps, final_score {1:.5f} {2:.5f} {3:.5f} {4:.5f} {5:.5f}'.format( f.split('-')[1].rstrip('.json'), final_score, score['Bleu_4'], score['CIDEr'], score['METEOR'], score['ROUGE_L'])) """ np.savetxt('../data/aichallenge/scores/scores.dat', np.array(final_scores)) # draw and save image fig = plt.figure() plt.plot(final_scores) fig.savefig('../data/aichallenge/scores/scores.png') """ print('max score {0} was achieved by file: {1}'.format( max(final_scores), files[evaluated_count + final_scores.index(max(final_scores))]))
def test_wrongname(self): """test for wrong_name""" m1_score = compute_m1(json_predictions_file="data/wrong_name.json", reference_file="data/id_to_words.json") self.assertEqual(m1_score['error'], 1)
def test_moredatanumber(self): """test for more data""" m1_score = compute_m1( json_predictions_file="data/more_data_number.json", reference_file="data/id_to_words.json") self.assertEqual(m1_score['error'], 1)
def test_keyerror(self): """test for key error""" m1_score = compute_m1(json_predictions_file="data/key_error.json", reference_file="data/id_to_words.json") self.assertEqual(m1_score['error'], 1)
def test_nulldata(self): """test for null data""" m1_score = compute_m1(json_predictions_file="data/has_null_data.json", reference_file="data/id_to_words.json") self.assertEqual(m1_score['error'], 1)
def test_rightdata(self): """test for right data""" m1_score = compute_m1( json_predictions_file="data/id_to_test_caption.json", reference_file="data/id_to_words.json") self.assertEqual(m1_score['error'], 0)