def evaluate_file(self, f): """ Evaluates the given file and returns the result. """ gt_path = f["gt_path"] pdf_path = f["pdf_path"] tool_path = f["tool_path"] res = dict() res["file"] = f # Don't proceed, if the gt file doesn't exist. if self.is_missing_or_empty(gt_path): res["missing_gt_file"] = True return res # Don't proceed, if the pdf file doesn't exist. if self.is_missing_or_empty(pdf_path): res["missing_pdf_file"] = True return res # Read and format the groundtruth file. gt = self.format_gt_file(gt_path) # Read and format the tool's output. tool_output = self.format_tool_file(tool_path) # Don't proceed, if the gt is empty. if not gt: res["missing_gt_file"] = True return res # Don't proceed, if the tool output is empty. if not tool_output: res["missing_tool_file"] = True return res # Compute evaluation result. res["evaluation_result"] = self.evaluate_strings(gt, tool_output) res["num_ops"] = count_num_ops(res["evaluation_result"], self.args.junk) res["prev_num_ops"] = self.deserialize(res) # Trigger the event here to interact with user immediately, even in # case of parallel processing. self.handle_evaluation_result(res) return res
def assert_equal(self, input1, input2, expected): diff_phrases = doc_diff.doc_diff(input1, input2) num_ops = doc_diff.count_num_ops(diff_phrases) self.assertDictEqual(dict(num_ops), expected)