Exemple #1
0
 def evaluate_file(self, f):
     """ 
     Evaluates the given file and returns the result.
     """
     
     gt_path   = f["gt_path"]
     pdf_path  = f["pdf_path"]
     tool_path = f["tool_path"]
     
     res = dict()
     res["file"] = f
            
     # Don't proceed, if the gt file doesn't exist.
     if self.is_missing_or_empty(gt_path):
         res["missing_gt_file"] = True
         return res
         
     # Don't proceed, if the pdf file doesn't exist.
     if self.is_missing_or_empty(pdf_path):
         res["missing_pdf_file"] = True
         return res
             
     # Read and format the groundtruth file.
     gt = self.format_gt_file(gt_path)
     # Read and format the tool's output.
     tool_output = self.format_tool_file(tool_path)
             
     # Don't proceed, if the gt is empty.                        
     if not gt:
         res["missing_gt_file"] = True
         return res
               
     # Don't proceed, if the tool output is empty.
     if not tool_output:
         res["missing_tool_file"] = True
         return res
                                      
     # Compute evaluation result.
     res["evaluation_result"] = self.evaluate_strings(gt, tool_output)
     res["num_ops"]           = count_num_ops(res["evaluation_result"], self.args.junk)
     res["prev_num_ops"]      = self.deserialize(res)
             
     # Trigger the event here to interact with user immediately, even in 
     # case of parallel processing.
     self.handle_evaluation_result(res)
                     
     return res
 def assert_equal(self, input1, input2, expected):
     diff_phrases = doc_diff.doc_diff(input1, input2)
     num_ops      = doc_diff.count_num_ops(diff_phrases)
     self.assertDictEqual(dict(num_ops), expected)