Esempio n. 1
0
 def run(self):
     """Perform the evaluation"""
     logging.info("Starting workflow %r run", self)
     all_results = {}
     evaluator = self.create_evaluator()
     count = 0
     for each_article in self._reader:
         count += 1
         logging.info("Working on article %d: %r", count, each_article)
         if not self.include_article(each_article):
             logging.log(
                 ULTRADEBUG, "Skipping article %r due to exclusion "
                 " criteria.", each_article)
             continue
         try:
             ranked_article = self.graph_and_rank(each_article)
         except CouldNotRank:
             continue
         converted_terms = self.convert(ranked_article)
         cut_terms = converted_terms.terms_higher_than_or_equal_to(
             self._ranking_cutoff)
         logging.debug(
             "Lowest-ranking term is term #%d out of %d"
             " (score=%1.5f, highest score=%1.5f)", len(cut_terms),
             len(converted_terms), [x[1] for x in cut_terms][-1],
             [x[1] for x in cut_terms][0])
         medline_record_mesh_terms = ExpressionList().from_medline(
             each_article.set_id.article_record().mesh_headings)
         flat_medline = medline_record_mesh_terms.flatten()
         flattened_terms = self.flatten_generated_terms(
             flat_medline, cut_terms)
         flattened_terms = self.limit_length(flat_medline, flattened_terms)
         if len(flat_medline) == 0:
             logging.warn(
                 "No gold standard available for article %r. "
                 "Omitting it from the result set.", each_article)
             continue
         eval_result = self.perform_evaluation(each_article, evaluator,
                                               flat_medline,
                                               flattened_terms)
         flattened_major_headings=\
             medline_record_mesh_terms.major_headings()
         logging.debug("Original headings: %r Major headings: %r",
                       medline_record_mesh_terms, flattened_major_headings)
         mh_result_temp = self.perform_evaluation(each_article, evaluator,
                                                  flattened_major_headings,
                                                  flattened_terms)
         mh_result = NamedResultSet("mh_", mh_result_temp)
         # Compute the total recall, too
         total_recall = self.compute_total_recall(flat_medline,
                                                  converted_terms)
         eval_result.add(total_recall)
         # Unify the result sets
         all_results[each_article.set_id] = eval_result | mh_result
     logging.info("Writing out results.")
     self.output(all_results)
     self.output_metadata()
     return
Esempio n. 2
0
 def run(self):
     """Perform the evaluation"""
     logging.info("Starting workflow %r run", self)
     all_results={}
     evaluator=self.create_evaluator()
     count=0
     for each_article in self._reader:
         count+=1
         logging.info("Working on article %d: %r", count, each_article)
         if not self.include_article(each_article):
             logging.log(ULTRADEBUG, "Skipping article %r due to exclusion "
                           " criteria.", each_article)
             continue
         try:
             ranked_article=self.graph_and_rank(each_article)
         except CouldNotRank:
             continue
         converted_terms=self.convert(ranked_article)
         cut_terms=converted_terms.terms_higher_than_or_equal_to(
                             self._ranking_cutoff)
         logging.debug("Lowest-ranking term is term #%d out of %d"
                       " (score=%1.5f, highest score=%1.5f)",
                       len(cut_terms), len(converted_terms),
                       [x[1] for x in cut_terms][-1],
                       [x[1] for x in cut_terms][0])
         medline_record_mesh_terms=ExpressionList().from_medline(
                 each_article.set_id.article_record().mesh_headings)
         flat_medline=medline_record_mesh_terms.flatten()
         flattened_terms=self.flatten_generated_terms(flat_medline,
                         cut_terms)
         flattened_terms=self.limit_length(flat_medline, flattened_terms)
         if len(flat_medline)==0:
             logging.warn("No gold standard available for article %r. "
                          "Omitting it from the result set.", each_article)
             continue
         eval_result=self.perform_evaluation(each_article,
                                             evaluator,
                                             flat_medline,
                                             flattened_terms)
         flattened_major_headings=\
             medline_record_mesh_terms.major_headings()
         logging.debug("Original headings: %r Major headings: %r", 
                         medline_record_mesh_terms,
                         flattened_major_headings)
         mh_result_temp=self.perform_evaluation(each_article, evaluator,
                                                flattened_major_headings,
                                                flattened_terms)
         mh_result=NamedResultSet("mh_", mh_result_temp)
         # Compute the total recall, too
         total_recall=self.compute_total_recall(flat_medline, 
                                                converted_terms)
         eval_result.add(total_recall)
         # Unify the result sets
         all_results[each_article.set_id]=eval_result | mh_result
     logging.info("Writing out results.")
     self.output(all_results)
     self.output_metadata()
     return
Esempio n. 3
0
 def process_article(self, each_article):
     if not self.include_article(each_article):
         logging.log(ULTRADEBUG, "Skipping article %r due to exclusion "
                       " criteria.", each_article)
         return
     try:
         ranked_article=self.graph_and_rank(each_article)
     except CouldNotRank:
         return
     logging.debug("Ranked article: %r", ranked_article)
     converted_terms=self.convert(ranked_article)
     logging.debug("Converted terms: %r", converted_terms)
     cut_terms=converted_terms.terms_higher_than_or_equal_to(
                         self._ranking_cutoff)
     logging.debug("Cut terms: %r", cut_terms)
     try:
         medline_record_mesh_terms=ExpressionList().from_medline(
                 each_article.set_id.article_record()['MH'])
     except:
         logging.warn("Could not obtain an article record for %r. "
                      "Skipping.", each_article)
         return
     flat_medline=medline_record_mesh_terms.flatten()
     flattened_terms=self.flatten_generated_terms(flat_medline,
                     cut_terms)
     flattened_terms=self.limit_length(flat_medline, flattened_terms)
     if len(flat_medline)==0:
         logging.warn("No gold standard available for article %r. "
                      "Omitting it from the result set.", each_article)
         return
     eval_result=self.perform_evaluation(each_article,
                                         self.evaluator,
                                         flat_medline,
                                         flattened_terms)
     flattened_major_headings=\
         medline_record_mesh_terms.major_headings()
     #logging.debug("Original headings: %r Major headings: %r", 
     #                medline_record_mesh_terms,
     #                flattened_major_headings)
     logging.debug("Flattened MeSH terms: %r", flat_medline)
     logging.debug("Flattened generated terms: %r", flattened_terms)
     mh_result_temp=self.perform_evaluation(each_article, self.evaluator,
                                            flattened_major_headings,
                                            flattened_terms)
     mh_result=NamedResultSet("major_", mh_result_temp)
     # Compute the total recall, too
     total_recall=self.compute_total_recall(flat_medline, 
                                            converted_terms)
     eval_result.add(total_recall)
     # Unify the result sets
     self.all_results[each_article.set_id]=eval_result | mh_result
     return
Esempio n. 4
0
 def process_article(self, each_article):
     if not self.include_article(each_article):
         logging.log(ULTRADEBUG, "Skipping article %r due to exclusion "
                     " criteria.", each_article)
         return
     try:
         ranked_article = self.graph_and_rank(each_article)
     except CouldNotRank:
         return
     logging.debug("Ranked article: %r", ranked_article)
     converted_terms = self.convert(ranked_article)
     logging.debug("Converted terms: %r", converted_terms)
     cut_terms = converted_terms.terms_higher_than_or_equal_to(
         self._ranking_cutoff)
     logging.debug("Cut terms: %r", cut_terms)
     try:
         medline_record_mesh_terms = ExpressionList().from_medline(
             each_article.set_id.article_record()['MH'])
     except:
         logging.warn(
             "Could not obtain an article record for %r. "
             "Skipping.", each_article)
         return
     flat_medline = medline_record_mesh_terms.flatten()
     flattened_terms = self.flatten_generated_terms(flat_medline, cut_terms)
     flattened_terms = self.limit_length(flat_medline, flattened_terms)
     if len(flat_medline) == 0:
         logging.warn(
             "No gold standard available for article %r. "
             "Omitting it from the result set.", each_article)
         return
     eval_result = self.perform_evaluation(each_article, self.evaluator,
                                           flat_medline, flattened_terms)
     flattened_major_headings=\
         medline_record_mesh_terms.major_headings()
     #logging.debug("Original headings: %r Major headings: %r",
     #                medline_record_mesh_terms,
     #                flattened_major_headings)
     logging.debug("Flattened MeSH terms: %r", flat_medline)
     logging.debug("Flattened generated terms: %r", flattened_terms)
     mh_result_temp = self.perform_evaluation(each_article, self.evaluator,
                                              flattened_major_headings,
                                              flattened_terms)
     mh_result = NamedResultSet("major_", mh_result_temp)
     # Compute the total recall, too
     total_recall = self.compute_total_recall(flat_medline, converted_terms)
     eval_result.add(total_recall)
     # Unify the result sets
     self.all_results[each_article.set_id] = eval_result | mh_result
     return
Esempio n. 5
0
 def as_ExpressionList(self):
     "Returns an ExpressionList containing the results of the conversion"
     return ExpressionList([x[0] for x in self])