Example #1
0
 def get_summary_block(self, text):
     text = html_comment_pattern.sub("", text)
     start, end = "summary_starts_here ", "summary_ends_here "
     text = summary_start_pattern.sub(start, text)
     if start not in text:
         text = summary_start_pattern_with_the.sub(start + " The ", text)
     text = summary_end_pattern.sub(end, text)
     text = str_find_between_regex(text, start=start, end=end, case=False)
     if not text:
         text = summary_start_pattern_with_the.sub(start, text)
         text = str_find_between_regex(text, start=start, end=end, case=False)
     return text
Example #2
0
 def get_infobox(self, text):
     start, end = "infobox_start", "infobox_end"
     txt = infobox_start_pattern.sub(start, text)
     txt = infobox_end_pattern.sub(end, txt)
     txt = str_find_between_regex(txt, start=start, end=end, case=False)
     return txt