def get_summary_block(self, text): text = html_comment_pattern.sub("", text) start, end = "summary_starts_here ", "summary_ends_here " text = summary_start_pattern.sub(start, text) if start not in text: text = summary_start_pattern_with_the.sub(start + " The ", text) text = summary_end_pattern.sub(end, text) text = str_find_between_regex(text, start=start, end=end, case=False) if not text: text = summary_start_pattern_with_the.sub(start, text) text = str_find_between_regex(text, start=start, end=end, case=False) return text
def get_infobox(self, text): start, end = "infobox_start", "infobox_end" txt = infobox_start_pattern.sub(start, text) txt = infobox_end_pattern.sub(end, txt) txt = str_find_between_regex(txt, start=start, end=end, case=False) return txt