def load_page(self) -> None: """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 # Exclude unsupported headings (h1, h3, etc): # adding the marker will make them ignored by extract_sections() text = self.get() marker = findmarker(text) text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M) # Find threads, avoid archiving categories or interwiki header, threads, footer = extract_sections(text, self.site) header = header.replace(marker, '') if header and footer: self.header = '\n\n'.join((header.rstrip(), footer, '')) else: self.header = header + footer for thread_heading, thread_content in threads: cur_thread = DiscussionThread(thread_heading.strip('= '), self.timestripper) # remove heading line _, *lines = thread_content.replace(marker, '').splitlines() for line in lines: cur_thread.feed_line(line) self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output('{} thread(s) found on {}'.format( len(self.threads), self))
def load_page(self): """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 # Exclude non-thread headings text = self.get() marker = findmarker(text) text = re.sub(r'^===', marker + r'===', text, flags=re.M) # Find threads, avoid archiving categories or interwiki header, threads, footer = extract_sections(text, self.site) header = header.replace(marker, '') if header and footer: self.header = '\n\n'.join((header.rstrip(), footer, '')) else: self.header = header + footer for thread_heading, thread_content in threads: cur_thread = DiscussionThread(thread_heading.strip('= '), self.now, self.timestripper) lines = thread_content.replace(marker, '').splitlines() lines = lines[1:] # remove heading line for line in lines: cur_thread.feed_line(line) self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))