def load_page(self): """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 # Exclude non-thread headings text = self.get() marker = findmarker(text) text = re.sub(r'^===', marker + r'===', text, flags=re.M) # Find threads, avoid archiving categories or interwiki header, threads, footer = extract_sections(text, self.site) header = header.replace(marker, '') if header and footer: self.header = '\n\n'.join((header.rstrip(), footer, '')) else: self.header = header + footer for thread_heading, thread_content in threads: cur_thread = DiscussionThread(thread_heading.strip('= '), self.now, self.timestripper) lines = thread_content.replace(marker, '').splitlines() lines = lines[1:] # remove heading line for line in lines: cur_thread.feed_line(line) self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
def load_page(self) -> None: """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 # Exclude unsupported headings (h1, h3, etc): # adding the marker will make them ignored by extract_sections() text = self.get() marker = findmarker(text) text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M) # Find threads, avoid archiving categories or interwiki header, threads, footer = extract_sections(text, self.site) header = header.replace(marker, '') if header and footer: self.header = '\n\n'.join((header.rstrip(), footer, '')) else: self.header = header + footer for thread_heading, thread_content in threads: cur_thread = DiscussionThread(thread_heading.strip('= '), self.timestripper) # remove heading line _, *lines = thread_content.replace(marker, '').splitlines() for line in lines: cur_thread.feed_line(line) self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output('{} thread(s) found on {}'.format( len(self.threads), self))
def load_page(self): """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 lines = self.get().split('\n') found = False # Reading header cur_thread = None for line in lines: thread_header = re.search('^== *([^=].*?) *== *$', line) if thread_header: found = True # Reading threads now if cur_thread: self.threads.append(cur_thread) cur_thread = DiscussionThread(thread_header.group(1), self.now, self.timestripper) else: if found: cur_thread.feed_line(line) else: self.header += line + '\n' if cur_thread: self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
def fixSelfInterwiki(self, text): """ Interwiki links to the site itself are displayed like local links. Remove their language code prefix. """ if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki': interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang) text = interwikiR.sub(r'[[\1]]', text) return text
def fixSelfInterwiki(self, text): """ Interwiki links to the site itself are displayed like local links. Remove their language code prefix. """ if not self.talkpage and pywikibot.calledModuleName() != 'interwiki': interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang) text = interwikiR.sub(r'[[\1]]', text) return text
def _module_hash(module=None) -> str: """Convert called module name to a hash.""" if module is None: module = pywikibot.calledModuleName() module = module.encode() if blake2b: hashobj = blake2b(module, digest_size=2) else: hashobj = md5(module) return hashobj.hexdigest()[:4] # slice for Python 3.5
def test_default_user_agent(self): """Config defined format string test.""" self.assertTrue(http.user_agent().startswith( pywikibot.calledModuleName())) self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent()) self.assertNotIn(' ', http.user_agent()) self.assertNotIn('()', http.user_agent()) self.assertNotIn('(;', http.user_agent()) self.assertNotIn(';)', http.user_agent()) self.assertIn('httplib2/', http.user_agent()) self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent())
def test_default_user_agent(self): """Config defined format string test.""" self.assertTrue(http.user_agent().startswith( pywikibot.calledModuleName())) self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent()) self.assertNotIn(' ', http.user_agent()) self.assertNotIn('()', http.user_agent()) self.assertNotIn('(;', http.user_agent()) self.assertNotIn(';)', http.user_agent()) self.assertIn('requests/', http.user_agent()) self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
def fixSelfInterwiki(self, text: str) -> str: """ Interwiki links to the site itself are displayed like local links. Remove their language code prefix. """ if not self.talkpage and pywikibot.calledModuleName() != 'interwiki': interwikiR = re.compile(r'\[\[(?: *:)? *{} *: *([^\[\]\n]*)\]\]' .format(self.site.code)) text = interwikiR.sub(r'[[\1]]', text) return text
def test_user_agent(self): """Test different variants of user agents.""" x = self.get_site() x._userinfo = {'name': 'foo'} x._username = ('foo', None) self.assertEqual('Pywikibot/' + pywikibot.__version__, user_agent(x, format_string='{pwb}')) self.assertEqual(x.family.name, user_agent(x, format_string='{family}')) self.assertEqual(x.code, user_agent(x, format_string='{lang}')) self.assertEqual(x.family.name + ' ' + x.code, user_agent(x, format_string='{family} {lang}')) self.assertEqual(x.username(), user_agent(x, format_string='{username}')) x._userinfo = {'name': '!'} x._username = ('!', None) self.assertEqual('!', user_agent(x, format_string='{username}')) x._userinfo = {'name': 'foo bar'} x._username = ('foo bar', None) self.assertEqual('foo_bar', user_agent(x, format_string='{username}')) old_config = '{script}/{version} Pywikibot/2.0 (User:{username})' pywikibot.version.getversiondict() script_value = (pywikibot.calledModuleName() + '/' + pywikibot.version.cache['rev']) self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)', user_agent(x, format_string=old_config)) x._userinfo = {'name': '⁂'} x._username = ('⁂', None) self.assertEqual('%E2%81%82', user_agent(x, format_string='{username}')) x._userinfo = {'name': '127.0.0.1'} x._username = (None, None) self.assertEqual('Foo', user_agent(x, format_string='Foo {username}')) self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')', user_agent(x, format_string='Foo ({script_comments})'))
def load_page(self): """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 text = self.get() # Replace text in following exceptions by spaces, but don't change line # numbers and character positions exceptions = ['comment', 'code', 'pre', 'source', 'nowiki'] exc_regexes = _get_regexes(exceptions, self.site) stripped_text = text for regex in exc_regexes: for match in re.finditer(regex, stripped_text): before = stripped_text[:match.start()] restricted = stripped_text[match.start():match.end()] after = stripped_text[match.end():] restricted = re.sub(r'[^\n]', ' ', restricted) stripped_text = before + restricted + after # Find thread headers in stripped text and return their line numbers stripped_lines = stripped_text.split('\n') thread_headers = [] for line_number, line in enumerate(stripped_lines, start=1): if re.search(r'^== *[^=].*? *== *$', line): thread_headers.append(line_number) # Fill self by original thread headers on returned line numbers lines = text.split('\n') found = False # Reading header cur_thread = None for line_number, line in enumerate(lines, start=1): if line_number in thread_headers: thread_header = re.search('^== *([^=].*?) *== *$', line) found = True # Reading threads now if cur_thread: self.threads.append(cur_thread) cur_thread = DiscussionThread(thread_header.group(1), self.now, self.timestripper) else: if found: cur_thread.feed_line(line) else: self.header += line + '\n' if cur_thread: self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
def test_user_agent(self): """Test different variants of user agents.""" x = self.get_site() x._userinfo = {'name': 'foo'} x._username = ('foo', None) self.assertEqual('Pywikibot/' + pywikibot.__release__, user_agent(x, format_string='{pwb}')) self.assertEqual(x.family.name, user_agent(x, format_string='{family}')) self.assertEqual(x.code, user_agent(x, format_string='{lang}')) self.assertEqual(x.family.name + ' ' + x.code, user_agent(x, format_string='{family} {lang}')) self.assertEqual(x.username(), user_agent(x, format_string='{username}')) x._userinfo = {'name': u'!'} x._username = (u'!', None) self.assertEqual('!', user_agent(x, format_string='{username}')) x._userinfo = {'name': u'foo bar'} x._username = (u'foo bar', None) self.assertEqual('foo_bar', user_agent(x, format_string='{username}')) old_config = '{script}/{version} Pywikibot/2.0 (User:{username})' pywikibot.version.getversiondict() script_value = pywikibot.calledModuleName() + '/' + pywikibot.version.cache['rev'] self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)', user_agent(x, format_string=old_config)) x._userinfo = {'name': u'⁂'} x._username = (u'⁂', None) self.assertEqual('%E2%81%82', user_agent(x, format_string='{username}')) x._userinfo = {'name': u'127.0.0.1'} x._username = (None, None) self.assertEqual('Foo', user_agent(x, format_string='Foo {username}')) self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')', user_agent(x, format_string='Foo ({script_comments})'))
def putSpacesInLists(self, text): """ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. NOTE: This space is recommended in the syntax help on the English, German, and French Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline'] if not (self.redirect or self.template) and \ pywikibot.calledModuleName() != 'capitalize_redirects': text = pywikibot.replaceExcept( text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) return text
def test_cosmetic_changes_hook(self): """Test summary result of Page._cosmetic_changes_hook.""" page = pywikibot.Page(self.site, 'Test') page.text = 'Some content with spaces.' # check cc settings config.cosmetic_changes_mylang_only = False self.assertFalse(page.isTalkPage()) self.assertNotIn(pywikibot.calledModuleName(), config.cosmetic_changes_deny_script) self.assertFalse(config.cosmetic_changes_mylang_only) if page.content_model != 'wikitext': self.skipTest('Wrong content model {!r} for cosmetic_changes' .format(page.content_model)) summary = 'Working on Test page at site {}'.format(self.site) msg = page._cosmetic_changes_hook(summary) self.assertEqual(msg, summary + '; cosmetic changes')
def putSpacesInLists(self, text): """ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. NOTE: This space is recommended in the syntax help on the English, German, and French Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'template', 'timeline'] if not (self.redirect or self.template) and \ pywikibot.calledModuleName() != 'capitalize_redirects': text = pywikibot.replaceExcept( text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) return text
_logger = "comm.http" # global variables # The OpenSSL error code for # certificate verify failed # cf. `openssl errstr 14090086` SSL_CERT_VERIFY_FAILED = ":14090086:" # the User-agent: header. The default is # '<script>/<revision> Pywikibot/2.0', where '<script>' is the currently # executing script and version is the Git revision of Pywikibot. USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikibot/2.0' useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(), version=pywikibot.version.getversiondict()) numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar( config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger)
from pywikibot.exceptions import Server504Error import pywikibot import cookielib import threadedhttp import pywikibot.version _logger = "comm.http" # global variables # the User-agent: header. The default is # '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently # executing script and version is the SVN revision of Pywikipediabot. USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0' useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(), version=pywikibot.version.getversiondict()) numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar( config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger)
from pywikibot.exceptions import Server504Error import pywikibot import cookielib import threadedhttp import pywikibot.version _logger = "comm.http" # global variables # the User-agent: header. The default is # '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently # executing script and version is the SVN revision of Pywikipediabot. USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0' useragent = USER_AGENT_FORMAT.format( script=pywikibot.calledModuleName(), version=pywikibot.version.getversiondict()) numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar( config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger)