예제 #1
0
    def load_page(self):
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0

        # Exclude non-thread headings
        text = self.get()
        marker = findmarker(text)
        text = re.sub(r'^===', marker + r'===', text, flags=re.M)

        # Find threads, avoid archiving categories or interwiki
        header, threads, footer = extract_sections(text, self.site)
        header = header.replace(marker, '')
        if header and footer:
            self.header = '\n\n'.join((header.rstrip(), footer, ''))
        else:
            self.header = header + footer
        for thread_heading, thread_content in threads:
            cur_thread = DiscussionThread(thread_heading.strip('= '), self.now,
                                          self.timestripper)
            lines = thread_content.replace(marker, '').splitlines()
            lines = lines[1:]  # remove heading line
            for line in lines:
                cur_thread.feed_line(line)
            self.threads.append(cur_thread)

        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output(u'%d Threads found on %s' %
                             (len(self.threads), self))
예제 #2
0
    def load_page(self) -> None:
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0

        # Exclude unsupported headings (h1, h3, etc):
        # adding the marker will make them ignored by extract_sections()
        text = self.get()
        marker = findmarker(text)
        text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M)

        # Find threads, avoid archiving categories or interwiki
        header, threads, footer = extract_sections(text, self.site)
        header = header.replace(marker, '')
        if header and footer:
            self.header = '\n\n'.join((header.rstrip(), footer, ''))
        else:
            self.header = header + footer
        for thread_heading, thread_content in threads:
            cur_thread = DiscussionThread(thread_heading.strip('= '),
                                          self.timestripper)
            # remove heading line
            _, *lines = thread_content.replace(marker, '').splitlines()
            for line in lines:
                cur_thread.feed_line(line)
            self.threads.append(cur_thread)

        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output('{} thread(s) found on {}'.format(
                len(self.threads), self))
예제 #3
0
 def load_page(self):
     """Load the page to be archived and break it up into threads."""
     self.header = ''
     self.threads = []
     self.archives = {}
     self.archived_threads = 0
     lines = self.get().split('\n')
     found = False  # Reading header
     cur_thread = None
     for line in lines:
         thread_header = re.search('^== *([^=].*?) *== *$', line)
         if thread_header:
             found = True  # Reading threads now
             if cur_thread:
                 self.threads.append(cur_thread)
             cur_thread = DiscussionThread(thread_header.group(1), self.now,
                                           self.timestripper)
         else:
             if found:
                 cur_thread.feed_line(line)
             else:
                 self.header += line + '\n'
     if cur_thread:
         self.threads.append(cur_thread)
     # This extra info is not desirable when run under the unittest
     # framework, which may be run either directly or via setup.py
     if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
         pywikibot.output(u'%d Threads found on %s'
                          % (len(self.threads), self))
예제 #4
0
 def fixSelfInterwiki(self, text):
     """
     Interwiki links to the site itself are displayed like local links.
     Remove their language code prefix.
     """
     if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
         interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang)
         text = interwikiR.sub(r'[[\1]]', text)
     return text
예제 #5
0
 def fixSelfInterwiki(self, text):
     """
     Interwiki links to the site itself are displayed like local links.
     Remove their language code prefix.
     """
     if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
         interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]'
                                 % self.site.lang)
         text = interwikiR.sub(r'[[\1]]', text)
     return text
예제 #6
0
 def _module_hash(module=None) -> str:
     """Convert called module name to a hash."""
     if module is None:
         module = pywikibot.calledModuleName()
     module = module.encode()
     if blake2b:
         hashobj = blake2b(module, digest_size=2)
     else:
         hashobj = md5(module)
     return hashobj.hexdigest()[:4]  # slice for Python 3.5
예제 #7
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('httplib2/', http.user_agent())
     self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent())
예제 #8
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('requests/', http.user_agent())
     self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('requests/', http.user_agent())
     self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
예제 #10
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('httplib2/', http.user_agent())
     self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent())
예제 #11
0
    def fixSelfInterwiki(self, text: str) -> str:
        """
        Interwiki links to the site itself are displayed like local links.

        Remove their language code prefix.
        """
        if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
            interwikiR = re.compile(r'\[\[(?: *:)? *{} *: *([^\[\]\n]*)\]\]'
                                    .format(self.site.code))
            text = interwikiR.sub(r'[[\1]]', text)
        return text
예제 #12
0
    def test_user_agent(self):
        """Test different variants of user agents."""
        x = self.get_site()

        x._userinfo = {'name': 'foo'}
        x._username = ('foo', None)

        self.assertEqual('Pywikibot/' + pywikibot.__version__,
                         user_agent(x, format_string='{pwb}'))

        self.assertEqual(x.family.name,
                         user_agent(x, format_string='{family}'))
        self.assertEqual(x.code,
                         user_agent(x, format_string='{lang}'))
        self.assertEqual(x.family.name + ' ' + x.code,
                         user_agent(x, format_string='{family} {lang}'))

        self.assertEqual(x.username(),
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': '!'}
        x._username = ('!', None)

        self.assertEqual('!', user_agent(x, format_string='{username}'))

        x._userinfo = {'name': 'foo bar'}
        x._username = ('foo bar', None)

        self.assertEqual('foo_bar', user_agent(x, format_string='{username}'))

        old_config = '{script}/{version} Pywikibot/2.0 (User:{username})'

        pywikibot.version.getversiondict()
        script_value = (pywikibot.calledModuleName() + '/'
                        + pywikibot.version.cache['rev'])

        self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)',
                         user_agent(x, format_string=old_config))

        x._userinfo = {'name': '⁂'}
        x._username = ('⁂', None)

        self.assertEqual('%E2%81%82',
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': '127.0.0.1'}
        x._username = (None, None)

        self.assertEqual('Foo', user_agent(x, format_string='Foo {username}'))
        self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')',
                         user_agent(x,
                                    format_string='Foo ({script_comments})'))
예제 #13
0
    def load_page(self):
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0
        text = self.get()

        # Replace text in following exceptions by spaces, but don't change line
        # numbers and character positions
        exceptions = ['comment', 'code', 'pre', 'source', 'nowiki']
        exc_regexes = _get_regexes(exceptions, self.site)
        stripped_text = text
        for regex in exc_regexes:
            for match in re.finditer(regex, stripped_text):
                before = stripped_text[:match.start()]
                restricted = stripped_text[match.start():match.end()]
                after = stripped_text[match.end():]
                restricted = re.sub(r'[^\n]', ' ', restricted)
                stripped_text = before + restricted + after

        # Find thread headers in stripped text and return their line numbers
        stripped_lines = stripped_text.split('\n')
        thread_headers = []
        for line_number, line in enumerate(stripped_lines, start=1):
            if re.search(r'^== *[^=].*? *== *$', line):
                thread_headers.append(line_number)
        # Fill self by original thread headers on returned line numbers
        lines = text.split('\n')
        found = False  # Reading header
        cur_thread = None
        for line_number, line in enumerate(lines, start=1):
            if line_number in thread_headers:
                thread_header = re.search('^== *([^=].*?) *== *$', line)
                found = True  # Reading threads now
                if cur_thread:
                    self.threads.append(cur_thread)
                cur_thread = DiscussionThread(thread_header.group(1), self.now,
                                              self.timestripper)
            else:
                if found:
                    cur_thread.feed_line(line)
                else:
                    self.header += line + '\n'
        if cur_thread:
            self.threads.append(cur_thread)
        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output(u'%d Threads found on %s' %
                             (len(self.threads), self))
    def test_user_agent(self):
        """Test different variants of user agents."""
        x = self.get_site()

        x._userinfo = {'name': 'foo'}
        x._username = ('foo', None)

        self.assertEqual('Pywikibot/' + pywikibot.__release__,
                         user_agent(x, format_string='{pwb}'))

        self.assertEqual(x.family.name,
                         user_agent(x, format_string='{family}'))
        self.assertEqual(x.code,
                         user_agent(x, format_string='{lang}'))
        self.assertEqual(x.family.name + ' ' + x.code,
                         user_agent(x, format_string='{family} {lang}'))

        self.assertEqual(x.username(),
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'!'}
        x._username = (u'!', None)

        self.assertEqual('!', user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'foo bar'}
        x._username = (u'foo bar', None)

        self.assertEqual('foo_bar', user_agent(x, format_string='{username}'))

        old_config = '{script}/{version} Pywikibot/2.0 (User:{username})'

        pywikibot.version.getversiondict()
        script_value = pywikibot.calledModuleName() + '/' + pywikibot.version.cache['rev']

        self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)',
                         user_agent(x, format_string=old_config))

        x._userinfo = {'name': u'⁂'}
        x._username = (u'⁂', None)

        self.assertEqual('%E2%81%82',
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'127.0.0.1'}
        x._username = (None, None)

        self.assertEqual('Foo', user_agent(x, format_string='Foo {username}'))
        self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')',
                         user_agent(x, format_string='Foo ({script_comments})'))
예제 #15
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English,
        German, and French Wikipedia. It might be that it is not wanted on other
        wikis. If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline']
        if not (self.redirect or self.template) and \
           pywikibot.calledModuleName() != 'capitalize_redirects':
            text = pywikibot.replaceExcept(
                text,
                r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>',
                exceptions)
        return text
예제 #16
0
    def test_cosmetic_changes_hook(self):
        """Test summary result of Page._cosmetic_changes_hook."""
        page = pywikibot.Page(self.site, 'Test')
        page.text = 'Some    content    with    spaces.'
        # check cc settings
        config.cosmetic_changes_mylang_only = False
        self.assertFalse(page.isTalkPage())
        self.assertNotIn(pywikibot.calledModuleName(),
                         config.cosmetic_changes_deny_script)
        self.assertFalse(config.cosmetic_changes_mylang_only)

        if page.content_model != 'wikitext':
            self.skipTest('Wrong content model {!r} for cosmetic_changes'
                          .format(page.content_model))

        summary = 'Working on Test page at site {}'.format(self.site)
        msg = page._cosmetic_changes_hook(summary)
        self.assertEqual(msg, summary + '; cosmetic changes')
예제 #17
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English,
        German, and French Wikipedia. It might be that it is not wanted on other
        wikis. If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'template',
                      'timeline']
        if not (self.redirect or self.template) and \
           pywikibot.calledModuleName() != 'capitalize_redirects':
            text = pywikibot.replaceExcept(
                text,
                r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
                '\g<bullet> \g<char>',
                exceptions)
        return text
예제 #18
0
파일: http.py 프로젝트: dtbinh/code
_logger = "comm.http"


# global variables

# The OpenSSL error code for
#   certificate verify failed
# cf. `openssl errstr 14090086`
SSL_CERT_VERIFY_FAILED = ":14090086:"

# the User-agent: header. The default is
# '<script>/<revision> Pywikibot/2.0', where '<script>' is the currently
# executing script and version is the Git revision of Pywikibot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikibot/2.0'
useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(),
                                     version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
    config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)
예제 #19
0
from pywikibot.exceptions import Server504Error
import pywikibot
import cookielib
import threadedhttp
import pywikibot.version

_logger = "comm.http"


# global variables

# the User-agent: header. The default is 
# '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently
# executing script and version is the SVN revision of Pywikipediabot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0'
useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(),
                                     version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
                 config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)
예제 #20
0
from pywikibot.exceptions import Server504Error
import pywikibot
import cookielib
import threadedhttp
import pywikibot.version

_logger = "comm.http"

# global variables

# the User-agent: header. The default is
# '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently
# executing script and version is the SVN revision of Pywikipediabot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0'
useragent = USER_AGENT_FORMAT.format(
    script=pywikibot.calledModuleName(),
    version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
    config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)