コード例 #1
0
    def load_page(self):
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0

        # Exclude non-thread headings
        text = self.get()
        marker = findmarker(text)
        text = re.sub(r'^===', marker + r'===', text, flags=re.M)

        # Find threads, avoid archiving categories or interwiki
        header, threads, footer = extract_sections(text, self.site)
        header = header.replace(marker, '')
        if header and footer:
            self.header = '\n\n'.join((header.rstrip(), footer, ''))
        else:
            self.header = header + footer
        for thread_heading, thread_content in threads:
            cur_thread = DiscussionThread(thread_heading.strip('= '), self.now,
                                          self.timestripper)
            lines = thread_content.replace(marker, '').splitlines()
            lines = lines[1:]  # remove heading line
            for line in lines:
                cur_thread.feed_line(line)
            self.threads.append(cur_thread)

        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output(u'%d Threads found on %s' %
                             (len(self.threads), self))
コード例 #2
0
ファイル: archivebot.py プロジェクト: dvorapa/pywikibot
    def load_page(self) -> None:
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0

        # Exclude unsupported headings (h1, h3, etc):
        # adding the marker will make them ignored by extract_sections()
        text = self.get()
        marker = findmarker(text)
        text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M)

        # Find threads, avoid archiving categories or interwiki
        header, threads, footer = extract_sections(text, self.site)
        header = header.replace(marker, '')
        if header and footer:
            self.header = '\n\n'.join((header.rstrip(), footer, ''))
        else:
            self.header = header + footer
        for thread_heading, thread_content in threads:
            cur_thread = DiscussionThread(thread_heading.strip('= '),
                                          self.timestripper)
            # remove heading line
            _, *lines = thread_content.replace(marker, '').splitlines()
            for line in lines:
                cur_thread.feed_line(line)
            self.threads.append(cur_thread)

        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output('{} thread(s) found on {}'.format(
                len(self.threads), self))
コード例 #3
0
ファイル: archivebot.py プロジェクト: MKaras93/pywikibot
 def load_page(self):
     """Load the page to be archived and break it up into threads."""
     self.header = ''
     self.threads = []
     self.archives = {}
     self.archived_threads = 0
     lines = self.get().split('\n')
     found = False  # Reading header
     cur_thread = None
     for line in lines:
         thread_header = re.search('^== *([^=].*?) *== *$', line)
         if thread_header:
             found = True  # Reading threads now
             if cur_thread:
                 self.threads.append(cur_thread)
             cur_thread = DiscussionThread(thread_header.group(1), self.now,
                                           self.timestripper)
         else:
             if found:
                 cur_thread.feed_line(line)
             else:
                 self.header += line + '\n'
     if cur_thread:
         self.threads.append(cur_thread)
     # This extra info is not desirable when run under the unittest
     # framework, which may be run either directly or via setup.py
     if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
         pywikibot.output(u'%d Threads found on %s'
                          % (len(self.threads), self))
コード例 #4
0
 def fixSelfInterwiki(self, text):
     """
     Interwiki links to the site itself are displayed like local links.
     Remove their language code prefix.
     """
     if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
         interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang)
         text = interwikiR.sub(r'[[\1]]', text)
     return text
コード例 #5
0
 def fixSelfInterwiki(self, text):
     """
     Interwiki links to the site itself are displayed like local links.
     Remove their language code prefix.
     """
     if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
         interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]'
                                 % self.site.lang)
         text = interwikiR.sub(r'[[\1]]', text)
     return text
コード例 #6
0
ファイル: throttle.py プロジェクト: dvorapa/pywikibot
 def _module_hash(module=None) -> str:
     """Convert called module name to a hash."""
     if module is None:
         module = pywikibot.calledModuleName()
     module = module.encode()
     if blake2b:
         hashobj = blake2b(module, digest_size=2)
     else:
         hashobj = md5(module)
     return hashobj.hexdigest()[:4]  # slice for Python 3.5
コード例 #7
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('httplib2/', http.user_agent())
     self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent())
コード例 #8
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('requests/', http.user_agent())
     self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
コード例 #9
0
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('requests/', http.user_agent())
     self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
コード例 #10
0
ファイル: http_tests.py プロジェクト: skamithi/pywikibot-core
 def test_default_user_agent(self):
     """Config defined format string test."""
     self.assertTrue(http.user_agent().startswith(
         pywikibot.calledModuleName()))
     self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent())
     self.assertNotIn('  ', http.user_agent())
     self.assertNotIn('()', http.user_agent())
     self.assertNotIn('(;', http.user_agent())
     self.assertNotIn(';)', http.user_agent())
     self.assertIn('httplib2/', http.user_agent())
     self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent())
コード例 #11
0
ファイル: cosmetic_changes.py プロジェクト: CCXXXI/pywikibot
    def fixSelfInterwiki(self, text: str) -> str:
        """
        Interwiki links to the site itself are displayed like local links.

        Remove their language code prefix.
        """
        if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
            interwikiR = re.compile(r'\[\[(?: *:)? *{} *: *([^\[\]\n]*)\]\]'
                                    .format(self.site.code))
            text = interwikiR.sub(r'[[\1]]', text)
        return text
コード例 #12
0
    def test_user_agent(self):
        """Test different variants of user agents."""
        x = self.get_site()

        x._userinfo = {'name': 'foo'}
        x._username = ('foo', None)

        self.assertEqual('Pywikibot/' + pywikibot.__version__,
                         user_agent(x, format_string='{pwb}'))

        self.assertEqual(x.family.name,
                         user_agent(x, format_string='{family}'))
        self.assertEqual(x.code,
                         user_agent(x, format_string='{lang}'))
        self.assertEqual(x.family.name + ' ' + x.code,
                         user_agent(x, format_string='{family} {lang}'))

        self.assertEqual(x.username(),
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': '!'}
        x._username = ('!', None)

        self.assertEqual('!', user_agent(x, format_string='{username}'))

        x._userinfo = {'name': 'foo bar'}
        x._username = ('foo bar', None)

        self.assertEqual('foo_bar', user_agent(x, format_string='{username}'))

        old_config = '{script}/{version} Pywikibot/2.0 (User:{username})'

        pywikibot.version.getversiondict()
        script_value = (pywikibot.calledModuleName() + '/'
                        + pywikibot.version.cache['rev'])

        self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)',
                         user_agent(x, format_string=old_config))

        x._userinfo = {'name': '⁂'}
        x._username = ('⁂', None)

        self.assertEqual('%E2%81%82',
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': '127.0.0.1'}
        x._username = (None, None)

        self.assertEqual('Foo', user_agent(x, format_string='Foo {username}'))
        self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')',
                         user_agent(x,
                                    format_string='Foo ({script_comments})'))
コード例 #13
0
ファイル: archivebot.py プロジェクト: NanakiPL/pywikibot
    def load_page(self):
        """Load the page to be archived and break it up into threads."""
        self.header = ''
        self.threads = []
        self.archives = {}
        self.archived_threads = 0
        text = self.get()

        # Replace text in following exceptions by spaces, but don't change line
        # numbers and character positions
        exceptions = ['comment', 'code', 'pre', 'source', 'nowiki']
        exc_regexes = _get_regexes(exceptions, self.site)
        stripped_text = text
        for regex in exc_regexes:
            for match in re.finditer(regex, stripped_text):
                before = stripped_text[:match.start()]
                restricted = stripped_text[match.start():match.end()]
                after = stripped_text[match.end():]
                restricted = re.sub(r'[^\n]', ' ', restricted)
                stripped_text = before + restricted + after

        # Find thread headers in stripped text and return their line numbers
        stripped_lines = stripped_text.split('\n')
        thread_headers = []
        for line_number, line in enumerate(stripped_lines, start=1):
            if re.search(r'^== *[^=].*? *== *$', line):
                thread_headers.append(line_number)
        # Fill self by original thread headers on returned line numbers
        lines = text.split('\n')
        found = False  # Reading header
        cur_thread = None
        for line_number, line in enumerate(lines, start=1):
            if line_number in thread_headers:
                thread_header = re.search('^== *([^=].*?) *== *$', line)
                found = True  # Reading threads now
                if cur_thread:
                    self.threads.append(cur_thread)
                cur_thread = DiscussionThread(thread_header.group(1), self.now,
                                              self.timestripper)
            else:
                if found:
                    cur_thread.feed_line(line)
                else:
                    self.header += line + '\n'
        if cur_thread:
            self.threads.append(cur_thread)
        # This extra info is not desirable when run under the unittest
        # framework, which may be run either directly or via setup.py
        if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
            pywikibot.output(u'%d Threads found on %s' %
                             (len(self.threads), self))
コード例 #14
0
    def test_user_agent(self):
        """Test different variants of user agents."""
        x = self.get_site()

        x._userinfo = {'name': 'foo'}
        x._username = ('foo', None)

        self.assertEqual('Pywikibot/' + pywikibot.__release__,
                         user_agent(x, format_string='{pwb}'))

        self.assertEqual(x.family.name,
                         user_agent(x, format_string='{family}'))
        self.assertEqual(x.code,
                         user_agent(x, format_string='{lang}'))
        self.assertEqual(x.family.name + ' ' + x.code,
                         user_agent(x, format_string='{family} {lang}'))

        self.assertEqual(x.username(),
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'!'}
        x._username = (u'!', None)

        self.assertEqual('!', user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'foo bar'}
        x._username = (u'foo bar', None)

        self.assertEqual('foo_bar', user_agent(x, format_string='{username}'))

        old_config = '{script}/{version} Pywikibot/2.0 (User:{username})'

        pywikibot.version.getversiondict()
        script_value = pywikibot.calledModuleName() + '/' + pywikibot.version.cache['rev']

        self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)',
                         user_agent(x, format_string=old_config))

        x._userinfo = {'name': u'⁂'}
        x._username = (u'⁂', None)

        self.assertEqual('%E2%81%82',
                         user_agent(x, format_string='{username}'))

        x._userinfo = {'name': u'127.0.0.1'}
        x._username = (None, None)

        self.assertEqual('Foo', user_agent(x, format_string='Foo {username}'))
        self.assertEqual('Foo (' + x.family.name + ':' + x.code + ')',
                         user_agent(x, format_string='Foo ({script_comments})'))
コード例 #15
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English,
        German, and French Wikipedia. It might be that it is not wanted on other
        wikis. If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline']
        if not (self.redirect or self.template) and \
           pywikibot.calledModuleName() != 'capitalize_redirects':
            text = pywikibot.replaceExcept(
                text,
                r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>',
                exceptions)
        return text
コード例 #16
0
    def test_cosmetic_changes_hook(self):
        """Test summary result of Page._cosmetic_changes_hook."""
        page = pywikibot.Page(self.site, 'Test')
        page.text = 'Some    content    with    spaces.'
        # check cc settings
        config.cosmetic_changes_mylang_only = False
        self.assertFalse(page.isTalkPage())
        self.assertNotIn(pywikibot.calledModuleName(),
                         config.cosmetic_changes_deny_script)
        self.assertFalse(config.cosmetic_changes_mylang_only)

        if page.content_model != 'wikitext':
            self.skipTest('Wrong content model {!r} for cosmetic_changes'
                          .format(page.content_model))

        summary = 'Working on Test page at site {}'.format(self.site)
        msg = page._cosmetic_changes_hook(summary)
        self.assertEqual(msg, summary + '; cosmetic changes')
コード例 #17
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English,
        German, and French Wikipedia. It might be that it is not wanted on other
        wikis. If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'template',
                      'timeline']
        if not (self.redirect or self.template) and \
           pywikibot.calledModuleName() != 'capitalize_redirects':
            text = pywikibot.replaceExcept(
                text,
                r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
                '\g<bullet> \g<char>',
                exceptions)
        return text
コード例 #18
0
ファイル: http.py プロジェクト: dtbinh/code
_logger = "comm.http"


# global variables

# The OpenSSL error code for
#   certificate verify failed
# cf. `openssl errstr 14090086`
SSL_CERT_VERIFY_FAILED = ":14090086:"

# the User-agent: header. The default is
# '<script>/<revision> Pywikibot/2.0', where '<script>' is the currently
# executing script and version is the Git revision of Pywikibot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikibot/2.0'
useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(),
                                     version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
    config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)
コード例 #19
0
ファイル: http.py プロジェクト: edgarskos/pywikipedia-rewrite
from pywikibot.exceptions import Server504Error
import pywikibot
import cookielib
import threadedhttp
import pywikibot.version

_logger = "comm.http"


# global variables

# the User-agent: header. The default is 
# '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently
# executing script and version is the SVN revision of Pywikipediabot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0'
useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(),
                                     version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
                 config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)
コード例 #20
0
from pywikibot.exceptions import Server504Error
import pywikibot
import cookielib
import threadedhttp
import pywikibot.version

_logger = "comm.http"

# global variables

# the User-agent: header. The default is
# '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently
# executing script and version is the SVN revision of Pywikipediabot.
USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0'
useragent = USER_AGENT_FORMAT.format(
    script=pywikibot.calledModuleName(),
    version=pywikibot.version.getversiondict())
numthreads = 1
threads = []

connection_pool = threadedhttp.ConnectionPool()
http_queue = Queue.Queue()

cookie_jar = threadedhttp.LockableCookieJar(
    config.datafilepath("pywikibot.lwp"))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    pywikibot.debug(u"Loading cookies failed.", _logger)
else:
    pywikibot.debug(u"Loaded cookies from file.", _logger)