def read_file_content(self, file_url=None):
        """Return name of temp file in which remote file is saved."""
        if not file_url:
            file_url = self.url
            pywikibot.warning("file_url is not given. "
                              "Set to self.url by default.")
        pywikibot.output(u'Reading file %s' % file_url)
        resume = False
        rlen = 0
        _contents = None
        dt = 15
        uo = URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(file_url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                pywikibot.output(u"Couldn't download the image: "
                                 "the requested URL was not found on server.")
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)" %
                        (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: length check of retrieved data not possible.")
        handle, tempname = tempfile.mkstemp()
        with os.fdopen(handle, "wb") as t:
            t.write(_contents)
        return tempname
Example #2
0
    def _command(self, file_name, text, jump_index=None):
        """Return editor selected in user-config.py."""
        if jump_index:
            # Some editors make it possible to mark occurrences of substrings,
            # or to jump to the line of the first occurrence.
            # TODO: Find a better solution than hardcoding these, e.g. a config
            # option.
            line = text[:jump_index].count('\n')
            column = jump_index - (text[:jump_index].rfind('\n') + 1)
        else:
            line = column = 0
        # Linux editors. We use startswith() because some users might use
        # parameters.
        if config.editor.startswith('kate'):
            command = ['-l', '%i' % (line + 1), '-c', '%i' % (column + 1)]
        elif config.editor.startswith('gedit'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('emacs'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('jedit'):
            command = ['+line:%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('vim'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('nano'):
            command = ['+%i,%i' % (line + 1, column + 1)]
        # Windows editors
        elif config.editor.lower().endswith('notepad++.exe'):
            command = ['-n%i' % (line + 1)]  # seems not to support columns
        else:
            command = []

        # See T102465 for problems relating to using config.editor unparsed.
        command = [config.editor] + command + [file_name]
        pywikibot.log('Running editor: %s' % TextEditor._concat(command))
        return command
Example #3
0
def _call_cmd(args, lib='djvulibre') -> tuple:
    """
    Tiny wrapper around subprocess.Popen().

    @param args: same as Popen()
    @type args: str or typing.Sequence[string]

    @param lib: library to be logged in logging messages
    @type lib: str

    @return: returns a tuple (res, stdoutdata), where
        res is True if dp.returncode != 0 else False
    """
    if not isinstance(args, str):
        # upcast any param in sequence args to str
        cmd = ' '.join(str(a) for a in args)
    else:
        cmd = args

    dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdoutdata, stderrdata = dp.communicate()

    if dp.returncode != 0:
        pywikibot.error('{0} error; {1}'.format(lib, cmd))
        pywikibot.error('{0}'.format(stderrdata))
        return (False, stdoutdata)

    pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid))

    return (True, stdoutdata)
Example #4
0
    def checkMultiplicity(self):
        """Count running processes for site and set process_multiplicity."""
        global pid
        mysite = self.mysite
        pywikibot.debug('Checking multiplicity: pid = {pid}'.format(pid=pid),
                        _logger)
        with self.lock:
            processes = []
            my_pid = pid or 1  # start at 1 if global pid not yet set
            count = 1
            # open throttle.log
            try:
                f = open(self.ctrlfilename, 'r')
            except IOError:
                if pid:
                    raise
            else:
                now = time.time()
                for line in f.readlines():
                    # parse line; format is "pid timestamp site"
                    try:
                        line = line.split(' ')
                        this_pid = int(line[0])
                        ptime = int(line[1].split('.')[0])
                        this_site = line[2].rstrip()
                    except (IndexError, ValueError):
                        # Sometimes the file gets corrupted ignore that line
                        continue
                    if now - ptime > self.releasepid:
                        continue  # process has expired, drop from file
                    if now - ptime <= self.dropdelay \
                       and this_site == mysite \
                       and this_pid != pid:
                        count += 1
                    if this_site != self.mysite or this_pid != pid:
                        processes.append({
                            'pid': this_pid,
                            'time': ptime,
                            'site': this_site
                        })
                    if not pid and this_pid >= my_pid:
                        my_pid = this_pid + 1  # next unused process id
                f.close()

            if not pid:
                pid = my_pid
            self.checktime = time.time()
            processes.append({
                'pid': pid,
                'time': self.checktime,
                'site': mysite
            })
            processes.sort(key=lambda p: (p['pid'], p['site']))
            with suppress(IOError), open(self.ctrlfilename, 'w') as f:
                for p in processes:
                    f.write(FORMAT_LINE.format_map(p))
            self.process_multiplicity = count
            pywikibot.log(
                'Found {} {} processes running, including this one.'.format(
                    count, mysite))
Example #5
0
def _call_cmd(args, lib='djvulibre'):
    """
    Tiny wrapper around subprocess.Popen().

    @param args: same as Popen()
    @type args: sequence or string

    @param library: library to be logged in logging messages
    @type library: string

    @param log: log process output; errors are always logged.
    @type library: bool


    @return: returns a tuple (res, stdoutdata), where
        res is True if dp.returncode != 0 else False
    """
    if not isinstance(args, StringTypes):
        # upcast if any param in sequence args is not in StringTypes
        args = [str(a) if not isinstance(a, StringTypes) else a for a in args]
        cmd = ' '.join(args)
    else:
        cmd = args

    dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdoutdata, stderrdata = dp.communicate()

    if dp.returncode != 0:
        pywikibot.error('{0} error; {1}'.format(lib, cmd))
        pywikibot.error('{0}'.format(stderrdata))
        return (False, stdoutdata)

    pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid))

    return (True, stdoutdata)
Example #6
0
    def login(self, retry=False, autocreate=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @param autocreate: if true, allow auto-creation of the account
                           using unified login
        @type autocreate: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            if not autocreate:
                self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                'Password for user %(name)s on %(site)s (no characters will '
                'be shown):' % {
                    'name': self.login_name,
                    'site': self.site
                },
                password=True)

        pywikibot.output('Logging in to %(site)s as %(name)s' % {
            'name': self.login_name,
            'site': self.site
        })
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error('Login failed (%s).' % e.code)
            if e.code == 'NotExists':
                raise NoUsername("Username '%s' does not exist on %s" %
                                 (self.login_name, self.site))
            elif e.code == 'Illegal':
                raise NoUsername("Username '%s' is invalid on %s" %
                                 (self.login_name, self.site))
            elif e.code == 'readapidenied':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}'.format(self.login_name, self.site))
            elif e.code == 'Failed':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}\n.{2}'.format(self.login_name, self.site, e.info))
            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log('Should be logged in now')
        return True
Example #7
0
    def lag(self, lagtime):
        """Seize the throttle lock due to server lag.

        This will prevent any thread from accessing this site.

        """
        started = time.time()
        self.lock.acquire()
        try:
            # start at 1/2 the current server lag time
            # wait at least 5 seconds but not more than 120 seconds
            delay = min(max(5, lagtime//2), 120)
            # account for any time we waited while acquiring the lock
            wait = delay - (time.time() - started)
            if wait > 0:
                if wait > config.noisysleep:
                    pywikibot.output(
                        u"Sleeping for %(wait).1f seconds, %(now)s"
                        % {'wait': wait,
                           'now': time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                        } )
                else:
                    pywikibot.log(
                        u"Sleeping for %(wait).1f seconds, %(now)s"
                        % {'wait': wait,
                           'now': time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                        } )
                time.sleep(wait)
        finally:
            self.lock.release()
 def __iter__(self):
     """Yield pages."""
     # TODO: start yielding before all referring pages have been found
     refs = [
         page for page in self.disambPage.getReferences(
             follow_redirects=False,
             withTemplateInclusion=False,
             namespaces=0 if self.main_only else None
         )
     ]
     pywikibot.output(u"Found %d references." % len(refs))
     # Remove ignorables
     if self.disambPage.site.family.name in ignore_title and \
        self.disambPage.site.lang in ignore_title[self.disambPage.site.family.name]:
         for ig in ignore_title[self.disambPage.site.family.name
                                ][self.disambPage.site.lang]:
             for i in range(len(refs) - 1, -1, -1):
                 if re.match(ig, refs[i].title()):
                     pywikibot.log(u'Ignoring page %s'
                                   % refs[i].title())
                     del refs[i]
                 elif self.primaryIgnoreManager.isIgnored(refs[i]):
                     del refs[i]
     if len(refs) < self.minimum:
         pywikibot.output(u"Found only %d pages to work on; skipping."
                          % len(refs))
         return
     pywikibot.output(u"Will work on %d pages." % len(refs))
     for ref in refs:
         yield ref
Example #9
0
    def get_redirects_from_dump(self, alsoGetPageTitles=False):
        """
        Extract redirects from dump.

        Load a local XML dump file, look at all pages which have the
        redirect flag set, and find out where they're pointing at. Return
        a dictionary where the redirect names are the keys and the redirect
        targets are the values.
        """
        xmlFilename = self.xmlFilename
        redict = {}
        # open xml dump and read page titles out of it
        dump = xmlreader.XmlDump(xmlFilename)
        redirR = self.site.redirectRegex()
        readPagesCount = 0
        if alsoGetPageTitles:
            pageTitles = set()
        for entry in dump.parse():
            readPagesCount += 1
            # always print status message after 10000 pages
            if readPagesCount % 10000 == 0:
                pywikibot.output(u'{0:d} pages read...'.format(readPagesCount))
            if len(self.namespaces) > 0:
                if pywikibot.Page(self.site, entry.title).namespace() \
                        not in self.namespaces:
                    continue
            if alsoGetPageTitles:
                pageTitles.add(space_to_underscore(pywikibot.Link(entry.title, self.site)))

            m = redirR.match(entry.text)
            if m:
                target = m.group(1)
                # There might be redirects to another wiki. Ignore these.
                target_link = pywikibot.Link(target, self.site)
                try:
                    target_link.parse()
                except pywikibot.SiteDefinitionError as e:
                    pywikibot.log(e)
                    pywikibot.output(
                        u'NOTE: Ignoring {0} which is a redirect ({1}) to an '
                        u'unknown site.'.format(entry.title, target))
                    target_link = None
                else:
                    if target_link.site != self.site:
                        pywikibot.output(
                            u'NOTE: Ignoring {0} which is a redirect to '
                            u'another site {1}.'.format(entry.title, target_link.site))
                        target_link = None
                # if the redirect does not link to another wiki
                if target_link and target_link.title:
                    source = pywikibot.Link(entry.title, self.site)
                    if target_link.anchor:
                        pywikibot.output(
                            u'HINT: {0!s} is a redirect with a pipelink.'.format(entry.title))
                    redict[space_to_underscore(source)] = (
                        space_to_underscore(target_link))
        if alsoGetPageTitles:
            return redict, pageTitles
        else:
            return redict
Example #10
0
    def change(self, text):
        """
        Given a wiki source code text, return the cleaned up version.
        """
        oldText = text
        if self.site.sitename() == u'commons:commons' and self.namespace == 6:
            text = self.commonsfiledesc(text)
        text = self.fixSelfInterwiki(text)
        text = self.standardizePageFooter(text)
        text = self.fixSyntaxSave(text)
        text = self.cleanUpLinks(text)
        text = self.cleanUpSectionHeaders(text)
        text = self.putSpacesInLists(text)
        text = self.translateAndCapitalizeNamespaces(text)
##        text = self.translateMagicWords(text)
        text = self.replaceDeprecatedTemplates(text)
##        text = self.resolveHtmlEntities(text)
        text = self.validXhtml(text)
        text = self.removeUselessSpaces(text)
        text = self.removeNonBreakingSpaceBeforePercent(text)

        text = self.fixHtml(text)
        text = self.fixReferences(text)
        text = self.fixStyle(text)
        text = self.fixTypo(text)
        if self.site.lang in ['ckb', 'fa']:
            text = self.fixArabicLetters(text)
        try:
            text = isbn.hyphenateIsbnNumbers(text)
        except isbn.InvalidIsbnException as error:
            pywikibot.log(u"ISBN error: %s" % error)
            pass
        if self.debug:
            pywikibot.showDiff(oldText, text)
        return text
Example #11
0
def _call_cmd(args, lib='djvulibre'):
    """
    Tiny wrapper around subprocess.Popen().

    @param args: same as Popen()
    @type args: sequence or string

    @param library: library to be logged in logging messages
    @type library: string

    @param log: log process output; errors are always logged.
    @type library: bool


    @return: returns a tuple (res, stdoutdata), where
        res is True if dp.returncode != 0 else False
    """
    if not isinstance(args, StringTypes):
        # upcast if any param in sequence args is not in StringTypes
        args = [str(a) if not isinstance(a, StringTypes) else a for a in args]
        cmd = ' '.join(args)
    else:
        cmd = args

    dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdoutdata, stderrdata = dp.communicate()

    if dp.returncode != 0:
        pywikibot.error('{0} error; {1}'.format(lib, cmd))
        pywikibot.error('{0}'.format(stderrdata))
        return (False, stdoutdata)

    pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid))

    return (True, stdoutdata)
Example #12
0
    def tearDown(self):
        """Tear down test."""
        super(TestLoggingMixin, self).tearDown()

        if hasattr(self, "_outcomeForDoCleanups"):
            # Python 3 unittest & nose
            outcome = self._outcomeForDoCleanups
        elif hasattr(self, "_outcome"):
            # Python 3.4 nose
            outcome = self._outcome
        elif hasattr(self, "_resultForDoCleanups"):
            # Python 2 unittest & nose
            outcome = self._resultForDoCleanups
        else:
            return

        if len(outcome.errors) > self._previous_errors:
            status = " NOT OK: ERROR"
        # nose 3.4 doesn't has failures
        elif hasattr(outcome, "failures") and len(outcome.failures) > self._previous_failures:
            status = " NOT OK: FAILURE"
        else:
            status = " OK"

        log("END " + self._log_prefix + "." + self._testMethodName + status)
Example #13
0
    def login(self, retry=False):
        if not self.password:
            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {'name': self.username, 'site': self.site},
                password=True)
#        self.password = self.password.encode(self.site.encoding())

        pywikibot.output(u"Logging in to %(site)s as %(name)s"
                         % {'name': self.username, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
##        # Show a warning according to the local bot policy
##   FIXME: disabled due to recursion; need to move this to the Site object after
##   login
##        if not self.botAllowed():
##            logger.error(
##                u"Username '%(name)s' is not listed on [[%(page)s]]."
##                 % {'name': self.username,
##                    'page': botList[self.site.family.name][self.site.code]})
##            logger.error(
##"Please make sure you are allowed to use the robot before actually using it!")
##            return False
        return True
Example #14
0
def validate_options(options, site):
    """
    Validate the options and return bool.

    @param options: options to validate
    @type options: dict

    @rtype: bool
    """
    pywikibot.log('Options:')
    required_keys = ['editnotice_template']
    has_keys = list()
    for key, value in options.items():
        pywikibot.log('-%s = %s' % (key, value))
        if key in required_keys:
            has_keys.append(key)
        if key in ('subject_only', 'talk_only', 'to_subject', 'to_talk'):
            pass
        elif key == 'editnotice_template':
            if isinstance(key, str):
                editnotice_page = pywikibot.Page(site, 'Template:%s' % value)
                if not editnotice_page.exists():
                    return False
            else:
                return False
    if sorted(has_keys) != sorted(required_keys):
        return False
    options['editnotice_page'] = editnotice_page
    options.pop('editnotice_template')
    return True
Example #15
0
 def isbn_execute(text):
     """Hyphenate ISBN numbers and catch 'InvalidIsbnException'."""
     try:
         return isbn.hyphenateIsbnNumbers(text)
     except isbn.InvalidIsbnException as error:
         pywikibot.log(u"ISBN error: %s" % error)
         return None
Example #16
0
    def findCommonscatLink(self, page=None):
        """Find CommonsCat template on interwiki pages.

        In Pywikibot >=2.0, page.interwiki() now returns Link objects,
        not Page objects

        @rtype: unicode, name of a valid commons category
        """
        for ipageLink in page.langlinks():
            ipage = pywikibot.page.Page(ipageLink)
            pywikibot.log('Looking for template on ' + ipage.title())
            try:
                if (not ipage.exists() or ipage.isRedirectPage()
                        or ipage.isDisambig()):
                    continue
                commonscatLink = self.getCommonscatLink(ipage)
                if not commonscatLink:
                    continue
                (currentTemplate, possibleCommonscat, linkText,
                 Note) = commonscatLink
                checkedCommonscat = self.checkCommonscatLink(
                    possibleCommonscat)
                if (checkedCommonscat != ''):
                    pywikibot.output(
                        'Found link for {} at [[{}:{}]] to {}.'.format(
                            page.title(), ipage.site.code, ipage.title(),
                            checkedCommonscat))
                    return checkedCommonscat
            except pywikibot.BadTitle:
                # The interwiki was incorrect
                return ''
        return ''
Example #17
0
    def findCommonscatLink(self, page) -> str:
        """Find CommonsCat template on interwiki pages.

        :return: name of a valid commons category
        """
        for ipageLink in page.langlinks():
            ipage = pywikibot.page.Page(ipageLink)
            pywikibot.log('Looking for template on ' + ipage.title())
            try:  # T291783
                ipage_exists = ipage.exists()
            except InvalidTitleError:
                pywikibot.exception()
                continue

            if (not ipage_exists or ipage.isRedirectPage()
                    or ipage.isDisambig()):
                continue

            commonscatLink = self.getCommonscatLink(ipage)
            if not commonscatLink:
                continue

            checkedCommonscat = self.checkCommonscatLink(commonscatLink[1])
            if checkedCommonscat:
                pywikibot.output(
                    'Found link for {} at [[{}:{}]] to {}.'.format(
                        page.title(), ipage.site.code, ipage.title(),
                        checkedCommonscat))
                return checkedCommonscat
        return ''
Example #18
0
def _flush():
    for i in threads:
        http_queue.put(None)
    pywikibot.log(u'Waiting for threads to finish... ')
    for i in threads:
        i.join()
    pywikibot.log(u"All threads finished.")
Example #19
0
    def tearDown(self):
        """Tear down test."""
        super(TestLoggingMixin, self).tearDown()

        if hasattr(self, '_outcomeForDoCleanups'):
            # Python 3 unittest & nose
            outcome = self._outcomeForDoCleanups
        elif hasattr(self, '_outcome'):
            # Python 3.4 nose
            outcome = self._outcome
        elif hasattr(self, '_resultForDoCleanups'):
            # Python 2 unittest & nose
            outcome = self._resultForDoCleanups
        else:
            return

        if len(outcome.errors) > self._previous_errors:
            status = ' NOT OK: ERROR'
        # nose 3.4 doesn't has failures
        elif (hasattr(outcome, 'failures')
              and len(outcome.failures) > self._previous_failures):
            status = ' NOT OK: FAILURE'
        else:
            status = ' OK'

        log('END ' + self._log_prefix + '.' + self._testMethodName + status)
Example #20
0
    def findCommonscatLink(self, page=None):
        """Find CommonsCat template on interwiki pages.

        In Pywikibot 2.0, page.interwiki() now returns Link objects,
        not Page objects

        @rtype: unicode, name of a valid commons category
        """
        for ipageLink in page.langlinks():
            ipage = pywikibot.page.Page(ipageLink)
            pywikibot.log("Looking for template on %s" % (ipage.title()))
            try:
                if (not ipage.exists() or ipage.isRedirectPage() or
                        ipage.isDisambig()):
                    continue
                commonscatLink = self.getCommonscatLink(ipage)
                if not commonscatLink:
                    continue
                (currentTemplate,
                 possibleCommonscat, linkText, Note) = commonscatLink
                checkedCommonscat = self.checkCommonscatLink(possibleCommonscat)
                if (checkedCommonscat != u''):
                    pywikibot.output(
                        u"Found link for %s at [[%s:%s]] to %s."
                        % (page.title(), ipage.site.code,
                           ipage.title(), checkedCommonscat))
                    return checkedCommonscat
            except pywikibot.BadTitle:
                # The interwiki was incorrect
                return u''
        return u''
Example #21
0
 def _replace_rt_template_files(
         self, tpl: mwparserfromhell.nodes.Template) -> None:
     # Written for [[:cs:Template:Železniční trať]].
     for param in tpl.params:
         param_value = HTML_COMMENT.sub("", str(param.value)).strip()
         if param.name.matches("typ"):
             if param_value[:2] == "ex":
                 current_name = "exl" + param_value[2:]
             else:
                 current_name = "l" + param_value
         else:
             current_name = param_value
         try:
             current_icon = BSiconPage(self.current_page.site,
                                       name=current_name)
             current_icon.title()
         except (pywikibot.exceptions.Error, ValueError):
             continue
         new_icon = self.opt.bsicons_map.get(current_icon, None)
         if not new_icon:
             continue
         if param.name.matches("typ"):
             if new_icon.name[:3] == "exl":
                 replacement = "ex" + new_icon.name[3:]
             elif new_icon.name[:1] == "l":
                 replacement = new_icon.name[1:]
             else:
                 pywikibot.log(f"{new_icon} cannot be used in |typ=.")
                 continue
         else:
             replacement = new_icon.name
         param.value = str(param.value).replace(param_value, replacement)
         self.current_page.replacements.add(
             Replacement(current_icon, new_icon))
Example #22
0
def validate_options(options, site):
    """
    Validate the options and return bool.

    @param options: options to validate
    @type options: dict

    @rtype: bool
    """
    pywikibot.log('Options:')
    required_keys = ['editnotice_template']
    has_keys = list()
    for key, value in options.items():
        pywikibot.log('-{} = {}'.format(key, value))
        if key in required_keys:
            has_keys.append(key)
        if key == 'editnotice_template':
            if not isinstance(key, str):
                return False
            options[key] = '{{' + value + '}}'
            editnotice_page = pywikibot.Page(site, value, ns=10)
            if not editnotice_page.exists():
                return False
    if sorted(has_keys) != sorted(required_keys):
        return False
    options['editnotice_page'] = editnotice_page
    return True
Example #23
0
    def get_redirects_from_dump(self, alsoGetPageTitles=False) -> Tuple[
            Dict[str, str], Set[str]]:
        """
        Extract redirects from dump.

        Load a local XML dump file, look at all pages which have the
        redirect flag set, and find out where they're pointing at. Return
        a dictionary where the redirect names are the keys and the redirect
        targets are the values.
        """
        xmlFilename = self.opt.xml
        redict = {}
        # open xml dump and read page titles out of it
        dump = xmlreader.XmlDump(xmlFilename)
        redirR = self.site.redirect_regex
        readPagesCount = 0
        pageTitles = set()
        for entry in dump.parse():
            readPagesCount += 1
            # always print status message after 10000 pages
            if readPagesCount % 10000 == 0:
                pywikibot.output('{} pages read...'.format(readPagesCount))
            if self.opt.namespaces:
                if pywikibot.Page(self.site, entry.title).namespace() \
                        not in self.opt.namespaces:
                    continue
            if alsoGetPageTitles:
                pageTitles.add(space_to_underscore(pywikibot.Link(entry.title,
                                                                  self.site)))

            m = redirR.match(entry.text)
            if m:
                target = m.group(1)
                # There might be redirects to another wiki. Ignore these.
                target_link = pywikibot.Link(target, self.site)
                try:
                    target_link.parse()
                except SiteDefinitionError as e:
                    pywikibot.log(e)
                    pywikibot.output(
                        'NOTE: Ignoring {} which is a redirect ({}) to an '
                        'unknown site.'.format(entry.title, target))
                    target_link = None
                else:
                    if target_link.site != self.site:
                        pywikibot.output(
                            'NOTE: Ignoring {} which is a redirect to '
                            'another site {}.'
                            .format(entry.title, target_link.site))
                        target_link = None
                # if the redirect does not link to another wiki
                if target_link and target_link.title:
                    source = pywikibot.Link(entry.title, self.site)
                    if target_link.anchor:
                        pywikibot.output(
                            'HINT: {} is a redirect with a pipelink.'
                            .format(entry.title))
                    redict[space_to_underscore(source)] = (
                        space_to_underscore(target_link))
        return redict, pageTitles
    def change(self, text):
        """
        Given a wiki source code text, return the cleaned up version.
        """
        oldText = text
        if self.site.sitename() == u'commons:commons' and self.namespace == 6:
            text = self.commonsfiledesc(text)
        text = self.fixSelfInterwiki(text)
        text = self.standardizePageFooter(text)
        text = self.fixSyntaxSave(text)
        text = self.cleanUpLinks(text)
        text = self.cleanUpSectionHeaders(text)
        text = self.putSpacesInLists(text)
        text = self.translateAndCapitalizeNamespaces(text)
##        text = self.translateMagicWords(text)
        text = self.replaceDeprecatedTemplates(text)
##        text = self.resolveHtmlEntities(text)
        text = self.validXhtml(text)
        text = self.removeUselessSpaces(text)
        text = self.removeNonBreakingSpaceBeforePercent(text)

        text = self.fixHtml(text)
        text = self.fixReferences(text)
        text = self.fixStyle(text)
        text = self.fixTypo(text)
        if self.site.lang in ['ckb', 'fa']:
            text = self.fixArabicLetters(text)
        try:
            text = isbn.hyphenateIsbnNumbers(text)
        except isbn.InvalidIsbnException, error:
            pywikibot.log(u"ISBN error: %s" % error)
            pass
Example #25
0
    def encoding(self):
        """Detect the response encoding."""
        if hasattr(self, '_encoding'):
            return self._encoding

        if self.charset is None and self.header_encoding is None:
            pywikibot.log("Http response doesn't contain a charset.")
            charset = 'latin1'
        else:
            charset = self.charset

        _encoding = UnicodeError()
        if self.header_encoding is not None \
           and (charset is None
                or codecs.lookup(self.header_encoding)
                != codecs.lookup(charset)):
            if charset:
                pywikibot.warning(
                    'Encoding "{}" requested but "{}" received in the '
                    'header.'.format(charset, self.header_encoding))

            # TODO: Buffer decoded content, weakref does remove it too
            #       early (directly after this method)
            _encoding = self._try_decode(self.header_encoding)

        if charset and isinstance(_encoding, Exception):
            _encoding = self._try_decode(charset)

        if isinstance(_encoding, Exception):
            raise _encoding
        else:
            self._encoding = _encoding
        return self._encoding
 def isbn_execute(text):
     """Hyphenate ISBN numbers and catch 'InvalidIsbnException'."""
     try:
         return isbn.hyphenateIsbnNumbers(text)
     except isbn.InvalidIsbnException as error:
         pywikibot.log(u"ISBN error: %s" % error)
         return None
Example #27
0
    def read_file_content(self, file_url=None):
        """Return name of temp file in which remote file is saved."""
        if not file_url:
            file_url = self.url
            pywikibot.warning("file_url is not given. "
                              "Set to self.url by default.")
        pywikibot.output(u'Reading file %s' % file_url)
        resume = False
        rlen = 0
        _contents = None
        dt = 15
        uo = URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(file_url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                pywikibot.output(u"Couldn't download the image: "
                                 "the requested URL was not found on server.")
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)"
                        % (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: length check of retrieved data not possible.")
        handle, tempname = tempfile.mkstemp()
        with os.fdopen(handle, "wb") as t:
            t.write(_contents)
        return tempname
Example #28
0
    def __getitem__(self, key):
        """Get token value for the given key."""
        if self.site.user() is None:
            self.site.login()

        user_tokens = self._tokens.setdefault(self.site.user(), {})
        # always preload all for users without tokens
        failed_cache_key = (self.site.user(), key)

        # redirect old tokens to be compatible with older MW version
        # https://www.mediawiki.org/wiki/MediaWiki_1.37/Deprecation_of_legacy_API_token_parameters
        if self.site.mw_version >= '1.24wmf19' \
           and key in {'edit', 'delete', 'protect', 'move', 'block', 'unblock',
                       'email', 'import', 'options'}:
            log('Token {!r} was replaced by {!r}'.format(key, 'csrf'))
            key = 'csrf'

        try:
            key = self.site.validate_tokens([key])[0]
        except IndexError:
            raise Error("Requested token '{}' is invalid on {} wiki.".format(
                key, self.site))

        if (key not in user_tokens
                and failed_cache_key not in self.failed_cache):
            self.load_tokens([key], all=False if user_tokens else None)

        if key in user_tokens:
            return user_tokens[key]
        # token not allowed for self.site.user() on self.site
        self.failed_cache.add(failed_cache_key)
        # to be changed back to a plain KeyError?
        raise Error(
            "Action '{}' is not allowed for user {} on {} wiki.".format(
                key, self.site.user(), self.site))
Example #29
0
    def tearDown(self):
        """Tear down test."""
        super(TestLoggingMixin, self).tearDown()

        if hasattr(self, '_outcomeForDoCleanups'):
            # Python 3 unittest & nose
            outcome = self._outcomeForDoCleanups
        elif hasattr(self, '_outcome'):
            # Python 3.4 nose
            outcome = self._outcome
        elif hasattr(self, '_resultForDoCleanups'):
            # Python 2 unittest & nose
            outcome = self._resultForDoCleanups
        else:
            return

        if len(outcome.errors) > self._previous_errors:
            status = ' NOT OK: ERROR'
        # nose 3.4 doesn't has failures
        elif (hasattr(outcome, 'failures') and
                len(outcome.failures) > self._previous_failures):
            status = ' NOT OK: FAILURE'
        else:
            status = ' OK'

        log('END ' + self._log_prefix + '.' + self._testMethodName + status)
Example #30
0
    def _command(self, file_name, text, jump_index=None):
        """Return editor selected in user-config.py."""
        if jump_index:
            # Some editors make it possible to mark occurrences of substrings,
            # or to jump to the line of the first occurrence.
            # TODO: Find a better solution than hardcoding these, e.g. a config
            # option.
            line = text[:jump_index].count('\n')
            column = jump_index - (text[:jump_index].rfind('\n') + 1)
        else:
            line = column = 0
        # Linux editors. We use startswith() because some users might use
        # parameters.
        if config.editor.startswith('kate'):
            command = ['-l', '%i' % (line + 1), '-c', '%i' % (column + 1)]
        elif config.editor.startswith('gedit'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('emacs'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('jedit'):
            command = ['+line:%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('vim'):
            command = ['+%i' % (line + 1)]  # seems not to support columns
        elif config.editor.startswith('nano'):
            command = ['+%i,%i' % (line + 1, column + 1)]
        # Windows editors
        elif config.editor.lower().endswith('notepad++.exe'):
            command = ['-n%i' % (line + 1)]  # seems not to support columns
        else:
            command = []

        # See T102465 for problems relating to using config.editor unparsed.
        command = [config.editor] + command + [file_name]
        pywikibot.log(u'Running editor: %s' % TextEditor._concat(command))
        return command
Example #31
0
 def __iter__(self):
     """Yield pages."""
     # TODO: start yielding before all referring pages have been found
     refs = [
         page for page in self.disambPage.getReferences(
             withTemplateInclusion=False,
             namespaces=0 if self.main_only else None)
     ]
     pywikibot.output(u"Found %d references." % len(refs))
     # Remove ignorables
     if self.disambPage.site.family.name in ignore_title and \
        self.disambPage.site.lang in ignore_title[
            self.disambPage.site.family.name]:
         for ig in ignore_title[self.disambPage.site.family.name][
                 self.disambPage.site.lang]:
             for i in range(len(refs) - 1, -1, -1):
                 if re.match(ig, refs[i].title()):
                     pywikibot.log(u'Ignoring page %s' % refs[i].title())
                     del refs[i]
                 elif self.primaryIgnoreManager.isIgnored(refs[i]):
                     del refs[i]
     if len(refs) < self.minimum:
         pywikibot.output(u"Found only %d pages to work on; skipping." %
                          len(refs))
         return
     pywikibot.output(u"Will work on %d pages." % len(refs))
     for ref in refs:
         yield ref
Example #32
0
    def command(self, tempFilename, text, jumpIndex=None):
        """Return editor selected in user-config.py."""
        command = config.editor
        if jumpIndex:
            # Some editors make it possible to mark occurences of substrings,
            # or to jump to the line of the first occurence.
            # TODO: Find a better solution than hardcoding these, e.g. a config
            # option.
            line = text[:jumpIndex].count('\n')
            column = jumpIndex - (text[:jumpIndex].rfind('\n') + 1)
        else:
            line = column = 0
        # Linux editors. We use startswith() because some users might use
        # parameters.
        if config.editor.startswith('kate'):
            command += " -l %i -c %i" % (line + 1, column + 1)
        elif config.editor.startswith('gedit'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('emacs'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('jedit'):
            command += " +line:%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('vim'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('nano'):
            command += " +%i,%i" % (line + 1, column + 1)
        # Windows editors
        elif config.editor.lower().endswith('notepad++.exe'):
            command += " -n%i" % (line + 1)  # seems not to support columns

        command += ' %s' % tempFilename
        pywikibot.log(u'Running editor: %s' % command)
        return command
Example #33
0
def _flush():
    for i in threads:
        http_queue.put(None)
    pywikibot.log(u'Waiting for threads to finish... ')
    for i in threads:
        i.join()
    pywikibot.log(u"All threads finished.")
Example #34
0
    def command(self, tempFilename, text, jumpIndex=None):
        """Return editor selected in user-config.py."""
        command = config.editor
        if jumpIndex:
            # Some editors make it possible to mark occurrences of substrings,
            # or to jump to the line of the first occurrence.
            # TODO: Find a better solution than hardcoding these, e.g. a config
            # option.
            line = text[:jumpIndex].count('\n')
            column = jumpIndex - (text[:jumpIndex].rfind('\n') + 1)
        else:
            line = column = 0
        # Linux editors. We use startswith() because some users might use
        # parameters.
        if config.editor.startswith('kate'):
            command += " -l %i -c %i" % (line + 1, column + 1)
        elif config.editor.startswith('gedit'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('emacs'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('jedit'):
            command += " +line:%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('vim'):
            command += " +%i" % (line + 1)  # seems not to support columns
        elif config.editor.startswith('nano'):
            command += " +%i,%i" % (line + 1, column + 1)
        # Windows editors
        elif config.editor.lower().endswith('notepad++.exe'):
            command += " -n%i" % (line + 1)  # seems not to support columns

        command += ' %s' % tempFilename
        pywikibot.log(u'Running editor: %s' % command)
        return command
Example #35
0
    def __init__(self, fromurl, **kwargs):
        """
        Initializer.

        :raises pywikibot.exceptions.ServerError: a server error occurred
            while loading the site
        :raises Timeout: a timeout occurred while loading the site
        :raises RuntimeError: Version not found or version less than 1.23
        """
        if fromurl.endswith('$1'):
            fromurl = fromurl[:-2]

        r = fetch(fromurl, **kwargs)
        check_response(r)

        if fromurl != r.url:
            pywikibot.log('{} redirected to {}'.format(fromurl, r.url))
            fromurl = r.url

        self.fromurl = fromurl

        data = r.text

        wp = WikiHTMLPageParser(fromurl)
        wp.feed(data)

        self.version = wp.version
        self.server = wp.server
        self.scriptpath = wp.scriptpath
        self.articlepath = None

        if self.api:
            try:
                self._parse_site()
            except (ServerError, RequestException):
                raise
            except Exception as e:
                pywikibot.log('MW detection failed: {!r}'.format(e))

            if not self.version:
                self._fetch_old_version()

        if not self.api:
            raise RuntimeError('Unsupported url: {}'.format(self.fromurl))

        if not self.version or self.version < MIN_VERSION:
            raise RuntimeError('Unsupported version: {}'.format(self.version))

        if not self.articlepath:
            if self.private_wiki:
                if self.api != self.fromurl and self.private_wiki:
                    self.articlepath = self.fromurl.rsplit('/', 1)[0] + '/$1'
                else:
                    raise RuntimeError(
                        'Unable to determine articlepath because the wiki is '
                        'private. Use the Main Page URL instead of the API.')
            else:
                raise RuntimeError('Unable to determine articlepath: '
                                   '{}'.format(self.fromurl))
Example #36
0
    def sauvegarder(self):
        """
        Sauvegarder dans une base de données
        """
        pywikibot.log(u'# Sauvegarde dans la base pour la langue "%s".' % self.langue)

        for q in self.nouveau:
            self.req_bdd(q, 'insert')
Example #37
0
    def __init__(self, generator, **kwargs):
        """- generator : Page generator."""
        self.availableOptions.update({
            'ignorepdf': False,  # boolean
            'limit': None,  # int, stop after n modified pages
            'summary': None,
        })

        super(ReferencesRobot, self).__init__(**kwargs)
        self.generator = generator
        self.site = pywikibot.Site()
        self._user_agent = comms.http.get_fake_user_agent()
        pywikibot.log('Using fake user agent: {0}'.format(self._user_agent))
        # Check
        manual = 'mw:Manual:Pywikibot/refLinks'
        code = None
        for alt in [self.site.code] + i18n._altlang(self.site.code):
            if alt in localized_msg:
                code = alt
                break
        if code:
            manual += '/%s' % code
        if self.getOption('summary') is None:
            self.msg = i18n.twtranslate(self.site, 'reflinks-msg', locals())
        else:
            self.msg = self.getOption('summary')
        self.stopPage = pywikibot.Page(self.site,
                                       i18n.translate(self.site, stopPage))

        local = i18n.translate(self.site, badtitles)
        if local:
            bad = '(' + globalbadtitles + '|' + local + ')'
        else:
            bad = globalbadtitles
        self.titleBlackList = re.compile(bad, re.I | re.S | re.X)
        self.norefbot = noreferences.NoReferencesBot(None, verbose=False)
        self.deduplicator = DuplicateReferences()
        try:
            self.stopPageRevId = self.stopPage.latest_revision_id
        except pywikibot.NoPage:
            pywikibot.output(u'The stop page %s does not exist'
                             % self.stopPage.title(asLink=True))
            raise

        # Regex to grasp content-type meta HTML tag in HTML source
        self.META_CONTENT = re.compile(br'(?i)<meta[^>]*content\-type[^>]*>')
        # Extract the encoding from a charset property (from content-type !)
        self.CHARSET = re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'",;>/]*)')
        # Extract html title from page
        self.TITLE = re.compile(r'(?is)(?<=<title>).*?(?=</title>)')
        # Matches content inside <script>/<style>/HTML comments
        self.NON_HTML = re.compile(
            br'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|'
            br'<!--.*?-->|<!\[CDATA\[.*?\]\]>')

        # Authorized mime types for HTML pages
        self.MIME = re.compile(
            r'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
    def __init__(self, generator, **kwargs):
        """- generator : Page generator."""
        self.availableOptions.update({
            'ignorepdf': False,  # boolean
            'limit': None,  # int, stop after n modified pages
            'summary': None,
        })

        super(ReferencesRobot, self).__init__(**kwargs)
        self.generator = generator
        self.site = pywikibot.Site()
        self._user_agent = comms.http.get_fake_user_agent()
        pywikibot.log('Using fake user agent: {0}'.format(self._user_agent))
        # Check
        manual = 'mw:Manual:Pywikibot/refLinks'
        code = None
        for alt in [self.site.code] + i18n._altlang(self.site.code):
            if alt in localized_msg:
                code = alt
                break
        if code:
            manual += '/%s' % code
        if self.getOption('summary') is None:
            self.msg = i18n.twtranslate(self.site, 'reflinks-msg', locals())
        else:
            self.msg = self.getOption('summary')
        self.stopPage = pywikibot.Page(self.site,
                                       i18n.translate(self.site, stopPage))

        local = i18n.translate(self.site, badtitles)
        if local:
            bad = '(' + globalbadtitles + '|' + local + ')'
        else:
            bad = globalbadtitles
        self.titleBlackList = re.compile(bad, re.I | re.S | re.X)
        self.norefbot = noreferences.NoReferencesBot(None, verbose=False)
        self.deduplicator = DuplicateReferences()
        try:
            self.stopPageRevId = self.stopPage.latest_revision_id
        except pywikibot.NoPage:
            pywikibot.output(u'The stop page %s does not exist' %
                             self.stopPage.title(asLink=True))
            raise

        # Regex to grasp content-type meta HTML tag in HTML source
        self.META_CONTENT = re.compile(br'(?i)<meta[^>]*content\-type[^>]*>')
        # Extract the encoding from a charset property (from content-type !)
        self.CHARSET = re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'",;>/]*)')
        # Extract html title from page
        self.TITLE = re.compile(r'(?is)(?<=<title>).*?(?=</title>)')
        # Matches content inside <script>/<style>/HTML comments
        self.NON_HTML = re.compile(
            br'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|'
            br'<!--.*?-->|<!\[CDATA\[.*?\]\]>')

        # Authorized mime types for HTML pages
        self.MIME = re.compile(
            r'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
Example #39
0
    def login(self, retry=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {
                    'name': self.username,
                    'site': self.site
                },
                password=True)

        pywikibot.output(u"Logging in to %(site)s as %(name)s" % {
            'name': self.username,
            'site': self.site
        })
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if e.code == 'NotExists':
                raise NoUsername(u"Username '%s' does not exist on %s" %
                                 (self.username, self.site))
            elif e.code == 'Illegal':
                raise NoUsername(u"Username '%s' is invalid on %s" %
                                 (self.username, self.site))
            # TODO: investigate other unhandled API codes (bug 73539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
        #        # Show a warning according to the local bot policy
        #   FIXME: disabled due to recursion; need to move this to the Site object after
        #   login
        #        if not self.botAllowed():
        #            logger.error(
        #                u"Username '%(name)s' is not listed on [[%(page)s]]."
        #                 % {'name': self.username,
        #                    'page': botList[self.site.family.name][self.site.code]})
        #            logger.error(
        # "Please make sure you are allowed to use the robot before actually using it!")
        #            return False
        return True
Example #40
0
    def login(self, retry=False, autocreate=False):
        """
        Attempt to log into the server.

        @see: U{https://www.mediawiki.org/wiki/API:Login}

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @param autocreate: if true, allow auto-creation of the account
                           using unified login
        @type autocreate: bool

        @raises pywikibot.exceptions.NoUsername: Username is not recognised by
            the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            if not autocreate:
                self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                'Password for user %(name)s on %(site)s (no characters will '
                'be shown):' % {
                    'name': self.login_name,
                    'site': self.site
                },
                password=True)

        pywikibot.output('Logging in to %(site)s as %(name)s' % {
            'name': self.login_name,
            'site': self.site
        })
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            error_code = e.code
            pywikibot.error('Login failed ({}).'.format(error_code))
            if error_code in self._api_error:
                error_msg = 'Username "{}" {} on {}'.format(
                    self.login_name, self._api_error[error_code], self.site)
                if error_code in ('Failed', 'FAIL'):
                    error_msg += '\n.{}'.format(e.info)
                raise NoUsername(error_msg)

            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log('Should be logged in now')
        return True
Example #41
0
    def login(self, retry=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {'name': self.login_name, 'site': self.site},
                password=True)

        pywikibot.output(u"Logging in to %(site)s as %(name)s"
                         % {'name': self.login_name, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if e.code == 'NotExists':
                raise NoUsername(u"Username '%s' does not exist on %s"
                                 % (self.login_name, self.site))
            elif e.code == 'Illegal':
                raise NoUsername(u"Username '%s' is invalid on %s"
                                 % (self.login_name, self.site))
            elif e.code == 'readapidenied':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}'.format(self.login_name, self.site))
            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
#        # Show a warning according to the local bot policy
#   FIXME: disabled due to recursion; need to move this to the Site object after
#   login
#        if not self.botAllowed():
#            logger.error(
#                u"Username '%(name)s' is not listed on [[%(page)s]]."
#                 % {'name': self.username,
#                    'page': botList[self.site.family.name][self.site.code]})
#            logger.error(
# "Please make sure you are allowed to use the robot before actually using it!")
#            return False
        return True
Example #42
0
def _decide_encoding(response, charset) -> Optional[str]:
    """Detect the response encoding."""
    def _try_decode(content, encoding):
        """Helper function to try decoding."""
        if encoding is None:
            return None

        try:
            content.decode(encoding)
        except LookupError:
            pywikibot.warning(
                'Unknown or invalid encoding {!r}'.format(encoding))
        except UnicodeDecodeError as e:
            pywikibot.warning('{} found in {}'.format(e, content))
        else:
            return encoding

        return None  # let chardet do the job

    header_encoding = _get_encoding_from_response_headers(response)
    if header_encoding is None:
        pywikibot.log('Http response does not contain a charset.')

    if charset is None:
        charset = response.request.headers.get('accept-charset')

    # No charset requested, or in request headers or response headers.
    # Defaults to latin1.
    if charset is None and header_encoding is None:
        return _try_decode(response.content, 'latin1')

    if charset is None and header_encoding is not None:
        return _try_decode(response.content, header_encoding)

    if charset is not None and header_encoding is None:
        return _try_decode(response.content, charset)

    # Both charset and header_encoding are available.
    try:
        header_codecs = codecs.lookup(header_encoding)
    except LookupError:
        header_codecs = None

    try:
        charset_codecs = codecs.lookup(charset)
    except LookupError:
        charset_codecs = None

    if header_codecs and charset_codecs and header_codecs != charset_codecs:
        pywikibot.warning('Encoding "{}" requested but "{}" received in the '
                          'response header.'.format(charset, header_encoding))

    _encoding = _try_decode(response.content, header_encoding) \
        or _try_decode(response.content, charset)

    return _encoding
Example #43
0
 def save_wikipage(self, page_text, page_name, summary="Bot: Update der Ergebnisliste"):
     try:
         article = pywikibot.Page(self.site, page_name)
         updater = ArticleUpdater(article)
         if not updater.save_text(page_text, summary):
             pywikibot.log("Result page has not changed, skipping update ...")
     except pywikibot.Error:
         with tempfile.NamedTemporaryFile(delete=False) as dump_file:
             dump_file.write(page_name.encode('utf-8'))
             pywikibot.error("Could not update result page, page dumped to {}".format(dump_file.name), exc_info=True)
Example #44
0
 def vider_base(self):
     """
     Vide la base de donnée associée (pour retirer les déchus)
     """
     pywikibot.log(u"## Vidage de l'ancienne base")
     req = u'TRUNCATE TABLE %s' % self.nom_base
     try:
         self.curseur.execute(req)
     except MySQLdb.Error, e:
         pywikibot.warning(u"Truncate error %d: %s" % (e.args[0], e.args[1]))
def main():
    countrycode = u''
    lang = u''
    skip_wd = False
    add_template = False
    conn = None
    cursor = None
    # Connect database, we need that
    (conn, cursor) = connect_to_monuments_database()
    (conn2, cursor2) = connect_to_commons_database()

    # FIXME add option to only run based on list usage, not category membership
    for arg in pywikibot.handleArgs():
        option, sep, value = arg.partition(':')
        if option == '-countrycode':
            countrycode = value
        elif option == '-langcode':
            lang = value
        elif option == '-skip_wd':
            skip_wd = True
        elif option == '-add_template':
            add_template = True
        else:
            raise Exception(
                u'Bad parameters. Expected "-countrycode", "-langcode", '
                u'"-skip_wd", "-add_template" or pywikibot args. '
                u'Found "{}"'.format(option))

    if countrycode and lang:
        if not mconfig.countries.get((countrycode, lang)):
            pywikibot.warning(
                u'I have no config for countrycode "{0}" '
                u'in language "{1}"'.format(countrycode, lang))
            return False
        pywikibot.log(
            u'Working on countrycode "{0}" in language "{1}"'.format(
                countrycode, lang))
        processCountry(mconfig.countries.get((countrycode, lang)),
                       add_template, conn, cursor, conn2, cursor2)
    elif countrycode or lang:
        raise Exception(u'The "countrycode" and "langcode" arguments must '
                        u'be used together.')
    else:
        statistics = []
        for (countrycode, lang), countryconfig in mconfig.filtered_countries(
                skip_wd=skip_wd):
            pywikibot.log(
                u'Working on countrycode "{0}" in language "{1}"'.format(
                    countrycode, lang))
            statistics.append(
                processCountry(
                    countryconfig, add_template, conn, cursor, conn2, cursor2))
        make_statistics(statistics)

    close_database_connection(conn, cursor)
Example #46
0
    def read_file_content(self):
        """Return name of temp file in which remote file is saved."""
        pywikibot.output(u'Reading file %s' % self.url)
        resume = False
        dt = 15
        uo = urllib.URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(self.url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                print \
"Couldn't download the image: the requested URL was not found on server."
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)" %
                        (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: No check length to retrieved data is possible.")
        handle, tempname = tempfile.mkstemp()
        t = os.fdopen(handle, "wb")
        t.write(_contents)
        t.close()
        return tempname
Example #47
0
    def read_file_content(self):
        """Return name of temp file in which remote file is saved."""
        pywikibot.output(u'Reading file %s' % self.url)
        resume = False
        dt = 15
        uo = urllib.URLopener()
        retrieved = False

        while not retrieved:
            if resume:
                pywikibot.output(u"Resume download...")
                uo.addheader('Range', 'bytes=%s-' % rlen)

            infile = uo.open(self.url)

            if 'text/html' in infile.info().getheader('Content-Type'):
                print \
"Couldn't download the image: the requested URL was not found on server."
                return

            content_len = infile.info().getheader('Content-Length')
            accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes'

            if resume:
                _contents += infile.read()
            else:
                _contents = infile.read()

            infile.close()
            retrieved = True

            if content_len:
                rlen = len(_contents)
                content_len = int(content_len)
                if rlen < content_len:
                    retrieved = False
                    pywikibot.output(
                        u"Connection closed at byte %s (%s left)"
                         % (rlen, content_len))
                    if accept_ranges and rlen > 0:
                        resume = True
                    pywikibot.output(u"Sleeping for %d seconds..." % dt)
                    time.sleep(dt)
                    if dt <= 60:
                        dt += 15
                    elif dt < 360:
                        dt += 60
            else:
                pywikibot.log(
                    u"WARNING: No check length to retrieved data is possible.")
        handle, tempname = tempfile.mkstemp()
        t = os.fdopen(handle, "wb")
        t.write(_contents)
        t.close()
        return tempname
Example #48
0
    def encoding(self):
        """Detect the response encoding."""
        pos = self.response_headers['content-type'].find('charset=')
        if pos >= 0:
            pos += len('charset=')
            encoding = self.response_headers['content-type'][pos:]
        else:
            encoding = 'ascii'
            # Don't warn, many pages don't contain one
            pywikibot.log(u"Http response doesn't contain a charset.")

        return encoding
Example #49
0
    def encoding(self):
        """Detect the response encoding."""
        pos = self.response_headers['content-type'].find('charset=')
        if pos >= 0:
            pos += len('charset=')
            encoding = self.response_headers['content-type'][pos:]
        else:
            encoding = 'ascii'
            # Don't warn, many pages don't contain one
            pywikibot.log(u"Http response doesn't contain a charset.")

        return encoding
Example #50
0
def match_name(name, typ, wd, limit=75):
    """
    Check if there is an item matching the name.

    Given a plaintext name (first or last) this checks if there is
    a unique matching entity of the right name type. Search results are
    stored in 'matchedNames' for later look-up.

    @param name: The name to search for
    @type name: basestring
    @param typ: The name type (either 'lastName' or 'firstName')
    @type typ: basestring
    @param wd: The running WikidataStuff instance
    @type wd: WikidataStuff (WD)
    @param limit: Number of hits before skipping (defaults to 75,
        ignored if onLabs)
    @type limit: int
    @return: A matching item, if any
    @rtype: pywikibot.ItemPage, or None
    """
    global matchedNames
    prop = {
        'lastName': ('Q101352', ),
        'firstName': ('Q12308941', 'Q11879590', 'Q202444')
    }

    # Skip any empty values
    if not name.strip():
        return

    # Check if already looked up
    if name in matchedNames[typ]:
        return matchedNames[typ][name]

    # search for potential matches
    matches = None
    props = prop[typ]
    if wd.onLabs:
        matches = match_name_on_labs(name, props, wd)
    else:
        matches = match_name_off_labs(name, props, wd, limit)

    # get rid of duplicates then check for uniqueness
    matches = list(set(matches))
    if len(matches) == 1:
        item = wd.bypassRedirect(matches[0])
        matchedNames[typ][name] = item  # store for later reuse
        return item
    elif len(matches) > 1:
        pywikibot.log('Possible duplicates: {}'.format(matches))

    # getting here means no hits so store that for later reuse
    matchedNames[typ][name] = None
Example #51
0
    def run(self):
        """The main bot function that does all the work.

        For readability it is split into several helper functions:
        - _movecat()
        - _movetalk()
        - _hist()
        - _change()
        - _delete()
        """
        # can_move_* determines if the page can be moved safely (target
        # doesn't exist but source does), move_items determines if the
        # items (pages/subcategories) of the category could be moved into
        # a new (non existent) category.
        can_move_page = CategoryMoveRobot.check_move("category page", self.oldcat, self.newcat)
        can_move_talk = CategoryMoveRobot.check_move("category talk page", self.oldtalk, self.newtalk)
        if not self.newcat:  # delete
            move_items = True
        else:
            move_items = not self.newcat.exists() or not self.move_together
            if not self.allow_split:
                can_move_page = can_move_page and move_items
                can_move_talk = can_move_talk and move_items
        if self.newcat and self.move_oldcat:
            if self.can_move_cats:
                if can_move_page:
                    oldcattitle = self.oldcat.title()
                    self.newcat = self.oldcat.move(self.newcat.title(), reason=self.comment, movetalkpage=can_move_talk)
                    self._strip_cfd_templates()
                    self.oldcat = pywikibot.Category(self.oldcat.site, oldcattitle)
            else:
                if can_move_page:
                    self._movecat()
                if can_move_talk:
                    self._movetalk()
                if self.wikibase:
                    self._update_wikibase_item()
            if self.history and can_move_page:
                self._hist()

        if move_items:
            self._change(pagegenerators.CategorizedPageGenerator(self.oldcat))
            if not self.pagesonly:
                self._change(pagegenerators.SubCategoriesPageGenerator(self.oldcat))
        else:
            pywikibot.log("Didn't move pages/subcategories, because the " "category page hasn't been moved.")
        if (
            self.oldcat.isEmptyCategory()
            and self.delete_oldcat
            and ((self.newcat and self.move_oldcat) or not self.newcat)
        ):
            self._delete(can_move_page, can_move_talk)
Example #52
0
 def _parse_section(self, section: str) -> None:
     """Parse a section of a page."""
     cfd_page = None
     cfd_prefix = cfd_suffix = ''
     for line in section.splitlines():
         assert self.mode is not None  # for mypy
         instruction = Instruction(
             mode=self.mode,
             bot_options=BotOptions(),
         )
         line_results = self._parse_line(line)
         instruction['bot_options']['old_cat'] = line_results['old_cat']
         instruction['bot_options']['new_cats'] = line_results['new_cats']
         if line_results['cfd_page']:
             cfd_prefix = line_results['prefix']
             cfd_suffix = line_results['suffix']
         cfd_page = line_results['cfd_page'] or cfd_page
         if not (cfd_page and instruction['bot_options']['old_cat']):
             continue
         prefix = line_results['prefix'] + cfd_prefix
         suffix = line_results['suffix'] or cfd_suffix
         if 'NO BOT' in prefix:
             pywikibot.log('Bot disabled for: {}'.format(line))
             continue
         cfd = cfd_page.find_discussion(line_results['old_cat'])
         instruction['cfd_page'] = cfd
         if self.mode == 'merge':
             instruction['redirect'] = 'REDIRECT' in prefix
         elif self.mode == 'move':
             instruction['noredirect'] = 'REDIRECT' not in prefix
         elif self.mode == 'retain':
             nc_matches = re.findall(r'\b(no consensus) (?:for|to) (\w+)\b',
                                     suffix,
                                     flags=re.I)
             not_matches = re.findall(r'\b(not )(\w+)\b',
                                      suffix,
                                      flags=re.I)
             if nc_matches:
                 instruction['result'] = nc_matches[0][0]
                 instruction['action'] = nc_matches[0][1]
             elif not_matches:
                 instruction['result'] = ''.join(not_matches[0])
                 instruction['action'] = re.sub(r'ed$', 'e',
                                                not_matches[0][1])
             elif 'keep' in suffix.lower():
                 instruction['result'] = 'keep'
                 instruction['action'] = 'delete'
             else:
                 instruction['result'] = cfd.get_result()
                 instruction['action'] = cfd.get_action(
                     instruction['bot_options']['old_cat'])
         self.instructions.append(instruction)
Example #53
0
    def _parse_pre_117(self, data):
        """Parse HTML."""
        if not self.REwgEnableApi.search(data):
            pywikibot.log("wgEnableApi is not enabled in HTML of %s" % self.fromurl)
        try:
            self.version = MediaWikiVersion(self.REwgVersion.search(data).group(1))
        except AttributeError:
            pass

        self.server = self.REwgServer.search(data).groups()[0]
        self.scriptpath = self.REwgScriptPath.search(data).groups()[0]
        self.articlepath = self.REwgArticlePath.search(data).groups()[0]
        self.lang = self.REwgContentLanguage.search(data).groups()[0]
Example #54
0
def load_config(page: pywikibot.Page, **kwargs: Any) -> ConfigJSONObject:
    """Load JSON config from the page."""
    if page.isRedirectPage():
        pywikibot.log(f"{page!r} is a redirect.")
        page = page.getRedirectTarget()
    _empty = jsoncfg.loads_config("{}")
    if not page.exists():
        pywikibot.log(f"{page!r} does not exist.")
        return _empty
    try:
        return jsoncfg.loads_config(page.get(**kwargs).strip())
    except pywikibot.exceptions.PageRelatedError:
        return _empty
Example #55
0
    def run(self):
        NB_AJOUTS = 0
        RETRAITS = True
        connus = BeBot.charger_bdd(self.db, self.nom_base, champs=u'page')
        connus = map(self.normaliser_page, connus)
        self.total_avant = len(connus)
        ordre_cats = [ u'AdQ', u'BA', u'?' ]
        for cat in self.cat_qualite:
            categorie = pywikibot.Category(self.site, cat)
            cpg = pagegenerators.CategorizedPageGenerator(categorie, recurse=False)
            try:
                i = self.categories_de_qualite[self.langue].index(cat)
            except:
                i = 2
            cattoa = ordre_cats[i]

            for p in pagegenerators.DuplicateFilterPageGenerator(cpg):
                if NB_AJOUTS < 2000:
                    if p.namespace() == 0:
                        page = p
                    elif p.namespace() == 1: # Pour EN:GA et IT:FA
                        page = p.toggleTalkPage()
                    else:
                        continue
                    if page.isRedirectPage():
                        page = page.getRedirectTarget()
                    title = page.title()
                    if title not in connus: #Comparaison avec le contenu de la bdd
                        infos = self.get_infos(page, cattoa)
                        NB_AJOUTS += 1
                        if infos is not None:
                            self.nouveau.append(infos)
                    else:
                        connus.remove(title)
                        self.connaitdeja.append( \
                               { 'page': title, \
                              'label': cattoa } ) # Ils ne seront pas ajoutés
                else:
                    pywikibot.output("Limite d'ajouts atteinte avec "+p.title())
                    RETRAITS = False
                    break

        # On retire ceux qui ont disparus
        if RETRAITS:
            pywikibot.output('Retraits : '+str(connus))
            for c in connus:
                self.req_bdd(c, 'delete')
        self.connus = len(connus)

        pywikibot.log( u"Total: %i ajouts ; %i déjà connus ; %i retraits." \
                % (len(self.nouveau), len(self.connaitdeja), len(connus)) )
Example #56
0
def match_name(name, typ, wd, limit=75):
    """
    Check if there is an item matching the name.

    Given a plaintext name (first or last) this checks if there is
    a unique matching entity of the right name type. Search results are
    stored in 'matchedNames' for later look-up.

    @param name: The name to search for
    @type name: basestring
    @param typ: The name type (either 'lastName' or 'firstName')
    @type typ: basestring
    @param wd: The running WikidataStuff instance
    @type wd: WikidataStuff (WD)
    @param limit: Number of hits before skipping (defaults to 75,
        ignored if onLabs)
    @type limit: int
    @return: A matching item, if any
    @rtype: pywikibot.ItemPage, or None
    """
    global matchedNames
    prop = {'lastName': ('Q101352',),
            'firstName': ('Q12308941', 'Q11879590', 'Q202444')}

    # Skip any empty values
    if not name.strip():
        return

    # Check if already looked up
    if name in matchedNames[typ]:
        return matchedNames[typ][name]

    # search for potential matches
    matches = None
    props = prop[typ]
    if wd.onLabs:
        matches = match_name_on_labs(name, props, wd)
    else:
        matches = match_name_off_labs(name, props, wd, limit)

    # get rid of duplicates then check for uniqueness
    matches = list(set(matches))
    if len(matches) == 1:
        item = wd.bypassRedirect(matches[0])
        matchedNames[typ][name] = item  # store for later reuse
        return item
    elif len(matches) > 1:
        pywikibot.log('Possible duplicates: {}'.format(matches))

    # getting here means no hits so store that for later reuse
    matchedNames[typ][name] = None
Example #57
0
    def login(self, retry=False, autocreate=False):
        """
        Attempt to log into the server.

        @see: U{https://www.mediawiki.org/wiki/API:Login}

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @param autocreate: if true, allow auto-creation of the account
                           using unified login
        @type autocreate: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            if not autocreate:
                self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                'Password for user %(name)s on %(site)s (no characters will '
                'be shown):' % {'name': self.login_name, 'site': self.site},
                password=True)

        pywikibot.output('Logging in to %(site)s as %(name)s'
                         % {'name': self.login_name, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            error_code = e.code
            pywikibot.error('Login failed ({}).'.format(error_code))
            if error_code in self._api_error:
                error_msg = 'Username "{}" {} on {}'.format(
                    self.login_name, self._api_error[error_code], self.site)
                if error_code == 'Failed':
                    error_msg += '\n.{}'.format(e.info)
                raise NoUsername(error_msg)

            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log('Should be logged in now')
        return True
def main():
    """The main loop."""
    # First find out what to work on

    countrycode = ''
    lang = ''
    full_update = True
    skip_wd = False
    days_back = 2  # Default 2 days. Runs every night so can miss one night.
    conn = None
    cursor = None
    (conn, cursor) = connect_to_monuments_database()

    for arg in pywikibot.handleArgs():
        option, sep, value = arg.partition(':')
        if option == '-countrycode':
            countrycode = value
        elif option == '-langcode':
            lang = value
        elif option == '-daysback':
            days_back = int(value)
        elif option == '-fullupdate':  # does nothing since already default
            full_update = True
        elif option == '-skip_wd':
            skip_wd = True
        else:
            raise Exception(
                'Bad parameters. Expected "-countrycode", "-langcode", '
                '"-daysback", "-fullupdate", "-skip_wd" or pywikibot args. '
                'Found "{}"'.format(option))

    if countrycode and lang:
        if not mconfig.countries.get((countrycode, lang)):
            pywikibot.warning(
                'I have no config for countrycode "{0}" '
                'in language "{1}"'.format(
                    countrycode, lang))
            return False

        pywikibot.log(
            'Working on countrycode "{0}" in language "{1}"'.format(
                countrycode, lang))
        try:
            countryconfig = mconfig.countries.get((countrycode, lang))
            process_country(countryconfig, conn, cursor, full_update,
                            days_back)
        except Exception, e:
            pywikibot.error(
                'Unknown error occurred when processing country '
                '{0} in lang {1}\n{2}'.format(countrycode, lang, str(e)))
Example #59
0
    def checkCommonscatLink(self, name=''):
        """Return the name of a valid commons category.

        If the page is a redirect this function tries to follow it.
        If the page doesn't exists the function will return an empty string

        """
        pywikibot.log('getCommonscat: ' + name)
        try:
            commonsSite = self.site.image_repository()
            # This can throw a pywikibot.BadTitle
            commonsPage = pywikibot.Page(commonsSite, 'Category:' + name)

            if not commonsPage.exists():
                pywikibot.output('Commons category does not exist. '
                                 'Examining deletion log...')
                logpages = commonsSite.logevents(logtype='delete',
                                                 page=commonsPage)
                for logitem in logpages:
                    loguser = logitem.user()
                    logcomment = logitem.comment()
                    # Some logic to extract the target page.
                    regex = (r'moved to \[\[\:?Category:'
                             r'(?P<newcat1>[^\|\}]+)(\|[^\}]+)?\]\]|'
                             r'Robot: Changing Category:(.+) '
                             r'to Category:(?P<newcat2>.+)')
                    m = re.search(regex, logcomment, flags=re.I)
                    if m:
                        if m.group('newcat1'):
                            return self.checkCommonscatLink(m.group('newcat1'))
                        elif m.group('newcat2'):
                            return self.checkCommonscatLink(m.group('newcat2'))
                    else:
                        pywikibot.output(
                            "getCommonscat: {} deleted by {}. Couldn't find "
                            'move target in "{}"'.format(
                                commonsPage, loguser, logcomment))
                        return ''
                return ''
            elif commonsPage.isRedirectPage():
                pywikibot.log('getCommonscat: The category is a redirect')
                return self.checkCommonscatLink(
                    commonsPage.getRedirectTarget().title(with_ns=False))
            elif (pywikibot.Page(commonsPage.site,
                                 'Template:Category redirect')
                  in commonsPage.templates()):
                pywikibot.log(
                    'getCommonscat: The category is a category redirect')
                for template in commonsPage.templatesWithParams():
                    if (template[0].title(with_ns=False) == 'Category redirect'
                            and len(template[1]) > 0):
                        return self.checkCommonscatLink(template[1][0])
            elif commonsPage.isDisambig():
                pywikibot.log('getCommonscat: The category is disambiguation')
                return ''
            else:
                return commonsPage.title(with_ns=False)
        except pywikibot.BadTitle:
            # Funky title so not correct
            return ''
def main():
    '''
    The main loop
    '''
    # First find out what to work on

    countrycode = u''
    lang = u''
    skip_wd = False
    conn = None
    cursor = None
    (conn, cursor) = connect_to_monuments_database()

    for arg in pywikibot.handleArgs():
        option, sep, value = arg.partition(':')
        if option == '-countrycode':
            countrycode = value
        elif option == '-langcode':
            lang = value
        elif option == u'-skip_wd':
            skip_wd = True
        else:
            raise Exception(
                u'Bad parameters. Expected "-countrycode", "-langcode", '
                u'"-skip_wd" or pywikibot args. Found "{}"'.format(option))

    query = u"""TRUNCATE table `id_dump`"""
    cursor.execute(query)

    if countrycode and lang:
        if not mconfig.countries.get((countrycode, lang)):
            pywikibot.warning(
                u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang))
            return False
        pywikibot.log(
            u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
        processCountry(
            mconfig.countries.get((countrycode, lang)), conn, cursor)
    elif countrycode or lang:
        raise Exception(u'The "countrycode" and "langcode" arguments must '
                        u'be used together.')
    else:
        for (countrycode, lang), countryconfig in mconfig.filtered_countries(
                skip_wd=skip_wd):
            pywikibot.log(
                u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
            processCountry(countryconfig, conn, cursor)

    close_database_connection(conn, cursor)