def read_file_content(self, file_url=None): """Return name of temp file in which remote file is saved.""" if not file_url: file_url = self.url pywikibot.warning("file_url is not given. " "Set to self.url by default.") pywikibot.output(u'Reading file %s' % file_url) resume = False rlen = 0 _contents = None dt = 15 uo = URLopener() retrieved = False while not retrieved: if resume: pywikibot.output(u"Resume download...") uo.addheader('Range', 'bytes=%s-' % rlen) infile = uo.open(file_url) if 'text/html' in infile.info().getheader('Content-Type'): pywikibot.output(u"Couldn't download the image: " "the requested URL was not found on server.") return content_len = infile.info().getheader('Content-Length') accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes' if resume: _contents += infile.read() else: _contents = infile.read() infile.close() retrieved = True if content_len: rlen = len(_contents) content_len = int(content_len) if rlen < content_len: retrieved = False pywikibot.output( u"Connection closed at byte %s (%s left)" % (rlen, content_len)) if accept_ranges and rlen > 0: resume = True pywikibot.output(u"Sleeping for %d seconds..." % dt) time.sleep(dt) if dt <= 60: dt += 15 elif dt < 360: dt += 60 else: pywikibot.log( u"WARNING: length check of retrieved data not possible.") handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(_contents) return tempname
def _command(self, file_name, text, jump_index=None): """Return editor selected in user-config.py.""" if jump_index: # Some editors make it possible to mark occurrences of substrings, # or to jump to the line of the first occurrence. # TODO: Find a better solution than hardcoding these, e.g. a config # option. line = text[:jump_index].count('\n') column = jump_index - (text[:jump_index].rfind('\n') + 1) else: line = column = 0 # Linux editors. We use startswith() because some users might use # parameters. if config.editor.startswith('kate'): command = ['-l', '%i' % (line + 1), '-c', '%i' % (column + 1)] elif config.editor.startswith('gedit'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('emacs'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('jedit'): command = ['+line:%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('vim'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('nano'): command = ['+%i,%i' % (line + 1, column + 1)] # Windows editors elif config.editor.lower().endswith('notepad++.exe'): command = ['-n%i' % (line + 1)] # seems not to support columns else: command = [] # See T102465 for problems relating to using config.editor unparsed. command = [config.editor] + command + [file_name] pywikibot.log('Running editor: %s' % TextEditor._concat(command)) return command
def _call_cmd(args, lib='djvulibre') -> tuple: """ Tiny wrapper around subprocess.Popen(). @param args: same as Popen() @type args: str or typing.Sequence[string] @param lib: library to be logged in logging messages @type lib: str @return: returns a tuple (res, stdoutdata), where res is True if dp.returncode != 0 else False """ if not isinstance(args, str): # upcast any param in sequence args to str cmd = ' '.join(str(a) for a in args) else: cmd = args dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdoutdata, stderrdata = dp.communicate() if dp.returncode != 0: pywikibot.error('{0} error; {1}'.format(lib, cmd)) pywikibot.error('{0}'.format(stderrdata)) return (False, stdoutdata) pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid)) return (True, stdoutdata)
def checkMultiplicity(self): """Count running processes for site and set process_multiplicity.""" global pid mysite = self.mysite pywikibot.debug('Checking multiplicity: pid = {pid}'.format(pid=pid), _logger) with self.lock: processes = [] my_pid = pid or 1 # start at 1 if global pid not yet set count = 1 # open throttle.log try: f = open(self.ctrlfilename, 'r') except IOError: if pid: raise else: now = time.time() for line in f.readlines(): # parse line; format is "pid timestamp site" try: line = line.split(' ') this_pid = int(line[0]) ptime = int(line[1].split('.')[0]) this_site = line[2].rstrip() except (IndexError, ValueError): # Sometimes the file gets corrupted ignore that line continue if now - ptime > self.releasepid: continue # process has expired, drop from file if now - ptime <= self.dropdelay \ and this_site == mysite \ and this_pid != pid: count += 1 if this_site != self.mysite or this_pid != pid: processes.append({ 'pid': this_pid, 'time': ptime, 'site': this_site }) if not pid and this_pid >= my_pid: my_pid = this_pid + 1 # next unused process id f.close() if not pid: pid = my_pid self.checktime = time.time() processes.append({ 'pid': pid, 'time': self.checktime, 'site': mysite }) processes.sort(key=lambda p: (p['pid'], p['site'])) with suppress(IOError), open(self.ctrlfilename, 'w') as f: for p in processes: f.write(FORMAT_LINE.format_map(p)) self.process_multiplicity = count pywikibot.log( 'Found {} {} processes running, including this one.'.format( count, mysite))
def _call_cmd(args, lib='djvulibre'): """ Tiny wrapper around subprocess.Popen(). @param args: same as Popen() @type args: sequence or string @param library: library to be logged in logging messages @type library: string @param log: log process output; errors are always logged. @type library: bool @return: returns a tuple (res, stdoutdata), where res is True if dp.returncode != 0 else False """ if not isinstance(args, StringTypes): # upcast if any param in sequence args is not in StringTypes args = [str(a) if not isinstance(a, StringTypes) else a for a in args] cmd = ' '.join(args) else: cmd = args dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdoutdata, stderrdata = dp.communicate() if dp.returncode != 0: pywikibot.error('{0} error; {1}'.format(lib, cmd)) pywikibot.error('{0}'.format(stderrdata)) return (False, stdoutdata) pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid)) return (True, stdoutdata)
def login(self, retry=False, autocreate=False): """ Attempt to log into the server. @param retry: infinitely retry if the API returns an unknown error @type retry: bool @param autocreate: if true, allow auto-creation of the account using unified login @type autocreate: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. if not autocreate: self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( 'Password for user %(name)s on %(site)s (no characters will ' 'be shown):' % { 'name': self.login_name, 'site': self.site }, password=True) pywikibot.output('Logging in to %(site)s as %(name)s' % { 'name': self.login_name, 'site': self.site }) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error('Login failed (%s).' % e.code) if e.code == 'NotExists': raise NoUsername("Username '%s' does not exist on %s" % (self.login_name, self.site)) elif e.code == 'Illegal': raise NoUsername("Username '%s' is invalid on %s" % (self.login_name, self.site)) elif e.code == 'readapidenied': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}'.format(self.login_name, self.site)) elif e.code == 'Failed': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}\n.{2}'.format(self.login_name, self.site, e.info)) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log('Should be logged in now') return True
def lag(self, lagtime): """Seize the throttle lock due to server lag. This will prevent any thread from accessing this site. """ started = time.time() self.lock.acquire() try: # start at 1/2 the current server lag time # wait at least 5 seconds but not more than 120 seconds delay = min(max(5, lagtime//2), 120) # account for any time we waited while acquiring the lock wait = delay - (time.time() - started) if wait > 0: if wait > config.noisysleep: pywikibot.output( u"Sleeping for %(wait).1f seconds, %(now)s" % {'wait': wait, 'now': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) } ) else: pywikibot.log( u"Sleeping for %(wait).1f seconds, %(now)s" % {'wait': wait, 'now': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) } ) time.sleep(wait) finally: self.lock.release()
def __iter__(self): """Yield pages.""" # TODO: start yielding before all referring pages have been found refs = [ page for page in self.disambPage.getReferences( follow_redirects=False, withTemplateInclusion=False, namespaces=0 if self.main_only else None ) ] pywikibot.output(u"Found %d references." % len(refs)) # Remove ignorables if self.disambPage.site.family.name in ignore_title and \ self.disambPage.site.lang in ignore_title[self.disambPage.site.family.name]: for ig in ignore_title[self.disambPage.site.family.name ][self.disambPage.site.lang]: for i in range(len(refs) - 1, -1, -1): if re.match(ig, refs[i].title()): pywikibot.log(u'Ignoring page %s' % refs[i].title()) del refs[i] elif self.primaryIgnoreManager.isIgnored(refs[i]): del refs[i] if len(refs) < self.minimum: pywikibot.output(u"Found only %d pages to work on; skipping." % len(refs)) return pywikibot.output(u"Will work on %d pages." % len(refs)) for ref in refs: yield ref
def get_redirects_from_dump(self, alsoGetPageTitles=False): """ Extract redirects from dump. Load a local XML dump file, look at all pages which have the redirect flag set, and find out where they're pointing at. Return a dictionary where the redirect names are the keys and the redirect targets are the values. """ xmlFilename = self.xmlFilename redict = {} # open xml dump and read page titles out of it dump = xmlreader.XmlDump(xmlFilename) redirR = self.site.redirectRegex() readPagesCount = 0 if alsoGetPageTitles: pageTitles = set() for entry in dump.parse(): readPagesCount += 1 # always print status message after 10000 pages if readPagesCount % 10000 == 0: pywikibot.output(u'{0:d} pages read...'.format(readPagesCount)) if len(self.namespaces) > 0: if pywikibot.Page(self.site, entry.title).namespace() \ not in self.namespaces: continue if alsoGetPageTitles: pageTitles.add(space_to_underscore(pywikibot.Link(entry.title, self.site))) m = redirR.match(entry.text) if m: target = m.group(1) # There might be redirects to another wiki. Ignore these. target_link = pywikibot.Link(target, self.site) try: target_link.parse() except pywikibot.SiteDefinitionError as e: pywikibot.log(e) pywikibot.output( u'NOTE: Ignoring {0} which is a redirect ({1}) to an ' u'unknown site.'.format(entry.title, target)) target_link = None else: if target_link.site != self.site: pywikibot.output( u'NOTE: Ignoring {0} which is a redirect to ' u'another site {1}.'.format(entry.title, target_link.site)) target_link = None # if the redirect does not link to another wiki if target_link and target_link.title: source = pywikibot.Link(entry.title, self.site) if target_link.anchor: pywikibot.output( u'HINT: {0!s} is a redirect with a pipelink.'.format(entry.title)) redict[space_to_underscore(source)] = ( space_to_underscore(target_link)) if alsoGetPageTitles: return redict, pageTitles else: return redict
def change(self, text): """ Given a wiki source code text, return the cleaned up version. """ oldText = text if self.site.sitename() == u'commons:commons' and self.namespace == 6: text = self.commonsfiledesc(text) text = self.fixSelfInterwiki(text) text = self.standardizePageFooter(text) text = self.fixSyntaxSave(text) text = self.cleanUpLinks(text) text = self.cleanUpSectionHeaders(text) text = self.putSpacesInLists(text) text = self.translateAndCapitalizeNamespaces(text) ## text = self.translateMagicWords(text) text = self.replaceDeprecatedTemplates(text) ## text = self.resolveHtmlEntities(text) text = self.validXhtml(text) text = self.removeUselessSpaces(text) text = self.removeNonBreakingSpaceBeforePercent(text) text = self.fixHtml(text) text = self.fixReferences(text) text = self.fixStyle(text) text = self.fixTypo(text) if self.site.lang in ['ckb', 'fa']: text = self.fixArabicLetters(text) try: text = isbn.hyphenateIsbnNumbers(text) except isbn.InvalidIsbnException as error: pywikibot.log(u"ISBN error: %s" % error) pass if self.debug: pywikibot.showDiff(oldText, text) return text
def tearDown(self): """Tear down test.""" super(TestLoggingMixin, self).tearDown() if hasattr(self, "_outcomeForDoCleanups"): # Python 3 unittest & nose outcome = self._outcomeForDoCleanups elif hasattr(self, "_outcome"): # Python 3.4 nose outcome = self._outcome elif hasattr(self, "_resultForDoCleanups"): # Python 2 unittest & nose outcome = self._resultForDoCleanups else: return if len(outcome.errors) > self._previous_errors: status = " NOT OK: ERROR" # nose 3.4 doesn't has failures elif hasattr(outcome, "failures") and len(outcome.failures) > self._previous_failures: status = " NOT OK: FAILURE" else: status = " OK" log("END " + self._log_prefix + "." + self._testMethodName + status)
def login(self, retry=False): if not self.password: # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % {'name': self.username, 'site': self.site}, password=True) # self.password = self.password.encode(self.site.encoding()) pywikibot.output(u"Logging in to %(site)s as %(name)s" % {'name': self.username, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") ## # Show a warning according to the local bot policy ## FIXME: disabled due to recursion; need to move this to the Site object after ## login ## if not self.botAllowed(): ## logger.error( ## u"Username '%(name)s' is not listed on [[%(page)s]]." ## % {'name': self.username, ## 'page': botList[self.site.family.name][self.site.code]}) ## logger.error( ##"Please make sure you are allowed to use the robot before actually using it!") ## return False return True
def validate_options(options, site): """ Validate the options and return bool. @param options: options to validate @type options: dict @rtype: bool """ pywikibot.log('Options:') required_keys = ['editnotice_template'] has_keys = list() for key, value in options.items(): pywikibot.log('-%s = %s' % (key, value)) if key in required_keys: has_keys.append(key) if key in ('subject_only', 'talk_only', 'to_subject', 'to_talk'): pass elif key == 'editnotice_template': if isinstance(key, str): editnotice_page = pywikibot.Page(site, 'Template:%s' % value) if not editnotice_page.exists(): return False else: return False if sorted(has_keys) != sorted(required_keys): return False options['editnotice_page'] = editnotice_page options.pop('editnotice_template') return True
def isbn_execute(text): """Hyphenate ISBN numbers and catch 'InvalidIsbnException'.""" try: return isbn.hyphenateIsbnNumbers(text) except isbn.InvalidIsbnException as error: pywikibot.log(u"ISBN error: %s" % error) return None
def findCommonscatLink(self, page=None): """Find CommonsCat template on interwiki pages. In Pywikibot >=2.0, page.interwiki() now returns Link objects, not Page objects @rtype: unicode, name of a valid commons category """ for ipageLink in page.langlinks(): ipage = pywikibot.page.Page(ipageLink) pywikibot.log('Looking for template on ' + ipage.title()) try: if (not ipage.exists() or ipage.isRedirectPage() or ipage.isDisambig()): continue commonscatLink = self.getCommonscatLink(ipage) if not commonscatLink: continue (currentTemplate, possibleCommonscat, linkText, Note) = commonscatLink checkedCommonscat = self.checkCommonscatLink( possibleCommonscat) if (checkedCommonscat != ''): pywikibot.output( 'Found link for {} at [[{}:{}]] to {}.'.format( page.title(), ipage.site.code, ipage.title(), checkedCommonscat)) return checkedCommonscat except pywikibot.BadTitle: # The interwiki was incorrect return '' return ''
def findCommonscatLink(self, page) -> str: """Find CommonsCat template on interwiki pages. :return: name of a valid commons category """ for ipageLink in page.langlinks(): ipage = pywikibot.page.Page(ipageLink) pywikibot.log('Looking for template on ' + ipage.title()) try: # T291783 ipage_exists = ipage.exists() except InvalidTitleError: pywikibot.exception() continue if (not ipage_exists or ipage.isRedirectPage() or ipage.isDisambig()): continue commonscatLink = self.getCommonscatLink(ipage) if not commonscatLink: continue checkedCommonscat = self.checkCommonscatLink(commonscatLink[1]) if checkedCommonscat: pywikibot.output( 'Found link for {} at [[{}:{}]] to {}.'.format( page.title(), ipage.site.code, ipage.title(), checkedCommonscat)) return checkedCommonscat return ''
def _flush(): for i in threads: http_queue.put(None) pywikibot.log(u'Waiting for threads to finish... ') for i in threads: i.join() pywikibot.log(u"All threads finished.")
def tearDown(self): """Tear down test.""" super(TestLoggingMixin, self).tearDown() if hasattr(self, '_outcomeForDoCleanups'): # Python 3 unittest & nose outcome = self._outcomeForDoCleanups elif hasattr(self, '_outcome'): # Python 3.4 nose outcome = self._outcome elif hasattr(self, '_resultForDoCleanups'): # Python 2 unittest & nose outcome = self._resultForDoCleanups else: return if len(outcome.errors) > self._previous_errors: status = ' NOT OK: ERROR' # nose 3.4 doesn't has failures elif (hasattr(outcome, 'failures') and len(outcome.failures) > self._previous_failures): status = ' NOT OK: FAILURE' else: status = ' OK' log('END ' + self._log_prefix + '.' + self._testMethodName + status)
def findCommonscatLink(self, page=None): """Find CommonsCat template on interwiki pages. In Pywikibot 2.0, page.interwiki() now returns Link objects, not Page objects @rtype: unicode, name of a valid commons category """ for ipageLink in page.langlinks(): ipage = pywikibot.page.Page(ipageLink) pywikibot.log("Looking for template on %s" % (ipage.title())) try: if (not ipage.exists() or ipage.isRedirectPage() or ipage.isDisambig()): continue commonscatLink = self.getCommonscatLink(ipage) if not commonscatLink: continue (currentTemplate, possibleCommonscat, linkText, Note) = commonscatLink checkedCommonscat = self.checkCommonscatLink(possibleCommonscat) if (checkedCommonscat != u''): pywikibot.output( u"Found link for %s at [[%s:%s]] to %s." % (page.title(), ipage.site.code, ipage.title(), checkedCommonscat)) return checkedCommonscat except pywikibot.BadTitle: # The interwiki was incorrect return u'' return u''
def _replace_rt_template_files( self, tpl: mwparserfromhell.nodes.Template) -> None: # Written for [[:cs:Template:Železniční trať]]. for param in tpl.params: param_value = HTML_COMMENT.sub("", str(param.value)).strip() if param.name.matches("typ"): if param_value[:2] == "ex": current_name = "exl" + param_value[2:] else: current_name = "l" + param_value else: current_name = param_value try: current_icon = BSiconPage(self.current_page.site, name=current_name) current_icon.title() except (pywikibot.exceptions.Error, ValueError): continue new_icon = self.opt.bsicons_map.get(current_icon, None) if not new_icon: continue if param.name.matches("typ"): if new_icon.name[:3] == "exl": replacement = "ex" + new_icon.name[3:] elif new_icon.name[:1] == "l": replacement = new_icon.name[1:] else: pywikibot.log(f"{new_icon} cannot be used in |typ=.") continue else: replacement = new_icon.name param.value = str(param.value).replace(param_value, replacement) self.current_page.replacements.add( Replacement(current_icon, new_icon))
def validate_options(options, site): """ Validate the options and return bool. @param options: options to validate @type options: dict @rtype: bool """ pywikibot.log('Options:') required_keys = ['editnotice_template'] has_keys = list() for key, value in options.items(): pywikibot.log('-{} = {}'.format(key, value)) if key in required_keys: has_keys.append(key) if key == 'editnotice_template': if not isinstance(key, str): return False options[key] = '{{' + value + '}}' editnotice_page = pywikibot.Page(site, value, ns=10) if not editnotice_page.exists(): return False if sorted(has_keys) != sorted(required_keys): return False options['editnotice_page'] = editnotice_page return True
def get_redirects_from_dump(self, alsoGetPageTitles=False) -> Tuple[ Dict[str, str], Set[str]]: """ Extract redirects from dump. Load a local XML dump file, look at all pages which have the redirect flag set, and find out where they're pointing at. Return a dictionary where the redirect names are the keys and the redirect targets are the values. """ xmlFilename = self.opt.xml redict = {} # open xml dump and read page titles out of it dump = xmlreader.XmlDump(xmlFilename) redirR = self.site.redirect_regex readPagesCount = 0 pageTitles = set() for entry in dump.parse(): readPagesCount += 1 # always print status message after 10000 pages if readPagesCount % 10000 == 0: pywikibot.output('{} pages read...'.format(readPagesCount)) if self.opt.namespaces: if pywikibot.Page(self.site, entry.title).namespace() \ not in self.opt.namespaces: continue if alsoGetPageTitles: pageTitles.add(space_to_underscore(pywikibot.Link(entry.title, self.site))) m = redirR.match(entry.text) if m: target = m.group(1) # There might be redirects to another wiki. Ignore these. target_link = pywikibot.Link(target, self.site) try: target_link.parse() except SiteDefinitionError as e: pywikibot.log(e) pywikibot.output( 'NOTE: Ignoring {} which is a redirect ({}) to an ' 'unknown site.'.format(entry.title, target)) target_link = None else: if target_link.site != self.site: pywikibot.output( 'NOTE: Ignoring {} which is a redirect to ' 'another site {}.' .format(entry.title, target_link.site)) target_link = None # if the redirect does not link to another wiki if target_link and target_link.title: source = pywikibot.Link(entry.title, self.site) if target_link.anchor: pywikibot.output( 'HINT: {} is a redirect with a pipelink.' .format(entry.title)) redict[space_to_underscore(source)] = ( space_to_underscore(target_link)) return redict, pageTitles
def change(self, text): """ Given a wiki source code text, return the cleaned up version. """ oldText = text if self.site.sitename() == u'commons:commons' and self.namespace == 6: text = self.commonsfiledesc(text) text = self.fixSelfInterwiki(text) text = self.standardizePageFooter(text) text = self.fixSyntaxSave(text) text = self.cleanUpLinks(text) text = self.cleanUpSectionHeaders(text) text = self.putSpacesInLists(text) text = self.translateAndCapitalizeNamespaces(text) ## text = self.translateMagicWords(text) text = self.replaceDeprecatedTemplates(text) ## text = self.resolveHtmlEntities(text) text = self.validXhtml(text) text = self.removeUselessSpaces(text) text = self.removeNonBreakingSpaceBeforePercent(text) text = self.fixHtml(text) text = self.fixReferences(text) text = self.fixStyle(text) text = self.fixTypo(text) if self.site.lang in ['ckb', 'fa']: text = self.fixArabicLetters(text) try: text = isbn.hyphenateIsbnNumbers(text) except isbn.InvalidIsbnException, error: pywikibot.log(u"ISBN error: %s" % error) pass
def encoding(self): """Detect the response encoding.""" if hasattr(self, '_encoding'): return self._encoding if self.charset is None and self.header_encoding is None: pywikibot.log("Http response doesn't contain a charset.") charset = 'latin1' else: charset = self.charset _encoding = UnicodeError() if self.header_encoding is not None \ and (charset is None or codecs.lookup(self.header_encoding) != codecs.lookup(charset)): if charset: pywikibot.warning( 'Encoding "{}" requested but "{}" received in the ' 'header.'.format(charset, self.header_encoding)) # TODO: Buffer decoded content, weakref does remove it too # early (directly after this method) _encoding = self._try_decode(self.header_encoding) if charset and isinstance(_encoding, Exception): _encoding = self._try_decode(charset) if isinstance(_encoding, Exception): raise _encoding else: self._encoding = _encoding return self._encoding
def __getitem__(self, key): """Get token value for the given key.""" if self.site.user() is None: self.site.login() user_tokens = self._tokens.setdefault(self.site.user(), {}) # always preload all for users without tokens failed_cache_key = (self.site.user(), key) # redirect old tokens to be compatible with older MW version # https://www.mediawiki.org/wiki/MediaWiki_1.37/Deprecation_of_legacy_API_token_parameters if self.site.mw_version >= '1.24wmf19' \ and key in {'edit', 'delete', 'protect', 'move', 'block', 'unblock', 'email', 'import', 'options'}: log('Token {!r} was replaced by {!r}'.format(key, 'csrf')) key = 'csrf' try: key = self.site.validate_tokens([key])[0] except IndexError: raise Error("Requested token '{}' is invalid on {} wiki.".format( key, self.site)) if (key not in user_tokens and failed_cache_key not in self.failed_cache): self.load_tokens([key], all=False if user_tokens else None) if key in user_tokens: return user_tokens[key] # token not allowed for self.site.user() on self.site self.failed_cache.add(failed_cache_key) # to be changed back to a plain KeyError? raise Error( "Action '{}' is not allowed for user {} on {} wiki.".format( key, self.site.user(), self.site))
def _command(self, file_name, text, jump_index=None): """Return editor selected in user-config.py.""" if jump_index: # Some editors make it possible to mark occurrences of substrings, # or to jump to the line of the first occurrence. # TODO: Find a better solution than hardcoding these, e.g. a config # option. line = text[:jump_index].count('\n') column = jump_index - (text[:jump_index].rfind('\n') + 1) else: line = column = 0 # Linux editors. We use startswith() because some users might use # parameters. if config.editor.startswith('kate'): command = ['-l', '%i' % (line + 1), '-c', '%i' % (column + 1)] elif config.editor.startswith('gedit'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('emacs'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('jedit'): command = ['+line:%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('vim'): command = ['+%i' % (line + 1)] # seems not to support columns elif config.editor.startswith('nano'): command = ['+%i,%i' % (line + 1, column + 1)] # Windows editors elif config.editor.lower().endswith('notepad++.exe'): command = ['-n%i' % (line + 1)] # seems not to support columns else: command = [] # See T102465 for problems relating to using config.editor unparsed. command = [config.editor] + command + [file_name] pywikibot.log(u'Running editor: %s' % TextEditor._concat(command)) return command
def __iter__(self): """Yield pages.""" # TODO: start yielding before all referring pages have been found refs = [ page for page in self.disambPage.getReferences( withTemplateInclusion=False, namespaces=0 if self.main_only else None) ] pywikibot.output(u"Found %d references." % len(refs)) # Remove ignorables if self.disambPage.site.family.name in ignore_title and \ self.disambPage.site.lang in ignore_title[ self.disambPage.site.family.name]: for ig in ignore_title[self.disambPage.site.family.name][ self.disambPage.site.lang]: for i in range(len(refs) - 1, -1, -1): if re.match(ig, refs[i].title()): pywikibot.log(u'Ignoring page %s' % refs[i].title()) del refs[i] elif self.primaryIgnoreManager.isIgnored(refs[i]): del refs[i] if len(refs) < self.minimum: pywikibot.output(u"Found only %d pages to work on; skipping." % len(refs)) return pywikibot.output(u"Will work on %d pages." % len(refs)) for ref in refs: yield ref
def command(self, tempFilename, text, jumpIndex=None): """Return editor selected in user-config.py.""" command = config.editor if jumpIndex: # Some editors make it possible to mark occurences of substrings, # or to jump to the line of the first occurence. # TODO: Find a better solution than hardcoding these, e.g. a config # option. line = text[:jumpIndex].count('\n') column = jumpIndex - (text[:jumpIndex].rfind('\n') + 1) else: line = column = 0 # Linux editors. We use startswith() because some users might use # parameters. if config.editor.startswith('kate'): command += " -l %i -c %i" % (line + 1, column + 1) elif config.editor.startswith('gedit'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('emacs'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('jedit'): command += " +line:%i" % (line + 1) # seems not to support columns elif config.editor.startswith('vim'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('nano'): command += " +%i,%i" % (line + 1, column + 1) # Windows editors elif config.editor.lower().endswith('notepad++.exe'): command += " -n%i" % (line + 1) # seems not to support columns command += ' %s' % tempFilename pywikibot.log(u'Running editor: %s' % command) return command
def command(self, tempFilename, text, jumpIndex=None): """Return editor selected in user-config.py.""" command = config.editor if jumpIndex: # Some editors make it possible to mark occurrences of substrings, # or to jump to the line of the first occurrence. # TODO: Find a better solution than hardcoding these, e.g. a config # option. line = text[:jumpIndex].count('\n') column = jumpIndex - (text[:jumpIndex].rfind('\n') + 1) else: line = column = 0 # Linux editors. We use startswith() because some users might use # parameters. if config.editor.startswith('kate'): command += " -l %i -c %i" % (line + 1, column + 1) elif config.editor.startswith('gedit'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('emacs'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('jedit'): command += " +line:%i" % (line + 1) # seems not to support columns elif config.editor.startswith('vim'): command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('nano'): command += " +%i,%i" % (line + 1, column + 1) # Windows editors elif config.editor.lower().endswith('notepad++.exe'): command += " -n%i" % (line + 1) # seems not to support columns command += ' %s' % tempFilename pywikibot.log(u'Running editor: %s' % command) return command
def __init__(self, fromurl, **kwargs): """ Initializer. :raises pywikibot.exceptions.ServerError: a server error occurred while loading the site :raises Timeout: a timeout occurred while loading the site :raises RuntimeError: Version not found or version less than 1.23 """ if fromurl.endswith('$1'): fromurl = fromurl[:-2] r = fetch(fromurl, **kwargs) check_response(r) if fromurl != r.url: pywikibot.log('{} redirected to {}'.format(fromurl, r.url)) fromurl = r.url self.fromurl = fromurl data = r.text wp = WikiHTMLPageParser(fromurl) wp.feed(data) self.version = wp.version self.server = wp.server self.scriptpath = wp.scriptpath self.articlepath = None if self.api: try: self._parse_site() except (ServerError, RequestException): raise except Exception as e: pywikibot.log('MW detection failed: {!r}'.format(e)) if not self.version: self._fetch_old_version() if not self.api: raise RuntimeError('Unsupported url: {}'.format(self.fromurl)) if not self.version or self.version < MIN_VERSION: raise RuntimeError('Unsupported version: {}'.format(self.version)) if not self.articlepath: if self.private_wiki: if self.api != self.fromurl and self.private_wiki: self.articlepath = self.fromurl.rsplit('/', 1)[0] + '/$1' else: raise RuntimeError( 'Unable to determine articlepath because the wiki is ' 'private. Use the Main Page URL instead of the API.') else: raise RuntimeError('Unable to determine articlepath: ' '{}'.format(self.fromurl))
def sauvegarder(self): """ Sauvegarder dans une base de données """ pywikibot.log(u'# Sauvegarde dans la base pour la langue "%s".' % self.langue) for q in self.nouveau: self.req_bdd(q, 'insert')
def __init__(self, generator, **kwargs): """- generator : Page generator.""" self.availableOptions.update({ 'ignorepdf': False, # boolean 'limit': None, # int, stop after n modified pages 'summary': None, }) super(ReferencesRobot, self).__init__(**kwargs) self.generator = generator self.site = pywikibot.Site() self._user_agent = comms.http.get_fake_user_agent() pywikibot.log('Using fake user agent: {0}'.format(self._user_agent)) # Check manual = 'mw:Manual:Pywikibot/refLinks' code = None for alt in [self.site.code] + i18n._altlang(self.site.code): if alt in localized_msg: code = alt break if code: manual += '/%s' % code if self.getOption('summary') is None: self.msg = i18n.twtranslate(self.site, 'reflinks-msg', locals()) else: self.msg = self.getOption('summary') self.stopPage = pywikibot.Page(self.site, i18n.translate(self.site, stopPage)) local = i18n.translate(self.site, badtitles) if local: bad = '(' + globalbadtitles + '|' + local + ')' else: bad = globalbadtitles self.titleBlackList = re.compile(bad, re.I | re.S | re.X) self.norefbot = noreferences.NoReferencesBot(None, verbose=False) self.deduplicator = DuplicateReferences() try: self.stopPageRevId = self.stopPage.latest_revision_id except pywikibot.NoPage: pywikibot.output(u'The stop page %s does not exist' % self.stopPage.title(asLink=True)) raise # Regex to grasp content-type meta HTML tag in HTML source self.META_CONTENT = re.compile(br'(?i)<meta[^>]*content\-type[^>]*>') # Extract the encoding from a charset property (from content-type !) self.CHARSET = re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'",;>/]*)') # Extract html title from page self.TITLE = re.compile(r'(?is)(?<=<title>).*?(?=</title>)') # Matches content inside <script>/<style>/HTML comments self.NON_HTML = re.compile( br'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|' br'<!--.*?-->|<!\[CDATA\[.*?\]\]>') # Authorized mime types for HTML pages self.MIME = re.compile( r'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
def login(self, retry=False): """ Attempt to log into the server. @param retry: infinitely retry if the API returns an unknown error @type retry: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % { 'name': self.username, 'site': self.site }, password=True) pywikibot.output(u"Logging in to %(site)s as %(name)s" % { 'name': self.username, 'site': self.site }) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if e.code == 'NotExists': raise NoUsername(u"Username '%s' does not exist on %s" % (self.username, self.site)) elif e.code == 'Illegal': raise NoUsername(u"Username '%s' is invalid on %s" % (self.username, self.site)) # TODO: investigate other unhandled API codes (bug 73539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") # # Show a warning according to the local bot policy # FIXME: disabled due to recursion; need to move this to the Site object after # login # if not self.botAllowed(): # logger.error( # u"Username '%(name)s' is not listed on [[%(page)s]]." # % {'name': self.username, # 'page': botList[self.site.family.name][self.site.code]}) # logger.error( # "Please make sure you are allowed to use the robot before actually using it!") # return False return True
def login(self, retry=False, autocreate=False): """ Attempt to log into the server. @see: U{https://www.mediawiki.org/wiki/API:Login} @param retry: infinitely retry if the API returns an unknown error @type retry: bool @param autocreate: if true, allow auto-creation of the account using unified login @type autocreate: bool @raises pywikibot.exceptions.NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. if not autocreate: self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( 'Password for user %(name)s on %(site)s (no characters will ' 'be shown):' % { 'name': self.login_name, 'site': self.site }, password=True) pywikibot.output('Logging in to %(site)s as %(name)s' % { 'name': self.login_name, 'site': self.site }) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: error_code = e.code pywikibot.error('Login failed ({}).'.format(error_code)) if error_code in self._api_error: error_msg = 'Username "{}" {} on {}'.format( self.login_name, self._api_error[error_code], self.site) if error_code in ('Failed', 'FAIL'): error_msg += '\n.{}'.format(e.info) raise NoUsername(error_msg) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log('Should be logged in now') return True
def login(self, retry=False): """ Attempt to log into the server. @param retry: infinitely retry if the API returns an unknown error @type retry: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % {'name': self.login_name, 'site': self.site}, password=True) pywikibot.output(u"Logging in to %(site)s as %(name)s" % {'name': self.login_name, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if e.code == 'NotExists': raise NoUsername(u"Username '%s' does not exist on %s" % (self.login_name, self.site)) elif e.code == 'Illegal': raise NoUsername(u"Username '%s' is invalid on %s" % (self.login_name, self.site)) elif e.code == 'readapidenied': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}'.format(self.login_name, self.site)) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") # # Show a warning according to the local bot policy # FIXME: disabled due to recursion; need to move this to the Site object after # login # if not self.botAllowed(): # logger.error( # u"Username '%(name)s' is not listed on [[%(page)s]]." # % {'name': self.username, # 'page': botList[self.site.family.name][self.site.code]}) # logger.error( # "Please make sure you are allowed to use the robot before actually using it!") # return False return True
def _decide_encoding(response, charset) -> Optional[str]: """Detect the response encoding.""" def _try_decode(content, encoding): """Helper function to try decoding.""" if encoding is None: return None try: content.decode(encoding) except LookupError: pywikibot.warning( 'Unknown or invalid encoding {!r}'.format(encoding)) except UnicodeDecodeError as e: pywikibot.warning('{} found in {}'.format(e, content)) else: return encoding return None # let chardet do the job header_encoding = _get_encoding_from_response_headers(response) if header_encoding is None: pywikibot.log('Http response does not contain a charset.') if charset is None: charset = response.request.headers.get('accept-charset') # No charset requested, or in request headers or response headers. # Defaults to latin1. if charset is None and header_encoding is None: return _try_decode(response.content, 'latin1') if charset is None and header_encoding is not None: return _try_decode(response.content, header_encoding) if charset is not None and header_encoding is None: return _try_decode(response.content, charset) # Both charset and header_encoding are available. try: header_codecs = codecs.lookup(header_encoding) except LookupError: header_codecs = None try: charset_codecs = codecs.lookup(charset) except LookupError: charset_codecs = None if header_codecs and charset_codecs and header_codecs != charset_codecs: pywikibot.warning('Encoding "{}" requested but "{}" received in the ' 'response header.'.format(charset, header_encoding)) _encoding = _try_decode(response.content, header_encoding) \ or _try_decode(response.content, charset) return _encoding
def save_wikipage(self, page_text, page_name, summary="Bot: Update der Ergebnisliste"): try: article = pywikibot.Page(self.site, page_name) updater = ArticleUpdater(article) if not updater.save_text(page_text, summary): pywikibot.log("Result page has not changed, skipping update ...") except pywikibot.Error: with tempfile.NamedTemporaryFile(delete=False) as dump_file: dump_file.write(page_name.encode('utf-8')) pywikibot.error("Could not update result page, page dumped to {}".format(dump_file.name), exc_info=True)
def vider_base(self): """ Vide la base de donnée associée (pour retirer les déchus) """ pywikibot.log(u"## Vidage de l'ancienne base") req = u'TRUNCATE TABLE %s' % self.nom_base try: self.curseur.execute(req) except MySQLdb.Error, e: pywikibot.warning(u"Truncate error %d: %s" % (e.args[0], e.args[1]))
def main(): countrycode = u'' lang = u'' skip_wd = False add_template = False conn = None cursor = None # Connect database, we need that (conn, cursor) = connect_to_monuments_database() (conn2, cursor2) = connect_to_commons_database() # FIXME add option to only run based on list usage, not category membership for arg in pywikibot.handleArgs(): option, sep, value = arg.partition(':') if option == '-countrycode': countrycode = value elif option == '-langcode': lang = value elif option == '-skip_wd': skip_wd = True elif option == '-add_template': add_template = True else: raise Exception( u'Bad parameters. Expected "-countrycode", "-langcode", ' u'"-skip_wd", "-add_template" or pywikibot args. ' u'Found "{}"'.format(option)) if countrycode and lang: if not mconfig.countries.get((countrycode, lang)): pywikibot.warning( u'I have no config for countrycode "{0}" ' u'in language "{1}"'.format(countrycode, lang)) return False pywikibot.log( u'Working on countrycode "{0}" in language "{1}"'.format( countrycode, lang)) processCountry(mconfig.countries.get((countrycode, lang)), add_template, conn, cursor, conn2, cursor2) elif countrycode or lang: raise Exception(u'The "countrycode" and "langcode" arguments must ' u'be used together.') else: statistics = [] for (countrycode, lang), countryconfig in mconfig.filtered_countries( skip_wd=skip_wd): pywikibot.log( u'Working on countrycode "{0}" in language "{1}"'.format( countrycode, lang)) statistics.append( processCountry( countryconfig, add_template, conn, cursor, conn2, cursor2)) make_statistics(statistics) close_database_connection(conn, cursor)
def read_file_content(self): """Return name of temp file in which remote file is saved.""" pywikibot.output(u'Reading file %s' % self.url) resume = False dt = 15 uo = urllib.URLopener() retrieved = False while not retrieved: if resume: pywikibot.output(u"Resume download...") uo.addheader('Range', 'bytes=%s-' % rlen) infile = uo.open(self.url) if 'text/html' in infile.info().getheader('Content-Type'): print \ "Couldn't download the image: the requested URL was not found on server." return content_len = infile.info().getheader('Content-Length') accept_ranges = infile.info().getheader('Accept-Ranges') == 'bytes' if resume: _contents += infile.read() else: _contents = infile.read() infile.close() retrieved = True if content_len: rlen = len(_contents) content_len = int(content_len) if rlen < content_len: retrieved = False pywikibot.output( u"Connection closed at byte %s (%s left)" % (rlen, content_len)) if accept_ranges and rlen > 0: resume = True pywikibot.output(u"Sleeping for %d seconds..." % dt) time.sleep(dt) if dt <= 60: dt += 15 elif dt < 360: dt += 60 else: pywikibot.log( u"WARNING: No check length to retrieved data is possible.") handle, tempname = tempfile.mkstemp() t = os.fdopen(handle, "wb") t.write(_contents) t.close() return tempname
def encoding(self): """Detect the response encoding.""" pos = self.response_headers['content-type'].find('charset=') if pos >= 0: pos += len('charset=') encoding = self.response_headers['content-type'][pos:] else: encoding = 'ascii' # Don't warn, many pages don't contain one pywikibot.log(u"Http response doesn't contain a charset.") return encoding
def match_name(name, typ, wd, limit=75): """ Check if there is an item matching the name. Given a plaintext name (first or last) this checks if there is a unique matching entity of the right name type. Search results are stored in 'matchedNames' for later look-up. @param name: The name to search for @type name: basestring @param typ: The name type (either 'lastName' or 'firstName') @type typ: basestring @param wd: The running WikidataStuff instance @type wd: WikidataStuff (WD) @param limit: Number of hits before skipping (defaults to 75, ignored if onLabs) @type limit: int @return: A matching item, if any @rtype: pywikibot.ItemPage, or None """ global matchedNames prop = { 'lastName': ('Q101352', ), 'firstName': ('Q12308941', 'Q11879590', 'Q202444') } # Skip any empty values if not name.strip(): return # Check if already looked up if name in matchedNames[typ]: return matchedNames[typ][name] # search for potential matches matches = None props = prop[typ] if wd.onLabs: matches = match_name_on_labs(name, props, wd) else: matches = match_name_off_labs(name, props, wd, limit) # get rid of duplicates then check for uniqueness matches = list(set(matches)) if len(matches) == 1: item = wd.bypassRedirect(matches[0]) matchedNames[typ][name] = item # store for later reuse return item elif len(matches) > 1: pywikibot.log('Possible duplicates: {}'.format(matches)) # getting here means no hits so store that for later reuse matchedNames[typ][name] = None
def run(self): """The main bot function that does all the work. For readability it is split into several helper functions: - _movecat() - _movetalk() - _hist() - _change() - _delete() """ # can_move_* determines if the page can be moved safely (target # doesn't exist but source does), move_items determines if the # items (pages/subcategories) of the category could be moved into # a new (non existent) category. can_move_page = CategoryMoveRobot.check_move("category page", self.oldcat, self.newcat) can_move_talk = CategoryMoveRobot.check_move("category talk page", self.oldtalk, self.newtalk) if not self.newcat: # delete move_items = True else: move_items = not self.newcat.exists() or not self.move_together if not self.allow_split: can_move_page = can_move_page and move_items can_move_talk = can_move_talk and move_items if self.newcat and self.move_oldcat: if self.can_move_cats: if can_move_page: oldcattitle = self.oldcat.title() self.newcat = self.oldcat.move(self.newcat.title(), reason=self.comment, movetalkpage=can_move_talk) self._strip_cfd_templates() self.oldcat = pywikibot.Category(self.oldcat.site, oldcattitle) else: if can_move_page: self._movecat() if can_move_talk: self._movetalk() if self.wikibase: self._update_wikibase_item() if self.history and can_move_page: self._hist() if move_items: self._change(pagegenerators.CategorizedPageGenerator(self.oldcat)) if not self.pagesonly: self._change(pagegenerators.SubCategoriesPageGenerator(self.oldcat)) else: pywikibot.log("Didn't move pages/subcategories, because the " "category page hasn't been moved.") if ( self.oldcat.isEmptyCategory() and self.delete_oldcat and ((self.newcat and self.move_oldcat) or not self.newcat) ): self._delete(can_move_page, can_move_talk)
def _parse_section(self, section: str) -> None: """Parse a section of a page.""" cfd_page = None cfd_prefix = cfd_suffix = '' for line in section.splitlines(): assert self.mode is not None # for mypy instruction = Instruction( mode=self.mode, bot_options=BotOptions(), ) line_results = self._parse_line(line) instruction['bot_options']['old_cat'] = line_results['old_cat'] instruction['bot_options']['new_cats'] = line_results['new_cats'] if line_results['cfd_page']: cfd_prefix = line_results['prefix'] cfd_suffix = line_results['suffix'] cfd_page = line_results['cfd_page'] or cfd_page if not (cfd_page and instruction['bot_options']['old_cat']): continue prefix = line_results['prefix'] + cfd_prefix suffix = line_results['suffix'] or cfd_suffix if 'NO BOT' in prefix: pywikibot.log('Bot disabled for: {}'.format(line)) continue cfd = cfd_page.find_discussion(line_results['old_cat']) instruction['cfd_page'] = cfd if self.mode == 'merge': instruction['redirect'] = 'REDIRECT' in prefix elif self.mode == 'move': instruction['noredirect'] = 'REDIRECT' not in prefix elif self.mode == 'retain': nc_matches = re.findall(r'\b(no consensus) (?:for|to) (\w+)\b', suffix, flags=re.I) not_matches = re.findall(r'\b(not )(\w+)\b', suffix, flags=re.I) if nc_matches: instruction['result'] = nc_matches[0][0] instruction['action'] = nc_matches[0][1] elif not_matches: instruction['result'] = ''.join(not_matches[0]) instruction['action'] = re.sub(r'ed$', 'e', not_matches[0][1]) elif 'keep' in suffix.lower(): instruction['result'] = 'keep' instruction['action'] = 'delete' else: instruction['result'] = cfd.get_result() instruction['action'] = cfd.get_action( instruction['bot_options']['old_cat']) self.instructions.append(instruction)
def _parse_pre_117(self, data): """Parse HTML.""" if not self.REwgEnableApi.search(data): pywikibot.log("wgEnableApi is not enabled in HTML of %s" % self.fromurl) try: self.version = MediaWikiVersion(self.REwgVersion.search(data).group(1)) except AttributeError: pass self.server = self.REwgServer.search(data).groups()[0] self.scriptpath = self.REwgScriptPath.search(data).groups()[0] self.articlepath = self.REwgArticlePath.search(data).groups()[0] self.lang = self.REwgContentLanguage.search(data).groups()[0]
def load_config(page: pywikibot.Page, **kwargs: Any) -> ConfigJSONObject: """Load JSON config from the page.""" if page.isRedirectPage(): pywikibot.log(f"{page!r} is a redirect.") page = page.getRedirectTarget() _empty = jsoncfg.loads_config("{}") if not page.exists(): pywikibot.log(f"{page!r} does not exist.") return _empty try: return jsoncfg.loads_config(page.get(**kwargs).strip()) except pywikibot.exceptions.PageRelatedError: return _empty
def run(self): NB_AJOUTS = 0 RETRAITS = True connus = BeBot.charger_bdd(self.db, self.nom_base, champs=u'page') connus = map(self.normaliser_page, connus) self.total_avant = len(connus) ordre_cats = [ u'AdQ', u'BA', u'?' ] for cat in self.cat_qualite: categorie = pywikibot.Category(self.site, cat) cpg = pagegenerators.CategorizedPageGenerator(categorie, recurse=False) try: i = self.categories_de_qualite[self.langue].index(cat) except: i = 2 cattoa = ordre_cats[i] for p in pagegenerators.DuplicateFilterPageGenerator(cpg): if NB_AJOUTS < 2000: if p.namespace() == 0: page = p elif p.namespace() == 1: # Pour EN:GA et IT:FA page = p.toggleTalkPage() else: continue if page.isRedirectPage(): page = page.getRedirectTarget() title = page.title() if title not in connus: #Comparaison avec le contenu de la bdd infos = self.get_infos(page, cattoa) NB_AJOUTS += 1 if infos is not None: self.nouveau.append(infos) else: connus.remove(title) self.connaitdeja.append( \ { 'page': title, \ 'label': cattoa } ) # Ils ne seront pas ajoutés else: pywikibot.output("Limite d'ajouts atteinte avec "+p.title()) RETRAITS = False break # On retire ceux qui ont disparus if RETRAITS: pywikibot.output('Retraits : '+str(connus)) for c in connus: self.req_bdd(c, 'delete') self.connus = len(connus) pywikibot.log( u"Total: %i ajouts ; %i déjà connus ; %i retraits." \ % (len(self.nouveau), len(self.connaitdeja), len(connus)) )
def match_name(name, typ, wd, limit=75): """ Check if there is an item matching the name. Given a plaintext name (first or last) this checks if there is a unique matching entity of the right name type. Search results are stored in 'matchedNames' for later look-up. @param name: The name to search for @type name: basestring @param typ: The name type (either 'lastName' or 'firstName') @type typ: basestring @param wd: The running WikidataStuff instance @type wd: WikidataStuff (WD) @param limit: Number of hits before skipping (defaults to 75, ignored if onLabs) @type limit: int @return: A matching item, if any @rtype: pywikibot.ItemPage, or None """ global matchedNames prop = {'lastName': ('Q101352',), 'firstName': ('Q12308941', 'Q11879590', 'Q202444')} # Skip any empty values if not name.strip(): return # Check if already looked up if name in matchedNames[typ]: return matchedNames[typ][name] # search for potential matches matches = None props = prop[typ] if wd.onLabs: matches = match_name_on_labs(name, props, wd) else: matches = match_name_off_labs(name, props, wd, limit) # get rid of duplicates then check for uniqueness matches = list(set(matches)) if len(matches) == 1: item = wd.bypassRedirect(matches[0]) matchedNames[typ][name] = item # store for later reuse return item elif len(matches) > 1: pywikibot.log('Possible duplicates: {}'.format(matches)) # getting here means no hits so store that for later reuse matchedNames[typ][name] = None
def login(self, retry=False, autocreate=False): """ Attempt to log into the server. @see: U{https://www.mediawiki.org/wiki/API:Login} @param retry: infinitely retry if the API returns an unknown error @type retry: bool @param autocreate: if true, allow auto-creation of the account using unified login @type autocreate: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. if not autocreate: self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( 'Password for user %(name)s on %(site)s (no characters will ' 'be shown):' % {'name': self.login_name, 'site': self.site}, password=True) pywikibot.output('Logging in to %(site)s as %(name)s' % {'name': self.login_name, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: error_code = e.code pywikibot.error('Login failed ({}).'.format(error_code)) if error_code in self._api_error: error_msg = 'Username "{}" {} on {}'.format( self.login_name, self._api_error[error_code], self.site) if error_code == 'Failed': error_msg += '\n.{}'.format(e.info) raise NoUsername(error_msg) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log('Should be logged in now') return True
def main(): """The main loop.""" # First find out what to work on countrycode = '' lang = '' full_update = True skip_wd = False days_back = 2 # Default 2 days. Runs every night so can miss one night. conn = None cursor = None (conn, cursor) = connect_to_monuments_database() for arg in pywikibot.handleArgs(): option, sep, value = arg.partition(':') if option == '-countrycode': countrycode = value elif option == '-langcode': lang = value elif option == '-daysback': days_back = int(value) elif option == '-fullupdate': # does nothing since already default full_update = True elif option == '-skip_wd': skip_wd = True else: raise Exception( 'Bad parameters. Expected "-countrycode", "-langcode", ' '"-daysback", "-fullupdate", "-skip_wd" or pywikibot args. ' 'Found "{}"'.format(option)) if countrycode and lang: if not mconfig.countries.get((countrycode, lang)): pywikibot.warning( 'I have no config for countrycode "{0}" ' 'in language "{1}"'.format( countrycode, lang)) return False pywikibot.log( 'Working on countrycode "{0}" in language "{1}"'.format( countrycode, lang)) try: countryconfig = mconfig.countries.get((countrycode, lang)) process_country(countryconfig, conn, cursor, full_update, days_back) except Exception, e: pywikibot.error( 'Unknown error occurred when processing country ' '{0} in lang {1}\n{2}'.format(countrycode, lang, str(e)))
def checkCommonscatLink(self, name=''): """Return the name of a valid commons category. If the page is a redirect this function tries to follow it. If the page doesn't exists the function will return an empty string """ pywikibot.log('getCommonscat: ' + name) try: commonsSite = self.site.image_repository() # This can throw a pywikibot.BadTitle commonsPage = pywikibot.Page(commonsSite, 'Category:' + name) if not commonsPage.exists(): pywikibot.output('Commons category does not exist. ' 'Examining deletion log...') logpages = commonsSite.logevents(logtype='delete', page=commonsPage) for logitem in logpages: loguser = logitem.user() logcomment = logitem.comment() # Some logic to extract the target page. regex = (r'moved to \[\[\:?Category:' r'(?P<newcat1>[^\|\}]+)(\|[^\}]+)?\]\]|' r'Robot: Changing Category:(.+) ' r'to Category:(?P<newcat2>.+)') m = re.search(regex, logcomment, flags=re.I) if m: if m.group('newcat1'): return self.checkCommonscatLink(m.group('newcat1')) elif m.group('newcat2'): return self.checkCommonscatLink(m.group('newcat2')) else: pywikibot.output( "getCommonscat: {} deleted by {}. Couldn't find " 'move target in "{}"'.format( commonsPage, loguser, logcomment)) return '' return '' elif commonsPage.isRedirectPage(): pywikibot.log('getCommonscat: The category is a redirect') return self.checkCommonscatLink( commonsPage.getRedirectTarget().title(with_ns=False)) elif (pywikibot.Page(commonsPage.site, 'Template:Category redirect') in commonsPage.templates()): pywikibot.log( 'getCommonscat: The category is a category redirect') for template in commonsPage.templatesWithParams(): if (template[0].title(with_ns=False) == 'Category redirect' and len(template[1]) > 0): return self.checkCommonscatLink(template[1][0]) elif commonsPage.isDisambig(): pywikibot.log('getCommonscat: The category is disambiguation') return '' else: return commonsPage.title(with_ns=False) except pywikibot.BadTitle: # Funky title so not correct return ''
def main(): ''' The main loop ''' # First find out what to work on countrycode = u'' lang = u'' skip_wd = False conn = None cursor = None (conn, cursor) = connect_to_monuments_database() for arg in pywikibot.handleArgs(): option, sep, value = arg.partition(':') if option == '-countrycode': countrycode = value elif option == '-langcode': lang = value elif option == u'-skip_wd': skip_wd = True else: raise Exception( u'Bad parameters. Expected "-countrycode", "-langcode", ' u'"-skip_wd" or pywikibot args. Found "{}"'.format(option)) query = u"""TRUNCATE table `id_dump`""" cursor.execute(query) if countrycode and lang: if not mconfig.countries.get((countrycode, lang)): pywikibot.warning( u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang)) return False pywikibot.log( u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) processCountry( mconfig.countries.get((countrycode, lang)), conn, cursor) elif countrycode or lang: raise Exception(u'The "countrycode" and "langcode" arguments must ' u'be used together.') else: for (countrycode, lang), countryconfig in mconfig.filtered_countries( skip_wd=skip_wd): pywikibot.log( u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) processCountry(countryconfig, conn, cursor) close_database_connection(conn, cursor)