def disambcategory(self): """Return Category in which disambig pages are listed.""" if self.has_data_repository: repo = self.data_repository() repo_name = repo.family.name try: item = self.family.disambcatname[repo.code] except KeyError: raise Error( 'No {repo} qualifier found for disambiguation category ' 'name in {fam}_family file'.format(repo=repo_name, fam=self.family.name)) dp = pywikibot.ItemPage(repo, item) try: name = dp.getSitelink(self) except pywikibot.NoPage: raise Error( 'No disambiguation category name found in {repo} ' 'for {site}'.format(repo=repo_name, site=self)) else: # fallback for non WM sites try: name = '{}:{}'.format(Namespace.CATEGORY, self.family.disambcatname[self.code]) except KeyError: raise Error( 'No disambiguation category name found in ' '{site.family.name}_family for {site}'.format(site=self)) return pywikibot.Category(pywikibot.Link(name, self))
def __getitem__(self, key): """Get token value for the given key.""" if self.site.user() is None: self.site.login() user_tokens = self._tokens.setdefault(self.site.user(), {}) # always preload all for users without tokens failed_cache_key = (self.site.user(), key) # redirect old tokens to be compatible with older MW version # https://www.mediawiki.org/wiki/MediaWiki_1.37/Deprecation_of_legacy_API_token_parameters if self.site.mw_version >= '1.24wmf19' \ and key in {'edit', 'delete', 'protect', 'move', 'block', 'unblock', 'email', 'import', 'options'}: log('Token {!r} was replaced by {!r}'.format(key, 'csrf')) key = 'csrf' try: key = self.site.validate_tokens([key])[0] except IndexError: raise Error("Requested token '{}' is invalid on {} wiki.".format( key, self.site)) if (key not in user_tokens and failed_cache_key not in self.failed_cache): self.load_tokens([key], all=False if user_tokens else None) if key in user_tokens: return user_tokens[key] # token not allowed for self.site.user() on self.site self.failed_cache.add(failed_cache_key) # to be changed back to a plain KeyError? raise Error( "Action '{}' is not allowed for user {} on {} wiki.".format( key, self.site.user(), self.site))
def __init__(self, endpoint=None, entity_url=None, repo=None, max_retries=None, retry_wait=None): """ Create endpoint. @param endpoint: SPARQL endpoint URL @type endpoint: string @param entity_url: URL prefix for any entities returned in a query. @type entity_url: string @param repo: The Wikibase site which we want to run queries on. If provided this overrides any value in endpoint and entity_url. Defaults to Wikidata. @type repo: pywikibot.site.DataSite @param max_retries: (optional) Maximum number of times to retry after errors, defaults to config.max_retries. @type max_retries: int @param retry_wait: (optional) Minimum time in seconds to wait after an error, defaults to config.retry_wait seconds (doubles each retry until max of 120 seconds is reached). @type retry_wait: float """ # default to Wikidata if not repo and not endpoint: repo = Site('wikidata', 'wikidata') if repo: try: self.endpoint = repo.sparql_endpoint self.entity_url = repo.concept_base_uri except NotImplementedError: raise NotImplementedError( 'Wiki version must be 1.28-wmf.23 or newer to ' 'automatically extract the sparql endpoint. ' 'Please provide the endpoint and entity_url ' 'parameters instead of a repo.') if not self.endpoint: raise Error( 'The site {0} does not provide a sparql endpoint.'.format( repo)) else: if not entity_url: raise Error('If initialised with an endpoint the entity_url ' 'must be provided.') self.endpoint = endpoint self.entity_url = entity_url self.last_response = None if max_retries is None: self.max_retries = config.max_retries else: self.max_retries = max_retries if retry_wait is None: self.retry_wait = config.retry_wait else: self.retry_wait = retry_wait
def __getitem__(self, key): """Get token value for the given key.""" if self.site.user() is None: self.site.login() user_tokens = self._tokens.setdefault(self.site.user(), {}) # always preload all for users without tokens failed_cache_key = (self.site.user(), key) try: key = self.site.validate_tokens([key])[0] except IndexError: raise Error("Requested token '{}' is invalid on {} wiki.".format( key, self.site)) if (key not in user_tokens and failed_cache_key not in self.failed_cache): self.load_tokens([key], all=False if user_tokens else None) if key in user_tokens: return user_tokens[key] # token not allowed for self.site.user() on self.site self.failed_cache.add(failed_cache_key) # to be changed back to a plain KeyError? raise Error( "Action '{}' is not allowed for user {} on {} wiki.".format( key, self.site.user(), self.site))
def __init__(self, **kwargs): """ Constructor: kwargs are used to create a Request object; see that object's documentation for values. 'action'='query' is assumed. """ if "action" in kwargs and kwargs["action"] != "query": raise Error("%s: 'action' must be 'query', not %s" % (self.__class__.__name__, kwargs["action"])) else: kwargs["action"] = "query" try: self.site = kwargs["site"] except KeyError: self.site = pywikibot.Site() kwargs["site"] = self.site # make sure request type is valid, and get limit key if any for modtype in ("generator", "list", "prop", "meta"): if modtype in kwargs: self.module = kwargs[modtype] break else: raise Error("%s: No query module name found in arguments." % self.__class__.__name__) kwargs["indexpageids"] = "" # always ask for list of pageids self.request = Request(**kwargs) self.prefix = None self.api_limit = None self.update_limit() # sets self.prefix if self.api_limit is not None and "generator" in kwargs: self.prefix = "g" + self.prefix self.limit = None self.query_limit = self.api_limit if "generator" in kwargs: self.resultkey = "pages" # name of the "query" subelement key else: # to look for when iterating self.resultkey = self.module # usually the query-continue key is the same as the querymodule, # but not always # API can return more than one query-continue key, if multiple properties # are requested by the query, e.g. # "query-continue":{ # "langlinks":{"llcontinue":"12188973|pt"}, # "templates":{"tlcontinue":"310820|828|Namespace_detect"}} # self.continuekey is a list self.continuekey = self.module.split('|')
def __init__(self, apidata): """Initialize object from a logevent dict returned by MW API""" self.data = LogDict(apidata) if self._expectedType is not None and self._expectedType != self.type( ): raise Error("Wrong log type! Expecting %s, received %s instead." % (self._expectedType, self.type()))
def treat(self, page): """Process one ProofreadPage page. :param page: page to be treated. :type page: ProofreadPage :raises pywikibot.exceptions.Error: Page must be a ProofreadPage object """ if not isinstance(page, ProofreadPage): raise Error('Page {} must be a ProofreadPage object.'.format(page)) old_text = page.text if page.exists() else '' if self.opt.ocr: _body = self._get_ocr(page) if _body is None: pywikibot.output('No OCR found. Skipping {}'.format( page.title(as_link=True))) return page.body = _body if page.exists() and not (self.opt.ocr and self.opt.force): pywikibot.output( 'Page {} already exists, not adding!'.format(page)) else: self.userPut(page, old_text, page.text, summary=self.opt.summary, show_diff=self.opt.showdiff)
def __init__(self, apidata, site): """Initialize object from a logevent dict returned by MW API.""" super().__init__(apidata) self.site = site expected_type = self._expected_type if expected_type is not None and expected_type != self.type(): raise Error('Wrong log type! Expecting %s, received %s instead.' % (expected_type, self.type()))
def _getBlockDetails(self): try: return self.data['block'] except KeyError: # No 'block' key means this is an unblocking log entry if self.action() == 'unblock': raise Error( "action='unblock': this log entry has no block details such as flags, duration, or expiry!" ) raise
def _createFromData(self, logdata): """ Checks for logtype from data, and creates the correct LogEntry """ try: logtype = logdata['type'] return LogEntryFactory._getEntryClass(logtype)(logdata) except KeyError: pywikibot.debug(u"API log entry received:\n" + logdata, _logger) raise Error("Log entry has no 'type' key")
def __init__(self, apidata: Dict[str, Any], site: 'pywikibot.site.BaseSite') -> None: """Initialize object from a logevent dict returned by MW API.""" super().__init__(apidata) self.site = site expected_type = self._expected_type if expected_type is not None and expected_type != self.type(): raise Error( 'Wrong log type! Expecting {}, received {} instead.'.format( expected_type, self.type()))
def _params(self): """ Additional data for some log entry types. @rtype: dict or None """ if 'params' in self.data: return self.data['params'] else: # try old mw style preceding mw 1.19 try: return self.data[self._expectedType] except KeyError: raise Error("action='%s': this log entry has no params details " "for type %s." % (self.action(), self.type))
def _createFromData(self, logdata: dict): """ Check for logtype from data, and creates the correct LogEntry. @param logdata: log entry data @rtype: LogEntry """ try: logtype = logdata['type'] except KeyError: pywikibot.debug('API log entry received:\n{0}'.format(logdata), _logger) raise Error("Log entry has no 'type' key") return LogEntryFactory.get_entry_class(logtype)(logdata, self._site)
def _create_from_data(self, logdata: Dict[str, Any]) -> LogEntry: """ Check for logtype from data, and creates the correct LogEntry. :param logdata: log entry data """ try: logtype = logdata['type'] except KeyError: pywikibot.debug('API log entry received:\n{}'.format(logdata), _logger) raise Error("Log entry has no 'type' key") return LogEntryFactory.get_entry_class(logtype)(logdata, self._site)
def __init__(self, **kwargs): """Initializer. :keyword site: the site to work on :type site: pywikibot.APISite """ super().__init__(**kwargs) csd_cat = i18n.translate(self.site, self.csd_cat_title) if csd_cat is None: self.csd_cat = self.site.page_from_repository(self.csd_cat_item) if self.csd_cat is None: raise Error( 'No category for speedy deletion found for {}'.format( self.site)) else: self.csd_cat = pywikibot.Category(self.site, csd_cat) self.saved_progress = None
def _modules(self): """Query api on self.site for paraminfo on querymodule=self.module""" if not set(self.module.split('|')) <= set(self.__modules.keys()): paramreq = CachedRequest(expiry=config.API_config_expiry, site=self.site, action="paraminfo", querymodules=self.module) data = paramreq.submit() assert "paraminfo" in data assert "querymodules" in data["paraminfo"] assert len(data["paraminfo"] ["querymodules"]) == 1 + self.module.count("|") for paraminfo in data["paraminfo"]["querymodules"]: assert paraminfo["name"] in self.module if "missing" in paraminfo: raise Error("Invalid query module name '%s'." % self.module) self.__modules[paraminfo["name"]] = paraminfo _modules = {} for m in self.module.split('|'): _modules[m] = self.__modules[m] return _modules
def _assert_len(len_oq, len_cq, title): if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2): raise Error('ProofreadPage {}: invalid format'.format(title))
def _assert_len(len_oq: int, len_cq: int, title: str) -> None: if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2): raise Error('ProofreadPage {}: invalid format'.format(title))
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None): """A factory method to obtain a Site object. Site objects are cached and reused by this method. By default rely on config settings. These defaults may all be overridden using the method parameters. @param code: language code (override config.mylang) @type code: string @param fam: family name or object (override config.family) @type fam: string or Family @param user: bot user name to use on this site (override config.usernames) @type user: unicode @param sysop: sysop user to use on this site (override config.sysopnames) @type sysop: unicode @param interface: site class or name of class in pywikibot.site (override config.site_interface) @type interface: subclass of L{pywikibot.site.BaseSite} or string @param url: Instead of code and fam, does try to get a Site based on the URL. Still requires that the family supporting that URL exists. @type url: string """ # Either code and fam or only url assert (not url or (not code and not fam)) _logger = "wiki" if url: if url in _url_cache: cached = _url_cache[url] if cached: code = cached[0] fam = cached[1] else: raise Error("Unknown URL '{0}'.".format(url)) else: # Iterate through all families and look, which does apply to # the given URL for fam in config.family_files: family = pywikibot.family.Family.load(fam) code = family.from_url(url) if code: _url_cache[url] = (code, fam) break else: _url_cache[url] = None # TODO: As soon as AutoFamily is ready, try and use an # AutoFamily raise Error("Unknown URL '{0}'.".format(url)) else: # Fallback to config defaults code = code or config.mylang fam = fam or config.family interface = interface or config.site_interface # config.usernames is initialised with a dict for each family name family_name = str(fam) if family_name in config.usernames: user = user or config.usernames[family_name].get(code) \ or config.usernames[family_name].get('*') sysop = sysop or config.sysopnames[family_name].get(code) \ or config.sysopnames[family_name].get('*') if not isinstance(interface, type): # If it isnt a class, assume it is a string try: tmp = __import__('pywikibot.site', fromlist=[interface]) interface = getattr(tmp, interface) except ImportError: raise ValueError("Invalid interface name '%(interface)s'" % locals()) if not issubclass(interface, pywikibot.site.BaseSite): warning('Site called with interface=%s' % interface.__name__) user = pywikibot.tools.normalize_username(user) key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user) if key not in _sites or not isinstance(_sites[key], interface): _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop) debug( u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]), _logger) if _sites[key].code != code: warn( 'Site %s instantiated using different code "%s"' % (_sites[key], code), UserWarning, 2) return _sites[key]
def main(*args): """ Process command line arguments and perform task. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ # Loading the comments global categoryToCheck, project_inserted # always, define a generator to understand if the user sets one, # defining what's genFactory always = False generator = False show = False moveBlockCheck = False protectedpages = False protectType = 'edit' namespace = 0 # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() # Process local args for arg in local_args: option, sep, value = arg.partition(':') if option == '-always': always = True elif option == '-move': moveBlockCheck = True elif option == '-show': show = True elif option in ('-protectedpages', '-moveprotected'): protectedpages = True if option == '-moveprotected': protectType = 'move' if value: namespace = int(value) else: genFactory.handle_arg(arg) if config.mylang not in project_inserted: pywikibot.output('Your project is not supported by this script.\n' 'You have to edit the script and add it!') return site = pywikibot.Site() if protectedpages: generator = site.protectedpages(namespace=namespace, type=protectType) # Take the right templates to use, the category and the comment TSP = i18n.translate(site, templateSemiProtection) TTP = i18n.translate(site, templateTotalProtection) TSMP = i18n.translate(site, templateSemiMoveProtection) TTMP = i18n.translate(site, templateTotalMoveProtection) TNR = i18n.translate(site, templateNoRegex) TU = i18n.translate(site, templateUnique) categories = i18n.translate(site, categoryToCheck) commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary') if not generator: generator = genFactory.getCombinedGenerator() if not generator: generator = [] pywikibot.output('Loading categories...') # Define the category if no other generator has been set for CAT in categories: cat = pywikibot.Category(site, CAT) # Define the generator gen = pagegenerators.CategorizedPageGenerator(cat) for pageCat in gen: generator.append(pageCat) pywikibot.output('Categories loaded, start!') # Main Loop if not genFactory.nopreload: generator = pagegenerators.PreloadingGenerator(generator, groupsize=60) for page in generator: pagename = page.title(as_link=True) pywikibot.output('Loading {}...'.format(pagename)) try: text = page.text except NoPageError: pywikibot.output("{} doesn't exist! Skipping...".format(pagename)) continue except IsRedirectPageError: pywikibot.output('{} is a redirect! Skipping...'.format(pagename)) if show: showQuest(page) continue # FIXME: This check does not work : # PreloadingGenerator cannot set correctly page.editRestriction # (see bug T57322) # if not page.has_permission(): # pywikibot.output( # "{} is sysop-protected : this account can't edit " # "it! Skipping...".format(pagename)) # continue restrictions = page.protection() try: editRestr = restrictions['edit'] except KeyError: editRestr = None if not page.has_permission(): pywikibot.output( '{} is protected: ' "this account can't edit it! Skipping...".format(pagename)) continue # Understand, according to the template in the page, what should be the # protection and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU) # Only to see if the text is the same or not... oldtext = text # keep track of the changes for each step (edit then move) changes = -1 if not editRestr: # page is not edit-protected # Deleting the template because the page doesn't need it. if not (TTP or TSP): raise Error('This script is not localized to use it on \n{}. ' 'Missing "templateSemiProtection" or' '"templateTotalProtection"'.format(site.sitename)) if TU: replaceToPerform = '|'.join(TTP + TSP + TU) else: replaceToPerform = '|'.join(TTP + TSP) text, changes = re.subn('<noinclude>({})</noinclude>'.format( replaceToPerform, '', text)) if changes == 0: text, changes = re.subn('({})'.format(replaceToPerform, '', text)) msg = 'The page is editable for all' if not moveBlockCheck: msg += ', deleting the template..' pywikibot.output(msg + '.') elif editRestr[0] == 'sysop': # total edit protection if (TemplateInThePage[0] == 'sysop-total' and TTP) or \ (TemplateInThePage[0] == 'unique' and TU): msg = 'The page is protected to the sysop' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise Error( 'This script is not localized to use it on \n{}. ' 'Missing "templateNoRegex"'.format(site.sitename)) pywikibot.output('The page is protected to the sysop, but the ' 'template seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[1], text) elif TSP or TU: # implicitly editRestr[0] = 'autoconfirmed', edit-Semi-protection if TemplateInThePage[0] == 'autoconfirmed-total' or \ TemplateInThePage[0] == 'unique': msg = 'The page is editable only for the autoconfirmed users' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise Error( 'This script is not localized to use it on \n{}. ' 'Missing "templateNoRegex"'.format(site.sitename)) pywikibot.output('The page is editable only for the ' 'autoconfirmed users, but the template ' 'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[0], text) if changes == 0: # We tried to fix edit-protection templates, but it did not work. pywikibot.warning('No edit-protection template could be found') if moveBlockCheck and changes > -1: # checking move protection now try: moveRestr = restrictions['move'] except KeyError: moveRestr = False changes = -1 if not moveRestr: pywikibot.output('The page is movable for all, deleting the ' 'template...') # Deleting the template because the page doesn't need it. if TU: replaceToPerform = '|'.join(TSMP + TTMP + TU) else: replaceToPerform = '|'.join(TSMP + TTMP) text, changes = re.subn('<noinclude>({})</noinclude>'.format( replaceToPerform, '', text)) if changes == 0: text, changes = re.subn('({})'.format(replaceToPerform), '', text) elif moveRestr[0] == 'sysop': # move-total-protection if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \ (TemplateInThePage[0] == 'unique' and TU): pywikibot.output('The page is protected from moving to ' 'the sysop, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output('The page is protected from moving to ' 'the sysop, but the template seems not ' 'correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[3], text) elif TSMP or TU: # implicitly moveRestr[0] = 'autoconfirmed', # move-semi-protection if TemplateInThePage[0] == 'autoconfirmed-move' or \ TemplateInThePage[0] == 'unique': pywikibot.output('The page is movable only for the ' 'autoconfirmed users, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output('The page is movable only for the ' 'autoconfirmed users, but the template ' 'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[2], text) if changes == 0: # We tried to fix move-protection templates but it did not work pywikibot.warning('No move-protection template could be found') if oldtext != text: # Ok, asking if the change has to be performed and do it if yes. pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(oldtext, text) if not always: choice = pywikibot.input_choice( 'Do you want to accept these ' 'changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')], 'n') if choice == 'a': always = True if always or choice == 'y': save_page(page, text, commentUsed)
def remove_templates(self): """Understand if the page is blocked has the right template.""" def understand_block(): """Understand if the page is blocked has the right template.""" results = 'sysop-total', 'autoconfirmed-total', 'unique' for index, template in enumerate((TTP, TSP, TU)): if not template: continue for catchRegex in template: resultCatch = re.findall(catchRegex, text) if resultCatch: return ParsedTemplate(results[index], catchRegex, 'modifying') if TSMP and TTMP and TTP != TTMP and TSP != TSMP: for catchRegex in TTMP: resultCatch = re.findall(catchRegex, text) if resultCatch: return ParsedTemplate('sysop-move', catchRegex, 'modifying') for catchRegex in TSMP: resultCatch = re.findall(catchRegex, text) if resultCatch: return ParsedTemplate('autoconfirmed-move', catchRegex, 'modifying') # If editable means that we have no regex, won't change anything # with this regex return ParsedTemplate('editable', r'\A', 'adding') TSP = i18n.translate(self.site, templateSemiProtection) TTP = i18n.translate(self.site, templateTotalProtection) TSMP = i18n.translate(self.site, templateSemiMoveProtection) TTMP = i18n.translate(self.site, templateTotalMoveProtection) TNR = i18n.translate(self.site, templateNoRegex) TU = i18n.translate(self.site, templateUnique) while True: text, restrictions = yield if text is None: continue # Understand, according to the template in the page, what should # be the protection and compare it with what there really is. TemplateInThePage = understand_block() # Only to see if the text is the same or not... oldtext = text # keep track of the changes for each step (edit then move) changes = -1 msg_type = None # type: Optional[str] editRestr = restrictions.get('edit') if not editRestr: # page is not edit-protected # Deleting the template because the page doesn't need it. if not (TTP or TSP): raise Error( 'This script is not localized to use it on {}.\n' 'Missing "templateSemiProtection" or' '"templateTotalProtection"'.format(self.site.sitename)) if TU: replaceToPerform = '|'.join(TTP + TSP + TU) else: replaceToPerform = '|'.join(TTP + TSP) text, changes = re.subn( '<noinclude>({})</noinclude>'.format(replaceToPerform), '', text) if not changes: text, changes = re.subn('({})'.format(replaceToPerform), '', text) msg = 'The page is editable for all' if not self.opt.move: msg += ', deleting the template..' pywikibot.output(msg + '.') msg_type = 'deleting' elif editRestr[0] == 'sysop': # total edit protection if TemplateInThePage.blocktype == 'sysop-total' and TTP \ or TemplateInThePage.blocktype == 'unique' and TU: msg = 'The page is protected to the sysop' if not self.opt.move: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise Error( 'This script is not localized to use it on \n{}. ' 'Missing "templateNoRegex"'.format( self.site.sitename)) pywikibot.output( 'The page is protected to the sysop, but the template ' 'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage.regex, TNR[4], text) else: text, changes = re.subn(TemplateInThePage.regex, TNR[1], text) msg_type = TemplateInThePage.msgtype elif TSP or TU: # implicitly # editRestr[0] = 'autoconfirmed', edit-Semi-protection if TemplateInThePage.blocktype in ('autoconfirmed-total', 'unique'): msg = ('The page is editable only for the autoconfirmed ' 'users') if not self.opt.move: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise Error( 'This script is not localized to use it on \n' '{}. Missing "templateNoRegex"'.format( self.site.sitename)) pywikibot.output( 'The page is editable only for the autoconfirmed ' 'users, but the template seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage.regex, TNR[4], text) else: text, changes = re.subn(TemplateInThePage.regex, TNR[0], text) msg_type = TemplateInThePage.msgtype if not changes: # We tried to fix edit-protection templates, but it did # not work. pywikibot.warning('No edit-protection template could be found') if self.opt.move and changes > -1: # checking move protection now moveRestr = restrictions.get('move') changes = -1 if not moveRestr: pywikibot.output('The page is movable for all, deleting ' 'the template...') # Deleting the template because the page doesn't need it. if TU: replaceToPerform = '|'.join(TSMP + TTMP + TU) else: replaceToPerform = '|'.join(TSMP + TTMP) text, changes = re.subn( '<noinclude>({})</noinclude>'.format(replaceToPerform), '', text) if not changes: text, changes = re.subn( '({})'.format(replaceToPerform), '', text) msg_type = 'deleting' elif moveRestr[0] == 'sysop': # move-total-protection if TemplateInThePage.blocktype == 'sysop-move' and TTMP \ or TemplateInThePage.blocktype == 'unique' and TU: pywikibot.output('The page is protected from moving ' 'to the sysop, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output( 'The page is protected from moving to the sysop, ' 'but the template seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage.regex, TNR[4], text) else: text, changes = re.subn(TemplateInThePage.regex, TNR[3], text) msg_type = TemplateInThePage.msgtype elif TSMP or TU: # implicitly # moveRestr[0] = 'autoconfirmed', move-semi-protection if TemplateInThePage.blocktype in ('autoconfirmed-move', 'unique'): pywikibot.output('The page is movable only for the ' 'autoconfirmed users, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output( 'The page is movable only for the autoconfirmed ' 'users, but the template seems not correct. ' 'Fixing...') if TU: text, changes = re.subn(TemplateInThePage.regex, TNR[4], text) else: text, changes = re.subn(TemplateInThePage.regex, TNR[2], text) msg_type = TemplateInThePage.msgtype if not changes: # We tried to fix move-protection templates # but it did not work pywikibot.warning( 'No move-protection template could be found') yield text, msg_type
def _get_page_mappings(self): """Associate label and number for each page linked to the index.""" # Clean cache, if any. self._page_from_numbers = {} self._numbers_from_page = {} self._page_numbers_from_label = {} self._pages_from_label = {} self._labels_from_page_number = {} self._labels_from_page = {} if hasattr(self, '_parsed_text'): del self._parsed_text self._parsed_text = self._get_parsed_page() self._soup = _bs4_soup(self._parsed_text) # Do not search for "new" here, to avoid to skip purging if links # to non-existing pages are present. attrs = {'class': re.compile('prp-pagequality')} # Search for attribute "prp-pagequality" in tags: # Existing pages: # <a href="/wiki/Page:xxx.djvu/n" # title="Page:xxx.djvu/n">m # class="quality1 prp-pagequality-1" # </a> # Non-existing pages: # <a href="/w/index.php?title=xxx&action=edit&redlink=1" # class="new" # title="Page:xxx.djvu/n (page does not exist)">m # </a> # Try to purge or raise ValueError. found = self._soup.find_all('a', attrs=attrs) attrs = {'class': re.compile('prp-pagequality|new')} if not found: self.purge() del self._parsed_text self._parsed_text = self._get_parsed_page() self._soup = _bs4_soup(self._parsed_text) if not self._soup.find_all('a', attrs=attrs): raise ValueError( 'Missing class="qualityN prp-pagequality-N" or ' 'class="new" in: {}.'.format(self)) # Search for attribute "prp-pagequality" or "new" in tags: page_cnt = 0 for a_tag in self._soup.find_all('a', attrs=attrs): label = a_tag.text.lstrip('0') # Label is not converted to int. class_ = a_tag.get('class') href = a_tag.get('href') if 'new' in class_: title = self._parse_redlink(href) # non-existing page if title is None: # title not conforming to required format continue else: title = a_tag.get('title') # existing page try: page = ProofreadPage(self.site, title) page.index = self # set index property for page page_cnt += 1 except ValueError: # title is not in site.proofread_page_ns; do not consider it continue if page not in self._all_page_links: raise Error('Page {} not recognised.'.format(page)) # In order to avoid to fetch other Page:title links outside # the Pages section of the Index page; these should hopefully be # the first ones, so if they start repeating, we are done. if page in self._labels_from_page: break # Sanity check if WS site use page convention name/number. if page._num is not None: assert page_cnt == int(page._num), ( 'Page number {} not recognised as page {}.'.format( page_cnt, title)) # Mapping: numbers <-> pages. self._page_from_numbers[page_cnt] = page self._numbers_from_page[page] = page_cnt # Mapping: numbers/pages as keys, labels as values. self._labels_from_page_number[page_cnt] = label self._labels_from_page[page] = label # Reverse mapping: labels as keys, numbers/pages as values. self._page_numbers_from_label.setdefault(label, set()).add(page_cnt) self._pages_from_label.setdefault(label, set()).add(page) # Sanity check: all links to Page: ns must have been considered. assert set(self._labels_from_page) == set(self._all_page_links) # Info cached. self._cached = True
def remove_templates(self): """Understand if the page is blocked has the right template.""" def understand_block(): """Understand if the page is blocked has the right template.""" results = 'sysop-total', 'autoconfirmed-total', 'unique' for index, template in enumerate((ttp, tsp, tu)): if not template: continue for catch_regex in template: result_catch = re.findall(catch_regex, text) if result_catch: return ParsedTemplate(results[index], catch_regex, 'modifying') if tsmp and ttmp and ttp != ttmp and tsp != tsmp: for catch_regex in ttmp: result_catch = re.findall(catch_regex, text) if result_catch: return ParsedTemplate('sysop-move', catch_regex, 'modifying') for catch_regex in tsmp: result_catch = re.findall(catch_regex, text) if result_catch: return ParsedTemplate('autoconfirmed-move', catch_regex, 'modifying') # If editable means that we have no regex, won't change anything # with this regex return ParsedTemplate('editable', r'\A', 'adding') tsp = i18n.translate(self.site, template_semi_protection) ttp = i18n.translate(self.site, template_total_protection) tsmp = i18n.translate(self.site, template_semi_move_protection) ttmp = i18n.translate(self.site, template_total_move_protection) tnr = i18n.translate(self.site, template_no_regex) tu = i18n.translate(self.site, template_unique) while True: text, restrictions = yield if text is None: continue # Understand, according to the template in the page, what should # be the protection and compare it with what there really is. template_in_page = understand_block() # Only to see if the text is the same or not... oldtext = text # keep track of the changes for each step (edit then move) changes = -1 msg_type = None # type: Optional[str] edit_restriction = restrictions.get('edit') if not edit_restriction: # page is not edit-protected # Deleting the template because the page doesn't need it. if not (ttp or tsp): raise Error( 'This script is not localized to use it on {}.\n' 'Missing "template_semi_protection" or' '"template_total_protection"'.format( self.site.sitename)) if tu: replacement = '|'.join(ttp + tsp + tu) else: replacement = '|'.join(ttp + tsp) text, changes = re.subn( '<noinclude>({})</noinclude>'.format(replacement), '', text) if not changes: text, changes = re.subn('({})'.format(replacement), '', text) msg = 'The page is editable for all' if not self.opt.move: msg += ', deleting the template..' pywikibot.output(msg + '.') msg_type = 'deleting' elif edit_restriction[0] == 'sysop': # total edit protection if template_in_page.blocktype == 'sysop-total' and ttp \ or template_in_page.blocktype == 'unique' and tu: msg = 'The page is protected to the sysop' if not self.opt.move: msg += ', skipping...' pywikibot.output(msg) else: if not tnr or tu and not tnr[4] or not (tu or tnr[1]): raise Error( 'This script is not localized to use it on \n{}. ' 'Missing "template_no_regex"'.format( self.site.sitename)) pywikibot.output( 'The page is protected to the sysop, but the template ' 'seems not correct. Fixing...') if tu: text, changes = re.subn(template_in_page.regex, tnr[4], text) else: text, changes = re.subn(template_in_page.regex, tnr[1], text) msg_type = template_in_page.msgtype elif tsp or tu: # implicitly edit semi-protection if template_in_page.blocktype in ('autoconfirmed-total', 'unique'): msg = ('The page is editable only for the autoconfirmed ' 'users') if not self.opt.move: msg += ', skipping...' pywikibot.output(msg) else: if not tnr or tu and not tnr[4] or not (tu or tnr[1]): raise Error( 'This script is not localized to use it on \n' '{}. Missing "template_no_regex"'.format( self.site.sitename)) pywikibot.output( 'The page is editable only for the autoconfirmed ' 'users, but the template seems not correct. Fixing...') if tu: text, changes = re.subn(template_in_page.regex, tnr[4], text) else: text, changes = re.subn(template_in_page.regex, tnr[0], text) msg_type = template_in_page.msgtype if not changes: # We tried to fix edit-protection templates, but it did # not work. pywikibot.warning('No edit-protection template could be found') if self.opt.move and changes > -1: # checking move protection now move_restriction = restrictions.get('move') changes = -1 if not move_restriction: pywikibot.output('The page is movable for all, deleting ' 'the template...') # Deleting the template because the page doesn't need it. if tu: replacement = '|'.join(tsmp + ttmp + tu) else: replacement = '|'.join(tsmp + ttmp) text, changes = re.subn( '<noinclude>({})</noinclude>'.format(replacement), '', text) if not changes: text, changes = re.subn('({})'.format(replacement), '', text) msg_type = 'deleting' elif move_restriction[0] == 'sysop': # move-total-protection if template_in_page.blocktype == 'sysop-move' and ttmp \ or template_in_page.blocktype == 'unique' and tu: pywikibot.output('The page is protected from moving ' 'to the sysop, skipping...') if tu: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output( 'The page is protected from moving to the sysop, ' 'but the template seems not correct. Fixing...') if tu: text, changes = re.subn(template_in_page.regex, tnr[4], text) else: text, changes = re.subn(template_in_page.regex, tnr[3], text) msg_type = template_in_page.msgtype elif tsmp or tu: # implicitly move semi-protection if template_in_page.blocktype in ('autoconfirmed-move', 'unique'): pywikibot.output('The page is movable only for the ' 'autoconfirmed users, skipping...') if tu: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output( 'The page is movable only for the autoconfirmed ' 'users, but the template seems not correct. ' 'Fixing...') if tu: text, changes = re.subn(template_in_page.regex, tnr[4], text) else: text, changes = re.subn(template_in_page.regex, tnr[2], text) msg_type = template_in_page.msgtype if not changes: # We tried to fix move-protection templates # but it did not work pywikibot.warning( 'No move-protection template could be found') yield text, msg_type