def main(*args): options = {} # Process global arguments to determine desired site local_args = pywikibot.handle_args(args) # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pywikibot.pagegenerators.GeneratorFactory() extraparams = {} # Parse command line arguments for arg in local_args: # Catch the pywikibot.pagegenerators options if genFactory.handleArg(arg): continue # nothing to do here if arg.startswith("-aistart:"): extraparams = {'gaistart': arg[9:]} # The preloading option is responsible for downloading multiple # pages from the wiki simultaneously. gen = genFactory.getCombinedGenerator(preload=True) if not gen: gen = PreloadingGenerator( GeographBotUploads(site=pywikibot.Site(), parameters=extraparams)) if gen: # pass generator and private options to the bot bot = FixLocationBot(gen, **options) bot.run() # guess what it does return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def featuredWithInterwiki(self, fromsite, task): """Read featured articles and find the corresponding pages. Find corresponding pages on other sites, place the template and remember the page in the cache dict. """ tosite = self.site if fromsite.code not in self.cache: self.cache[fromsite.code] = {} if tosite.code not in self.cache[fromsite.code]: self.cache[fromsite.code][tosite.code] = {} cc = self.cache[fromsite.code][tosite.code] if self.getOption('nocache') is True or \ fromsite.code in self.getOption('nocache'): cc = {} gen = self.featuredArticles(fromsite, task, cc) if self.getOption('count'): next(gen, None) return # count only, we are ready here gen = PreloadingGenerator(gen) for source in gen: if source.isRedirectPage(): source = source.getRedirectTarget() if not source.exists(): pywikibot.output(u"source page doesn't exist: %s" % source) continue for dest in self.findTranslated(source, tosite): self.add_template(source, dest, task, fromsite) cc[source.title()] = dest.title()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # Page generator gen = None # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = GeneratorFactory() botArgs = {} for arg in local_args: if arg == '-always': botArgs['always'] = True else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() if not gen: pywikibot.bot.suggest_help(missing_generator=True) return False preloadingGen = PreloadingGenerator(gen) bot = SelflinkBot(preloadingGen, **botArgs) bot.run() return True
def generator(self): for rule in self.typoRules: if not rule.canSearch(): continue pywikibot.output('Query: "%s"' % rule.query) self.current_rule = rule yield from PreloadingGenerator(rule.querySearch())
def main(*args: str) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ filename = 'dict.txt' options = {} r_options = {} for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(':') option = arg.partition('-')[2] # reader options if option in ('begin', 'end', 'titlestart', 'titleend', 'title'): r_options[option] = value elif option == 'file': filename = value elif option in ('include', 'notitle', 'textonly'): r_options[option] = True # bot options elif option == 'appendbottom': options['append'] = ('bottom', value) elif option == 'appendtop': options['append'] = ('top', value) elif option in ('force', 'minor', 'autosummary', 'showdiff'): options[option] = True elif option == 'noredirect': options['redirect'] = False elif option in ('nocontent', 'summary'): options[option] = value else: pywikibot.output('Disregarding unknown argument {}.'.format(arg)) options['always'] = 'showdiff' not in options failed_filename = False while not os.path.isfile(filename): pywikibot.output("\nFile '{}' does not exist. ".format(filename)) _input = pywikibot.input('Please enter the file name [q to quit]:') if _input == 'q': failed_filename = True break filename = _input # show help text from the top of this file if reader failed # or User quit. if failed_filename: pywikibot.bot.suggest_help(missing_parameters=['-file']) else: site = pywikibot.Site() reader = PageFromFileReader(filename, site=site, **r_options) reader = PreloadingGenerator(reader) bot = PageFromFileRobot(generator=reader, site=site, **options) bot.run()
def _handle_recent(self, value): starttime = datetime.now(timezone.utc) - timedelta(days=int(value)) earlystart = starttime - timedelta(days=1) extraparams = {'gcmend': earlystart.astimezone(timezone.utc)} new_on_commons = PreloadingGenerator( NewGeographImages(site=pywikibot.Site(), parameters=extraparams)) changed_on_geograph = ModifiedGeographs(modified_since=starttime, submitted_before=earlystart) return chain(new_on_commons, changed_on_geograph)
def generator(self): for rule in self.typoRules: if rule.query is None: continue pywikibot.output('Query: "{}"'.format(rule.query)) self.current_rule = rule yield from PreloadingGenerator( self.site.search(rule.query, namespaces=[0]))
def treat_page(self): page = self.current_page categories = textlib.getCategoryLinks(page.text, site=self.site) titles = map( lambda cat: cat.title(with_ns=False, with_section=False, allow_interwiki=False, insite=self.site), categories) matches = list(filter(bool, map(self.categoryR.fullmatch, titles))) if not matches: pywikibot.output('No birthdate category found') return fullmatch = matches.pop() if matches: pywikibot.output('Multiple birthdate categories found') return birth_date = fullmatch.group(1) search_query = 'linksto:"%s"' % page.title() search_query += r' insource:/\[\[[^\[\]]+\]\]' search_query += r' +\(\* *\[*%s\]*\)/' % birth_date search_query += ' -intitle:"Seznam"' pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join( map( lambda p: re.escape(p.title()), chain([page], page.backlinks(followRedirects=False, filterRedirects=True, namespaces=[0])))) pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date regex = re.compile(pattern) for ref_page in PreloadingGenerator( SearchPageGenerator(search_query, namespaces=[0], site=self.site)): text = ref_page.text # todo: multiple matches match = regex.search(text) if not match: continue inside, left, year1, right = match.groups('') new_text = text[:match.start()] new_text += replace_pattern.format(inside=inside, left=left, right=right, year1=year1, year2=self.year) new_text += text[match.end():] self.userPut(ref_page, ref_page.text, new_text, summary='doplnění data úmrtí')
def test_low_step(self): """Test PreloadingGenerator with a list of pages.""" mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=20)) count = 0 for page in PreloadingGenerator(links, step=10): self.assertIsInstance(page, pywikibot.Page) self.assertIsInstance(page.exists(), bool) if page.exists(): self.assertEqual(len(page._revisions), 1) self.assertIsNotNone(page._revisions[page._revid].text) self.assertFalse(hasattr(page, '_pageprops')) count += 1 self.assertEqual(len(links), count)
def treat(self, page): pattern = self.helper.pattern for entry in PreloadingGenerator(self.line_iterator(page)): key = title = entry.title() if not entry.exists(): self.cache.pop(key) continue while entry.isRedirectPage(): entry = entry.getRedirectTarget() title = entry.title() text = self.helper.remove_disabled_parts(entry.text) for string in self.cache.pop(key): if string in text: self.put.append(pattern.format('[[%s]]' % title, string)) page.text = '\n'.join(self.put) page.save(summary='odstranění vyřešených překlepů', minor=True, botflag=True, apply_cosmetic_changes=False)
def treat_page_and_item(self, page, item): if self.site.lang in item.descriptions: return title = item.getSitelink(self.site) search_query = r'linksto:"%s" insource:/\* *%s/' % ( title, re.escape('[[' + title)) regex = self.get_regex_for_title(re.escape(title)) for ref_page in PreloadingGenerator( SearchPageGenerator(search_query, namespaces=[0])): # todo: first polish text match = regex.search(ref_page.text) if not match: continue if not self.opt['allpages'] and not ref_page.isDisambig(): continue desc = self.parse_description(match.group(2)) if not self.validate_description(desc): continue summary = self.get_summary(ref_page, desc) item.descriptions[self.site.lang] = desc.strip() if self.user_edit_entity(item, summary=summary): break
def main(): # Page generator gen = None # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = GeneratorFactory() botArgs = {} for arg in local_args: if arg == '-always': botArgs['always'] = True else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp() return preloadingGen = PreloadingGenerator(gen) bot = SelflinkBot(preloadingGen, **botArgs) bot.run()
def treat_page(self): page = self.current_page categories = textlib.getCategoryLinks(page.text, site=self.site) titles = (cat.title(with_ns=False, with_section=False, allow_interwiki=False, insite=self.site) for cat in categories) matches = list(filter(bool, map(self.categoryR.fullmatch, titles))) if not matches: pywikibot.output('No birthdate category found') return fullmatch = matches.pop() if matches: pywikibot.output('Multiple birthdate categories found') return birth_date = fullmatch.group(1) search_query = 'linksto:"%s"' % page.title() # todo: sanitize? search_query += r' insource:/\[\[[^\[\]]+\]\]' search_query += r' +\(\* *\[*%s\]*\)/' % birth_date search_query += ' -intitle:"Seznam"' pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join( re.escape(p.title()) for p in chain([page], page.backlinks(followRedirects=False, filterRedirects=True, namespaces=[0]))) pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date regex = re.compile(pattern) for ref_page in PreloadingGenerator( SearchPageGenerator(search_query, namespaces=[0], site=self.site)): new_text, num = regex.subn(self.replace_callback, ref_page.text) if num: self.userPut(ref_page, ref_page.text, new_text, summary='doplnění data úmrtí')
def RatingGenerator(pages, step=50): ''' Generate pages with assessment ratings. ''' # Preload talk page contents in bulk to speed up processing # Note: since pywikibot's PreloadingGenerator doesn't guarantee # order, we'll have to exhaust it and map title to talkpage. tp_map = {} for talkpage in PreloadingGenerator(TalkPageGenerator(pages), step=step): tp_map[talkpage.title(withNamespace=False)] = talkpage # iterate and set the rating for page in pages: try: talkpage = tp_map[page.title()] page._rating = page.get_assessment(talkpage.get()) except KeyError: page._rating = 'na' except pywikibot.NoPage: page._rating = 'na' except pywikibot.IsRedirectPage: page._rating = 'na' yield page
def main(*args): """ Process command line arguments and invoke bot. @param args: command line arguments @type args: list of unicode """ options = { 'end_date': date.today() - timedelta(days=1), 'start_date': date.today() - timedelta(days=1), } # Process global arguments local_args = pywikibot.handle_args(args) site = pywikibot.Site() site.login() # Parse command line arguments for arg in local_args: arg, _, value = arg.partition(':') arg = arg[1:] if arg in ('config', 'end_date', 'start_date'): if not value: value = pywikibot.input( 'Please enter a value for {}'.format(arg), default=None) options[arg] = value else: options[arg] = True if not validate_options(options): pywikibot.bot.suggest_help( additional_text='The specified options are invalid.') return False config = pywikibot.Page(site, options.pop('config')) config = get_json_from_page(config) if not validate_config(config, site): pywikibot.bot.suggest_help( additional_text='The specified configuration is invalid.') return False options['config'] = config meta = pywikibot.Site('meta', 'meta') suffix = '@{}'.format(site.dbName()) start = datetime.datetime.combine(options.pop('start_date'), time.min) end = datetime.datetime.combine(options.pop('end_date'), time.max) # Parse rename logs into a list of dict. if options.pop('rename', None): renames = list() if options.get('meta', None): rename_events = meta.logevents(logtype='gblrename', start=start, end=end, reverse=True) else: rename_events = site.logevents(logtype='renameuser', start=start, end=end, reverse=True) for rename in rename_events: try: renames.append({ 'olduser': pywikibot.User(site, rename.data['params']['olduser']), 'newuser': pywikibot.User(site, rename.data['params']['newuser']), 'timestamp': rename.timestamp(), }) except KeyError: continue options['renames'] = sorted(renames, key=itemgetter('timestamp')) # Parse rights logs into a list of dict. group_changes = list() rights_events = site.logevents(logtype='rights', start=start, end=end, reverse=True) if options.pop('meta', None): meta_rights_events = set() for log_event in meta.logevents(logtype='rights', start=start, end=end, reverse=True): try: if log_event.page().title().endswith(suffix): meta_rights_events.add(log_event) except KeyError: continue rights_events = chain(rights_events, meta_rights_events) for log_event in rights_events: try: new_groups = set(log_event.newgroups) old_groups = set(log_event.oldgroups) group_changes.append({ 'user': pywikibot.User( site, re.sub(r'{}$'.format(suffix), '', log_event.page().title()), ), 'added': new_groups - old_groups, 'removed': old_groups - new_groups, 'timestamp': log_event.timestamp(), }) except KeyError: continue options['group_changes'] = sorted(group_changes, key=itemgetter('timestamp')) # Generate pages and invoke the bot. gen = (config[key]['page'] for key in config.keys() if config[key]['enabled']) gen = PreloadingGenerator(gen) UserGroupsMassMessageListUpdater(gen, site=site, **options).run() return True
def featuredWithInterwiki(self, fromsite, task): """Place or remove the Link_GA/FA template on/from a page""" def compile_link(site, templates): """compile one link template list""" findtemplate = '(%s)' % '|'.join(templates) return re.compile(ur"\{\{%s\|%s\}\}" % (findtemplate.replace(u' ', u'[ _]'), site.code), re.IGNORECASE) quiet = self.getOption('quiet') tosite = self.site if not fromsite.lang in self.cache: self.cache[fromsite.lang] = {} if not tosite.lang in self.cache[fromsite.lang]: self.cache[fromsite.lang][tosite.lang] = {} cc = self.cache[fromsite.lang][tosite.lang] if self.getOption('nocache') is True or \ fromsite.code in self.getOption('nocache'): cc = {} add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_Link_add = compile_link(fromsite, add_tl) re_Link_remove = compile_link(fromsite, remove_tl) gen = self.featuredArticles(fromsite, task, cc) gen = PreloadingGenerator(gen) pairs = [] for a in gen: if a.isRedirectPage(): a = a.getRedirectTarget() if not a.exists(): pywikibot.output(u"source page doesn't exist: %s" % a.title()) continue atrans = self.findTranslated(a, tosite) if not atrans: continue text = atrans.get() m1 = add_tl and re_Link_add.search(text) m2 = remove_tl and re_Link_remove.search(text) changed = False if add_tl: if m1: pywikibot.output(u"(already added)") else: # insert just before interwiki if (not interactive or pywikibot.input( u'Connecting %s -> %s. Proceed? [Y/N]' % (a.title(), atrans.title())) in ['Y', 'y']): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + u" {{%s|%s}}" % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = pywikibot.getLanguageLinks(text, self.site) text = pywikibot.removeLanguageLinks(text, self.site) text += u"\r\n{{%s|%s}}\r\n" % (add_tl[0], fromsite.code) text = pywikibot.replaceLanguageLinks(text, iw, self.site) changed = True if remove_tl: if m2: if (not interactive or pywikibot.input( u'Connecting %s -> %s. Proceed? [Y/N]' % (a.title(), atrans.title())) in ['Y', 'y']): text = re.sub(re_Link_add, '', text) changed = True elif task == 'former': pywikibot.output(u"(already removed)") cc[a.title()] = atrans.title() if changed: comment = i18n.twtranslate(self.site, 'featured-' + task, {'page': unicode(a)}) try: atrans.put(text, comment) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % atrans.title()) except pywikibot.PageNotSaved, e: pywikibot.output(u"Page not saved")
def update_subscribers(self): ''' Update the list of subscribers based on the current configuration ''' # reset all seen-values of users of the current wiki, # and who are currently active reset_query = r"""UPDATE {} SET seen=0 WHERE lang=%(lang)s AND active=1""".format(config.regulars_table) # query to set all unseen users as inactive, because it means # they no longer use the template inactive_query = r"""UPDATE {} SET active=0 WHERE lang=%(lang)s AND seen=0""".format(config.regulars_table) ## Connect to the database sbdb = db.SuggestBotDatabase() if not sbdb.connect(): logging.error("Unable to connect to the suggestbot database") return(False) (dbconn, dbcursor) = sbdb.getConnection() ## Reset the `seen` bit for all active uers dbcursor.execute(reset_query, {'lang': self._lang}) dbconn.commit() logging.info('number of rows with updated seen-values: {}'.format(dbcursor.rowcount)) # Build the set of pages that we'll ignore when we find links to # our templates. ignorePages = set() for page_title in config.template_stoplist[self._lang]: ignorePages.add(pywikibot.Page(self._site, page_title)) # Grab the config templates for this language Wikipedia configTemplates = config.config_templates[self._lang] configPages = set() # Regular expression for splitting into username + subpage-name. subpageSplitRe = re.compile(r'(?P<username>[^/]+)(?P<subname>/.*)') # Loop over them, userbox first as any settings in the config template # is to take priority. for temp_nick in ['userbox', 'config']: configPage = pywikibot.Page(self._site, configTemplates[temp_nick]) configPages.add(configPage.title().strip().lower()) # Grab all links to the config template that are redirects warningsList = list(configPage.getReferences( onlyTemplateInclusion=True, redirectsOnly=True)) # Output all of them to a file so we know which users might # have changed usernames. if len(warningsList) > 0: logging.info('writing {n} pages that are redirects to warnings file.'.format(n=len(warningsList))) with codecs.open(config.userlist_warnings, 'a', 'utf-8') as warningsFile: warningsFile.write("The following pages are redirects:\n") for page in warningsList: warningsFile.write(page.title()) warningsFile.write("\n") # warningsList is now used as a list of pages that contain errors # that need fixing. Values are tuples where the first item is the # pywikibot.Page object, and the second is a short description of # the problem. warningsList = [] # For each page, that we're preloading 10 of at a time to # speed things up: for page in PreloadingGenerator( configPage.getReferences( onlyTemplateInclusion=True, redirectsOnly=False), step=10): # Is this one of our own pages? if page in ignorePages: continue logging.info('now processing {}'.format(page.title())) # figure out what user this page belongs to # 1: check that the page namespace is user or user talk if page.namespace() not in [2, 3]: warningsList.append((page, "namespace not user or user talk")) continue # 2: fetch the title without namespace page_title = page.title(withNamespace=False, withSection=False) # split the page title on first "/" in case it's a subpage. subpageTitle = None username = '' matchObj = subpageSplitRe.match(page_title) if matchObj: # we have a subpage # store subpage title in user object subpageTitle = page.title() username = matchObj.group('username') logging.info('found subpage {subtitle} of user {username}'.format( subtitle=matchObj.group('subname'), username=username)) else: username = page_title subscriber = Subscriber(self._lang, username, site=self._site) # check the timestamp of the user's last contribution, # set the retired bit if the user's no longer active. lastEditTuple = None try: lastEditTuple = next(subscriber.contributions(total=5)) except StopIteration: # User apparently has made no edits, so there's no tuple pass except KeyError: # pywikibot had a bug that made it fail with a KeyError # if a revision's comment was deleted. That's fixed now, # but we'll capture the exception just in case something # else goes wrong and triggers it. pass if lastEditTuple is not None: lastEditTime = lastEditTuple[2] logging.info('user last edited at {}'.format(lastEditTime)) timeSinceLastEdit = datetime.utcnow() - lastEditTime if timeSinceLastEdit.days >= config.retired_days: subscriber._retired = 1 # NOTE: Don't add "if not subscriber.retired:" to skip # the template checking if the user is retired. Don't do that. # It'll lead to us storing default values for our users in # the database, and since we've already fetched the page text, # this is cheap processing. parsed_page = mwp.parse(page.get(), skip_style_tags=True) # call page.templatesWithParams() for template in parsed_page.filter_templates(recursive=True): ## logging.info('checking template {}'.format(template.name)) template_name = template.name.strip().lower() if not template_name in configPages: continue ## logging.info('checking parameters to known template {}'.format(template_name)) # This accounts for the case where a user has a subpage for # their userboxes. We'll post to their user talk page. if subpageTitle is not None and template_name \ == configTemplates['userbox'].strip().lower(): subpageTitle = None # for each parameter... for param in template.params: ## True if this is a key/value pair if param.showkey: # translate the key (e.g. Norwegian -> English) translatedKey = self._translate_key( param.name.strip().lower()) else: translatedKey = self._translate_key( param.value.strip().lower()) if translatedKey is None: warningsList.append((page, "unaccepted parameter")) continue ## logging.info("using parameter {} with value {}".format(translatedKey, param.value)) if param.showkey: # parameter is OK, use it: subscriber.useParam(translatedKey, param.value.strip().lower()) else: ## Note: This works because the methods behave ## sensibly if the value evaluates to False subscriber.useParam(translatedKey, "") # Always updating this ensures that we capture users who return # and do not specify where they want it posted. subscriber._page_title = subpageTitle ## FIXME: if we've gone through all the templates on a page ## and not found SuggestBot's template, we have a parsing error. ## In that case, we shouldn't update the database? logging.info('updating database for this user') # update or store values for this user subscriber.update(sbdb) if len(warningsList) > 0: logging.info("writing {n} users that have errors to warnings file".format(n=len(warningsList))) warningFilename = "{base}.{lang}".format( base=config.userlist_warnings, lang=self._lang) with codecs.open(warningFilename, 'a', 'utf-8') as \ warningsFile: warningsFile.write("The following users had errors in their configuration:\n") for (page, reason) in warningsList: warningsFile.write(page.title()) warningsFile.write(" - %s" % (reason,)) warningsFile.write("\n") dbcursor.execute(inactive_query, {'lang': self._lang}) dbconn.commit() logging.info("number of users set as inactive: {}".format(dbcursor.rowcount)) sbdb.disconnect() return()
def get_popquals(lang, titles, do_tasks=False): ''' Get popularity and quality data for the given list of article titles. If do_tasks is set, also get task recommendations. :param titles: Article titles to retrieve data for :type titles: list (of str) :param do_tasks: Should we get recommendations for specific tasks? :type do_tasks: bool ''' site = pywikibot.Site(lang) # Make our titles into Page objects pages = [sup.Page(site, title) for title in titles] # List of dictionaries with popularity and quality data result = [] # Create HTTP session to pool pageview HTTP requests http_session = requests.Session() for page in PreloadingGenerator( sup.PredictionGenerator(site, sup.RatingGenerator(pages))): # 2: populate task suggestions task_suggestions = page.get_suggestions() # Page data we'll return, with some defaults pdata = { 'title': page.title(), 'pop': 'High', 'popcount': round(page.get_views(http_session=http_session)), 'qual': page.get_rating(), 'pred': 'NA', 'predclass': page.get_prediction(), 'work': ['{0}:{1}'.format(k, v) \ for k, v in task_suggestions.items()], 'pred-numeric': -1 } # Properly capitalize or uppercase predicted class: if pdata['predclass'] in ['start', 'stub']: pdata['predclass'] = pdata['predclass'].capitalize() else: pdata['predclass'] = pdata['predclass'].upper() if pdata['qual'] in ['start', 'stub']: pdata['qual'] = pdata['qual'].capitalize() else: pdata['qual'] = pdata['qual'].upper() # Set medium/low popularity if below thresholds if pdata['popcount'] <= config.pop_thresh_low: pdata['pop'] = 'Low' elif pdata['popcount'] <= config.pop_thresh_med: pdata['pop'] = 'Medium' # Set high/medium/low quality based on assessment rating if pdata['qual'] in ['FA', 'A', 'GA'] \ or pdata['predclass'] in ['FA', 'GA']: pdata['pred'] = 'High' pdata['pred-numeric'] = 3 elif pdata['predclass'] in ['B', 'C']: pdata['pred'] = 'Medium' pdata['pred-numeric'] = 2 else: pdata['pred'] = 'Low' pdata['pred-numeric'] = 1 result.append(pdata) return (result)