def main(rootpage, saveto): wptools = WikiProjectTools() bot = pywikibot.Site('en', 'wikipedia') projects = [] output = 'These WikiProjects are not in any WikiProject meta-categories:\n\n' # Generating category whitelist wpcats = WikiProjectCategories() tree = wpcats.generate() whitelist = list(treegen(tree)) # Run through a simple generator function to produce a flat list whitelist = tuple(set(whitelist)) # De-duplicating and making into a tuple page = pywikibot.Page(bot, rootpage + '/All') contents = mwph.parse(page.text) contents = contents.filter_templates() for t in contents: if t.name.strip() == "WikiProject directory entry": project = str(t.get('project').value).strip().replace(' ', '_') # Give me a list of all the categories, as long as it's on the whitelist query = wptools.query('wiki', "select distinct cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace in (4, 14) and page_title = {0} and cl_to in {1};".format('"' + project + '"', whitelist), None) if len(query) == 0: # If page is in none of the whitelisted categories output += "# [[Wikipedia:{0}|{0}]]\n".format(project.replace('_', ' ')) page = pywikibot.Page(bot, saveto) page.text = output page.save('Updating', minor=False)
def main(): wptools = WikiProjectTools() bot = pywikibot.Site('en', 'wikipedia') q = ('select page_title from page where page_namespace = 0 ' 'and page_is_redirect = 0 and page_title not in ' '(select page_title from page join page_props on pp_page = page_id ' 'where page_namespace = 0 and pp_propname = "wikibase_item") ' 'order by page_id;') no_wikidata = [ x[0].decode('utf-8') for x in wptools.query('wiki', q, None) ] total_count = len(no_wikidata) # Capturing this before truncating list no_wikidata = no_wikidata[:100] page = pywikibot.Page(bot, 'User:Reports_bot/No_Wikidata_item') content = "'''Total Articles Missing From Wikidata:''' " + str( total_count) + "\n\n" for title in no_wikidata: content += "* [[" + title.replace('_', ' ') + \ "]] ([https://www.wikidata.org/w/index.php?search=" + \ quote(title) + \ "&title=Special%3ASearch&fulltext=1 Search on Wikidata])\n" page.text = content page.save("Updating list", minor=False)
def __init__(self): self.wptools = WikiProjectTools() q = ( 'create table if not exists notifications ' '(n_id int(11) NOT NULL auto_increment, ' 'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, ' 'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, ' 'n_content TEXT character set utf8 collate utf8_unicode_ci, ' 'primary key (n_id)) ' 'engine=innodb character set=utf8;') #self.wptools.query('index', q, None) self.bot = pywikibot.Site('en', 'wikipedia', user='******') # Recognized notification variants # A variant that is not any of these kinds will cause an error # variantname --> template parameter name date = datetime.datetime.utcnow().strftime('%d %B %Y') self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">' self.recognizedvariants = {'newmember': \ 'notification_when_a_new_member_joins', \ 'newdiscussion': \ 'notification_when_a_new_discussion_topic_is_posted'} self.varianttext = {'newmember': \ '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \ 'newdiscussion': \ '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}
def main(rootpage): bot = pywikibot.Site('en', 'wikipedia') wptools = WikiProjectTools() config = json.loads(wptools.query('index', 'select json from config;', None)[0][0]) postto = [] # In this loop, *project* is a dictionary of configurations for project in config['projects']: if 'suggestbot' in project: # Is the key even defined? if project['suggestbot'] == True and project['type'] == 'Category': postto.append(project['name']) page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project['name'][10:]) page.text = "{{{{User:SuggestBot/suggest|Category:{0}}}}}".format(project['source']) page.save("Requesting latest recommendations from SuggestBot", minor=False) print("Sleeping for 30 minutes.") time.sleep(1800) # Sleeping 30 minutes to wait for SuggestBot to do its thing # In this loop, *project* is a string (the name of the project) for project in postto: page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project[10:])\ # Isolating the table from the output table = page.text.split('{|', 1)[1] table = table.split('|}', 1)[0] table = '{|\n' + table + '\n|}' # Saving table to WikiProject page = pywikibot.Page(bot, project + '/Edit articles') page.text = '===Edit articles===\n{{WPX last updated|' + project + '/Edit articles' + '}}\n\n' + table page.save("Updating list", minor=False, async=True)
def main(): print("Loading...") wptools = WikiProjectTools() query = wptools.query('index', 'select pi_page, pi_project from projectindex;', None) pages = {} for row in query: pi_page = row[0] pi_project = row[1] try: pages[pi_project].append(pi_page) except KeyError: pages[pi_project] = [pi_page] # Compare! intersect_counts = {} regex = re.compile('/.*') for wikiproject_x in pages.keys(): # lol WikiProject X print("Working on: " + wikiproject_x) intersect_counts[wikiproject_x] = {} for wikiproject_y in pages.keys(): if wikiproject_x == wikiproject_y: continue # Don't compare a project to itself test1 = re.sub(regex, '', wikiproject_x) test2 = re.sub(regex, '', wikiproject_y) if test1 == test2: continue # Filters out comparisons where one is a subpage of another s = set(pages[wikiproject_x]) intersect_counts[wikiproject_x][wikiproject_y] = len( [n for n in pages[wikiproject_y] if n in s]) bot = pywikibot.Site('en', 'wikipedia') print("Sorting and saving...") for project in intersect_counts.keys(): # Sorts from highest to lowest ordered = sorted(intersect_counts[project].items(), key=operator.itemgetter(1), reverse=True) saveto = 'Wikipedia:Related_WikiProjects/' + project[10:] page = pywikibot.Page(bot, saveto) draft = '{{WPX header|color={{{1|#37f}}}|Related WikiProjects<noinclude>: [[' \ + project.replace('_', ' ') + '|]]</noinclude>}}\n' draft += '{{WPX list start|intro={{WPX last updated|' + saveto + '}}}}\n' for x in range(0, 10): if ordered[x][1] > 0: draft += "{{WPX block|color={{{1|#37f}}}|" \ + "largetext='''[[{0}|]]''' ([[Wikipedia:Related WikiProjects/{1}|view related]])|".format(ordered[x][0].replace('_', ' '), ordered[x][0].replace('_', ' ')[10:]) \ + "smalltext={0} articles in common}}}}\n".format(str(ordered[x][1])) draft += '{{WPX list end|more=' + saveto + '}}' if page.text != draft: page.text = draft page.save('Updating', minor=False, async=True)
def __init__(self, viewdump=None): print("Initializing the Priority Predictor") self.wptools = WikiProjectTools() if viewdump == None: # If a dumped JSON file of pageviews is not specified self.dump = getviewdump(self.wptools, 'en', days=30) else: with open(viewdump, 'r') as f: self.dump = json.load( f) # Load pageviews from a dumped JSON file
def build_cat_tree(cat_name, max_depth=5): if max_depth == 0: return None wptools = WikiProjectTools() query = wptools.query( 'wiki', 'select distinct page.page_title from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and cl_to = "{0}" and page_title like "%\_WikiProjects" and page_title not like "Inactive_%";' .format(cat_name), None) retval = {} for row in query: category = row[0].decode('utf-8') retval[category] = build_cat_tree(category, max_depth=max_depth - 1) return retval
def __init__(self): self.bot = pywikibot.Site('en', 'wikipedia') self.wptools = WikiProjectTools() self.projects = [] self.predictorseed = {} self.unknownquality = {} self.unknownpriority = {} self.config = self.wptools.query('index', 'select json from config;', None) self.config = json.loads(self.config[0][0]) for entry in self.config['projects']: if 'assessment_tools' in entry \ and 'at_category' in entry \ and 'at_unknown_quality' in entry \ and 'at_unknown_priority' in entry: project = entry['name'][10:] # Normalizing title self.projects.append(project) self.predictorseed[project] = entry['at_category'].replace(' ', '_') self.unknownquality[project] = entry['at_unknown_quality'].replace(' ', '_') self.unknownpriority[project] = entry['at_unknown_priority'].replace(' ', '_')
def main(): wptools = WikiProjectTools() bot = pwb.Site('en', 'wikipedia', user='******') # Generate list of WikiProjects with eponymous categories q = ('select page_title from page where page_namespace = 14 ' 'and page_title in (select page_title from page where ' 'page_namespace = 4 and page_title like "WikiProject_%" ' 'and page_is_redirect = 0);') pairs = [row[0].decode('utf-8') for row in wptools.query('wiki', q, None)] for pair in pairs: # Load WikiProject page project_page = pwb.Page(bot, 'Wikipedia:' + pair) # Preserve only categories that aren't in the style "X WikiProjects" preserve = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \ if str(c)[-15:] != ' WikiProjects]]'] # Check for presence of removable categories; otherwise, don't bother if preserve != pwb.textlib.getCategoryLinks(project_page.text): # Load WikiProject category project_cat = pwb.Page(bot, 'Category:' + pair) # List categories to add to project category page_cats = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \ if str(c)[-15:] == ' WikiProjects]]'] cat_cats = [c for c in pwb.textlib.getCategoryLinks(project_cat.text) \ if str(c)[-15:] == ' WikiProjects]]'] to_add = list(set(page_cats) - set(cat_cats)) # Make changes and save page project_cat.text = pwb.textlib.replaceCategoryLinks(project_cat.text, to_add, addOnly=True) project_page.text = pwb.textlib.replaceCategoryLinks(project_page.text, preserve) summary = "WikiProject category migration. See [[User:Harej bot/WikiProject category migration]]." project_page.save(summary, minor=False) project_cat.save(summary, minor=False)
def __init__(self): self.wptools = WikiProjectTools() self.wpn = WikiProjectNotifications()
def __init__(self): self.wptools = WikiProjectTools() self.bot = pywikibot.Site('en', 'wikipedia')
def main(rootpage): d = WikiProjectDirectory() wptools = WikiProjectTools() wpcats = WikiProjectCategories() tree = wpcats.generate() bot = pywikibot.Site('en', 'wikipedia') directories = {} directoryrow = {} projects = [] # Generate directoryrows and projects lists based on the /All directory: page = pywikibot.Page(bot, rootpage + '/All') contents = mwph.parse(page.text) contents = contents.filter_templates() for t in contents: if t.name.strip() == "WikiProject directory entry": name = str(t.get('project').value).strip().replace(' ', '_') projects.append(name) directoryrow[name] = str(t) + "\n" # The rest of this stuff is copied from directory.py index_primary = sorted([key for key in tree.keys()]) index_secondary = {} indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage) for firstlevel in tree.keys(): directories[firstlevel] = "={0}=\n".format(firstlevel.replace( '_', ' ')) directories[firstlevel] += d.listpull( wptools, projects, directoryrow, firstlevel) # For immmedate subcats of WikiProjects_by_area directories[firstlevel] += d.treeiterator( wptools, tree[firstlevel], projects, directoryrow, firstlevel) # For descendants of those immediate subcats. index_secondary[firstlevel] = sorted( [key for key in tree[firstlevel].keys()]) # Updating the directory index for firstlevel in index_primary: firstlevel_normalized = firstlevel.replace('_', ' ') indextext += ";[[{0}/{1}|{1}]]".format(rootpage, firstlevel_normalized) if len(tree[firstlevel]) > 0: indextext += " : " for secondlevel in index_secondary[firstlevel]: indextext += "[[{0}/{1}#{2}|{2}]] – ".format( rootpage, firstlevel_normalized, secondlevel.replace('_', ' ')) indextext = indextext[: -3] # Truncates trailing dash and is also a cute smiley face indextext += "\n\n" saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index') saveindex.text = indextext saveindex.save('Updating', minor=False, async=True) # Generate directories and save! for directory in directories.keys(): contents = directories[directory] page = pywikibot.Page(bot, rootpage + "/" + directory) if contents != page.text: # Checking to see if a change was made to cut down on API save queries oldcontents = page.text page.text = contents page.save('Updating', minor=False, async=True)
def main(): # This is used for Aaron Halfaker's API wrapper... loginfile = configparser.ConfigParser() loginfile.read([os.path.expanduser('~/.wiki.ini')]) username = loginfile.get('wiki', 'username') password = loginfile.get('wiki', 'password') # ...And this is for Pywikibot bot = pywikibot.Site('en', 'wikipedia') wptools = WikiProjectTools() now = datetime.datetime.utcnow() now = now.strftime( '%Y%m%d%H%M%S') # converts timestamp to MediaWiki format # Pulling timestamp of the last time the script was run query = wptools.query( 'index', 'select lu_timestamp from lastupdated where lu_key = "new_discussions";', None) lastupdated = query[0][0] # Polling for newest talk page posts in the last thirty minutes query = wptools.query( 'wiki', 'select distinct recentchanges.rc_this_oldid, page.page_id, recentchanges.rc_title, recentchanges.rc_comment, recentchanges.rc_timestamp, page.page_namespace from recentchanges join page on recentchanges.rc_namespace = page.page_namespace and recentchanges.rc_title = page.page_title join categorylinks on page.page_id=categorylinks.cl_from where rc_timestamp >= {0} and rc_timestamp < {1} and rc_comment like "% new section" and rc_deleted = 0 and cl_to like "%_articles" and page_namespace not in (0, 2, 6, 8, 10, 12, 14, 100, 108, 118) order by rc_timestamp asc;' .format(lastupdated, now), None) # Cleaning up output namespace = { 1: 'Talk:', 3: 'User_talk:', 4: 'Wikipedia:', 5: 'Wikipedia_talk:', 7: 'File_talk:', 9: 'MediaWiki_talk:', 11: 'Template_talk:', 13: 'Help_talk:', 15: 'Category_talk:', 101: 'Portal_talk:', 109: 'Book_talk:', 119: 'Draft_talk:', 447: 'Education_Program_talk:', 711: 'TimedText_talk:', 829: 'Module_talk:', 2600: 'Topic:' } output = [] for row in query: rc_id = row[0] page_id = row[1] rc_title = row[2].decode('utf-8') rc_comment = row[3].decode('utf-8') rc_comment = rc_comment[ 3:] # Truncate beginning part of the edit summary rc_comment = rc_comment[:-15] # Truncate end of the edit summary rc_timestamp = row[4].decode('utf-8') rc_timestamp = datetime.datetime.strptime(rc_timestamp, '%Y%m%d%H%M%S') rc_timestamp = rc_timestamp.strftime('%H:%M, %d %B %Y (UTC)') page_namespace = row[5] page_namespace = namespace[page_namespace] session = api.Session("https://en.wikipedia.org/w/api.php", user_agent='WPX Revert Checker') session.login(username, password) # Check if revision has been reverted reverted = reverts.api.check(session, rc_id, page_id, 3, None, 172800, None) if reverted is None: entry = { 'title': (page_namespace + rc_title), 'section': rc_comment, 'timestamp': rc_timestamp } output.append(entry) # Loading list of WikiProjects signed up to get lists of new discussions config = json.loads( wptools.query('index', 'select json from config;', None)[0][0]) if config['defaults'][ 'new_discussions'] == False: # i.e. if New Discussions is an opt-in system whitelist = [] # Whitelisted WikiProjects for new discussion lists for project in config['projects']: try: project['new_discussions'] except KeyError: continue else: if project['new_discussions'] == True: whitelist.append(project['name']) else: whitelist = None # A whitelist of [] is one where there is a whitelist, but it's just empty. # A whitelist of None is for situations where the need for a whitelist has been obviated. # Generating list of WikiProjects for each thread for thread in output: query = wptools.query( 'index', 'select distinct pi_project from projectindex where pi_page = %s;', (thread['title'])) thread['wikiprojects'] = [] for row in query: wikiproject = row[0].replace('_', ' ') if (whitelist is None) or (wikiproject in whitelist): thread['wikiprojects'].append(wikiproject) for wikiproject in thread['wikiprojects']: saveto = wikiproject + '/Discussions' page = pywikibot.Page(bot, saveto) intro_garbage = '{{WPX header|Discussions|color={{{1|#37f}}}}}\n' intro_garbage += '{{{{WPX action box|color={{{{{{2|#086}}}}}}|title=Have a question?|content={{{{Clickable button 2|url=//en.wikipedia.org/wiki/Wikipedia_talk:{0}?action=edit§ion=new|Ask the WikiProject|class=mw-ui-progressive mw-ui-block}}}}\n\n{{{{Clickable button 2|Wikipedia talk:{0}|View Other Discussions|class=mw-ui-block}}}}}}}}\n'.format( wikiproject[10:].replace(' ', '_')) intro_garbage += '{{{{WPX list start|intro={{{{WPX last updated|{0}}}}}}}}}\n\n'.format( saveto) draft = '<noinclude><div style="padding-bottom:1em;">{{{{Clickable button 2|{0}|Return to WikiProject|class=mw-ui-neutral}}}}</div>\n</noinclude>'.format( wikiproject) + intro_garbage submission = '{{{{WPX new discussion|color={{{{{{1|#37f}}}}}}|title={0}|section={1}|timestamp={2}}}}}\n'.format( thread['title'].replace('_', ' '), thread['section'], thread['timestamp']) notification = "* '''[[{0}#{1}|{1}]] on {0}".format( thread['title'].replace('_', ' '), thread['section']) queue_notification(wikiproject[10:].replace(' ', '_'), notification) index = mwparserfromhell.parse(page.text) index = index.filter_templates() templatelist = [] for i in index: if i.name == "WPX new discussion": templatelist.append(str(i)) templatelist = templatelist[:14] # Sayonara, old threads! page.text = draft + submission if len(templatelist) > 3: templatelist[ 2] += "<noinclude>" # Anything after the third item will not be transcluded templatelist[len(templatelist) - 1] += "</noinclude>" for i in templatelist: page.text += i + "\n" page.text += "{{{{WPX list end|more={0}}}}}".format( saveto.replace(' ', '_')) page.save('New discussion on [[{0}]]'.format( thread['title'].replace('_', ' ')), minor=False) # Update the Last Updated field with new timestamp wptools.query( 'index', 'update lastupdated set lu_timestamp = {0} where lu_key = "new_discussions";' .format(now), None)
def main(self, rootpage): # Initializing... bot = pywikibot.Site('en', 'wikipedia') wptools = WikiProjectTools() config = json.loads( wptools.query('index', 'select json from config;', None)[0][0]) # Get list of people who opted out optout = pywikibot.Page(bot, 'User:Reports bot/Opt-out') blacklist = [] regexes = [ re.findall('\[\[User:(.*?)\|', optout.text, re.I), re.findall('\{\{user\|(.*?)\}\}', optout.text, re.I), re.findall('\[\[:User:(.*?)\]', optout.text, re.I), re.findall('\[\[:User talk:(.*?)\]', optout.text, re.I) ] for results in regexes: for user in results: blacklist.append(user) # Bots are to be excluded for result in wptools.query( 'wiki', "select user_name from user_groups left join user on user_id = ug_user where ug_group = 'bot';", None): blacklist.append(result[0].decode('utf-8')) # List of projects we are working on # Methodology: List from Project Index + List from Formal Definition, minus duplicates # This will cover all of our bases. articles = {} counter = 0 while True: # I am a bad man for doing this query = wptools.query( 'index', 'select pi_page, pi_project from projectindex where pi_id > {0} and pi_id <= {1};' .format(counter, counter + 1000000), None) if len(query) == 0: break for pair in query: # Normalizing by getting rid of namespace page = pair[0] page = page.replace('Draft_talk:', '') page = page.replace('Talk:', '') proj = pair[1][ 10:] # Normalizing by getting rid of "Wikipedia:" try: articles[proj].append(page) except KeyError: articles[proj] = [page] counter += 1000000 projects = [project for project in articles.keys()] q = ('select distinct page.page_title from page ' 'join categorylinks on page.page_id = categorylinks.cl_from ' 'left join redirect on page.page_id = redirect.rd_from ' 'where page_namespace = 4 ' 'and page_title not like "%/%" ' 'and rd_title is null ' 'and (cl_to in ' '(select page.page_title from page ' 'where page_namespace = 14 and ' 'page_title like "%\_WikiProjects" ' 'and page_title not like "%\_for\_WikiProjects" ' 'and page_title not like "%\_of\_WikiProjects") ' 'or page_title like "WikiProject\_%");') formaldefinition = wptools.query( 'wiki', q, None) # http://quarry.wmflabs.org/query/3509 for row in formaldefinition: row = row[0].decode('utf-8') if row not in projects: projects.append(row) projects.sort() directories = { 'All': '' } # All projects, plus subdirectories to be defined below. directoryrow = {} # Alright! Let's run some reports! for project in projects: # Seeding directory row and profile page if project not in articles: articles[project] = [] project_normalized = project.replace('_', ' ') # List of active project participants (less blacklist) wp_editors = [] start_date = time.strftime( '%Y%m%d000000', time.gmtime(time.time() - (60 * 60 * 24 * 90))) # 90 days end_date = time.strftime('%Y%m%d000000', time.gmtime(time.time())) # Today query = "select rev_user_text from page left join revision on page_id = rev_page where (page_namespace = 4 OR page_namespace = 5) and (page_title like \"{0}/%%\" OR page_title = \"{0}\") and rev_timestamp > {1} and rev_timestamp < {2} group by rev_user_text HAVING count(*) > 1;".format( project, start_date, end_date) for result in wptools.query('wiki', query, None): if result[0] is not None: user = result[0].decode('utf-8') if user not in blacklist: wp_editors.append(user) wp_editors.sort() # List of active subject area editors (less blacklist) start_date = time.strftime( '%Y%m%d000000', time.gmtime(time.time() - (60 * 60 * 24 * 30))) # 30 days end_date = time.strftime('%Y%m%d000000', time.gmtime(time.time())) # Today if len(articles[project]) > 0: subject_editors = [] packages = [] for i in range(0, len(articles[project]), 10000): packages.append(articles[project][i:i + 10000]) counter = 0 for package in packages: counter += 1 if len(package) > 1: query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title in {0} and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format( tuple(package), start_date, end_date) else: query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title = "{0}" and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format( package[0], start_date, end_date) for result in wptools.query('wiki', query_builder, None): if result[0] is not None: subject_editors.append(result[0].decode('utf-8')) subject_editors = dict( Counter(subject_editors) ) # Convert the list to a dictionary with username as key and edit count as value subject_editors_filtered = [] for user in subject_editors.keys(): if user not in blacklist: if subject_editors[user] > 4: subject_editors_filtered.append(user) subject_editors = subject_editors_filtered # And now assigned back. subject_editors.sort() else: subject_editors = [] # Generate and Save Profile Page wp_editors_formatted = "" subject_editors_formatted = "" if len(wp_editors) > 0: for editor in wp_editors: wp_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format( editor) else: wp_editors_formatted = "" if len(subject_editors) > 0 and len(subject_editors) < 3200: for editor in subject_editors: subject_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format( editor) else: subject_editors_formatted = "" profilepage = "{{{{WikiProject description page | project = {0} | list_of_active_wikiproject_participants = {1} | list_of_active_subject_area_editors = {2}}}}}".format( project_normalized, wp_editors_formatted, subject_editors_formatted) page = pywikibot.Page( bot, rootpage + '/Description/' + project_normalized) if profilepage != page.text: # Checking to see if a change was made to cut down on API queries page.text = profilepage page.save('Updating', minor=False, async=True) # Construct directory entry directoryrow[ project] = "{{{{WikiProject directory entry | project = {0} | number_of_articles = {1} | wp_editors = {2} | scope_editors = {3}}}}}\n".format( project_normalized, len(articles[project]), len(wp_editors), len(subject_editors)) # Assign directory entry to relevant directory pages ("All entries" and relevant subdirectory pages) for entry in sorted( directoryrow.items(), key=operator.itemgetter(1)): # Sorting into alphabetical order directories['All'] += entry[1] directories['All'] = "{{WikiProject directory top}}\n" + directories[ 'All'] + "|}" wpcats = WikiProjectCategories() tree = wpcats.generate() index_primary = sorted([key for key in tree.keys()]) index_secondary = {} indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage) for firstlevel in tree.keys(): directories[firstlevel] = "={0}=\n".format( firstlevel.replace('_', ' ')) directories[firstlevel] += self.listpull( wptools, projects, directoryrow, firstlevel) # For immmedate subcats of WikiProjects_by_area directories[firstlevel] += self.treeiterator( wptools, tree[firstlevel], projects, directoryrow, firstlevel) # For descendants of those immediate subcats. index_secondary[firstlevel] = sorted( [key for key in tree[firstlevel].keys()]) # Updating the directory index for firstlevel in index_primary: firstlevel_normalized = firstlevel.replace('_', ' ') indextext += ";[[{0}/{1}|{1}]]".format(rootpage, firstlevel_normalized) if len(tree[firstlevel]) > 0: indextext += " : " for secondlevel in index_secondary[firstlevel]: indextext += "[[{0}/{1}#{2}|{2}]] – ".format( rootpage, firstlevel_normalized, secondlevel.replace('_', ' ')) indextext = indextext[: -3] # Truncates trailing dash and is also a cute smiley face indextext += "\n\n" saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index') saveindex.text = indextext saveindex.save('Updating', minor=False, async=True) # Generate directories and save! for directory in directories.keys(): contents = directories[directory] page = pywikibot.Page(bot, rootpage + "/" + directory) if contents != page.text: # Checking to see if a change was made to cut down on API save queries oldcontents = page.text page.text = contents page.save('Updating', minor=False, async=True) # Cleanup of obsolete description pages and "Related WikiProjects" pages if directory == 'All': oldcontents = mwph.parse(oldcontents) oldcontents = oldcontents.filter_templates() oldprojectlist = [] for t in oldcontents: if t.name.strip() == "WikiProject directory entry": oldprojectlist.append(str(t.get('project').value)) for oldproject in oldprojectlist: oldproject = oldproject.strip().replace( ' ', '_') # Normalizing if oldproject not in projects: deletethis = pywikibot.Page( bot, rootpage + '/Description/' + oldproject) deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n" deletethis.save('Nominating page for deletion', minor=False, async=True) deletethis = pywikibot.Page( bot, 'Wikipedia:Related WikiProjects/' + oldproject) if deletethis.text != "": deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n" deletethis.save('Nominating page for deletion', minor=False, async=True)
def go(self): wptools = WikiProjectTools() # Get list of WikiProjects that also have a self-named category output = 'This report highlights discrepancies in WikiProject categorization between WikiProjects and their self-named categories.\n\n' query = 'select page_title from page left join redirect on page.page_id = redirect.rd_from where page_title like "WikiProject\_%" and page_namespace = 4 and page_title in (select page_title from page where page_title like "WikiProject\_%" and page_namespace = 14) and rd_title is null;' for row in wptools.query('wiki', query, None): project = row[0].decode('utf-8') cl_projectspace = [ ] # read as "category links, Wikipedia namespace" cl_categoryspace = [ ] # read as "category links, Category namespace" for match in wptools.query( 'wiki', 'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 4 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";' .format(project), None): cl_projectspace.append(match[0].decode('utf-8').replace( '_', ' ')) for match in wptools.query( 'wiki', 'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";' .format(project), None): cl_categoryspace.append(match[0].decode('utf-8').replace( '_', ' ')) cl_projectspace.sort() cl_categoryspace.sort() if cl_projectspace == cl_categoryspace: continue # Don't bother generating a report if both category lists match perfectly both = list(set(cl_projectspace).intersection(cl_categoryspace)) project = project.replace('_', ' ') output += "* '''{0}'''\n".format(project) output += "** [[Wikipedia:{0}]]: ".format(project) for entry in cl_projectspace: if entry in both: output += "<span style='color: #999'>{0}</span> – ".format( entry) else: output += "<span style='color: #FF0000'>{0}</span> – ".format( entry) output = output[:-2] + "\n" # Truncate trailing endash and add line break output += "** [[:Category:{0}]]: ".format(project) for entry in cl_categoryspace: if entry in both: output += "<span style='color: #999'>{0}</span> –".format( entry) else: output += "<span style='color: #FF0000'>{0}</span> –".format( entry) output = output[:-2] + "\n" # Truncate trailing endash and add line break return output
def prepare(self, saveto): wptools = WikiProjectTools() bot = pywikibot.Site('en', 'wikipedia') # Retrieve list of WikiProjects projects = [] for row in wptools.query( 'index', 'select distinct pi_project from projectindex;', None): projects.append(row[0]) runtime = datetime.datetime.utcnow().strftime('%H:%M, %d %B %Y (UTC)') q = ('select distinct page.page_title from page ' 'join categorylinks on page.page_id = categorylinks.cl_from ' 'left join redirect on page.page_id = redirect.rd_from ' 'where page_namespace = 4 ' 'and page_title not like "%/%" ' 'and rd_title is null ' 'and (cl_to in ' '(select page.page_title from page ' 'where page_namespace = 14 and ' 'page_title like "%\_WikiProjects" ' 'and page_title not like "%\_for\_WikiProjects" ' 'and page_title not like "%\_of\_WikiProjects") ' 'or page_title like "WikiProject\_%");') formaldefinition = wptools.query( 'wiki', q, None) # http://quarry.wmflabs.org/query/3509 for row in formaldefinition: row = row[0].decode('utf-8') if row not in projects: projects.append(row) projects.sort() packages = [projects[i:i + 50] for i in range(0, len(projects), 50)] report = {} for package in packages: url = "https://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=watchers&titles=" for title in package: url += title + "|" url = url[:-1] # Truncate trailing pipe apiquery = requests.get(url) apiquery = apiquery.json() for pagedata in apiquery['query']['pages'].values(): if 'watchers' in pagedata: if pagedata['watchers'] > 29: # Required part report[pagedata['title']] = pagedata['watchers'] report = sorted(report.items(), key=operator.itemgetter(1), reverse=True) contents = 'List of WikiProjects by number of watchers of its main page and talk page. A WikiProject not appearing on this list has fewer than 30 watchers. Data as of <onlyinclude>' + runtime + '</onlyinclude>' contents += '\n\n{| class="wikitable sortable plainlinks"\n|-\n! No.\n! WikiProject\n! Watchers\n' counter = 0 for pair in report: counter += 1 contents += "|-\n| {0}\n| [[{1}]]\n| {2}\n".format( str(counter), pair[0], pair[1]) contents += "|}" page = pywikibot.Page(bot, saveto) page.text = contents page.save("Updating report", minor=False)