예제 #1
0
def main(rootpage, saveto):
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')
    projects = []
    output = 'These WikiProjects are not in any WikiProject meta-categories:\n\n'

    # Generating category whitelist
    wpcats = WikiProjectCategories()
    tree = wpcats.generate()
    whitelist = list(treegen(tree))  # Run through a simple generator function to produce a flat list
    whitelist = tuple(set(whitelist))  # De-duplicating and making into a tuple

    page = pywikibot.Page(bot, rootpage + '/All')
    contents = mwph.parse(page.text)
    contents = contents.filter_templates()
    for t in contents:
        if t.name.strip() == "WikiProject directory entry":
            project = str(t.get('project').value).strip().replace(' ', '_')

            # Give me a list of all the categories, as long as it's on the whitelist
            query = wptools.query('wiki', "select distinct cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace in (4, 14) and page_title = {0} and cl_to in {1};".format('"' + project + '"', whitelist), None)
            if len(query) == 0:  # If page is in none of the whitelisted categories
                output += "# [[Wikipedia:{0}|{0}]]\n".format(project.replace('_', ' '))

    page = pywikibot.Page(bot, saveto)
    page.text = output
    page.save('Updating', minor=False)
예제 #2
0
def main():
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')

    q = ('select page_title from page where page_namespace = 0 '
         'and page_is_redirect = 0 and page_title not in '
         '(select page_title from page join page_props on pp_page = page_id '
         'where page_namespace = 0 and pp_propname = "wikibase_item") '
         'order by page_id;')
    no_wikidata = [
        x[0].decode('utf-8') for x in wptools.query('wiki', q, None)
    ]

    total_count = len(no_wikidata)  # Capturing this before truncating list
    no_wikidata = no_wikidata[:100]

    page = pywikibot.Page(bot, 'User:Reports_bot/No_Wikidata_item')

    content = "'''Total Articles Missing From Wikidata:''' " + str(
        total_count) + "\n\n"
    for title in no_wikidata:
        content += "* [[" + title.replace('_', ' ') + \
                   "]] ([https://www.wikidata.org/w/index.php?search=" + \
                   quote(title) + \
                   "&title=Special%3ASearch&fulltext=1 Search on Wikidata])\n"

    page.text = content
    page.save("Updating list", minor=False)
예제 #3
0
    def __init__(self):
        self.wptools = WikiProjectTools()
        q = (
            'create table if not exists notifications '
            '(n_id int(11) NOT NULL auto_increment, '
            'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_content TEXT character set utf8 collate utf8_unicode_ci, '
            'primary key (n_id)) '
            'engine=innodb character set=utf8;')
        #self.wptools.query('index', q, None)
        self.bot = pywikibot.Site('en',
                                  'wikipedia',
                                  user='******')

        # Recognized notification variants
        # A variant that is not any of these kinds will cause an error
        # variantname --> template parameter name

        date = datetime.datetime.utcnow().strftime('%d %B %Y')
        self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">'
        self.recognizedvariants = {'newmember': \
                                   'notification_when_a_new_member_joins', \
                                   'newdiscussion': \
                                   'notification_when_a_new_discussion_topic_is_posted'}
        self.varianttext = {'newmember': \
                            '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \
                            'newdiscussion': \
                            '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}
예제 #4
0
def main(rootpage):
    bot = pywikibot.Site('en', 'wikipedia')
    wptools = WikiProjectTools()
    config = json.loads(wptools.query('index', 'select json from config;', None)[0][0])

    postto = []
    # In this loop, *project* is a dictionary of configurations
    for project in config['projects']:
        if 'suggestbot' in project:  # Is the key even defined?
            if project['suggestbot'] == True and project['type'] == 'Category':
                postto.append(project['name'])
                page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project['name'][10:])
                page.text = "{{{{User:SuggestBot/suggest|Category:{0}}}}}".format(project['source'])
                page.save("Requesting latest recommendations from SuggestBot", minor=False)

    print("Sleeping for 30 minutes.")
    time.sleep(1800)  # Sleeping 30 minutes to wait for SuggestBot to do its thing

    # In this loop, *project* is a string (the name of the project)
    for project in postto:
        page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project[10:])\
        # Isolating the table from the output
        table = page.text.split('{|', 1)[1]
        table = table.split('|}', 1)[0]
        table = '{|\n' + table + '\n|}'

        # Saving table to WikiProject
        page = pywikibot.Page(bot, project + '/Edit articles')
        page.text = '===Edit articles===\n{{WPX last updated|' + project + '/Edit articles' + '}}\n\n' + table
        page.save("Updating list", minor=False, async=True)
예제 #5
0
def main():

    print("Loading...")
    wptools = WikiProjectTools()
    query = wptools.query('index',
                          'select pi_page, pi_project from projectindex;',
                          None)

    pages = {}
    for row in query:
        pi_page = row[0]
        pi_project = row[1]
        try:
            pages[pi_project].append(pi_page)
        except KeyError:
            pages[pi_project] = [pi_page]

    # Compare!
    intersect_counts = {}
    regex = re.compile('/.*')
    for wikiproject_x in pages.keys():  # lol WikiProject X
        print("Working on: " + wikiproject_x)
        intersect_counts[wikiproject_x] = {}
        for wikiproject_y in pages.keys():
            if wikiproject_x == wikiproject_y:
                continue  # Don't compare a project to itself

            test1 = re.sub(regex, '', wikiproject_x)
            test2 = re.sub(regex, '', wikiproject_y)
            if test1 == test2:
                continue  # Filters out comparisons where one is a subpage of another

            s = set(pages[wikiproject_x])
            intersect_counts[wikiproject_x][wikiproject_y] = len(
                [n for n in pages[wikiproject_y] if n in s])

    bot = pywikibot.Site('en', 'wikipedia')

    print("Sorting and saving...")
    for project in intersect_counts.keys():
        # Sorts from highest to lowest
        ordered = sorted(intersect_counts[project].items(),
                         key=operator.itemgetter(1),
                         reverse=True)
        saveto = 'Wikipedia:Related_WikiProjects/' + project[10:]
        page = pywikibot.Page(bot, saveto)
        draft = '{{WPX header|color={{{1|#37f}}}|Related WikiProjects<noinclude>: [[' \
                + project.replace('_', ' ') + '|]]</noinclude>}}\n'
        draft += '{{WPX list start|intro={{WPX last updated|' + saveto + '}}}}\n'
        for x in range(0, 10):
            if ordered[x][1] > 0:
                draft += "{{WPX block|color={{{1|#37f}}}|" \
                         + "largetext='''[[{0}|]]''' ([[Wikipedia:Related WikiProjects/{1}|view related]])|".format(ordered[x][0].replace('_', ' '), ordered[x][0].replace('_', ' ')[10:]) \
                         + "smalltext={0} articles in common}}}}\n".format(str(ordered[x][1]))
        draft += '{{WPX list end|more=' + saveto + '}}'
        if page.text != draft:
            page.text = draft
            page.save('Updating', minor=False, async=True)
예제 #6
0
    def __init__(self, viewdump=None):
        print("Initializing the Priority Predictor")
        self.wptools = WikiProjectTools()

        if viewdump == None:  # If a dumped JSON file of pageviews is not specified
            self.dump = getviewdump(self.wptools, 'en', days=30)
        else:
            with open(viewdump, 'r') as f:
                self.dump = json.load(
                    f)  # Load pageviews from a dumped JSON file
예제 #7
0
def build_cat_tree(cat_name, max_depth=5):
    if max_depth == 0:
        return None
    wptools = WikiProjectTools()
    query = wptools.query(
        'wiki',
        'select distinct page.page_title from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and cl_to = "{0}" and page_title like "%\_WikiProjects" and page_title not like "Inactive_%";'
        .format(cat_name), None)
    retval = {}
    for row in query:
        category = row[0].decode('utf-8')
        retval[category] = build_cat_tree(category, max_depth=max_depth - 1)
    return retval
예제 #8
0
    def __init__(self):
        self.bot = pywikibot.Site('en', 'wikipedia')
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query('index', 'select json from config;', None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config['projects']:
            if 'assessment_tools' in entry \
            and 'at_category' in entry \
            and 'at_unknown_quality' in entry \
            and 'at_unknown_priority' in entry:
                project = entry['name'][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry['at_category'].replace(' ', '_')
                self.unknownquality[project] = entry['at_unknown_quality'].replace(' ', '_')
                self.unknownpriority[project] = entry['at_unknown_priority'].replace(' ', '_')
def main():
    wptools = WikiProjectTools()
    bot = pwb.Site('en', 'wikipedia', user='******')

    # Generate list of WikiProjects with eponymous categories
    q = ('select page_title from page where page_namespace = 14 '
         'and page_title in (select page_title from page where '
         'page_namespace = 4 and page_title like "WikiProject_%" '
         'and page_is_redirect = 0);')

    pairs = [row[0].decode('utf-8') for row in wptools.query('wiki', q, None)]

    for pair in pairs:
        # Load WikiProject page
        project_page = pwb.Page(bot, 'Wikipedia:' + pair)

        # Preserve only categories that aren't in the style "X WikiProjects"
        preserve  = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \
                     if str(c)[-15:] != ' WikiProjects]]']

        # Check for presence of removable categories; otherwise, don't bother
        if preserve != pwb.textlib.getCategoryLinks(project_page.text):

            # Load WikiProject category
            project_cat = pwb.Page(bot, 'Category:' + pair)
    
            # List categories to add to project category
            page_cats = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \
                         if str(c)[-15:] == ' WikiProjects]]']
            cat_cats  = [c for c in pwb.textlib.getCategoryLinks(project_cat.text) \
                         if str(c)[-15:] == ' WikiProjects]]']
            to_add = list(set(page_cats) - set(cat_cats))

            # Make changes and save page
            project_cat.text = pwb.textlib.replaceCategoryLinks(project_cat.text, to_add, addOnly=True)
            project_page.text = pwb.textlib.replaceCategoryLinks(project_page.text, preserve)
            summary = "WikiProject category migration. See [[User:Harej bot/WikiProject category migration]]."
            project_page.save(summary, minor=False)
            project_cat.save(summary, minor=False)
예제 #10
0
 def __init__(self):
     self.wptools = WikiProjectTools()
     self.wpn = WikiProjectNotifications()
예제 #11
0
 def __init__(self):
     self.wptools = WikiProjectTools()
     self.bot = pywikibot.Site('en', 'wikipedia')
예제 #12
0
def main(rootpage):
    d = WikiProjectDirectory()
    wptools = WikiProjectTools()
    wpcats = WikiProjectCategories()
    tree = wpcats.generate()
    bot = pywikibot.Site('en', 'wikipedia')
    directories = {}
    directoryrow = {}
    projects = []

    # Generate directoryrows and projects lists based on the /All directory:
    page = pywikibot.Page(bot, rootpage + '/All')
    contents = mwph.parse(page.text)
    contents = contents.filter_templates()
    for t in contents:
        if t.name.strip() == "WikiProject directory entry":
            name = str(t.get('project').value).strip().replace(' ', '_')
            projects.append(name)
            directoryrow[name] = str(t) + "\n"

    # The rest of this stuff is copied from directory.py
    index_primary = sorted([key for key in tree.keys()])
    index_secondary = {}
    indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage)
    for firstlevel in tree.keys():
        directories[firstlevel] = "={0}=\n".format(firstlevel.replace(
            '_', ' '))
        directories[firstlevel] += d.listpull(
            wptools, projects, directoryrow,
            firstlevel)  # For immmedate subcats of WikiProjects_by_area
        directories[firstlevel] += d.treeiterator(
            wptools, tree[firstlevel], projects, directoryrow,
            firstlevel)  # For descendants of those immediate subcats.
        index_secondary[firstlevel] = sorted(
            [key for key in tree[firstlevel].keys()])

    # Updating the directory index
    for firstlevel in index_primary:
        firstlevel_normalized = firstlevel.replace('_', ' ')
        indextext += ";[[{0}/{1}|{1}]]".format(rootpage, firstlevel_normalized)
        if len(tree[firstlevel]) > 0:
            indextext += " : "
            for secondlevel in index_secondary[firstlevel]:
                indextext += "[[{0}/{1}#{2}|{2}]] – ".format(
                    rootpage, firstlevel_normalized,
                    secondlevel.replace('_', ' '))
            indextext = indextext[:
                                  -3]  # Truncates trailing dash and is also a cute smiley face
        indextext += "\n\n"
    saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index')
    saveindex.text = indextext
    saveindex.save('Updating', minor=False, async=True)

    # Generate directories and save!
    for directory in directories.keys():
        contents = directories[directory]
        page = pywikibot.Page(bot, rootpage + "/" + directory)
        if contents != page.text:  # Checking to see if a change was made to cut down on API save queries
            oldcontents = page.text
            page.text = contents
            page.save('Updating', minor=False, async=True)
예제 #13
0
def main():
    # This is used for Aaron Halfaker's API wrapper...
    loginfile = configparser.ConfigParser()
    loginfile.read([os.path.expanduser('~/.wiki.ini')])
    username = loginfile.get('wiki', 'username')
    password = loginfile.get('wiki', 'password')

    # ...And this is for Pywikibot
    bot = pywikibot.Site('en', 'wikipedia')

    wptools = WikiProjectTools()

    now = datetime.datetime.utcnow()
    now = now.strftime(
        '%Y%m%d%H%M%S')  # converts timestamp to MediaWiki format

    # Pulling timestamp of the last time the script was run
    query = wptools.query(
        'index',
        'select lu_timestamp from lastupdated where lu_key = "new_discussions";',
        None)
    lastupdated = query[0][0]

    # Polling for newest talk page posts in the last thirty minutes
    query = wptools.query(
        'wiki',
        'select distinct recentchanges.rc_this_oldid, page.page_id, recentchanges.rc_title, recentchanges.rc_comment, recentchanges.rc_timestamp, page.page_namespace from recentchanges join page on recentchanges.rc_namespace = page.page_namespace and recentchanges.rc_title = page.page_title join categorylinks on page.page_id=categorylinks.cl_from where rc_timestamp >= {0} and rc_timestamp < {1} and rc_comment like "% new section" and rc_deleted = 0 and cl_to like "%_articles" and page_namespace not in (0, 2, 6, 8, 10, 12, 14, 100, 108, 118) order by rc_timestamp asc;'
        .format(lastupdated, now), None)

    # Cleaning up output
    namespace = {
        1: 'Talk:',
        3: 'User_talk:',
        4: 'Wikipedia:',
        5: 'Wikipedia_talk:',
        7: 'File_talk:',
        9: 'MediaWiki_talk:',
        11: 'Template_talk:',
        13: 'Help_talk:',
        15: 'Category_talk:',
        101: 'Portal_talk:',
        109: 'Book_talk:',
        119: 'Draft_talk:',
        447: 'Education_Program_talk:',
        711: 'TimedText_talk:',
        829: 'Module_talk:',
        2600: 'Topic:'
    }

    output = []
    for row in query:
        rc_id = row[0]
        page_id = row[1]
        rc_title = row[2].decode('utf-8')
        rc_comment = row[3].decode('utf-8')
        rc_comment = rc_comment[
            3:]  # Truncate beginning part of the edit summary
        rc_comment = rc_comment[:-15]  # Truncate end of the edit summary
        rc_timestamp = row[4].decode('utf-8')
        rc_timestamp = datetime.datetime.strptime(rc_timestamp, '%Y%m%d%H%M%S')
        rc_timestamp = rc_timestamp.strftime('%H:%M, %d %B %Y (UTC)')
        page_namespace = row[5]
        page_namespace = namespace[page_namespace]

        session = api.Session("https://en.wikipedia.org/w/api.php",
                              user_agent='WPX Revert Checker')
        session.login(username, password)

        # Check if revision has been reverted
        reverted = reverts.api.check(session, rc_id, page_id, 3, None, 172800,
                                     None)
        if reverted is None:
            entry = {
                'title': (page_namespace + rc_title),
                'section': rc_comment,
                'timestamp': rc_timestamp
            }
            output.append(entry)

    # Loading list of WikiProjects signed up to get lists of new discussions
    config = json.loads(
        wptools.query('index', 'select json from config;', None)[0][0])

    if config['defaults'][
            'new_discussions'] == False:  # i.e. if New Discussions is an opt-in system
        whitelist = []  # Whitelisted WikiProjects for new discussion lists
        for project in config['projects']:
            try:
                project['new_discussions']
            except KeyError:
                continue
            else:
                if project['new_discussions'] == True:
                    whitelist.append(project['name'])
    else:
        whitelist = None

    # A whitelist of [] is one where there is a whitelist, but it's just empty.
    # A whitelist of None is for situations where the need for a whitelist has been obviated.

    # Generating list of WikiProjects for each thread
    for thread in output:
        query = wptools.query(
            'index',
            'select distinct pi_project from projectindex where pi_page = %s;',
            (thread['title']))
        thread['wikiprojects'] = []
        for row in query:
            wikiproject = row[0].replace('_', ' ')
            if (whitelist is None) or (wikiproject in whitelist):
                thread['wikiprojects'].append(wikiproject)
        for wikiproject in thread['wikiprojects']:
            saveto = wikiproject + '/Discussions'
            page = pywikibot.Page(bot, saveto)
            intro_garbage = '{{WPX header|Discussions|color={{{1|#37f}}}}}\n'
            intro_garbage += '{{{{WPX action box|color={{{{{{2|#086}}}}}}|title=Have a question?|content={{{{Clickable button 2|url=//en.wikipedia.org/wiki/Wikipedia_talk:{0}?action=edit&section=new|Ask the WikiProject|class=mw-ui-progressive mw-ui-block}}}}\n\n{{{{Clickable button 2|Wikipedia talk:{0}|View Other Discussions|class=mw-ui-block}}}}}}}}\n'.format(
                wikiproject[10:].replace(' ', '_'))
            intro_garbage += '{{{{WPX list start|intro={{{{WPX last updated|{0}}}}}}}}}\n\n'.format(
                saveto)
            draft = '<noinclude><div style="padding-bottom:1em;">{{{{Clickable button 2|{0}|Return to WikiProject|class=mw-ui-neutral}}}}</div>\n</noinclude>'.format(
                wikiproject) + intro_garbage
            submission = '{{{{WPX new discussion|color={{{{{{1|#37f}}}}}}|title={0}|section={1}|timestamp={2}}}}}\n'.format(
                thread['title'].replace('_', ' '), thread['section'],
                thread['timestamp'])

            notification = "* '''[[{0}#{1}|{1}]] on {0}".format(
                thread['title'].replace('_', ' '), thread['section'])
            queue_notification(wikiproject[10:].replace(' ', '_'),
                               notification)

            index = mwparserfromhell.parse(page.text)
            index = index.filter_templates()
            templatelist = []
            for i in index:
                if i.name == "WPX new discussion":
                    templatelist.append(str(i))
            templatelist = templatelist[:14]  # Sayonara, old threads!
            page.text = draft + submission
            if len(templatelist) > 3:
                templatelist[
                    2] += "<noinclude>"  # Anything after the third item will not be transcluded
                templatelist[len(templatelist) - 1] += "</noinclude>"
            for i in templatelist:
                page.text += i + "\n"
            page.text += "{{{{WPX list end|more={0}}}}}".format(
                saveto.replace(' ', '_'))
            page.save('New discussion on [[{0}]]'.format(
                thread['title'].replace('_', ' ')),
                      minor=False)

    # Update the Last Updated field with new timestamp
    wptools.query(
        'index',
        'update lastupdated set lu_timestamp = {0} where lu_key = "new_discussions";'
        .format(now), None)
예제 #14
0
    def main(self, rootpage):
        # Initializing...
        bot = pywikibot.Site('en', 'wikipedia')
        wptools = WikiProjectTools()
        config = json.loads(
            wptools.query('index', 'select json from config;', None)[0][0])

        # Get list of people who opted out
        optout = pywikibot.Page(bot, 'User:Reports bot/Opt-out')
        blacklist = []
        regexes = [
            re.findall('\[\[User:(.*?)\|', optout.text, re.I),
            re.findall('\{\{user\|(.*?)\}\}', optout.text, re.I),
            re.findall('\[\[:User:(.*?)\]', optout.text, re.I),
            re.findall('\[\[:User talk:(.*?)\]', optout.text, re.I)
        ]
        for results in regexes:
            for user in results:
                blacklist.append(user)
        # Bots are to be excluded
        for result in wptools.query(
                'wiki',
                "select user_name from user_groups left join user on user_id = ug_user where ug_group = 'bot';",
                None):
            blacklist.append(result[0].decode('utf-8'))

        # List of projects we are working on
        # Methodology: List from Project Index + List from Formal Definition, minus duplicates
        # This will cover all of our bases.
        articles = {}
        counter = 0
        while True:  # I am a bad man for doing this
            query = wptools.query(
                'index',
                'select pi_page, pi_project from projectindex where pi_id > {0} and pi_id <= {1};'
                .format(counter, counter + 1000000), None)
            if len(query) == 0:
                break
            for pair in query:
                # Normalizing by getting rid of namespace
                page = pair[0]
                page = page.replace('Draft_talk:', '')
                page = page.replace('Talk:', '')
                proj = pair[1][
                    10:]  # Normalizing by getting rid of "Wikipedia:"
                try:
                    articles[proj].append(page)
                except KeyError:
                    articles[proj] = [page]
            counter += 1000000

        projects = [project for project in articles.keys()]

        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query(
            'wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)
        projects.sort()

        directories = {
            'All': ''
        }  # All projects, plus subdirectories to be defined below.
        directoryrow = {}

        # Alright! Let's run some reports!
        for project in projects:

            # Seeding directory row and profile page
            if project not in articles:
                articles[project] = []
            project_normalized = project.replace('_', ' ')

            # List of active project participants (less blacklist)
            wp_editors = []
            start_date = time.strftime(
                '%Y%m%d000000',
                time.gmtime(time.time() - (60 * 60 * 24 * 90)))  # 90 days
            end_date = time.strftime('%Y%m%d000000',
                                     time.gmtime(time.time()))  # Today
            query = "select rev_user_text from page left join revision on page_id = rev_page where (page_namespace = 4 OR page_namespace = 5) and (page_title like \"{0}/%%\" OR page_title = \"{0}\") and rev_timestamp > {1} and rev_timestamp < {2} group by rev_user_text HAVING count(*) > 1;".format(
                project, start_date, end_date)
            for result in wptools.query('wiki', query, None):
                if result[0] is not None:
                    user = result[0].decode('utf-8')
                    if user not in blacklist:
                        wp_editors.append(user)
            wp_editors.sort()

            # List of active subject area editors (less blacklist)
            start_date = time.strftime(
                '%Y%m%d000000',
                time.gmtime(time.time() - (60 * 60 * 24 * 30)))  # 30 days
            end_date = time.strftime('%Y%m%d000000',
                                     time.gmtime(time.time()))  # Today

            if len(articles[project]) > 0:
                subject_editors = []
                packages = []
                for i in range(0, len(articles[project]), 10000):
                    packages.append(articles[project][i:i + 10000])

                counter = 0
                for package in packages:
                    counter += 1
                    if len(package) > 1:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title in {0} and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(
                            tuple(package), start_date, end_date)
                    else:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title = "{0}" and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(
                            package[0], start_date, end_date)

                    for result in wptools.query('wiki', query_builder, None):
                        if result[0] is not None:
                            subject_editors.append(result[0].decode('utf-8'))

                subject_editors = dict(
                    Counter(subject_editors)
                )  # Convert the list to a dictionary with username as key and edit count as value
                subject_editors_filtered = []
                for user in subject_editors.keys():
                    if user not in blacklist:
                        if subject_editors[user] > 4:
                            subject_editors_filtered.append(user)
                subject_editors = subject_editors_filtered  # And now assigned back.
                subject_editors.sort()

            else:
                subject_editors = []

            # Generate and Save Profile Page
            wp_editors_formatted = ""
            subject_editors_formatted = ""
            if len(wp_editors) > 0:
                for editor in wp_editors:
                    wp_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(
                        editor)
            else:
                wp_editors_formatted = ""
            if len(subject_editors) > 0 and len(subject_editors) < 3200:
                for editor in subject_editors:
                    subject_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(
                        editor)
            else:
                subject_editors_formatted = ""

            profilepage = "{{{{WikiProject description page | project = {0} | list_of_active_wikiproject_participants = {1} | list_of_active_subject_area_editors = {2}}}}}".format(
                project_normalized, wp_editors_formatted,
                subject_editors_formatted)
            page = pywikibot.Page(
                bot, rootpage + '/Description/' + project_normalized)
            if profilepage != page.text:  # Checking to see if a change was made to cut down on API queries
                page.text = profilepage
                page.save('Updating', minor=False, async=True)

            # Construct directory entry
            directoryrow[
                project] = "{{{{WikiProject directory entry | project = {0} | number_of_articles = {1} | wp_editors = {2} | scope_editors = {3}}}}}\n".format(
                    project_normalized, len(articles[project]),
                    len(wp_editors), len(subject_editors))

        # Assign directory entry to relevant directory pages ("All entries" and relevant subdirectory pages)
        for entry in sorted(
                directoryrow.items(),
                key=operator.itemgetter(1)):  # Sorting into alphabetical order
            directories['All'] += entry[1]
        directories['All'] = "{{WikiProject directory top}}\n" + directories[
            'All'] + "|}"

        wpcats = WikiProjectCategories()
        tree = wpcats.generate()
        index_primary = sorted([key for key in tree.keys()])
        index_secondary = {}
        indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage)
        for firstlevel in tree.keys():
            directories[firstlevel] = "={0}=\n".format(
                firstlevel.replace('_', ' '))
            directories[firstlevel] += self.listpull(
                wptools, projects, directoryrow,
                firstlevel)  # For immmedate subcats of WikiProjects_by_area
            directories[firstlevel] += self.treeiterator(
                wptools, tree[firstlevel], projects, directoryrow,
                firstlevel)  # For descendants of those immediate subcats.
            index_secondary[firstlevel] = sorted(
                [key for key in tree[firstlevel].keys()])

        # Updating the directory index
        for firstlevel in index_primary:
            firstlevel_normalized = firstlevel.replace('_', ' ')
            indextext += ";[[{0}/{1}|{1}]]".format(rootpage,
                                                   firstlevel_normalized)
            if len(tree[firstlevel]) > 0:
                indextext += " : "
                for secondlevel in index_secondary[firstlevel]:
                    indextext += "[[{0}/{1}#{2}|{2}]] – ".format(
                        rootpage, firstlevel_normalized,
                        secondlevel.replace('_', ' '))
                indextext = indextext[:
                                      -3]  # Truncates trailing dash and is also a cute smiley face
            indextext += "\n\n"
        saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index')
        saveindex.text = indextext
        saveindex.save('Updating', minor=False, async=True)

        # Generate directories and save!
        for directory in directories.keys():
            contents = directories[directory]
            page = pywikibot.Page(bot, rootpage + "/" + directory)
            if contents != page.text:  # Checking to see if a change was made to cut down on API save queries
                oldcontents = page.text
                page.text = contents
                page.save('Updating', minor=False, async=True)
                # Cleanup of obsolete description pages and "Related WikiProjects" pages
                if directory == 'All':
                    oldcontents = mwph.parse(oldcontents)
                    oldcontents = oldcontents.filter_templates()
                    oldprojectlist = []
                    for t in oldcontents:
                        if t.name.strip() == "WikiProject directory entry":
                            oldprojectlist.append(str(t.get('project').value))
                    for oldproject in oldprojectlist:
                        oldproject = oldproject.strip().replace(
                            ' ', '_')  # Normalizing
                        if oldproject not in projects:
                            deletethis = pywikibot.Page(
                                bot, rootpage + '/Description/' + oldproject)
                            deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                            deletethis.save('Nominating page for deletion',
                                            minor=False,
                                            async=True)
                            deletethis = pywikibot.Page(
                                bot,
                                'Wikipedia:Related WikiProjects/' + oldproject)
                            if deletethis.text != "":
                                deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                                deletethis.save('Nominating page for deletion',
                                                minor=False,
                                                async=True)
예제 #15
0
    def go(self):
        wptools = WikiProjectTools()

        # Get list of WikiProjects that also have a self-named category

        output = 'This report highlights discrepancies in WikiProject categorization between WikiProjects and their self-named categories.\n\n'
        query = 'select page_title from page left join redirect on page.page_id = redirect.rd_from where page_title like "WikiProject\_%" and page_namespace = 4 and page_title in (select page_title from page where page_title like "WikiProject\_%" and page_namespace = 14) and rd_title is null;'

        for row in wptools.query('wiki', query, None):
            project = row[0].decode('utf-8')

            cl_projectspace = [
            ]  # read as "category links, Wikipedia namespace"
            cl_categoryspace = [
            ]  # read as "category links, Category namespace"

            for match in wptools.query(
                    'wiki',
                    'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 4 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'
                    .format(project), None):
                cl_projectspace.append(match[0].decode('utf-8').replace(
                    '_', ' '))

            for match in wptools.query(
                    'wiki',
                    'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'
                    .format(project), None):
                cl_categoryspace.append(match[0].decode('utf-8').replace(
                    '_', ' '))

            cl_projectspace.sort()
            cl_categoryspace.sort()

            if cl_projectspace == cl_categoryspace:
                continue  # Don't bother generating a report if both category lists match perfectly

            both = list(set(cl_projectspace).intersection(cl_categoryspace))

            project = project.replace('_', ' ')

            output += "* '''{0}'''\n".format(project)
            output += "** [[Wikipedia:{0}]]: ".format(project)
            for entry in cl_projectspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> – ".format(
                        entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> – ".format(
                        entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

            output += "** [[:Category:{0}]]: ".format(project)
            for entry in cl_categoryspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> –".format(
                        entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> –".format(
                        entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

        return output
예제 #16
0
    def prepare(self, saveto):
        wptools = WikiProjectTools()
        bot = pywikibot.Site('en', 'wikipedia')

        # Retrieve list of WikiProjects
        projects = []
        for row in wptools.query(
                'index', 'select distinct pi_project from projectindex;',
                None):
            projects.append(row[0])

        runtime = datetime.datetime.utcnow().strftime('%H:%M, %d %B %Y (UTC)')
        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query(
            'wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)

        projects.sort()
        packages = [projects[i:i + 50] for i in range(0, len(projects), 50)]

        report = {}
        for package in packages:
            url = "https://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=watchers&titles="
            for title in package:
                url += title + "|"
            url = url[:-1]  # Truncate trailing pipe
            apiquery = requests.get(url)
            apiquery = apiquery.json()
            for pagedata in apiquery['query']['pages'].values():
                if 'watchers' in pagedata:
                    if pagedata['watchers'] > 29:  # Required part
                        report[pagedata['title']] = pagedata['watchers']

        report = sorted(report.items(),
                        key=operator.itemgetter(1),
                        reverse=True)

        contents = 'List of WikiProjects by number of watchers of its main page and talk page. A WikiProject not appearing on this list has fewer than 30 watchers. Data as of <onlyinclude>' + runtime + '</onlyinclude>'
        contents += '\n\n{| class="wikitable sortable plainlinks"\n|-\n! No.\n! WikiProject\n! Watchers\n'

        counter = 0
        for pair in report:
            counter += 1
            contents += "|-\n| {0}\n| [[{1}]]\n| {2}\n".format(
                str(counter), pair[0], pair[1])

        contents += "|}"

        page = pywikibot.Page(bot, saveto)
        page.text = contents
        page.save("Updating report", minor=False)