Python WikiProjectTools 예제들, project_index.WikiProjectTools Python 예제들

예제 #1

0

파일 보기

파일: no_wikidata.py 프로젝트: harej/reports_bot

def main():
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')

    q = ('select page_title from page where page_namespace = 0 '
          'and page_is_redirect = 0 and page_id not in '
          '(select page_id from page join page_props on pp_page = page_id '
          'where page_namespace = 0 and pp_propname = "wikibase_item") '
          'order by page_id;')
    no_wikidata = [x[0].decode('utf-8') for x in wptools.query('wiki', q, None)]

    total_count = len(no_wikidata)  # Capturing this before truncating list
    no_wikidata = no_wikidata[:100]

    page = pywikibot.Page(bot, 'User:Reports_bot/No_Wikidata_item')

    content = "'''Total Articles Missing From Wikidata:''' " + str(total_count) + "\n\n"
    for title in no_wikidata:
        content += "* [[" + title.replace('_', ' ') + \
                   "]] ([https://www.wikidata.org/w/index.php?search=" + \
                   quote(title) + \
                   "&title=Special%3ASearch&fulltext=1 Search on Wikidata])\n"

    page.text = content
    page.save("Updating list", minor=False, quiet=True)

예제 #2

0

파일 보기

파일: watchers.py 프로젝트: harej/reports_bot

    def prepare(self, saveto):
        wptools = WikiProjectTools()
        bot = pywikibot.Site('en', 'wikipedia')

        # Retrieve list of WikiProjects
        projects = []
        for row in wptools.query('index', 'select distinct pi_project from projectindex;', None):
            projects.append(row[0])

        runtime = datetime.datetime.utcnow().strftime('%H:%M, %d %B %Y (UTC)')
        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query('wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = "Wikipedia:" + row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)

        projects.sort()
        packages = [projects[i:i+50] for i in range(0, len(projects), 50)]

        report = {}
        for package in packages:
            url = "https://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=watchers&titles="
            for title in package:
                url += quote(title) + "|"
            url = url[:-1]  # Truncate trailing pipe
            apiquery = requests.get(url)
            apiquery = apiquery.json()
            for pagedata in apiquery['query']['pages'].values():
                if 'watchers' in pagedata:
                    if pagedata['watchers'] > 29:  # Required part
                        report[pagedata['title']] = pagedata['watchers']

        report = sorted(report.items(), key=operator.itemgetter(1), reverse=True)

        contents = 'List of WikiProjects by number of watchers of its main page and talk page. A WikiProject not appearing on this list has fewer than 30 watchers. Data as of <onlyinclude>' + runtime + '</onlyinclude>'
        contents += '\n\n{| class="wikitable sortable plainlinks"\n|-\n! No.\n! WikiProject\n! Watchers\n'

        counter = 0
        for pair in report:
            counter += 1
            contents += "|-\n| {0}\n| [[{1}]]\n| {2}\n".format(str(counter), pair[0], pair[1])

        contents += "|}"

        page = pywikibot.Page(bot, saveto)
        page.text = contents
        page.save("Updating report", minor=False, quiet=True)

예제 #3

0

파일 보기

파일: no_wikidata.py 프로젝트: xZise/wikiproject_scripts

def main():
    wptools = WikiProjectTools()
    bot = pywikibot.Site("en", "wikipedia")

    q = (
        "select page_title from page where page_namespace = 0 "
        "and page_is_redirect = 0 and page_title not in "
        "(select page_title from page join page_props on pp_page = page_id "
        'where page_namespace = 0 and pp_propname = "wikibase_item") '
        "order by page_id;"
    )
    no_wikidata = [x[0].decode("utf-8") for x in wptools.query("wiki", q, None)]

    total_count = len(no_wikidata)  # Capturing this before truncating list
    no_wikidata = no_wikidata[:100]

    page = pywikibot.Page(bot, "User:Reports_bot/No_Wikidata_item")

    content = "'''Total Articles Missing From Wikidata:''' " + str(total_count) + "\n\n"
    for title in no_wikidata:
        content += (
            "* [["
            + title.replace("_", " ")
            + "]] ([https://www.wikidata.org/w/index.php?search="
            + quote(title)
            + "&title=Special%3ASearch&fulltext=1 Search on Wikidata])\n"
        )

    page.text = content
    page.save("Updating list", minor=False)

예제 #4

0

파일 보기

파일: uncategorized.py 프로젝트: halfak/wikiproject_scripts

def main(rootpage, saveto):
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')
    projects = []
    output = 'These WikiProjects are not in any WikiProject meta-categories:\n\n'

    # Generating category whitelist
    wpcats = WikiProjectCategories()
    tree = wpcats.generate()
    whitelist = list(treegen(tree))  # Run through a simple generator function to produce a flat list
    whitelist = tuple(set(whitelist))  # De-duplicating and making into a tuple

    page = pywikibot.Page(bot, rootpage + '/All')
    contents = mwph.parse(page.text)
    contents = contents.filter_templates()
    for t in contents:
        if t.name.strip() == "WikiProject directory entry small":
            project = str(t.get('project').value).strip().replace(' ', '_')

            # Give me a list of all the categories, as long as it's on the whitelist
            query = wptools.query('wiki', "select distinct cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace in (4, 14) and page_title = {0} and cl_to in {1};".format('"' + project + '"', whitelist), None)
            if len(query) == 0:  # If page is in none of the whitelisted categories
                output += "# [[Wikipedia:{0}|{0}]]\n".format(project.replace('_', ' '))

    page = pywikibot.Page(bot, saveto)
    page.text = output
    page.save('Updating', minor=False)

예제 #5

0

파일 보기

파일: notifications.py 프로젝트: xZise/wikiproject_scripts

    def __init__(self):
        self.wptools = WikiProjectTools()
        q = (
            'create table if not exists notifications '
            '(n_id int(11) NOT NULL auto_increment, '
            'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_content TEXT character set utf8 collate utf8_unicode_ci, '
            'primary key (n_id)) '
            'engine=innodb character set=utf8;')
        #self.wptools.query('index', q, None)
        self.bot = pywikibot.Site('en',
                                  'wikipedia',
                                  user='******')

        # Recognized notification variants
        # A variant that is not any of these kinds will cause an error
        # variantname --> template parameter name

        date = datetime.datetime.utcnow().strftime('%d %B %Y')
        self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">'
        self.recognizedvariants = {'newmember': \
                                   'notification_when_a_new_member_joins', \
                                   'newdiscussion': \
                                   'notification_when_a_new_discussion_topic_is_posted'}
        self.varianttext = {'newmember': \
                            '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \
                            'newdiscussion': \
                            '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}

예제 #6

0

파일 보기

파일: suggestfarm.py 프로젝트: xZise/wikiproject_scripts

def main(rootpage):
    bot = pywikibot.Site('en', 'wikipedia')
    wptools = WikiProjectTools()
    config = json.loads(wptools.query('index', 'select json from config;', None)[0][0])

    postto = []
    # In this loop, *project* is a dictionary of configurations
    for project in config['projects']:
        if 'suggestbot' in project:  # Is the key even defined?
            if project['suggestbot'] == True and project['type'] == 'Category':
                postto.append(project['name'])
                page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project['name'][10:])
                page.text = "{{{{User:SuggestBot/suggest|Category:{0}}}}}".format(project['source'])
                page.save("Requesting latest recommendations from SuggestBot", minor=False)

    print("Sleeping for 30 minutes.")
    time.sleep(1800)  # Sleeping 30 minutes to wait for SuggestBot to do its thing

    # In this loop, *project* is a string (the name of the project)
    for project in postto:
        page = pywikibot.Page(bot, rootpage + '/SuggestFarm/' + project[10:])\
        # Isolating the table from the output
        table = page.text.split('{|', 1)[1]
        table = table.split('|}', 1)[0]
        table = '{|\n' + table + '\n|}'

        # Saving table to WikiProject
        page = pywikibot.Page(bot, project + '/Edit articles')
        page.text = '===Edit articles===\n{{WPX last updated|' + project + '/Edit articles' + '}}\n\n' + table
        page.save("Updating list", minor=False, async=True)

예제 #7

0

파일 보기

파일: uncategorized.py 프로젝트: xZise/wikiproject_scripts

def main(rootpage, saveto):
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')
    projects = []
    output = 'These WikiProjects are not in any WikiProject meta-categories:\n\n'

    # Generating category whitelist
    wpcats = WikiProjectCategories()
    tree = wpcats.generate()
    whitelist = list(treegen(tree))  # Run through a simple generator function to produce a flat list
    whitelist = tuple(set(whitelist))  # De-duplicating and making into a tuple

    page = pywikibot.Page(bot, rootpage + '/All')
    contents = mwph.parse(page.text)
    contents = contents.filter_templates()
    for t in contents:
        if t.name.strip() == "WikiProject directory entry":
            project = str(t.get('project').value).strip().replace(' ', '_')

            # Give me a list of all the categories, as long as it's on the whitelist
            query = wptools.query('wiki', "select distinct cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace in (4, 14) and page_title = {0} and cl_to in {1};".format('"' + project + '"', whitelist), None)
            if len(query) == 0:  # If page is in none of the whitelisted categories
                output += "# [[Wikipedia:{0}|{0}]]\n".format(project.replace('_', ' '))

    page = pywikibot.Page(bot, saveto)
    page.text = output
    page.save('Updating', minor=False)

예제 #8

0

파일 보기

def main():
    wptools = WikiProjectTools()
    bot = pywikibot.Site('en', 'wikipedia')

    q = ('select page_title from page where page_namespace = 0 '
         'and page_is_redirect = 0 and page_title not in '
         '(select page_title from page join page_props on pp_page = page_id '
         'where page_namespace = 0 and pp_propname = "wikibase_item") '
         'order by page_id;')
    no_wikidata = [
        x[0].decode('utf-8') for x in wptools.query('wiki', q, None)
    ]

    total_count = len(no_wikidata)  # Capturing this before truncating list
    no_wikidata = no_wikidata[:100]

    page = pywikibot.Page(bot, 'User:Reports_bot/No_Wikidata_item')

    content = "'''Total Articles Missing From Wikidata:''' " + str(
        total_count) + "\n\n"
    for title in no_wikidata:
        content += "* [[" + title.replace('_', ' ') + \
                   "]] ([https://www.wikidata.org/w/index.php?search=" + \
                   quote(title) + \
                   "&title=Special%3ASearch&fulltext=1 Search on Wikidata])\n"

    page.text = content
    page.save("Updating list", minor=False)

예제 #9

0

파일 보기

파일: related_projects.py 프로젝트: xZise/wikiproject_scripts

def main():

    print("Loading...")
    wptools = WikiProjectTools()
    query = wptools.query('index',
                          'select pi_page, pi_project from projectindex;',
                          None)

    pages = {}
    for row in query:
        pi_page = row[0]
        pi_project = row[1]
        try:
            pages[pi_project].append(pi_page)
        except KeyError:
            pages[pi_project] = [pi_page]

    # Compare!
    intersect_counts = {}
    regex = re.compile('/.*')
    for wikiproject_x in pages.keys():  # lol WikiProject X
        print("Working on: " + wikiproject_x)
        intersect_counts[wikiproject_x] = {}
        for wikiproject_y in pages.keys():
            if wikiproject_x == wikiproject_y:
                continue  # Don't compare a project to itself

            test1 = re.sub(regex, '', wikiproject_x)
            test2 = re.sub(regex, '', wikiproject_y)
            if test1 == test2:
                continue  # Filters out comparisons where one is a subpage of another

            s = set(pages[wikiproject_x])
            intersect_counts[wikiproject_x][wikiproject_y] = len(
                [n for n in pages[wikiproject_y] if n in s])

    bot = pywikibot.Site('en', 'wikipedia')

    print("Sorting and saving...")
    for project in intersect_counts.keys():
        # Sorts from highest to lowest
        ordered = sorted(intersect_counts[project].items(),
                         key=operator.itemgetter(1),
                         reverse=True)
        saveto = 'Wikipedia:Related_WikiProjects/' + project[10:]
        page = pywikibot.Page(bot, saveto)
        draft = '{{WPX header|color={{{1|#37f}}}|Related WikiProjects<noinclude>: [[' \
                + project.replace('_', ' ') + '|]]</noinclude>}}\n'
        draft += '{{WPX list start|intro={{WPX last updated|' + saveto + '}}}}\n'
        for x in range(0, 10):
            if ordered[x][1] > 0:
                draft += "{{WPX block|color={{{1|#37f}}}|" \
                         + "largetext='''[[{0}|]]''' ([[Wikipedia:Related WikiProjects/{1}|view related]])|".format(ordered[x][0].replace('_', ' '), ordered[x][0].replace('_', ' ')[10:]) \
                         + "smalltext={0} articles in common}}}}\n".format(str(ordered[x][1]))
        draft += '{{WPX list end|more=' + saveto + '}}'
        if page.text != draft:
            page.text = draft
            page.save('Updating', minor=False, async=True)

예제 #10

0

파일 보기

파일: category_tree.py 프로젝트: harej/reports_bot

def build_cat_tree(cat_name, max_depth=5):
    if max_depth == 0:
        return None
    wptools = WikiProjectTools()
    query = wptools.query('wiki', 'select distinct page.page_title from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and cl_to = "{0}" and page_title like "%\_WikiProjects" and page_title not like "Inactive_%";'.format(cat_name), None)
    retval = {}
    for row in query:
        category = row[0].decode('utf-8')
        retval[category] = build_cat_tree(category, max_depth=max_depth-1)
    return retval

예제 #11

0

파일 보기

파일: predictor.py 프로젝트: xZise/wikiproject_scripts

    def __init__(self, viewdump=None):
        print("Initializing the Priority Predictor")
        self.wptools = WikiProjectTools()

        if viewdump == None:  # If a dumped JSON file of pageviews is not specified
            self.dump = getviewdump(self.wptools, 'en', days=30)
        else:
            with open(viewdump, 'r') as f:
                self.dump = json.load(
                    f)  # Load pageviews from a dumped JSON file

예제 #12

0

파일 보기

파일: load_configuration.py 프로젝트: halfak/wikiproject_scripts

 def now(self):
     bot = pywikibot.Site('en', 'wikipedia')
 
     # Exports the contents of the wikiproject.json page.
     page = pywikibot.Page(bot, 'Wikipedia:WikiProject X/wikiproject.json')
     output = page.text
 
     # We now have the JSON blob, in string format.
     try:
         output = json.loads(output)
     except ValueError as ack:  # If JSON is invalid
         self.stopthepresses(bot, str(ack))
 
     # At this point, we have valid JSON at our disposal. But does it comply with the schema?
     schema = list(output['schema'].keys())
     for setting in output['defaults']:
         if setting not in schema:
             self.stopthepresses(bot, 'Invalid setting {0} in default configuration.'.format(setting))
     for entry in output['projects']:
         for setting in entry:
             if setting not in schema:
                 self.stopthepresses(bot, 'Invalid setting {0} in project entry {1}'.format(setting, entry))
 
     # If the script hasn't been killed yet, save to database.
     output = json.dumps(output)
     wptools = WikiProjectTools()
     wptools.query('index', 'create table config_draft (json mediumtext character set utf8 collate utf8_unicode_ci) engine=innodb character set=utf8;', None)
     wptools.query('index', 'insert into config_draft (json) values (%s);', (str(output),))
     wptools.query('index', 'drop table if exists config', None)
     wptools.query('index', 'rename table config_draft to config', None)

예제 #13

0

파일 보기

파일: related_projects.py 프로젝트: xZise/wikiproject_scripts

def main():

    print("Loading...")
    wptools = WikiProjectTools()
    query = wptools.query('index', 'select pi_page, pi_project from projectindex;', None)

    pages = {}
    for row in query:
        pi_page = row[0]
        pi_project = row[1]
        try:
            pages[pi_project].append(pi_page)
        except KeyError:
            pages[pi_project] = [pi_page]

    # Compare!
    intersect_counts = {}
    regex = re.compile('/.*')
    for wikiproject_x in pages.keys():  # lol WikiProject X
        print("Working on: " + wikiproject_x)
        intersect_counts[wikiproject_x] = {}
        for wikiproject_y in pages.keys():
            if wikiproject_x == wikiproject_y:
                continue  # Don't compare a project to itself

            test1 = re.sub(regex, '', wikiproject_x)
            test2 = re.sub(regex, '', wikiproject_y)
            if test1 == test2:
                continue  # Filters out comparisons where one is a subpage of another

            s = set(pages[wikiproject_x])
            intersect_counts[wikiproject_x][wikiproject_y] = len([n for n in pages[wikiproject_y] if n in s])

    bot = pywikibot.Site('en', 'wikipedia')

    print("Sorting and saving...")
    for project in intersect_counts.keys():
        # Sorts from highest to lowest
        ordered = sorted(intersect_counts[project].items(), key=operator.itemgetter(1), reverse=True)
        saveto = 'Wikipedia:Related_WikiProjects/' + project[10:]
        page = pywikibot.Page(bot, saveto)
        draft = '{{WPX header|color={{{1|#37f}}}|Related WikiProjects<noinclude>: [[' \
                + project.replace('_', ' ') + '|]]</noinclude>}}\n'
        draft += '{{WPX list start|intro={{WPX last updated|' + saveto + '}}}}\n'
        for x in range(0, 10):
            if ordered[x][1] > 0:
                draft += "{{WPX block|color={{{1|#37f}}}|" \
                         + "largetext='''[[{0}|]]''' ([[Wikipedia:Related WikiProjects/{1}|view related]])|".format(ordered[x][0].replace('_', ' '), ordered[x][0].replace('_', ' ')[10:]) \
                         + "smalltext={0} articles in common}}}}\n".format(str(ordered[x][1]))
        draft += '{{WPX list end|more=' + saveto + '}}'
        if page.text != draft:
            page.text = draft
            page.save('Updating', minor=False, async=True)

예제 #14

0

파일 보기

def build_cat_tree(cat_name, max_depth=5):
    if max_depth == 0:
        return None
    wptools = WikiProjectTools()
    query = wptools.query(
        'wiki',
        'select distinct page.page_title from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and cl_to = "{0}" and page_title like "%\_WikiProjects" and page_title not like "Inactive_%";'
        .format(cat_name), None)
    retval = {}
    for row in query:
        category = row[0].decode('utf-8')
        retval[category] = build_cat_tree(category, max_depth=max_depth - 1)
    return retval

예제 #15

0

파일 보기

파일: project_category_audit.py 프로젝트: halfak/wikiproject_scripts

    def go(self):
        wptools = WikiProjectTools()

        # Get list of WikiProjects that also have a self-named category

        output = 'This report highlights discrepancies in WikiProject categorization between WikiProjects and their self-named categories.\n\n'
        query = 'select page_title from page left join redirect on page.page_id = redirect.rd_from where page_title like "WikiProject\_%" and page_namespace = 4 and page_title in (select page_title from page where page_title like "WikiProject\_%" and page_namespace = 14) and rd_title is null;'

        for row in wptools.query('wiki', query, None):
            project = row[0].decode('utf-8')

            cl_projectspace = []  # read as "category links, Wikipedia namespace"
            cl_categoryspace = []  # read as "category links, Category namespace"

            for match in wptools.query('wiki', 'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 4 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'.format(project), None):
                cl_projectspace.append(match[0].decode('utf-8').replace('_', ' '))

            for match in wptools.query('wiki', 'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'.format(project), None):
                cl_categoryspace.append(match[0].decode('utf-8').replace('_', ' '))

            cl_projectspace.sort()
            cl_categoryspace.sort()

            if cl_projectspace == cl_categoryspace:
                continue  # Don't bother generating a report if both category lists match perfectly

            both = list(set(cl_projectspace).intersection(cl_categoryspace))

            project = project.replace('_', ' ')

            output += "* '''{0}'''\n".format(project)
            output += "** [[Wikipedia:{0}]]: ".format(project)
            for entry in cl_projectspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> – ".format(entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> – ".format(entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

            output += "** [[:Category:{0}]]: ".format(project)
            for entry in cl_categoryspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> –".format(entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> –".format(entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

        return output

예제 #16

0

파일 보기

파일: related_projects.py 프로젝트: halfak/wikiproject_scripts

def main():

    print("Loading...")
    wptools = WikiProjectTools()
    query = wptools.query('index', 'select pi_page, pi_project from projectindex;', None)

    pages = {}
    for row in query:
        pi_page = row[0]
        pi_project = row[1]
        try:
            pages[pi_project].append(pi_page)
        except KeyError:
            pages[pi_project] = [pi_page]

    # Compare!
    intersect_counts = {}
    regex = re.compile('/.*')
    for wikiproject_x in pages.keys():  # lol WikiProject X
        print("Working on: " + wikiproject_x)
        intersect_counts[wikiproject_x] = {}
        for wikiproject_y in pages.keys():
            if wikiproject_x == wikiproject_y:
                continue  # Don't compare a project to itself

            test1 = re.sub(regex, '', wikiproject_x)
            test2 = re.sub(regex, '', wikiproject_y)
            if test1 == test2:
                continue  # Filters out comparisons where one is a subpage of another

            s = set(pages[wikiproject_x])
            intersect_counts[wikiproject_x][wikiproject_y] = len([n for n in pages[wikiproject_y] if n in s])

    bot = pywikibot.Site('en', 'wikipedia')

    print("Sorting and saving...")
    for project in intersect_counts.keys():
        # Sorts from highest to lowest
        ordered = sorted(intersect_counts[project].items(), key=operator.itemgetter(1), reverse=True)
        saveto = 'Wikipedia:Related_WikiProjects/' + project[10:]
        page = pywikibot.Page(bot, saveto)
        draft = ''
        for x in range(0, 10):
            if ordered[x][1] > 0:
                draft += "* '''[[{0}|{1}]]''': {2} articles in common\n".format(ordered[x][0], ordered[x][0][10:].replace('_', ' '), str(ordered[x][1]))
        if page.text != draft:
            page.text = draft
            page.save('Updating', minor=False, async=True)

예제 #17

0

파일 보기

파일: notifications.py 프로젝트: harej/reports_bot

    def __init__(self):
        self.wptools = WikiProjectTools()
        q = ('create table if not exists notifications '
             '(n_id int(11) NOT NULL auto_increment, '
             'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
             'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
             'n_content TEXT character set utf8 collate utf8_unicode_ci, '
             'primary key (n_id)) '
             'engine=innodb character set=utf8;')
        #self.wptools.query('index', q, None)
        self.bot = pywikibot.Site('en', 'wikipedia', user='******')

        # Recognized notification variants
        # A variant that is not any of these kinds will cause an error
        # variantname --> template parameter name

        date = datetime.datetime.utcnow().strftime('%d %B %Y')
        self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">'
        self.recognizedvariants = {'newmember': \
                                   'notification_when_a_new_member_joins', \
                                   'newdiscussion': \
                                   'notification_when_a_new_discussion_topic_is_posted'}
        self.varianttext = {'newmember': \
                            '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \
                            'newdiscussion': \
                            '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}

예제 #18

0

파일 보기

파일: predictor.py 프로젝트: harej/reports_bot

    def __init__(self, viewdump=None):
        print("Initializing the Priority Predictor")
        self.wptools = WikiProjectTools()

        if viewdump == None:  # If a dumped JSON file of pageviews is not specified
            self.dump = getviewdump(self.wptools, 'en', days=30)
        else:
            with open(viewdump, 'r') as f:
                self.dump = json.load(f)  # Load pageviews from a dumped JSON file

예제 #19

0

파일 보기

파일: assessment.py 프로젝트: xZise/wikiproject_scripts

    def __init__(self):
        self.bot = pywikibot.Site('en', 'wikipedia')
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query('index', 'select json from config;', None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config['projects']:
            if 'assessment_tools' in entry \
            and 'at_category' in entry \
            and 'at_unknown_quality' in entry \
            and 'at_unknown_priority' in entry:
                project = entry['name'][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry['at_category'].replace(' ', '_')
                self.unknownquality[project] = entry['at_unknown_quality'].replace(' ', '_')
                self.unknownpriority[project] = entry['at_unknown_priority'].replace(' ', '_')

예제 #20

0

파일 보기

파일: category_migration.py 프로젝트: xZise/wikiproject_scripts

def main():
    wptools = WikiProjectTools()
    bot = pwb.Site("en", "wikipedia", user="******")

    # Generate list of WikiProjects with eponymous categories
    q = (
        "select page_title from page where page_namespace = 14 "
        "and page_title in (select page_title from page where "
        'page_namespace = 4 and page_title like "WikiProject_%" '
        "and page_is_redirect = 0);"
    )

    pairs = [row[0].decode("utf-8") for row in wptools.query("wiki", q, None)]

    for pair in pairs:
        # Load WikiProject page
        project_page = pwb.Page(bot, "Wikipedia:" + pair)

        # Preserve only categories that aren't in the style "X WikiProjects"
        preserve = [c for c in pwb.textlib.getCategoryLinks(project_page.text) if str(c)[-15:] != " WikiProjects]]"]

        # Check for presence of removable categories; otherwise, don't bother
        if preserve != pwb.textlib.getCategoryLinks(project_page.text):

            # Load WikiProject category
            project_cat = pwb.Page(bot, "Category:" + pair)

            # List categories to add to project category
            page_cats = [
                c for c in pwb.textlib.getCategoryLinks(project_page.text) if str(c)[-15:] == " WikiProjects]]"
            ]
            cat_cats = [c for c in pwb.textlib.getCategoryLinks(project_cat.text) if str(c)[-15:] == " WikiProjects]]"]
            to_add = list(set(page_cats) - set(cat_cats))

            # Make changes and save page
            project_cat.text = pwb.textlib.replaceCategoryLinks(project_cat.text, to_add, addOnly=True)
            project_page.text = pwb.textlib.replaceCategoryLinks(project_page.text, preserve)
            summary = "WikiProject category migration. See [[User:Harej bot/WikiProject category migration]]."
            project_page.save(summary, minor=False)
            project_cat.save(summary, minor=False)

예제 #21

0

파일 보기

파일: category_migration.py 프로젝트: xZise/wikiproject_scripts

def main():
    wptools = WikiProjectTools()
    bot = pwb.Site('en', 'wikipedia', user='******')

    # Generate list of WikiProjects with eponymous categories
    q = ('select page_title from page where page_namespace = 14 '
         'and page_title in (select page_title from page where '
         'page_namespace = 4 and page_title like "WikiProject_%" '
         'and page_is_redirect = 0);')

    pairs = [row[0].decode('utf-8') for row in wptools.query('wiki', q, None)]

    for pair in pairs:
        # Load WikiProject page
        project_page = pwb.Page(bot, 'Wikipedia:' + pair)

        # Preserve only categories that aren't in the style "X WikiProjects"
        preserve  = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \
                     if str(c)[-15:] != ' WikiProjects]]']

        # Check for presence of removable categories; otherwise, don't bother
        if preserve != pwb.textlib.getCategoryLinks(project_page.text):

            # Load WikiProject category
            project_cat = pwb.Page(bot, 'Category:' + pair)
    
            # List categories to add to project category
            page_cats = [c for c in pwb.textlib.getCategoryLinks(project_page.text) \
                         if str(c)[-15:] == ' WikiProjects]]']
            cat_cats  = [c for c in pwb.textlib.getCategoryLinks(project_cat.text) \
                         if str(c)[-15:] == ' WikiProjects]]']
            to_add = list(set(page_cats) - set(cat_cats))

            # Make changes and save page
            project_cat.text = pwb.textlib.replaceCategoryLinks(project_cat.text, to_add, addOnly=True)
            project_page.text = pwb.textlib.replaceCategoryLinks(project_page.text, preserve)
            summary = "WikiProject category migration. See [[User:Harej bot/WikiProject category migration]]."
            project_page.save(summary, minor=False)
            project_cat.save(summary, minor=False)

예제 #22

0

파일 보기

파일: assessment.py 프로젝트: harej/reports_bot

    def __init__(self):
        self.bot = pywikibot.Site('en', 'wikipedia')
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query('index', 'select json from config;', None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config['projects']:
            if 'assessment_tools' in entry \
            and 'at_category' in entry \
            and 'at_unknown_quality' in entry \
            and 'at_unknown_priority' in entry:
                project = entry['name'][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry['at_category'].replace(' ', '_')
                self.unknownquality[project] = entry['at_unknown_quality'].replace(' ', '_')
                self.unknownpriority[project] = entry['at_unknown_priority'].replace(' ', '_')

예제 #23

0

파일 보기

파일: assessment.py 프로젝트: xZise/wikiproject_scripts

    def __init__(self):
        self.bot = pywikibot.Site("en", "wikipedia")
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query("index", "select json from config;", None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config["projects"]:
            if (
                "assessment_tools" in entry
                and "at_category" in entry
                and "at_unknown_quality" in entry
                and "at_unknown_priority" in entry
            ):
                project = entry["name"][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry["at_category"].replace(" ", "_")
                self.unknownquality[project] = entry["at_unknown_quality"].replace(" ", "_")
                self.unknownpriority[project] = entry["at_unknown_priority"].replace(" ", "_")

예제 #24

0

파일 보기

class WikidataMagic:
    def __init__(self):
        self.wptools = WikiProjectTools()
        self.bot = pywikibot.Site('en', 'wikipedia')


    def entitydata(self, item):
        url = 'https://www.wikidata.org/wiki/Special:EntityData/' + item + \
              '.json'
        r = requests.get(url)
        return r.json()


    def wikidataquery(self, query):
        url = 'https://wdq.wmflabs.org/api?q=' + query
        r = requests.get(url)
        return ['Q' + str(item) for item in r.json()['items']]


    def missing_from_enwiki(self, total_item_list):
        q = ("select pp_value from page_props "
             "where pp_propname = 'wikibase_item' and pp_value in {0}")
        q = q.format(tuple(total_item_list))
        on_enwiki = [item[0].decode('utf-8') \
                     for item in self.wptools.query('wiki', q, None) \
                     if item != None]
        return list(set(total_item_list) - set(on_enwiki))


    def missing_articles_report(self):
        config = self.wptools.query('index', 'select json from config;', None)
        config = json.loads(config[0][0])
        for entry in config['projects']:
            if 'wikidata_missing_articles' in entry:
                wikiproject = entry['name']  # e.g. "Wikipedia:WikiProject Something"
                wdq_query = entry['wikidata_missing_articles']

                # Coming up with list of Wikidata items of missing articles
                items_for_report = self.wikidataquery(wdq_query)
                items_for_report = self.missing_from_enwiki(items_for_report)
                items_for_report = items_for_report[:100]  # Truncate list

                # Generate the report itself!
                save_to = wikiproject + "/Tasks/Wikidata Missing Article Report"
                content = ("{{WPX list start|title=From Wikidata|"
                           "intro=Automatically generated list of missing articles"
                           "<br />{{WPX last updated|" + save_to + "}}}}\n"
                           "{{#invoke:<includeonly>random|list|limit=3"
                           "</includeonly><noinclude>list|unbulleted</noinclude>\n")

                for item in items_for_report:
                    data = self.entitydata(item)
                    data = data['entities'][item]

                    if 'labels' in data:
                        if 'en' in data['labels']:
                            label = "[[" + data['labels']['en']['value'] + "]]"
                        else:
                            label = "No English title available"
                    else:
                        label = "No English title available"

                    if 'descriptions' in data:
                        if 'en' in data['descriptions']:
                            description = data['descriptions']['en']['value']
                        else:
                            description = "No English description available"
                    else:
                        description = "No English Description available"
                    
                    content += "| {{WPX block|largetext='''" + label + \
                               "'''|smalltext=" + description + \
                               "<br />([[d:" + item + \
                               "|More information on Wikidata]])" + \
                               "|color={{{1|#37f}}}}}\n"

                # Wrap up report and save

                content += "}}\n{{WPX list end|more=" + save_to + "}}"

                page = pywikibot.Page(self.bot, save_to)
                page.text = content
                page.save("Updating task list", minor=False, async=True)

예제 #25

0

파일 보기

    def prepare(self, saveto):
        wptools = WikiProjectTools()
        bot = pywikibot.Site('en', 'wikipedia')

        # Retrieve list of WikiProjects
        projects = []
        for row in wptools.query(
                'index', 'select distinct pi_project from projectindex;',
                None):
            projects.append(row[0])

        runtime = datetime.datetime.utcnow().strftime('%H:%M, %d %B %Y (UTC)')
        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query(
            'wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)

        projects.sort()
        packages = [projects[i:i + 50] for i in range(0, len(projects), 50)]

        report = {}
        for package in packages:
            url = "https://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=watchers&titles="
            for title in package:
                url += title + "|"
            url = url[:-1]  # Truncate trailing pipe
            apiquery = requests.get(url)
            apiquery = apiquery.json()
            for pagedata in apiquery['query']['pages'].values():
                if 'watchers' in pagedata:
                    if pagedata['watchers'] > 29:  # Required part
                        report[pagedata['title']] = pagedata['watchers']

        report = sorted(report.items(),
                        key=operator.itemgetter(1),
                        reverse=True)

        contents = 'List of WikiProjects by number of watchers of its main page and talk page. A WikiProject not appearing on this list has fewer than 30 watchers. Data as of <onlyinclude>' + runtime + '</onlyinclude>'
        contents += '\n\n{| class="wikitable sortable plainlinks"\n|-\n! No.\n! WikiProject\n! Watchers\n'

        counter = 0
        for pair in report:
            counter += 1
            contents += "|-\n| {0}\n| [[{1}]]\n| {2}\n".format(
                str(counter), pair[0], pair[1])

        contents += "|}"

        page = pywikibot.Page(bot, saveto)
        page.text = contents
        page.save("Updating report", minor=False)

예제 #26

0

파일 보기

파일: notifications.py 프로젝트: xZise/wikiproject_scripts

class WikiProjectNotifications:
    def __init__(self):
        self.wptools = WikiProjectTools()
        q = (
            'create table if not exists notifications '
            '(n_id int(11) NOT NULL auto_increment, '
            'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
            'n_content TEXT character set utf8 collate utf8_unicode_ci, '
            'primary key (n_id)) '
            'engine=innodb character set=utf8;')
        #self.wptools.query('index', q, None)
        self.bot = pywikibot.Site('en',
                                  'wikipedia',
                                  user='******')

        # Recognized notification variants
        # A variant that is not any of these kinds will cause an error
        # variantname --> template parameter name

        date = datetime.datetime.utcnow().strftime('%d %B %Y')
        self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">'
        self.recognizedvariants = {'newmember': \
                                   'notification_when_a_new_member_joins', \
                                   'newdiscussion': \
                                   'notification_when_a_new_discussion_topic_is_posted'}
        self.varianttext = {'newmember': \
                            '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \
                            'newdiscussion': \
                            '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}

    def active_user(self, username):
        '''
        Determines if a username meets a basic threshold of activity
        Takes string *username*, returns boolean
        Threshold is one edit in the recent changes tables (i.e. in the past 30 days)
        '''

        q = 'select count(*) from recentchanges_userindex where rc_user_text = "{0}"'.format(
            username.replace('_', ' '))
        if self.wptools.query('wiki', q, None)[0][0] > 0:
            return True
        else:
            return False

    def post(self, project, variant, content):
        '''
        Adds an item to the WikiProject Notification Center, to be included in the next update
        '''

        if variant in self.recognizedvariants:
            q = 'insert into notifications (n_project, n_variant, n_content) values ("{0}", "{1}", "{2}");'
            q = q.format(project, variant, content)
            self.wptools.query('index', q, None)
        else:
            raise NotificationVariantError(variant)

    def findsubscribers(self):
        '''
        Generates a dictionary of WikiProjects with notification centers and corresponding report subscribers
        '''

        q = (
            'select page_title from templatelinks '
            'join page on page_id = tl_from and page_namespace = tl_from_namespace '
            'where page_namespace = 2 and tl_namespace = 10 '
            'and tl_title = "WikiProjectCard";')

        output = {}
        for row in self.wptools.query('wiki', q, None):
            title = row[0].decode('utf-8')
            components = title.split(
                '/'
            )  # e.g. ['Harej', 'WikiProjectCards', 'WikiProject_Women_in_Technology']
            username = components[0]

            # No notifications for inactive users
            if self.active_user(username) == False:
                continue

            # Carrying on...
            title = "User: "******"New notification", minor=False, async=True)

        # Deleting old records now that notifications have been sent out
        if len(id_to_delete) > 0:
            if len(id_to_delete) == 1:
                self.wptools.query(
                    'index',
                    'delete from notifications where n_id = {0};'.format(
                        id_to_delete[0]), None)
            else:
                self.wptools.query(
                    'index',
                    'delete from notifications where n_id in {0};'.format(
                        tuple(id_to_delete)), None)

예제 #27

0

파일 보기

파일: directory.py 프로젝트: harej/reports_bot

    def main(self, rootpage):
        # Initializing...
        bot = pywikibot.Site('en', 'wikipedia')
        wptools = WikiProjectTools()
        config = json.loads(wptools.query('index', 'select json from config;', None)[0][0])

        # Get list of people who opted out
        optout = pywikibot.Page(bot, 'User:Reports bot/Opt-out')
        blacklist = []
        regexes =[re.findall('\[\[User:(.*?)\|',optout.text,re.I), re.findall('\{\{user\|(.*?)\}\}',optout.text,re.I), re.findall('\[\[:User:(.*?)\]',optout.text,re.I), re.findall('\[\[:User talk:(.*?)\]',optout.text,re.I)]
        for results in regexes:
            for user in results:
                blacklist.append(user)
        # Bots are to be excluded
        for result in wptools.query('wiki', "select user_name from user_groups left join user on user_id = ug_user where ug_group = 'bot';", None):
            blacklist.append(result[0].decode('utf-8'))

        # List of projects we are working on
        # Methodology: List from Project Index + List from Formal Definition, minus duplicates
        # This will cover all of our bases.
        articles = {}
        counter = 0
        while True:  # I am a bad man for doing this
            query = wptools.query('index', 'select pi_page, pi_project from projectindex where pi_id > {0} and pi_id <= {1};'.format(counter, counter+1000000), None)
            if len(query) == 0:
                break
            for pair in query:
                # Normalizing by getting rid of namespace
                page = pair[0]
                page = page.replace('Draft_talk:', '')
                page = page.replace('Talk:', '')
                proj = pair[1][10:]  # Normalizing by getting rid of "Wikipedia:"
                try:
                    articles[proj].append(page)
                except KeyError:
                    articles[proj] = [page]
            counter += 1000000

        projects = [project for project in articles.keys()]

        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query('wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)
        projects.sort()

        directories = {'All': ''}  # All projects, plus subdirectories to be defined below.
        directoryrow = {}

        # Alright! Let's run some reports!
        for project in projects:

            # Seeding directory row and profile page
            if project not in articles:
                articles[project] = []
            project_normalized = project.replace('_', ' ')

            # List of active project participants (less blacklist)
            wp_editors = []
            start_date = time.strftime('%Y%m%d000000',time.gmtime(time.time()-(60*60*24*90)))  # 90 days
            end_date = time.strftime('%Y%m%d000000',time.gmtime(time.time()))  # Today
            query = "select rev_user_text from page left join revision on page_id = rev_page where (page_namespace = 4 OR page_namespace = 5) and (page_title like \"{0}/%%\" OR page_title = \"{0}\") and rev_timestamp > {1} and rev_timestamp < {2} group by rev_user_text HAVING count(*) > 1;".format(project, start_date, end_date)
            for result in wptools.query('wiki', query, None):
                if result[0] is not None:
                    user = result[0].decode('utf-8')
                    if user not in blacklist:
                        wp_editors.append(user)
            wp_editors.sort()

            # List of active subject area editors (less blacklist)
            start_date = time.strftime('%Y%m%d000000',time.gmtime(time.time()-(60*60*24*30)))  # 30 days
            end_date = time.strftime('%Y%m%d000000',time.gmtime(time.time()))  # Today

            if len(articles[project]) > 0:
                subject_editors = []
                packages = []
                for i in range(0, len(articles[project]), 10000):
                    packages.append(articles[project][i:i+10000])

                counter = 0
                for package in packages:
                    counter += 1
                    if len(package) > 1:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title in {0} and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(tuple(package), start_date, end_date)
                    else:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title = "{0}" and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(package[0], start_date, end_date)

                    for result in wptools.query('wiki', query_builder, None):
                        if result[0] is not None:
                            subject_editors.append(result[0].decode('utf-8'))

                subject_editors = dict(Counter(subject_editors))  # Convert the list to a dictionary with username as key and edit count as value
                subject_editors_filtered = []
                for user in subject_editors.keys():
                    if user not in blacklist:
                        if subject_editors[user] > 4:
                            subject_editors_filtered.append(user)
                subject_editors = subject_editors_filtered   # And now assigned back.
                subject_editors.sort()

            else:
                subject_editors = []

            # Generate and Save Profile Page
            wp_editors_formatted = ""
            subject_editors_formatted = ""
            if len(wp_editors) > 0:
                for editor in wp_editors:
                    wp_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(editor)
            else:
                wp_editors_formatted = ""
            if len(subject_editors) > 0 and len(subject_editors) < 3200:
                for editor in subject_editors:
                    subject_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(editor)
            else:
                subject_editors_formatted = ""

            profilepage = "{{{{WikiProject description page | project = {0} | list_of_active_wikiproject_participants = {1} | list_of_active_subject_area_editors = {2}}}}}".format(project_normalized, wp_editors_formatted, subject_editors_formatted)
            page = pywikibot.Page(bot, rootpage + '/Description/' + project_normalized)
            if profilepage != page.text:  # Checking to see if a change was made to cut down on API queries
                page.text = profilepage
                page.save('Updating', minor=False, async=True, quiet=True)

            # Construct directory entry
            directoryrow[project] = "{{{{WikiProject directory entry | project = {0} | number_of_articles = {1} | wp_editors = {2} | scope_editors = {3}}}}}\n".format(project_normalized, len(articles[project]), len(wp_editors), len(subject_editors))

        # Assign directory entry to relevant directory pages ("All entries" and relevant subdirectory pages)
        for entry in sorted(directoryrow.items(), key=operator.itemgetter(1)):  # Sorting into alphabetical order
            directories['All'] += entry[1]
        directories['All'] = "{{WikiProject directory top}}\n" + directories['All'] + "|}"

        wpcats = WikiProjectCategories()
        tree = wpcats.generate()
        index_primary = sorted([key for key in tree.keys()])
        index_secondary = {}
        indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage)
        for firstlevel in tree.keys():
            directories[firstlevel] = "={0}=\n".format(firstlevel.replace('_',  ' ' ))
            directories[firstlevel] += self.listpull(wptools, projects, directoryrow, firstlevel)  # For immmedate subcats of WikiProjects_by_area
            directories[firstlevel] += self.treeiterator(wptools, tree[firstlevel], projects, directoryrow, firstlevel)  # For descendants of those immediate subcats.
            index_secondary[firstlevel] = sorted([key for key in tree[firstlevel].keys()])

        # Updating the directory index
        for firstlevel in index_primary:
            firstlevel_normalized = firstlevel.replace('_', ' ')
            indextext += ";[[{0}/{1}|{1}]]".format(rootpage, firstlevel_normalized)
            if len(tree[firstlevel]) > 0:
                indextext += " : "
                for secondlevel in index_secondary[firstlevel]:
                    indextext += "[[{0}/{1}#{2}|{2}]] – ".format(rootpage, firstlevel_normalized, secondlevel.replace('_', ' '))
                indextext = indextext[:-3]  # Truncates trailing dash and is also a cute smiley face
            indextext += "\n\n"
        saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index')
        saveindex.text = indextext
        saveindex.save('Updating', minor=False, async=True, quiet=True)

        # Generate directories and save!
        for directory in directories.keys():
            contents = directories[directory]
            page = pywikibot.Page(bot, rootpage + "/" + directory)
            if contents != page.text:  # Checking to see if a change was made to cut down on API save queries
                oldcontents = page.text
                page.text = contents
                page.save('Updating', minor=False, async=True, quiet=True)
                # Cleanup of obsolete description pages and "Related WikiProjects" pages
                if directory == 'All':
                    oldcontents = mwph.parse(oldcontents)
                    oldcontents = oldcontents.filter_templates()
                    oldprojectlist = []
                    for t in oldcontents:
                        if t.name.strip() == "WikiProject directory entry":
                            oldprojectlist.append(str(t.get('project').value))
                    for oldproject in oldprojectlist:
                        oldproject = oldproject.strip().replace(' ', '_')  # Normalizing
                        if oldproject not in projects:
                            deletethis = pywikibot.Page(bot, rootpage + '/Description/' + oldproject)
                            deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                            deletethis.save('Nominating page for deletion', minor=False, async=True, quiet=True)
                            deletethis = pywikibot.Page(bot, 'Wikipedia:Related WikiProjects/' + oldproject)
                            if deletethis.text != "":
                                deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                                deletethis.save('Nominating page for deletion', minor=False, async=True, quiet=True)

예제 #28

0

파일 보기

파일: members.py 프로젝트: xZise/wikiproject_scripts

 def __init__(self):
     self.wptools = WikiProjectTools()
     self.wpn = WikiProjectNotifications()

예제 #29

0

파일 보기

파일: members.py 프로젝트: xZise/wikiproject_scripts

class WikiProjectMembers:
    def __init__(self):
        self.wptools = WikiProjectTools()
        self.wpn = WikiProjectNotifications()

    def queue_notification(self, project, username):
        '''
        Queue new member notification
        '''
        
        content = "* User:"******"newmember", content)


    def run(self):
        bot = pywikibot.Site('en', 'wikipedia')

        q = ('select page_title from templatelinks '
             'join page on page_id = tl_from and page_namespace = tl_from_namespace '
             'where page_namespace = 2 and tl_namespace = 10 '
             'and tl_title = "WikiProjectCard";')

        # Generate list of WikiProjects and members through the WikiProjectCard system
        members = {}
        for row in self.wptools.query('wiki', q, None):
            title = row[0].decode('utf-8')
            components = title.split('/')  # e.g. ['Harej', 'WikiProjectCards', 'WikiProject_Women_in_Technology']
            title = "User: "******"{{{{Clickable button 2|Wikipedia:{0}|Return to WikiProject|class=mw-ui-neutral}}}}<span class='wp-formsGadget mw-ui-button mw-ui-progressive' data-mode='create' data-type='Join'>Join WikiProject</span>".format(wikiproject)
            lua_garbage = "{{#invoke:<includeonly>random|list|limit=3</includeonly><noinclude>list|unbulleted</noinclude>|"
            active = "<noinclude>" + return_to_wikiproject + "\n\n<div style='padding-top:1.5em; padding-bottom:2em;'>Our WikiProject members are below. Those who have not edited Wikipedia in over a month are moved to the [[Wikipedia:{0}/Members/Inactive|inactive members list]].</div>\n\n</noinclude>".format(wikiproject) + lua_garbage
            inactive = "<noinclude>" + return_to_wikiproject + "\n\n<div style='padding-top:1.5em; padding-bottom:2em;'>These are our members who have not edited in a while. Once they edit again, they will be moved back to the [[Wikipedia:{0}/Members|active members list]].</div>\n\n</noinclude>".format(wikiproject) + lua_garbage

            for member in members[wikiproject]:
                addition = "{{User:"******"/WikiProjectCards/" + wikiproject + "<includeonly>|mode=compact</includeonly>}}|"
                if self.wpn.active_user(member):
                    active += addition
                else:
                    inactive += addition

            active = active[:-1] + "}}"  # removing trailing pipe and closing off module
            inactive += "}}"

            # Generate old list to prepare a diff
            page_active = pywikibot.Page(bot, "Wikipedia:" + wikiproject + "/Members")
            page_inactive = pywikibot.Page(bot, "Wikipedia:" + wikiproject + "/Members/Inactive")

            oldnames = []
            for text in [page_active.text, page_inactive.text]:
                contents = mwparserfromhell.parse(text)
                contents = contents.filter_templates()
                for t in contents:
                    if t.name[:5] == "User:"******"/")[0][5:])  # i.e. grab username from template

            newnames = list(set(members[wikiproject]) - set(oldnames))
            newnames.sort()
            print(newnames)

            # Anyone in the *newnames* set is a new user. Queue the notification!
            for member in newnames:
                self.queue_notification(wikiproject, member)

            # Now, save pages.
            page_active.text = active
            page_active.save("Updating member list", minor=False, async=True)
            page_inactive.text = inactive
            page_inactive.save("Updating member list", minor=False, async=True)

예제 #30

0

파일 보기

    def go(self):
        wptools = WikiProjectTools()

        # Get list of WikiProjects that also have a self-named category

        output = 'This report highlights discrepancies in WikiProject categorization between WikiProjects and their self-named categories.\n\n'
        query = 'select page_title from page left join redirect on page.page_id = redirect.rd_from where page_title like "WikiProject\_%" and page_namespace = 4 and page_title in (select page_title from page where page_title like "WikiProject\_%" and page_namespace = 14) and rd_title is null;'

        for row in wptools.query('wiki', query, None):
            project = row[0].decode('utf-8')

            cl_projectspace = [
            ]  # read as "category links, Wikipedia namespace"
            cl_categoryspace = [
            ]  # read as "category links, Category namespace"

            for match in wptools.query(
                    'wiki',
                    'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 4 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'
                    .format(project), None):
                cl_projectspace.append(match[0].decode('utf-8').replace(
                    '_', ' '))

            for match in wptools.query(
                    'wiki',
                    'select cl_to from categorylinks join page on categorylinks.cl_from=page.page_id where page_namespace = 14 and page_title = "{0}" and cl_to like "%\_WikiProjects" and cl_to not like "Active\_%" and cl_to not like "Semi-active\_%" and cl_to not like "Inactive\_%" and cl_to not like "Defunct\_%";'
                    .format(project), None):
                cl_categoryspace.append(match[0].decode('utf-8').replace(
                    '_', ' '))

            cl_projectspace.sort()
            cl_categoryspace.sort()

            if cl_projectspace == cl_categoryspace:
                continue  # Don't bother generating a report if both category lists match perfectly

            both = list(set(cl_projectspace).intersection(cl_categoryspace))

            project = project.replace('_', ' ')

            output += "* '''{0}'''\n".format(project)
            output += "** [[Wikipedia:{0}]]: ".format(project)
            for entry in cl_projectspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> – ".format(
                        entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> – ".format(
                        entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

            output += "** [[:Category:{0}]]: ".format(project)
            for entry in cl_categoryspace:
                if entry in both:
                    output += "<span style='color: #999'>{0}</span> –".format(
                        entry)
                else:
                    output += "<span style='color: #FF0000'>{0}</span> –".format(
                        entry)

            output = output[:-2] + "\n"  # Truncate trailing endash and add line break

        return output

예제 #31

0

파일 보기

파일: assessment.py 프로젝트: xZise/wikiproject_scripts

class WikiProjectAssess:
    def __init__(self):
        self.bot = pywikibot.Site("en", "wikipedia")
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query("index", "select json from config;", None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config["projects"]:
            if (
                "assessment_tools" in entry
                and "at_category" in entry
                and "at_unknown_quality" in entry
                and "at_unknown_priority" in entry
            ):
                project = entry["name"][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry["at_category"].replace(" ", "_")
                self.unknownquality[project] = entry["at_unknown_quality"].replace(" ", "_")
                self.unknownpriority[project] = entry["at_unknown_priority"].replace(" ", "_")

    def qualitypredictor(self, pagetitles):
        """
        Makes a query to ORES that predicts the quality of an article.
        Takes list *pagetitles* as input
        Returns a list of tuples (title, prediction)
        Input MUST be a list. If only one title, enter it as [title]
        """

        output = []

        # Split into packages
        packages = [pagetitles[i : i + 50] for i in range(0, len(pagetitles), 50)]

        for package in packages:
            if len(package) > 1:
                q = (
                    "select page_title, page_latest from page "
                    "where page_namespace = 0 and page_title in {0} "
                    "order by page_title limit 100;"
                ).format(tuple(package))
            else:
                q = (
                    "select page_title, page_latest from page " "where page_namespace = 0 " 'and page_title = "{0}";'
                ).format(package[0])

            revision_ids = {str(row[1]): row[0].decode("utf-8") for row in self.wptools.query("wiki", q, None)}
            api_input = [rev_id for rev_id in revision_ids.keys()]

            api_url = "http://ores.wmflabs.org/scores/enwiki/wp10/?revids="
            for rev_id in api_input:
                api_url += rev_id + "|"
            api_url = api_url[:-1]  # Truncating extra vertical pipe

            query = requests.get(api_url)
            query = query.json()
            for rev_id, result in query.items():
                pair = (revision_ids[rev_id], result["prediction"])
                output.append(pair)

        return output

    def qualitylist(self):
        for wikiproject, category in self.unknownquality.items():
            save_to = "User:Reports bot/" + wikiproject + "/Assessment/Assess for quality"
            q = (
                "select page_title from categorylinks " "join page on cl_from = page_id " 'where cl_to = "{0}";'
            ).format(category.replace(" ", "_"))
            to_process = [row[0].decode("utf-8") for row in self.wptools.query("wiki", q, None)]
            to_process = self.qualitypredictor(to_process)
            contents = (
                "{{WPX list start|color={{{2|#086}}}<includeonly>|constrained=yes</includeonly>|title=Assess for quality"
                "|intro=Determine the quality of these articles<br />"
                "{{WPX last updated|" + save_to + "}}}}<br />\n\n"
                "{{#invoke:<includeonly>random|list|limit=3"
                "</includeonly><noinclude>list|unbulleted</noinclude>|"
            )
            for pair in to_process:
                article = pair[0].replace("_", " ")
                prediction = pair[1]
                contents += (
                    "{{WPX block|color={{{1|#37f}}}|largetext=<b>[["
                    + article
                    + "]]</b> "
                    + "([[Talk:"
                    + article
                    + "|talk]])|smalltext="
                    + "Predicted class: "
                    + prediction
                    + "}}|"
                )
            contents = contents[:-1] + "}}\n{{WPX list end|more=" + save_to + "}}"

            page = pywikibot.Page(self.bot, save_to)
            page.text = contents
            page.save("Updating listing", minor=False, async=True)

    def scopepredictor(self):
        for wikiproject, category in self.predictorseed.items():
            category_recs = []
            article_recs = []

            # This query produces a list of pages that belong to categories that
            # have been tagged by the WikiProject
            q = (
                "select page_namespace, page_title from page "
                "join categorylinks on categorylinks.cl_from = page.page_id "
                "where page_namespace in (0, 14) "
                "and cl_to in ( "
                "select page.page_title from page "
                "join categorylinks on categorylinks.cl_from = page.page_id "
                "where page_namespace = 15 "
                'and cl_to = "{0}");'
            ).format(category)

            for row in self.wptools.query("wiki", q, None):
                ns = row[0]
                page = row[1].decode("utf-8")
                if ns == 0:
                    article_recs.append(page)
                elif ns == 14:
                    category_recs.append(page)

            # Filter against these lists:
            q = "select pi_page from projectindex " 'where pi_project = "Wikipedia:{0}";'
            q = q.format(wikiproject.replace(" ", "_"))
            article_filter = [
                row[0].replace("Talk:", "") for row in self.wptools.query("index", q, None) if row[0].startswith("Talk")
            ]

            q = (
                "select page_title from page "
                "join categorylinks on cl_from = page_id "
                "where page_namespace = 15 "
                'and cl_to = "{0}";'
            ).format(category)
            category_filter = [row[0].decode("utf-8") for row in self.wptools.query("wiki", q, None)]

            # Now do the filtering...
            category_recs = list(set(category_recs) - set(category_filter))
            article_recs = list(set(article_recs) - set(article_filter))

            # Unite them together...
            recommendations = [":Category:" + name for name in category_recs] + [name for name in article_recs]

            # And lop it off at 100!
            recommendations = recommendations[:100]

            # Class prediction
            predicted_class = self.qualitypredictor(
                [page for page in recommendations if page.startswith(":Category:") == False]
            ) + [(page, "Category") for page in recommendations if page.startswith(":Category:") == True]
            predicted_class = {pair[0]: pair[1] for pair in predicted_class}

            save_to = "User:Reports bot/" + wikiproject + "/Assessment/Not tagged"
            contents = (
                "{{WPX list start|color={{{2|#086}}}<includeonly>|constrained=yes</includeonly>|title=Not tagged by the WikiProject|"
                "intro=These pages are potentially in the WikiProject's"
                " scope.<br />{{WPX last updated|" + save_to + "}}}}"
                "<br />\n\n"
                "{{#invoke:<includeonly>random|list|limit=3"
                "</includeonly><noinclude>list|unbulleted</noinclude>|"
            )
            for recommendation in recommendations:
                contents += (
                    "{{WPX block|color={{{1|#37f}}}|largetext=<b>[["
                    + recommendation.replace("_", " ")
                    + "]]</b> ([[Talk:"
                    + recommendation
                    + "|talk]])|smalltext=Predicted class: "
                    + predicted_class[recommendation]
                    + "}}|"
                )
            contents = contents.replace("Talk::Category:", "Category talk:")
            contents = contents[:-1] + "}}\n{{WPX list end|more=" + save_to + "}}"
            page = pywikibot.Page(self.bot, save_to)
            page.text = contents
            page.save("Updating listing", minor=False, async=True)

예제 #32

0

파일 보기

파일: new_discussions.py 프로젝트: xZise/wikiproject_scripts

def main():
    # This is used for Aaron Halfaker's API wrapper...
    loginfile = configparser.ConfigParser()
    loginfile.read([os.path.expanduser('~/.wiki.ini')])
    username = loginfile.get('wiki', 'username')
    password = loginfile.get('wiki', 'password')

    # ...And this is for Pywikibot
    bot = pywikibot.Site('en', 'wikipedia')

    wptools = WikiProjectTools()

    now = datetime.datetime.utcnow()
    now = now.strftime('%Y%m%d%H%M%S') # converts timestamp to MediaWiki format

    # Pulling timestamp of the last time the script was run
    query = wptools.query('index', 'select lu_timestamp from lastupdated where lu_key = "new_discussions";', None)
    lastupdated = query[0][0]
    
    # Polling for newest talk page posts in the last thirty minutes
    query = wptools.query('wiki', 'select distinct recentchanges.rc_this_oldid, page.page_id, recentchanges.rc_title, recentchanges.rc_comment, recentchanges.rc_timestamp, page.page_namespace from recentchanges join page on recentchanges.rc_namespace = page.page_namespace and recentchanges.rc_title = page.page_title join categorylinks on page.page_id=categorylinks.cl_from where rc_timestamp >= {0} and rc_timestamp < {1} and rc_comment like "% new section" and rc_deleted = 0 and cl_to like "%_articles" and page_namespace not in (0, 2, 6, 8, 10, 12, 14, 100, 108, 118) order by rc_timestamp asc;'.format(lastupdated, now), None)

    # Cleaning up output
    namespace = {1: 'Talk:', 3: 'User_talk:', 4: 'Wikipedia:', 5: 'Wikipedia_talk:', 7: 'File_talk:', 9: 'MediaWiki_talk:', 11: 'Template_talk:', 13: 'Help_talk:', 15: 'Category_talk:', 101: 'Portal_talk:', 109: 'Book_talk:', 119: 'Draft_talk:', 447: 'Education_Program_talk:', 711: 'TimedText_talk:', 829: 'Module_talk:', 2600: 'Topic:'}

    output = []
    for row in query:
        rc_id = row[0]
        page_id = row[1]
        rc_title = row[2].decode('utf-8')
        rc_comment = row[3].decode('utf-8')
        rc_comment = rc_comment[3:]  # Truncate beginning part of the edit summary
        rc_comment = rc_comment[:-15]  # Truncate end of the edit summary
        rc_timestamp = row[4].decode('utf-8')
        rc_timestamp = datetime.datetime.strptime(rc_timestamp, '%Y%m%d%H%M%S')
        rc_timestamp = rc_timestamp.strftime('%H:%M, %d %B %Y (UTC)')
        page_namespace = row[5]
        page_namespace = namespace[page_namespace]

        session = api.Session("https://en.wikipedia.org/w/api.php", user_agent='WPX Revert Checker')
        session.login(username, password)

        # Check if revision has been reverted
        reverted = reverts.api.check(session, rc_id, page_id, 3, None, 172800, None)
        if reverted is None:
            entry = {'title': (page_namespace + rc_title), 'section': rc_comment, 'timestamp': rc_timestamp}
            output.append(entry)

    # Loading list of WikiProjects signed up to get lists of new discussions
    config = json.loads(wptools.query('index', 'select json from config;', None)[0][0])
    
    if config['defaults']['new_discussions'] == False:  # i.e. if New Discussions is an opt-in system
        whitelist = []  # Whitelisted WikiProjects for new discussion lists
        for project in config['projects']:
            try:
                project['new_discussions']
            except KeyError:
                continue
            else:
                if project['new_discussions'] == True:
                    whitelist.append(project['name'])
    else:
        whitelist = None

    # A whitelist of [] is one where there is a whitelist, but it's just empty.
    # A whitelist of None is for situations where the need for a whitelist has been obviated.

    # Generating list of WikiProjects for each thread
    for thread in output:
        query = wptools.query('index', 'select distinct pi_project from projectindex where pi_page = %s;', (thread['title']))
        thread['wikiprojects'] = []
        for row in query:
            wikiproject = row[0].replace('_', ' ')
            if (whitelist is None) or (wikiproject in whitelist):
                thread['wikiprojects'].append(wikiproject)
        for wikiproject in thread['wikiprojects']:
            saveto = wikiproject + '/Discussions'
            page = pywikibot.Page(bot, saveto)
            intro_garbage = '{{WPX header|Discussions|color={{{1|#37f}}}}}\n'
            intro_garbage += '{{{{WPX action box|color={{{{{{2|#086}}}}}}|title=Have a question?|content={{{{Clickable button 2|url=//en.wikipedia.org/wiki/Wikipedia_talk:{0}?action=edit&section=new|Ask the WikiProject|class=mw-ui-progressive mw-ui-block}}}}\n\n{{{{Clickable button 2|Wikipedia talk:{0}|View Other Discussions|class=mw-ui-block}}}}}}}}\n'.format(wikiproject[10:].replace(' ', '_'))
            intro_garbage += '{{{{WPX list start|intro={{{{WPX last updated|{0}}}}}}}}}\n\n'.format(saveto)
            draft = '<noinclude><div style="padding-bottom:1em;">{{{{Clickable button 2|{0}|Return to WikiProject|class=mw-ui-neutral}}}}</div>\n</noinclude>'.format(wikiproject) + intro_garbage
            submission = '{{{{WPX new discussion|color={{{{{{1|#37f}}}}}}|title={0}|section={1}|timestamp={2}}}}}\n'.format(thread['title'].replace('_', ' '), thread['section'], thread['timestamp'])

            notification = "* '''[[{0}#{1}|{1}]] on {0}".format(thread['title'].replace('_', ' '), thread['section'])
            queue_notification(wikiproject[10:].replace(' ', '_'), notification)

            index = mwparserfromhell.parse(page.text)
            index = index.filter_templates()
            templatelist = []
            for i in index:
                if i.name == "WPX new discussion":
                    templatelist.append(str(i))
            templatelist = templatelist[:14]  # Sayonara, old threads!
            page.text = draft + submission
            if len(templatelist) > 3:
                templatelist[2] += "<noinclude>"  # Anything after the third item will not be transcluded
                templatelist[len(templatelist) - 1] += "</noinclude>"
            for i in templatelist:
                page.text += i + "\n"
            page.text += "{{{{WPX list end|more={0}}}}}".format(saveto.replace(' ', '_'))
            page.save('New discussion on [[{0}]]'.format(thread['title'].replace('_', ' ')), minor=False)

    # Update the Last Updated field with new timestamp
    wptools.query('index', 'update lastupdated set lu_timestamp = {0} where lu_key = "new_discussions";'.format(now), None)

예제 #33

0

파일 보기

def main(rootpage):
    d = WikiProjectDirectory()
    wptools = WikiProjectTools()
    wpcats = WikiProjectCategories()
    tree = wpcats.generate()
    bot = pywikibot.Site('en', 'wikipedia')
    directories = {}
    directoryrow = {}
    projects = []

    # Generate directoryrows and projects lists based on the /All directory:
    page = pywikibot.Page(bot, rootpage + '/All')
    contents = mwph.parse(page.text)
    contents = contents.filter_templates()
    for t in contents:
        if t.name.strip() == "WikiProject directory entry":
            name = str(t.get('project').value).strip().replace(' ', '_')
            projects.append(name)
            directoryrow[name] = str(t) + "\n"

    # The rest of this stuff is copied from directory.py
    index_primary = sorted([key for key in tree.keys()])
    index_secondary = {}
    indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage)
    for firstlevel in tree.keys():
        directories[firstlevel] = "={0}=\n".format(firstlevel.replace(
            '_', ' '))
        directories[firstlevel] += d.listpull(
            wptools, projects, directoryrow,
            firstlevel)  # For immmedate subcats of WikiProjects_by_area
        directories[firstlevel] += d.treeiterator(
            wptools, tree[firstlevel], projects, directoryrow,
            firstlevel)  # For descendants of those immediate subcats.
        index_secondary[firstlevel] = sorted(
            [key for key in tree[firstlevel].keys()])

    # Updating the directory index
    for firstlevel in index_primary:
        firstlevel_normalized = firstlevel.replace('_', ' ')
        indextext += ";[[{0}/{1}|{1}]]".format(rootpage, firstlevel_normalized)
        if len(tree[firstlevel]) > 0:
            indextext += " : "
            for secondlevel in index_secondary[firstlevel]:
                indextext += "[[{0}/{1}#{2}|{2}]] – ".format(
                    rootpage, firstlevel_normalized,
                    secondlevel.replace('_', ' '))
            indextext = indextext[:
                                  -3]  # Truncates trailing dash and is also a cute smiley face
        indextext += "\n\n"
    saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index')
    saveindex.text = indextext
    saveindex.save('Updating', minor=False, async=True)

    # Generate directories and save!
    for directory in directories.keys():
        contents = directories[directory]
        page = pywikibot.Page(bot, rootpage + "/" + directory)
        if contents != page.text:  # Checking to see if a change was made to cut down on API save queries
            oldcontents = page.text
            page.text = contents
            page.save('Updating', minor=False, async=True)

예제 #34

0

파일 보기

파일: predictor.py 프로젝트: xZise/wikiproject_scripts

class PriorityPredictor:
    def __init__(self, viewdump=None):
        print("Initializing the Priority Predictor")
        self.wptools = WikiProjectTools()

        if viewdump == None:  # If a dumped JSON file of pageviews is not specified
            self.dump = getviewdump(self.wptools, 'en', days=30)
        else:
            with open(viewdump, 'r') as f:
                self.dump = json.load(
                    f)  # Load pageviews from a dumped JSON file

    def loadproject(self, wikiproject, unknownpriority):
        self.projectcat = unknownpriority.replace("Unknown-", "")
        self.project = wikiproject
        self.score = {
        }  # Unsorted dictionary "article: value"; allows for easily looking up scores later
        # We need all the articles for a WikiProject, since the system works by comparing stats for an article to the others.
        print("Preparing Priority Predictor for: " + self.project)
        self.articles = []  # List of strings (article titles)
        pageviews = []  # List of tuples (article title, log of view count)
        linkcount = []  # List of tuples (article title, log of link count)
        for row in self.wptools.query(
                'index',
                'select pi_page from projectindex where pi_project = "Wikipedia:{0}";'
                .format(self.project), None):
            if row[0].startswith("Talk:"):  #
                article = row[0][5:]  # Stripping out "Talk:"
                self.articles.append(article)
                pageviews.append(
                    (article, log(getpageviews(self.dump, article) + 1)))

        # Inbound link count
        # This is batched, thus broken out of the loop
        print("Getting inbound link count...")
        packages = []
        for i in range(0, len(self.articles), 10000):
            packages.append(self.articles[i:i + 10000])

        for package in packages:
            toappend = getlinkcount(self.wptools, package)
            for item in toappend:
                linkcount.append(item)

        # "Internal Clout"
        # This measures, within a group of articles, the number of links to each other
        # Works amazingly well as a metric

        print("Measuring internal clout...")
        internalclout = getinternalclout(self.wptools, self.articles,
                                         self.articles)

        # SOPV, Second-Order Page Views
        # Calculates the page views of the articles linking to the article being assessed

        print("Measuring second-order page views...")
        sopv = getsopv(self.wptools, self.dump, self.articles)

        # Sorting...
        pageviews = sorted(pageviews, key=operator.itemgetter(1), reverse=True)
        linkcount = sorted(linkcount, key=operator.itemgetter(1), reverse=True)
        internalclout = sorted(internalclout,
                               key=operator.itemgetter(1),
                               reverse=True)
        sopv = sorted(sopv, key=operator.itemgetter(1), reverse=True)

        # Converting to dictionary to weight factors and add them all together

        print("Prepared weighted scores...")
        pageviews_weighted = {}
        linkcount_weighted = {}
        internalclout_weighted = {}
        sopv_weighted = {}

        # Weights assigned to different factors.
        self.weight_pageviews = 1
        self.weight_linkcount = 1
        self.weight_internalclout = 1
        self.weight_sopv = 1

        for pair in pageviews:
            article = pair[0]
            count = pair[1]
            pageviews_weighted[article] = count * self.weight_pageviews

        for pair in linkcount:
            article = pair[0]
            count = pair[1]
            linkcount_weighted[article] = count * self.weight_linkcount

        for pair in internalclout:
            article = pair[0]
            count = pair[1]
            internalclout_weighted[article] = count * self.weight_internalclout

        for pair in sopv:
            article = pair[0]
            count = pair[1]
            sopv_weighted[article] = count * self.weight_sopv

        for article in self.articles:
            if article in internalclout_weighted and article in pageviews_weighted and article in linkcount_weighted and article in sopv_weighted:
                scorearray = [
                    internalclout_weighted[article], sopv_weighted[article]
                ]
                self.score[article] = scorearray

        # Multiclass classification

        print("Making calculations...")
        self.threshold = {}
        self.scorelist = {}
        q = 'select page_title from categorylinks join page on cl_from = page_id where cl_type = "page" and cl_to = "{0}";'
        for priority in ['Top-', 'High-', 'Mid-', 'Low-']:
            prioritycategory = priority + self.projectcat
            self.scorelist[priority] = [
                (row[0].decode('utf-8'), self.score[row[0].decode('utf-8')])
                for row in self.wptools.query('wiki', q.format(
                    prioritycategory), None)
                if row[0].decode('utf-8') in self.score
            ]

        X = np.array([(x[1]) for x in self.scorelist['Top-']] +
                     [(x[1]) for x in self.scorelist['High-']] +
                     [(x[1]) for x in self.scorelist['Mid-']] +
                     [(x[1]) for x in self.scorelist['Low-']])

        y = np.array([0 for x in self.scorelist['Top-']] +
                     [1 for x in self.scorelist['High-']] +
                     [2 for x in self.scorelist['Mid-']] +
                     [3 for x in self.scorelist['Low-']])

        model = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y)

        print(list(model.predict(X)))
        print(model.score(X, y))

예제 #35

0

파일 보기

파일: assessment.py 프로젝트: xZise/wikiproject_scripts

class WikiProjectAssess:
    def __init__(self):
        self.bot = pywikibot.Site('en', 'wikipedia')
        self.wptools = WikiProjectTools()
        self.projects = []
        self.predictorseed = {}
        self.unknownquality = {}
        self.unknownpriority = {}

        self.config = self.wptools.query('index', 'select json from config;', None)
        self.config = json.loads(self.config[0][0])

        for entry in self.config['projects']:
            if 'assessment_tools' in entry \
            and 'at_category' in entry \
            and 'at_unknown_quality' in entry \
            and 'at_unknown_priority' in entry:
                project = entry['name'][10:]  # Normalizing title
                self.projects.append(project)
                self.predictorseed[project] = entry['at_category'].replace(' ', '_')
                self.unknownquality[project] = entry['at_unknown_quality'].replace(' ', '_')
                self.unknownpriority[project] = entry['at_unknown_priority'].replace(' ', '_')


    def qualitypredictor(self, pagetitles):
        '''
        Makes a query to ORES that predicts the quality of an article.
        Takes list *pagetitles* as input
        Returns a list of tuples (title, prediction)
        Input MUST be a list. If only one title, enter it as [title]
        '''

        output = []

        # Split into packages
        packages = [pagetitles[i:i+50] for i in range(0, len(pagetitles), 50)]

        for package in packages:
            if len(package) > 1:
                q = ('select page_title, page_latest from page '
                     'where page_namespace = 0 and page_title in {0} '
                     'order by page_title limit 100;').format(tuple(package))
            else:
                q = ('select page_title, page_latest from page '
                     'where page_namespace = 0 '
                     'and page_title = "{0}";').format(package[0])

            revision_ids = {str(row[1]):row[0].decode('utf-8') \
                           for row in self.wptools.query('wiki', q, None)}
            api_input = [rev_id for rev_id in revision_ids.keys()]

            api_url = "http://ores.wmflabs.org/scores/enwiki/wp10/?revids="
            for rev_id in api_input:
                api_url += rev_id + "|"
            api_url = api_url[:-1]  # Truncating extra vertical pipe

            query = requests.get(api_url)
            query = query.json()
            for rev_id, result in query.items():
                pair = (revision_ids[rev_id], result['prediction'])
                output.append(pair)

        return output


    def qualitylist(self):
        for wikiproject, category in self.unknownquality.items():
            save_to = "User:Reports bot/" + wikiproject + \
                      "/Assessment/Assess for quality"
            q = ('select page_title from categorylinks '
                 'join page on cl_from = page_id '
                 'where cl_to = "{0}";').format(category.replace(' ', '_'))
            to_process = [row[0].decode('utf-8') \
                         for row in self.wptools.query('wiki', q, None)]
            to_process = self.qualitypredictor(to_process)
            contents = ("{{WPX list start|color={{{2|#086}}}<includeonly>|constrained=yes</includeonly>|title=Assess for quality"
                        "|intro=Determine the quality of these articles<br />"
                        "{{WPX last updated|" + save_to + "}}}}<br />\n\n"
                        "{{#invoke:<includeonly>random|list|limit=3"
                        "</includeonly><noinclude>list|unbulleted</noinclude>|")
            for pair in to_process:
                article = pair[0].replace("_", " ")
                prediction = pair[1]
                contents += "{{WPX block|color={{{1|#37f}}}|largetext=<b>[[" + article + "]]</b> "\
                            + "([[Talk:" + article + "|talk]])|smalltext="\
                            + "Predicted class: " + prediction + "}}|"
            contents = contents[:-1] + "}}\n{{WPX list end|more=" + save_to \
                       + "}}"

            page = pywikibot.Page(self.bot, save_to)
            page.text = contents
            page.save("Updating listing", minor=False, async=True)


    def scopepredictor(self):
        for wikiproject, category in self.predictorseed.items():
            category_recs = []
            article_recs = []

            # This query produces a list of pages that belong to categories that
            # have been tagged by the WikiProject
            q = ('select page_namespace, page_title from page '
                 'join categorylinks on categorylinks.cl_from = page.page_id '
                 'where page_namespace in (0, 14) '
                 'and cl_to in ( '
                 'select page.page_title from page '
                 'join categorylinks on categorylinks.cl_from = page.page_id '
                 'where page_namespace = 15 '
                 'and cl_to = "{0}");').format(category)

            for row in self.wptools.query('wiki', q, None):
                ns = row[0]
                page = row[1].decode('utf-8')
                if ns == 0:
                    article_recs.append(page)
                elif ns == 14:
                    category_recs.append(page)

            # Filter against these lists:
            q = ('select pi_page from projectindex '
                 'where pi_project = "Wikipedia:{0}";')
            q = q.format(wikiproject.replace(' ', '_'))
            article_filter = [row[0].replace('Talk:', '') \
                             for row in self.wptools.query('index', q, None) \
                             if row[0].startswith('Talk')]

            q = ('select page_title from page '
                 'join categorylinks on cl_from = page_id '
                 'where page_namespace = 15 '
                 'and cl_to = "{0}";').format(category)
            category_filter = [row[0].decode('utf-8') \
                              for row in self.wptools.query('wiki', q, None)]

            # Now do the filtering...
            category_recs = list(set(category_recs) - set(category_filter))
            article_recs = list(set(article_recs) - set(article_filter))

            # Unite them together...
            recommendations = [':Category:' + name for name in category_recs] \
                              + [name for name in article_recs]

            # And lop it off at 100!
            recommendations = recommendations[:100]

            # Class prediction
            predicted_class = self.qualitypredictor([page for page in recommendations \
                              if page.startswith(':Category:') == False]) + \
                              [(page, 'Category') for page in recommendations \
                              if page.startswith(':Category:') == True]
            predicted_class = {pair[0]:pair[1] for pair in predicted_class}

            save_to = "User:Reports bot/" + wikiproject + "/Assessment/Not tagged"
            contents = ("{{WPX list start|color={{{2|#086}}}<includeonly>|constrained=yes</includeonly>|title=Not tagged by the WikiProject|"
                        "intro=These pages are potentially in the WikiProject's"
                        " scope.<br />{{WPX last updated|" + save_to + "}}}}"
                        "<br />\n\n"
                        "{{#invoke:<includeonly>random|list|limit=3"
                        "</includeonly><noinclude>list|unbulleted</noinclude>|")
            for recommendation in recommendations:
                contents += "{{WPX block|color={{{1|#37f}}}|largetext=<b>[[" \
                            + recommendation.replace('_', ' ') \
                            + "]]</b> ([[Talk:" + recommendation \
                            + "|talk]])|smalltext=Predicted class: " \
                            + predicted_class[recommendation] + "}}|"
            contents = contents.replace("Talk::Category:", "Category talk:")
            contents = contents[:-1] + "}}\n{{WPX list end|more=" + save_to \
                       + "}}"
            page = pywikibot.Page(self.bot, save_to)
            page.text = contents
            page.save("Updating listing", minor=False, async=True)

예제 #36

0

파일 보기

    def main(self, rootpage):
        # Initializing...
        bot = pywikibot.Site('en', 'wikipedia')
        wptools = WikiProjectTools()
        config = json.loads(
            wptools.query('index', 'select json from config;', None)[0][0])

        # Get list of people who opted out
        optout = pywikibot.Page(bot, 'User:Reports bot/Opt-out')
        blacklist = []
        regexes = [
            re.findall('\[\[User:(.*?)\|', optout.text, re.I),
            re.findall('\{\{user\|(.*?)\}\}', optout.text, re.I),
            re.findall('\[\[:User:(.*?)\]', optout.text, re.I),
            re.findall('\[\[:User talk:(.*?)\]', optout.text, re.I)
        ]
        for results in regexes:
            for user in results:
                blacklist.append(user)
        # Bots are to be excluded
        for result in wptools.query(
                'wiki',
                "select user_name from user_groups left join user on user_id = ug_user where ug_group = 'bot';",
                None):
            blacklist.append(result[0].decode('utf-8'))

        # List of projects we are working on
        # Methodology: List from Project Index + List from Formal Definition, minus duplicates
        # This will cover all of our bases.
        articles = {}
        counter = 0
        while True:  # I am a bad man for doing this
            query = wptools.query(
                'index',
                'select pi_page, pi_project from projectindex where pi_id > {0} and pi_id <= {1};'
                .format(counter, counter + 1000000), None)
            if len(query) == 0:
                break
            for pair in query:
                # Normalizing by getting rid of namespace
                page = pair[0]
                page = page.replace('Draft_talk:', '')
                page = page.replace('Talk:', '')
                proj = pair[1][
                    10:]  # Normalizing by getting rid of "Wikipedia:"
                try:
                    articles[proj].append(page)
                except KeyError:
                    articles[proj] = [page]
            counter += 1000000

        projects = [project for project in articles.keys()]

        q = ('select distinct page.page_title from page '
             'join categorylinks on page.page_id = categorylinks.cl_from '
             'left join redirect on page.page_id = redirect.rd_from '
             'where page_namespace = 4 '
             'and page_title not like "%/%" '
             'and rd_title is null '
             'and (cl_to in '
             '(select page.page_title from page '
             'where page_namespace = 14 and '
             'page_title like "%\_WikiProjects" '
             'and page_title not like "%\_for\_WikiProjects" '
             'and page_title not like "%\_of\_WikiProjects") '
             'or page_title like "WikiProject\_%");')
        formaldefinition = wptools.query(
            'wiki', q, None)  # http://quarry.wmflabs.org/query/3509
        for row in formaldefinition:
            row = row[0].decode('utf-8')
            if row not in projects:
                projects.append(row)
        projects.sort()

        directories = {
            'All': ''
        }  # All projects, plus subdirectories to be defined below.
        directoryrow = {}

        # Alright! Let's run some reports!
        for project in projects:

            # Seeding directory row and profile page
            if project not in articles:
                articles[project] = []
            project_normalized = project.replace('_', ' ')

            # List of active project participants (less blacklist)
            wp_editors = []
            start_date = time.strftime(
                '%Y%m%d000000',
                time.gmtime(time.time() - (60 * 60 * 24 * 90)))  # 90 days
            end_date = time.strftime('%Y%m%d000000',
                                     time.gmtime(time.time()))  # Today
            query = "select rev_user_text from page left join revision on page_id = rev_page where (page_namespace = 4 OR page_namespace = 5) and (page_title like \"{0}/%%\" OR page_title = \"{0}\") and rev_timestamp > {1} and rev_timestamp < {2} group by rev_user_text HAVING count(*) > 1;".format(
                project, start_date, end_date)
            for result in wptools.query('wiki', query, None):
                if result[0] is not None:
                    user = result[0].decode('utf-8')
                    if user not in blacklist:
                        wp_editors.append(user)
            wp_editors.sort()

            # List of active subject area editors (less blacklist)
            start_date = time.strftime(
                '%Y%m%d000000',
                time.gmtime(time.time() - (60 * 60 * 24 * 30)))  # 30 days
            end_date = time.strftime('%Y%m%d000000',
                                     time.gmtime(time.time()))  # Today

            if len(articles[project]) > 0:
                subject_editors = []
                packages = []
                for i in range(0, len(articles[project]), 10000):
                    packages.append(articles[project][i:i + 10000])

                counter = 0
                for package in packages:
                    counter += 1
                    if len(package) > 1:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title in {0} and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(
                            tuple(package), start_date, end_date)
                    else:
                        query_builder = 'select rev_user_text from page left join revision on page_id = rev_page where page_namespace in (0, 1, 118, 119) and page_title = "{0}" and rev_timestamp > {1} and rev_timestamp < {2} order by rev_user_text;'.format(
                            package[0], start_date, end_date)

                    for result in wptools.query('wiki', query_builder, None):
                        if result[0] is not None:
                            subject_editors.append(result[0].decode('utf-8'))

                subject_editors = dict(
                    Counter(subject_editors)
                )  # Convert the list to a dictionary with username as key and edit count as value
                subject_editors_filtered = []
                for user in subject_editors.keys():
                    if user not in blacklist:
                        if subject_editors[user] > 4:
                            subject_editors_filtered.append(user)
                subject_editors = subject_editors_filtered  # And now assigned back.
                subject_editors.sort()

            else:
                subject_editors = []

            # Generate and Save Profile Page
            wp_editors_formatted = ""
            subject_editors_formatted = ""
            if len(wp_editors) > 0:
                for editor in wp_editors:
                    wp_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(
                        editor)
            else:
                wp_editors_formatted = ""
            if len(subject_editors) > 0 and len(subject_editors) < 3200:
                for editor in subject_editors:
                    subject_editors_formatted += "\n* [[User:{0}|{0}]] ([[User talk:{0}|talk]])".format(
                        editor)
            else:
                subject_editors_formatted = ""

            profilepage = "{{{{WikiProject description page | project = {0} | list_of_active_wikiproject_participants = {1} | list_of_active_subject_area_editors = {2}}}}}".format(
                project_normalized, wp_editors_formatted,
                subject_editors_formatted)
            page = pywikibot.Page(
                bot, rootpage + '/Description/' + project_normalized)
            if profilepage != page.text:  # Checking to see if a change was made to cut down on API queries
                page.text = profilepage
                page.save('Updating', minor=False, async=True)

            # Construct directory entry
            directoryrow[
                project] = "{{{{WikiProject directory entry | project = {0} | number_of_articles = {1} | wp_editors = {2} | scope_editors = {3}}}}}\n".format(
                    project_normalized, len(articles[project]),
                    len(wp_editors), len(subject_editors))

        # Assign directory entry to relevant directory pages ("All entries" and relevant subdirectory pages)
        for entry in sorted(
                directoryrow.items(),
                key=operator.itemgetter(1)):  # Sorting into alphabetical order
            directories['All'] += entry[1]
        directories['All'] = "{{WikiProject directory top}}\n" + directories[
            'All'] + "|}"

        wpcats = WikiProjectCategories()
        tree = wpcats.generate()
        index_primary = sorted([key for key in tree.keys()])
        index_secondary = {}
        indextext = "'''[[{0}/All|All WikiProjects]]'''\n\n".format(rootpage)
        for firstlevel in tree.keys():
            directories[firstlevel] = "={0}=\n".format(
                firstlevel.replace('_', ' '))
            directories[firstlevel] += self.listpull(
                wptools, projects, directoryrow,
                firstlevel)  # For immmedate subcats of WikiProjects_by_area
            directories[firstlevel] += self.treeiterator(
                wptools, tree[firstlevel], projects, directoryrow,
                firstlevel)  # For descendants of those immediate subcats.
            index_secondary[firstlevel] = sorted(
                [key for key in tree[firstlevel].keys()])

        # Updating the directory index
        for firstlevel in index_primary:
            firstlevel_normalized = firstlevel.replace('_', ' ')
            indextext += ";[[{0}/{1}|{1}]]".format(rootpage,
                                                   firstlevel_normalized)
            if len(tree[firstlevel]) > 0:
                indextext += " : "
                for secondlevel in index_secondary[firstlevel]:
                    indextext += "[[{0}/{1}#{2}|{2}]] – ".format(
                        rootpage, firstlevel_normalized,
                        secondlevel.replace('_', ' '))
                indextext = indextext[:
                                      -3]  # Truncates trailing dash and is also a cute smiley face
            indextext += "\n\n"
        saveindex = pywikibot.Page(bot, 'Template:WikiProject directory index')
        saveindex.text = indextext
        saveindex.save('Updating', minor=False, async=True)

        # Generate directories and save!
        for directory in directories.keys():
            contents = directories[directory]
            page = pywikibot.Page(bot, rootpage + "/" + directory)
            if contents != page.text:  # Checking to see if a change was made to cut down on API save queries
                oldcontents = page.text
                page.text = contents
                page.save('Updating', minor=False, async=True)
                # Cleanup of obsolete description pages and "Related WikiProjects" pages
                if directory == 'All':
                    oldcontents = mwph.parse(oldcontents)
                    oldcontents = oldcontents.filter_templates()
                    oldprojectlist = []
                    for t in oldcontents:
                        if t.name.strip() == "WikiProject directory entry":
                            oldprojectlist.append(str(t.get('project').value))
                    for oldproject in oldprojectlist:
                        oldproject = oldproject.strip().replace(
                            ' ', '_')  # Normalizing
                        if oldproject not in projects:
                            deletethis = pywikibot.Page(
                                bot, rootpage + '/Description/' + oldproject)
                            deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                            deletethis.save('Nominating page for deletion',
                                            minor=False,
                                            async=True)
                            deletethis = pywikibot.Page(
                                bot,
                                'Wikipedia:Related WikiProjects/' + oldproject)
                            if deletethis.text != "":
                                deletethis.text = "{{db-g6|rationale=A bot has automatically tagged this page as obsolete. This means that the WikiProject described on this page has been deleted or made into a redirect}}\n"
                                deletethis.save('Nominating page for deletion',
                                                minor=False,
                                                async=True)

예제 #37

0

파일 보기

파일: predictor.py 프로젝트: harej/reports_bot

class PriorityPredictor:
    def __init__(self, viewdump=None):
        print("Initializing the Priority Predictor")
        self.wptools = WikiProjectTools()

        if viewdump == None:  # If a dumped JSON file of pageviews is not specified
            self.dump = getviewdump(self.wptools, 'en', days=30)
        else:
            with open(viewdump, 'r') as f:
                self.dump = json.load(f)  # Load pageviews from a dumped JSON file


    def loadproject(self, wikiproject, unknownpriority):
        self.projectcat = unknownpriority.replace("Unknown-", "")
        self.project = wikiproject
        self.score = {}  # Unsorted dictionary "article: value"; allows for easily looking up scores later
        # We need all the articles for a WikiProject, since the system works by comparing stats for an article to the others.
        print("Preparing Priority Predictor for: " + self.project)
        self.articles = []   # List of strings (article titles)
        pageviews = []  # List of tuples (article title, log of view count)
        linkcount = []  # List of tuples (article title, log of link count)
        for row in self.wptools.query('index', 'select pi_page from projectindex where pi_project = "Wikipedia:{0}";'.format(self.project), None):
            if row[0].startswith("Talk:"):  #
                article = row[0][5:] # Stripping out "Talk:"
                self.articles.append(article)
                pageviews.append((article, log(getpageviews(self.dump, article) + 1)))

        # Inbound link count
        # This is batched, thus broken out of the loop
        print("Getting inbound link count...")
        packages = []
        for i in range(0, len(self.articles), 10000):
            packages.append(self.articles[i:i+10000])

        for package in packages:
                toappend = getlinkcount(self.wptools, package)
                for item in toappend:
                    linkcount.append(item)

        # "Internal Clout"
        # This measures, within a group of articles, the number of links to each other
        # Works amazingly well as a metric

        print("Measuring internal clout...")
        internalclout = getinternalclout(self.wptools, self.articles, self.articles)


        # SOPV, Second-Order Page Views
        # Calculates the page views of the articles linking to the article being assessed

        print("Measuring second-order page views...")
        sopv = getsopv(self.wptools, self.dump, self.articles)

        # Sorting...
        pageviews = sorted(pageviews, key=operator.itemgetter(1), reverse=True)
        linkcount = sorted(linkcount, key=operator.itemgetter(1), reverse=True)
        internalclout = sorted(internalclout, key=operator.itemgetter(1), reverse=True)
        sopv = sorted(sopv, key=operator.itemgetter(1), reverse=True)

        # Converting to dictionary to weight factors and add them all together

        print("Prepared weighted scores...")
        pageviews_weighted = {}
        linkcount_weighted = {}
        internalclout_weighted = {}
        sopv_weighted = {}

        # Weights assigned to different factors.
        self.weight_pageviews = 1
        self.weight_linkcount = 1
        self.weight_internalclout = 1
        self.weight_sopv = 1

        for pair in pageviews:
            article = pair[0]
            count = pair[1]
            pageviews_weighted[article] = count * self.weight_pageviews

        for pair in linkcount:
            article = pair[0]
            count = pair[1]
            linkcount_weighted[article] = count * self.weight_linkcount

        for pair in internalclout:
            article = pair[0]
            count = pair[1]
            internalclout_weighted[article] = count * self.weight_internalclout

        for pair in sopv:
            article = pair[0]
            count = pair[1]
            sopv_weighted[article] = count * self.weight_sopv

        for article in self.articles:
            if article in internalclout_weighted and article in pageviews_weighted and article in linkcount_weighted and article in sopv_weighted:
                scorearray = [internalclout_weighted[article], sopv_weighted[article]]
                self.score[article] = scorearray

        # Multiclass classification

        print("Making calculations...")
        self.threshold = {}
        self.scorelist = {}
        q = 'select page_title from categorylinks join page on cl_from = page_id where cl_type = "page" and cl_to = "{0}";'
        for priority in ['Top-', 'High-', 'Mid-', 'Low-']:
            prioritycategory = priority + self.projectcat
            self.scorelist[priority] = [(row[0].decode('utf-8'), self.score[row[0].decode('utf-8')]) for row in self.wptools.query('wiki', q.format(prioritycategory), None) if row[0].decode('utf-8') in self.score]

        X = np.array([(x[1]) for x in self.scorelist['Top-']] + [(x[1]) for x in self.scorelist['High-']] + [(x[1]) for x in self.scorelist['Mid-']] + [(x[1]) for x in self.scorelist['Low-']])

        y = np.array([0 for x in self.scorelist['Top-']] + [1 for x in self.scorelist['High-']] + [2 for x in self.scorelist['Mid-']] + [3 for x in self.scorelist['Low-']])

        model = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y)

        print(list(model.predict(X)))
        print(model.score(X, y))

예제 #38

0

파일 보기

파일: notifications.py 프로젝트: harej/reports_bot

class WikiProjectNotifications:
    def __init__(self):
        self.wptools = WikiProjectTools()
        q = ('create table if not exists notifications '
             '(n_id int(11) NOT NULL auto_increment, '
             'n_project VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
             'n_variant VARCHAR(255) character set utf8 collate utf8_unicode_ci, '
             'n_content TEXT character set utf8 collate utf8_unicode_ci, '
             'primary key (n_id)) '
             'engine=innodb character set=utf8;')
        #self.wptools.query('index', q, None)
        self.bot = pywikibot.Site('en', 'wikipedia', user='******')

        # Recognized notification variants
        # A variant that is not any of these kinds will cause an error
        # variantname --> template parameter name

        date = datetime.datetime.utcnow().strftime('%d %B %Y')
        self.contentwrapper = '<div style="max-width:500px; padding-bottom:2.5em;">'
        self.recognizedvariants = {'newmember': \
                                   'notification_when_a_new_member_joins', \
                                   'newdiscussion': \
                                   'notification_when_a_new_discussion_topic_is_posted'}
        self.varianttext = {'newmember': \
                            '==New member report for ' + date + '==\n' + self.contentwrapper + 'The following users joined the WikiProject in the past day:\n', \
                            'newdiscussion': \
                            '==New discussion report for ' + date + '==\n' + self.contentwrapper + 'New discussions that are of interest to the WikiProject:\n'}


    def active_user(self, username):
        '''
        Determines if a username meets a basic threshold of activity
        Takes string *username*, returns boolean
        Threshold is one edit in the recent changes tables (i.e. in the past 30 days)
        '''

        q = 'select count(*) from recentchanges_userindex where rc_user_text = "{0}"'.format(username.replace('_', ' '))
        if self.wptools.query('wiki', q, None)[0][0] > 0:
            return True
        else:
            return False


    def post(self, project, variant, content):
        '''
        Adds an item to the WikiProject Notification Center, to be included in the next update
        '''

        if variant in self.recognizedvariants:
            q = 'insert into notifications (n_project, n_variant, n_content) values ("{0}", "{1}", "{2}");'
            q = q.format(project, variant, content)
            self.wptools.query('index', q, None)
        else:
            raise NotificationVariantError(variant)


    def findsubscribers(self):
        '''
        Generates a dictionary of WikiProjects with notification centers and corresponding report subscribers
        '''

        q = ('select page_title from templatelinks '
             'join page on page_id = tl_from and page_namespace = tl_from_namespace '
             'where page_namespace = 2 and tl_namespace = 10 '
             'and tl_title = "WikiProjectCard";')

        output = {}
        for row in self.wptools.query('wiki', q, None):
            title = row[0].decode('utf-8')
            components = title.split('/')  # e.g. ['Harej', 'WikiProjectCards', 'WikiProject_Women_in_Technology']
            username = components[0]

            # No notifications for inactive users
            if self.active_user(username) == False:
                continue

            # Carrying on...
            title = "User: "******"New notification", minor=False, async=True, quiet=True)

        # Deleting old records now that notifications have been sent out
        if len(id_to_delete) > 0:
            if len(id_to_delete) == 1:
                self.wptools.query('index', 'delete from notifications where n_id = {0};'.format(id_to_delete[0]), None)
            else:
                self.wptools.query('index', 'delete from notifications where n_id in {0};'.format(tuple(id_to_delete)), None)

예제 #39

0

파일 보기

 def __init__(self):
     self.wptools = WikiProjectTools()
     self.bot = pywikibot.Site('en', 'wikipedia')

예제 #40

0

파일 보기

def main():
    # This is used for Aaron Halfaker's API wrapper...
    loginfile = configparser.ConfigParser()
    loginfile.read([os.path.expanduser('~/.wiki.ini')])
    username = loginfile.get('wiki', 'username')
    password = loginfile.get('wiki', 'password')

    # ...And this is for Pywikibot
    bot = pywikibot.Site('en', 'wikipedia')

    wptools = WikiProjectTools()

    now = datetime.datetime.utcnow()
    now = now.strftime(
        '%Y%m%d%H%M%S')  # converts timestamp to MediaWiki format

    # Pulling timestamp of the last time the script was run
    query = wptools.query(
        'index',
        'select lu_timestamp from lastupdated where lu_key = "new_discussions";',
        None)
    lastupdated = query[0][0]

    # Polling for newest talk page posts in the last thirty minutes
    query = wptools.query(
        'wiki',
        'select distinct recentchanges.rc_this_oldid, page.page_id, recentchanges.rc_title, recentchanges.rc_comment, recentchanges.rc_timestamp, page.page_namespace from recentchanges join page on recentchanges.rc_namespace = page.page_namespace and recentchanges.rc_title = page.page_title join categorylinks on page.page_id=categorylinks.cl_from where rc_timestamp >= {0} and rc_timestamp < {1} and rc_comment like "% new section" and rc_deleted = 0 and cl_to like "%_articles" and page_namespace not in (0, 2, 6, 8, 10, 12, 14, 100, 108, 118) order by rc_timestamp asc;'
        .format(lastupdated, now), None)

    # Cleaning up output
    namespace = {
        1: 'Talk:',
        3: 'User_talk:',
        4: 'Wikipedia:',
        5: 'Wikipedia_talk:',
        7: 'File_talk:',
        9: 'MediaWiki_talk:',
        11: 'Template_talk:',
        13: 'Help_talk:',
        15: 'Category_talk:',
        101: 'Portal_talk:',
        109: 'Book_talk:',
        119: 'Draft_talk:',
        447: 'Education_Program_talk:',
        711: 'TimedText_talk:',
        829: 'Module_talk:',
        2600: 'Topic:'
    }

    output = []
    for row in query:
        rc_id = row[0]
        page_id = row[1]
        rc_title = row[2].decode('utf-8')
        rc_comment = row[3].decode('utf-8')
        rc_comment = rc_comment[
            3:]  # Truncate beginning part of the edit summary
        rc_comment = rc_comment[:-15]  # Truncate end of the edit summary
        rc_timestamp = row[4].decode('utf-8')
        rc_timestamp = datetime.datetime.strptime(rc_timestamp, '%Y%m%d%H%M%S')
        rc_timestamp = rc_timestamp.strftime('%H:%M, %d %B %Y (UTC)')
        page_namespace = row[5]
        page_namespace = namespace[page_namespace]

        session = api.Session("https://en.wikipedia.org/w/api.php",
                              user_agent='WPX Revert Checker')
        session.login(username, password)

        # Check if revision has been reverted
        reverted = reverts.api.check(session, rc_id, page_id, 3, None, 172800,
                                     None)
        if reverted is None:
            entry = {
                'title': (page_namespace + rc_title),
                'section': rc_comment,
                'timestamp': rc_timestamp
            }
            output.append(entry)

    # Loading list of WikiProjects signed up to get lists of new discussions
    config = json.loads(
        wptools.query('index', 'select json from config;', None)[0][0])

    if config['defaults'][
            'new_discussions'] == False:  # i.e. if New Discussions is an opt-in system
        whitelist = []  # Whitelisted WikiProjects for new discussion lists
        for project in config['projects']:
            try:
                project['new_discussions']
            except KeyError:
                continue
            else:
                if project['new_discussions'] == True:
                    whitelist.append(project['name'])
    else:
        whitelist = None

    # A whitelist of [] is one where there is a whitelist, but it's just empty.
    # A whitelist of None is for situations where the need for a whitelist has been obviated.

    # Generating list of WikiProjects for each thread
    for thread in output:
        query = wptools.query(
            'index',
            'select distinct pi_project from projectindex where pi_page = %s;',
            (thread['title']))
        thread['wikiprojects'] = []
        for row in query:
            wikiproject = row[0].replace('_', ' ')
            if (whitelist is None) or (wikiproject in whitelist):
                thread['wikiprojects'].append(wikiproject)
        for wikiproject in thread['wikiprojects']:
            saveto = wikiproject + '/Discussions'
            page = pywikibot.Page(bot, saveto)
            intro_garbage = '{{WPX header|Discussions|color={{{1|#37f}}}}}\n'
            intro_garbage += '{{{{WPX action box|color={{{{{{2|#086}}}}}}|title=Have a question?|content={{{{Clickable button 2|url=//en.wikipedia.org/wiki/Wikipedia_talk:{0}?action=edit&section=new|Ask the WikiProject|class=mw-ui-progressive mw-ui-block}}}}\n\n{{{{Clickable button 2|Wikipedia talk:{0}|View Other Discussions|class=mw-ui-block}}}}}}}}\n'.format(
                wikiproject[10:].replace(' ', '_'))
            intro_garbage += '{{{{WPX list start|intro={{{{WPX last updated|{0}}}}}}}}}\n\n'.format(
                saveto)
            draft = '<noinclude><div style="padding-bottom:1em;">{{{{Clickable button 2|{0}|Return to WikiProject|class=mw-ui-neutral}}}}</div>\n</noinclude>'.format(
                wikiproject) + intro_garbage
            submission = '{{{{WPX new discussion|color={{{{{{1|#37f}}}}}}|title={0}|section={1}|timestamp={2}}}}}\n'.format(
                thread['title'].replace('_', ' '), thread['section'],
                thread['timestamp'])

            notification = "* '''[[{0}#{1}|{1}]] on {0}".format(
                thread['title'].replace('_', ' '), thread['section'])
            queue_notification(wikiproject[10:].replace(' ', '_'),
                               notification)

            index = mwparserfromhell.parse(page.text)
            index = index.filter_templates()
            templatelist = []
            for i in index:
                if i.name == "WPX new discussion":
                    templatelist.append(str(i))
            templatelist = templatelist[:14]  # Sayonara, old threads!
            page.text = draft + submission
            if len(templatelist) > 3:
                templatelist[
                    2] += "<noinclude>"  # Anything after the third item will not be transcluded
                templatelist[len(templatelist) - 1] += "</noinclude>"
            for i in templatelist:
                page.text += i + "\n"
            page.text += "{{{{WPX list end|more={0}}}}}".format(
                saveto.replace(' ', '_'))
            page.save('New discussion on [[{0}]]'.format(
                thread['title'].replace('_', ' ')),
                      minor=False)

    # Update the Last Updated field with new timestamp
    wptools.query(
        'index',
        'update lastupdated set lu_timestamp = {0} where lu_key = "new_discussions";'
        .format(now), None)