Python PreloadingGeneratorの例、pywikibot.pagegenerators.PreloadingGenerator Pythonの例

コード例 #1

0

ファイルを表示

def main(*args):
    options = {}
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pywikibot.pagegenerators.GeneratorFactory()

    extraparams = {}
    # Parse command line arguments
    for arg in local_args:

        # Catch the pywikibot.pagegenerators options
        if genFactory.handleArg(arg):
            continue  # nothing to do here
        if arg.startswith("-aistart:"):
            extraparams = {'gaistart': arg[9:]}
    # The preloading option is responsible for downloading multiple
    # pages from the wiki simultaneously.
    gen = genFactory.getCombinedGenerator(preload=True)
    if not gen:
        gen = PreloadingGenerator(
            GeographBotUploads(site=pywikibot.Site(), parameters=extraparams))
    if gen:
        # pass generator and private options to the bot
        bot = FixLocationBot(gen, **options)
        bot.run()  # guess what it does
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

コード例 #2

0

ファイルを表示

    def featuredWithInterwiki(self, fromsite, task):
        """Read featured articles and find the corresponding pages.

        Find corresponding pages on other sites, place the template and
        remember the page in the cache dict.

        """
        tosite = self.site
        if fromsite.code not in self.cache:
            self.cache[fromsite.code] = {}
        if tosite.code not in self.cache[fromsite.code]:
            self.cache[fromsite.code][tosite.code] = {}
        cc = self.cache[fromsite.code][tosite.code]
        if self.getOption('nocache') is True or \
           fromsite.code in self.getOption('nocache'):
            cc = {}

        gen = self.featuredArticles(fromsite, task, cc)
        if self.getOption('count'):
            next(gen, None)
            return  # count only, we are ready here
        gen = PreloadingGenerator(gen)

        for source in gen:
            if source.isRedirectPage():
                source = source.getRedirectTarget()

            if not source.exists():
                pywikibot.output(u"source page doesn't exist: %s" % source)
                continue

            for dest in self.findTranslated(source, tosite):
                self.add_template(source, dest, task, fromsite)
                cc[source.title()] = dest.title()

コード例 #3

0

ファイルを表示

ファイル: selflink.py プロジェクト: djff/pywikibot-core

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # Page generator
    gen = None
    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = GeneratorFactory()
    botArgs = {}

    for arg in local_args:
        if arg == '-always':
            botArgs['always'] = True
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    preloadingGen = PreloadingGenerator(gen)
    bot = SelflinkBot(preloadingGen, **botArgs)
    bot.run()
    return True

コード例 #4

0

ファイルを表示

ファイル: list_typos.py プロジェクト: dpriskorn/pywikibot-scripts

    def generator(self):
        for rule in self.typoRules:
            if not rule.canSearch():
                continue

            pywikibot.output('Query: "%s"' % rule.query)
            self.current_rule = rule
            yield from PreloadingGenerator(rule.querySearch())

コード例 #5

0

ファイルを表示

def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    filename = 'dict.txt'
    options = {}
    r_options = {}

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg.partition('-')[2]
        # reader options
        if option in ('begin', 'end', 'titlestart', 'titleend', 'title'):
            r_options[option] = value
        elif option == 'file':
            filename = value
        elif option in ('include', 'notitle', 'textonly'):
            r_options[option] = True
        # bot options
        elif option == 'appendbottom':
            options['append'] = ('bottom', value)
        elif option == 'appendtop':
            options['append'] = ('top', value)
        elif option in ('force', 'minor', 'autosummary', 'showdiff'):
            options[option] = True
        elif option == 'noredirect':
            options['redirect'] = False
        elif option in ('nocontent', 'summary'):
            options[option] = value
        else:
            pywikibot.output('Disregarding unknown argument {}.'.format(arg))

    options['always'] = 'showdiff' not in options

    failed_filename = False
    while not os.path.isfile(filename):
        pywikibot.output("\nFile '{}' does not exist. ".format(filename))
        _input = pywikibot.input('Please enter the file name [q to quit]:')
        if _input == 'q':
            failed_filename = True
            break
        filename = _input

    # show help text from the top of this file if reader failed
    # or User quit.
    if failed_filename:
        pywikibot.bot.suggest_help(missing_parameters=['-file'])
    else:
        site = pywikibot.Site()
        reader = PageFromFileReader(filename, site=site, **r_options)
        reader = PreloadingGenerator(reader)
        bot = PageFromFileRobot(generator=reader, site=site, **options)
        bot.run()

コード例 #6

0

ファイルを表示

ファイル: gubutil.py プロジェクト: Nintendofan885/geograph-update-bot

 def _handle_recent(self, value):
     starttime = datetime.now(timezone.utc) - timedelta(days=int(value))
     earlystart = starttime - timedelta(days=1)
     extraparams = {'gcmend': earlystart.astimezone(timezone.utc)}
     new_on_commons = PreloadingGenerator(
         NewGeographImages(site=pywikibot.Site(), parameters=extraparams))
     changed_on_geograph = ModifiedGeographs(modified_since=starttime,
                                             submitted_before=earlystart)
     return chain(new_on_commons, changed_on_geograph)

コード例 #7

0

ファイルを表示

    def generator(self):
        for rule in self.typoRules:
            if rule.query is None:
                continue

            pywikibot.output('Query: "{}"'.format(rule.query))
            self.current_rule = rule
            yield from PreloadingGenerator(
                self.site.search(rule.query, namespaces=[0]))

コード例 #8

0

ファイルを表示

 def treat_page(self):
     page = self.current_page
     categories = textlib.getCategoryLinks(page.text, site=self.site)
     titles = map(
         lambda cat: cat.title(with_ns=False,
                               with_section=False,
                               allow_interwiki=False,
                               insite=self.site), categories)
     matches = list(filter(bool, map(self.categoryR.fullmatch, titles)))
     if not matches:
         pywikibot.output('No birthdate category found')
         return
     fullmatch = matches.pop()
     if matches:
         pywikibot.output('Multiple birthdate categories found')
         return
     birth_date = fullmatch.group(1)
     search_query = 'linksto:"%s"' % page.title()
     search_query += r' insource:/\[\[[^\[\]]+\]\]'
     search_query += r' +\(\* *\[*%s\]*\)/' % birth_date
     search_query += ' -intitle:"Seznam"'
     pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join(
         map(
             lambda p: re.escape(p.title()),
             chain([page],
                   page.backlinks(followRedirects=False,
                                  filterRedirects=True,
                                  namespaces=[0]))))
     pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date
     regex = re.compile(pattern)
     for ref_page in PreloadingGenerator(
             SearchPageGenerator(search_query,
                                 namespaces=[0],
                                 site=self.site)):
         text = ref_page.text
         # todo: multiple matches
         match = regex.search(text)
         if not match:
             continue
         inside, left, year1, right = match.groups('')
         new_text = text[:match.start()]
         new_text += replace_pattern.format(inside=inside,
                                            left=left,
                                            right=right,
                                            year1=year1,
                                            year2=self.year)
         new_text += text[match.end():]
         self.userPut(ref_page,
                      ref_page.text,
                      new_text,
                      summary='doplnění data úmrtí')

コード例 #9

0

ファイルを表示

ファイル: pagegenerators_tests.py プロジェクト: valhallasw/pywikibot-core

 def test_low_step(self):
     """Test PreloadingGenerator with a list of pages."""
     mainpage = self.get_mainpage()
     links = list(self.site.pagelinks(mainpage, total=20))
     count = 0
     for page in PreloadingGenerator(links, step=10):
         self.assertIsInstance(page, pywikibot.Page)
         self.assertIsInstance(page.exists(), bool)
         if page.exists():
             self.assertEqual(len(page._revisions), 1)
             self.assertIsNotNone(page._revisions[page._revid].text)
             self.assertFalse(hasattr(page, '_pageprops'))
         count += 1
     self.assertEqual(len(links), count)

コード例 #10

0

ファイルを表示

ファイル: list_typos.py プロジェクト: dpriskorn/pywikibot-scripts

    def treat(self, page):
        pattern = self.helper.pattern
        for entry in PreloadingGenerator(self.line_iterator(page)):
            key = title = entry.title()
            if not entry.exists():
                self.cache.pop(key)
                continue
            while entry.isRedirectPage():
                entry = entry.getRedirectTarget()
                title = entry.title()
            text = self.helper.remove_disabled_parts(entry.text)
            for string in self.cache.pop(key):
                if string in text:
                    self.put.append(pattern.format('[[%s]]' % title, string))

        page.text = '\n'.join(self.put)
        page.save(summary='odstranění vyřešených překlepů', minor=True,
                  botflag=True, apply_cosmetic_changes=False)

コード例 #11

0

ファイルを表示

ファイル: import_descriptions.py プロジェクト: matejsuchanek/pywikibot-scripts

 def treat_page_and_item(self, page, item):
     if self.site.lang in item.descriptions:
         return
     title = item.getSitelink(self.site)
     search_query = r'linksto:"%s" insource:/\* *%s/' % (
         title, re.escape('[[' + title))
     regex = self.get_regex_for_title(re.escape(title))
     for ref_page in PreloadingGenerator(
             SearchPageGenerator(search_query, namespaces=[0])):
         # todo: first polish text
         match = regex.search(ref_page.text)
         if not match:
             continue
         if not self.opt['allpages'] and not ref_page.isDisambig():
             continue
         desc = self.parse_description(match.group(2))
         if not self.validate_description(desc):
             continue
         summary = self.get_summary(ref_page, desc)
         item.descriptions[self.site.lang] = desc.strip()
         if self.user_edit_entity(item, summary=summary):
             break

コード例 #12

0

ファイルを表示

ファイル: selflink.py プロジェクト: ImanYZ/ExpertIdeas

def main():
    # Page generator
    gen = None
    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = GeneratorFactory()
    botArgs = {}

    for arg in local_args:
        if arg == '-always':
            botArgs['always'] = True
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp()
        return

    preloadingGen = PreloadingGenerator(gen)
    bot = SelflinkBot(preloadingGen, **botArgs)
    bot.run()

コード例 #13

0

ファイルを表示

 def treat_page(self):
     page = self.current_page
     categories = textlib.getCategoryLinks(page.text, site=self.site)
     titles = (cat.title(with_ns=False,
                         with_section=False,
                         allow_interwiki=False,
                         insite=self.site) for cat in categories)
     matches = list(filter(bool, map(self.categoryR.fullmatch, titles)))
     if not matches:
         pywikibot.output('No birthdate category found')
         return
     fullmatch = matches.pop()
     if matches:
         pywikibot.output('Multiple birthdate categories found')
         return
     birth_date = fullmatch.group(1)
     search_query = 'linksto:"%s"' % page.title()  # todo: sanitize?
     search_query += r' insource:/\[\[[^\[\]]+\]\]'
     search_query += r' +\(\* *\[*%s\]*\)/' % birth_date
     search_query += ' -intitle:"Seznam"'
     pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join(
         re.escape(p.title())
         for p in chain([page],
                        page.backlinks(followRedirects=False,
                                       filterRedirects=True,
                                       namespaces=[0])))
     pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date
     regex = re.compile(pattern)
     for ref_page in PreloadingGenerator(
             SearchPageGenerator(search_query,
                                 namespaces=[0],
                                 site=self.site)):
         new_text, num = regex.subn(self.replace_callback, ref_page.text)
         if num:
             self.userPut(ref_page,
                          ref_page.text,
                          new_text,
                          summary='doplnění data úmrtí')

コード例 #14

0

ファイルを表示

ファイル: page.py プロジェクト: sema0703/suggestbot

def RatingGenerator(pages, step=50):
    '''
    Generate pages with assessment ratings.
    '''

    # Preload talk page contents in bulk to speed up processing
    # Note: since pywikibot's PreloadingGenerator doesn't guarantee
    #       order, we'll have to exhaust it and map title to talkpage.
    tp_map = {}
    for talkpage in PreloadingGenerator(TalkPageGenerator(pages), step=step):
        tp_map[talkpage.title(withNamespace=False)] = talkpage

    # iterate and set the rating
    for page in pages:
        try:
            talkpage = tp_map[page.title()]
            page._rating = page.get_assessment(talkpage.get())
        except KeyError:
            page._rating = 'na'
        except pywikibot.NoPage:
            page._rating = 'na'
        except pywikibot.IsRedirectPage:
            page._rating = 'na'
        yield page

コード例 #15

0

ファイルを表示

ファイル: massmessage_list_updater.py プロジェクト: Rachelmorrell/JJMC89_bot

def main(*args):
    """
    Process command line arguments and invoke bot.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {
        'end_date': date.today() - timedelta(days=1),
        'start_date': date.today() - timedelta(days=1),
    }
    # Process global arguments
    local_args = pywikibot.handle_args(args)
    site = pywikibot.Site()
    site.login()
    # Parse command line arguments
    for arg in local_args:
        arg, _, value = arg.partition(':')
        arg = arg[1:]
        if arg in ('config', 'end_date', 'start_date'):
            if not value:
                value = pywikibot.input(
                    'Please enter a value for {}'.format(arg), default=None)
            options[arg] = value
        else:
            options[arg] = True
    if not validate_options(options):
        pywikibot.bot.suggest_help(
            additional_text='The specified options are invalid.')
        return False
    config = pywikibot.Page(site, options.pop('config'))
    config = get_json_from_page(config)
    if not validate_config(config, site):
        pywikibot.bot.suggest_help(
            additional_text='The specified configuration is invalid.')
        return False
    options['config'] = config

    meta = pywikibot.Site('meta', 'meta')
    suffix = '@{}'.format(site.dbName())
    start = datetime.datetime.combine(options.pop('start_date'), time.min)
    end = datetime.datetime.combine(options.pop('end_date'), time.max)
    # Parse rename logs into a list of dict.
    if options.pop('rename', None):
        renames = list()
        if options.get('meta', None):
            rename_events = meta.logevents(logtype='gblrename',
                                           start=start,
                                           end=end,
                                           reverse=True)
        else:
            rename_events = site.logevents(logtype='renameuser',
                                           start=start,
                                           end=end,
                                           reverse=True)
        for rename in rename_events:
            try:
                renames.append({
                    'olduser':
                    pywikibot.User(site, rename.data['params']['olduser']),
                    'newuser':
                    pywikibot.User(site, rename.data['params']['newuser']),
                    'timestamp':
                    rename.timestamp(),
                })
            except KeyError:
                continue
        options['renames'] = sorted(renames, key=itemgetter('timestamp'))

    # Parse rights logs into a list of dict.
    group_changes = list()
    rights_events = site.logevents(logtype='rights',
                                   start=start,
                                   end=end,
                                   reverse=True)
    if options.pop('meta', None):
        meta_rights_events = set()
        for log_event in meta.logevents(logtype='rights',
                                        start=start,
                                        end=end,
                                        reverse=True):
            try:
                if log_event.page().title().endswith(suffix):
                    meta_rights_events.add(log_event)
            except KeyError:
                continue
        rights_events = chain(rights_events, meta_rights_events)
    for log_event in rights_events:
        try:
            new_groups = set(log_event.newgroups)
            old_groups = set(log_event.oldgroups)
            group_changes.append({
                'user':
                pywikibot.User(
                    site,
                    re.sub(r'{}$'.format(suffix), '',
                           log_event.page().title()),
                ),
                'added':
                new_groups - old_groups,
                'removed':
                old_groups - new_groups,
                'timestamp':
                log_event.timestamp(),
            })
        except KeyError:
            continue
    options['group_changes'] = sorted(group_changes,
                                      key=itemgetter('timestamp'))

    # Generate pages and invoke the bot.
    gen = (config[key]['page'] for key in config.keys()
           if config[key]['enabled'])
    gen = PreloadingGenerator(gen)
    UserGroupsMassMessageListUpdater(gen, site=site, **options).run()
    return True

コード例 #16

0

ファイルを表示

    def featuredWithInterwiki(self, fromsite, task):
        """Place or remove the Link_GA/FA template on/from a page"""

        def compile_link(site, templates):
            """compile one link template list"""
            findtemplate = '(%s)' % '|'.join(templates)
            return re.compile(ur"\{\{%s\|%s\}\}"
                              % (findtemplate.replace(u' ', u'[ _]'),
                                 site.code), re.IGNORECASE)
            
        quiet = self.getOption('quiet')
        tosite = self.site
        if not fromsite.lang in self.cache:
            self.cache[fromsite.lang] = {}
        if not tosite.lang in self.cache[fromsite.lang]:
            self.cache[fromsite.lang][tosite.lang] = {}
        cc = self.cache[fromsite.lang][tosite.lang]
        if self.getOption('nocache') is True or \
           fromsite.code in self.getOption('nocache'):
            cc = {}
        add_tl, remove_tl = self.getTemplateList(tosite.code, task)
        re_Link_add = compile_link(fromsite, add_tl)
        re_Link_remove = compile_link(fromsite, remove_tl)
        gen = self.featuredArticles(fromsite, task, cc)
        gen = PreloadingGenerator(gen)
        pairs = []
        for a in gen:
            if a.isRedirectPage():
                a = a.getRedirectTarget()

            if not a.exists():
                pywikibot.output(u"source page doesn't exist: %s"
                                 % a.title())
                continue

            atrans = self.findTranslated(a, tosite)
            if not atrans:
                continue

            text = atrans.get()
            m1 = add_tl and re_Link_add.search(text)
            m2 = remove_tl and re_Link_remove.search(text)
            changed = False
            if add_tl:
                if m1:
                    pywikibot.output(u"(already added)")
                else:
                    # insert just before interwiki
                    if (not interactive or
                        pywikibot.input(
                            u'Connecting %s -> %s. Proceed? [Y/N]'
                            % (a.title(), atrans.title())) in ['Y', 'y']):
                        if self.getOption('side'):
                            # Placing {{Link FA|xx}} right next to
                            # corresponding interwiki
                            text = (text[:m1.end()] +
                                    u" {{%s|%s}}" % (add_tl[0], fromsite.code) +
                                    text[m1.end():])
                        else:
                            # Moving {{Link FA|xx}} to top of interwikis
                            iw = pywikibot.getLanguageLinks(text, self.site)
                            text = pywikibot.removeLanguageLinks(text, self.site)
                            text += u"\r\n{{%s|%s}}\r\n" % (add_tl[0],
                                                            fromsite.code)
                            text = pywikibot.replaceLanguageLinks(text,
                                                                  iw, self.site)
                        changed = True
            if remove_tl:
                if m2:
                    if (not interactive or
                        pywikibot.input(
                            u'Connecting %s -> %s. Proceed? [Y/N]'
                            % (a.title(), atrans.title())) in ['Y', 'y']):
                        text = re.sub(re_Link_add, '', text)
                        changed = True
                elif task == 'former':
                    pywikibot.output(u"(already removed)")
            cc[a.title()] = atrans.title()
            if changed:
                comment = i18n.twtranslate(self.site, 'featured-' + task,
                                           {'page': unicode(a)})
                try:
                    atrans.put(text, comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page %s is locked!'
                                     % atrans.title())
                except pywikibot.PageNotSaved, e:
                    pywikibot.output(u"Page not saved")

コード例 #17

0

ファイルを表示

ファイル: subscribers.py プロジェクト: sema0703/suggestbot

    def update_subscribers(self):
        '''
        Update the list of subscribers based on the current configuration

        '''
        # reset all seen-values of users of the current wiki,
        # and who are currently active 
        reset_query = r"""UPDATE {}
                          SET seen=0
                          WHERE lang=%(lang)s
                          AND active=1""".format(config.regulars_table)

        # query to set all unseen users as inactive, because it means
        # they no longer use the template
        inactive_query = r"""UPDATE {}
                             SET active=0
                             WHERE lang=%(lang)s
                             AND seen=0""".format(config.regulars_table)

        ## Connect to the database
        sbdb = db.SuggestBotDatabase()
        if not sbdb.connect():
            logging.error("Unable to connect to the suggestbot database")
            return(False)

        (dbconn, dbcursor) = sbdb.getConnection()

        ## Reset the `seen` bit for all active uers
        dbcursor.execute(reset_query,
                         {'lang': self._lang})
        dbconn.commit()
        logging.info('number of rows with updated seen-values: {}'.format(dbcursor.rowcount))

        # Build the set of pages that we'll ignore when we find links to
        # our templates.
        ignorePages = set()
        for page_title in config.template_stoplist[self._lang]:
            ignorePages.add(pywikibot.Page(self._site, page_title))

        # Grab the config templates for this language Wikipedia
        configTemplates = config.config_templates[self._lang]
        configPages = set()

        # Regular expression for splitting into username + subpage-name.
        subpageSplitRe = re.compile(r'(?P<username>[^/]+)(?P<subname>/.*)')

        # Loop over them, userbox first as any settings in the config template
        # is to take priority.
        for temp_nick in ['userbox', 'config']:
            configPage = pywikibot.Page(self._site,
                                        configTemplates[temp_nick])
            configPages.add(configPage.title().strip().lower())

            # Grab all links to the config template that are redirects
            warningsList = list(configPage.getReferences(
                onlyTemplateInclusion=True,
                redirectsOnly=True))

            # Output all of them to a file so we know which users might
            # have changed usernames.
            if len(warningsList) > 0:
                logging.info('writing {n} pages that are redirects to warnings file.'.format(n=len(warningsList)))

                with codecs.open(config.userlist_warnings, 'a',
                                 'utf-8') as warningsFile:
                    warningsFile.write("The following pages are redirects:\n")
                    for page in warningsList:
                        warningsFile.write(page.title())
                        warningsFile.write("\n")
                                
            # warningsList is now used as a list of pages that contain errors
            # that need fixing.  Values are tuples where the first item is the
            # pywikibot.Page object, and the second is a short description of
            # the problem.
            warningsList = []
        
            # For each page, that we're preloading 10 of at a time to
            # speed things up:
            for page in PreloadingGenerator(
                    configPage.getReferences(
                        onlyTemplateInclusion=True,
                        redirectsOnly=False),
                    step=10):
                # Is this one of our own pages?
                if page in ignorePages:
                    continue

                logging.info('now processing {}'.format(page.title()))

                #   figure out what user this page belongs to
                #   1: check that the page namespace is user or user talk
                if page.namespace() not in [2, 3]:
                    warningsList.append((page,
                                         "namespace not user or user talk"))
                    continue

                #   2: fetch the title without namespace
                page_title = page.title(withNamespace=False,
                                        withSection=False)

                # split the page title on first "/" in case it's a subpage.
                subpageTitle = None
                username = ''
                matchObj = subpageSplitRe.match(page_title)
                if matchObj:
                    # we have a subpage
                    # store subpage title in user object
                    subpageTitle = page.title()
                    username = matchObj.group('username')
                    logging.info('found subpage {subtitle} of user {username}'.format(
                        subtitle=matchObj.group('subname'), username=username))
                else:
                    username = page_title

                subscriber = Subscriber(self._lang, username, site=self._site)

                # check the timestamp of the user's last contribution,
                # set the retired bit if the user's no longer active.
                lastEditTuple = None
                try:
                    lastEditTuple = next(subscriber.contributions(total=5))
                except StopIteration:
                    # User apparently has made no edits, so there's no tuple
                    pass
                except KeyError:
                    # pywikibot had a bug that made it fail with a KeyError
                    # if a revision's comment was deleted.  That's fixed now,
                    # but we'll capture the exception just in case something
                    # else goes wrong and triggers it.
                    pass

                if lastEditTuple is not None:
                    lastEditTime = lastEditTuple[2]
                    logging.info('user last edited at {}'.format(lastEditTime))
                    timeSinceLastEdit = datetime.utcnow() - lastEditTime
                    if timeSinceLastEdit.days >= config.retired_days:
                        subscriber._retired = 1

                # NOTE: Don't add "if not subscriber.retired:" to skip
                # the template checking if the user is retired.  Don't do that.
                # It'll lead to us storing default values for our users in
                # the database, and since we've already fetched the page text,
                # this is cheap processing.

                parsed_page = mwp.parse(page.get(), skip_style_tags=True)
                #   call page.templatesWithParams()
                for template in parsed_page.filter_templates(recursive=True):
                    ## logging.info('checking template {}'.format(template.name))
                    template_name = template.name.strip().lower()
                    if not template_name in configPages:
                        continue

                    ## logging.info('checking parameters to known template {}'.format(template_name))

                    # This accounts for the case where a user has a subpage for
                    # their userboxes.  We'll post to their user talk page.
                    if subpageTitle is not None and template_name \
                       == configTemplates['userbox'].strip().lower():
                        subpageTitle = None

                    # for each parameter...
                    for param in template.params:
                        ## True if this is a key/value pair
                        if param.showkey:
                            # translate the key (e.g. Norwegian -> English)
                            translatedKey = self._translate_key(
                                param.name.strip().lower())
                        else:
                             translatedKey = self._translate_key(
                                 param.value.strip().lower())

                        if translatedKey is None:
                            warningsList.append((page, "unaccepted parameter"))
                            continue

                        ## logging.info("using parameter {} with value {}".format(translatedKey, param.value))

                        if param.showkey:
                            # parameter is OK, use it:
                            subscriber.useParam(translatedKey, param.value.strip().lower())
                        else:
                            ## Note: This works because the methods behave
                            ## sensibly if the value evaluates to False
                            subscriber.useParam(translatedKey, "")
                        
                # Always updating this ensures that we capture users who return
                # and do not specify where they want it posted.
                subscriber._page_title = subpageTitle

                ## FIXME: if we've gone through all the templates on a page
                ## and not found SuggestBot's template, we have a parsing error.
                ## In that case, we shouldn't update the database?
                
                logging.info('updating database for this user')
                
                # update or store values for this user
                subscriber.update(sbdb)

            if len(warningsList) > 0:
                logging.info("writing {n} users that have errors to warnings file".format(n=len(warningsList)))

                warningFilename = "{base}.{lang}".format(
                    base=config.userlist_warnings,
                    lang=self._lang)
                with codecs.open(warningFilename, 'a', 'utf-8') as \
                        warningsFile:
                    warningsFile.write("The following users had errors in their configuration:\n")
                    for (page, reason) in warningsList:
                        warningsFile.write(page.title())
                        warningsFile.write(" - %s" % (reason,))
                        warningsFile.write("\n")

        dbcursor.execute(inactive_query,
                         {'lang': self._lang})
        dbconn.commit()
        logging.info("number of users set as inactive: {}".format(dbcursor.rowcount))
        sbdb.disconnect()
        return()

コード例 #18

0

ファイルを表示

def get_popquals(lang, titles, do_tasks=False):
    '''
    Get popularity and quality data for the given list of article titles.
    If do_tasks is set, also get task recommendations.

    :param titles: Article titles to retrieve data for
    :type titles: list (of str)

    :param do_tasks: Should we get recommendations for specific tasks?
    :type do_tasks: bool
    '''

    site = pywikibot.Site(lang)

    # Make our titles into Page objects
    pages = [sup.Page(site, title) for title in titles]

    # List of dictionaries with popularity and quality data
    result = []

    # Create HTTP session to pool pageview HTTP requests
    http_session = requests.Session()

    for page in PreloadingGenerator(
            sup.PredictionGenerator(site, sup.RatingGenerator(pages))):

        # 2: populate task suggestions
        task_suggestions = page.get_suggestions()

        # Page data we'll return, with some defaults
        pdata = {
            'title': page.title(),
            'pop': 'High',
            'popcount': round(page.get_views(http_session=http_session)),
            'qual': page.get_rating(),
            'pred': 'NA',
            'predclass': page.get_prediction(),
            'work': ['{0}:{1}'.format(k, v) \
                     for k, v in task_suggestions.items()],
            'pred-numeric': -1
            }

        # Properly capitalize or uppercase predicted class:
        if pdata['predclass'] in ['start', 'stub']:
            pdata['predclass'] = pdata['predclass'].capitalize()
        else:
            pdata['predclass'] = pdata['predclass'].upper()

        if pdata['qual'] in ['start', 'stub']:
            pdata['qual'] = pdata['qual'].capitalize()
        else:
            pdata['qual'] = pdata['qual'].upper()

        # Set medium/low popularity if below thresholds
        if pdata['popcount'] <= config.pop_thresh_low:
            pdata['pop'] = 'Low'
        elif pdata['popcount'] <= config.pop_thresh_med:
            pdata['pop'] = 'Medium'

        # Set high/medium/low quality based on assessment rating
        if pdata['qual'] in ['FA', 'A', 'GA'] \
           or pdata['predclass'] in ['FA', 'GA']:
            pdata['pred'] = 'High'
            pdata['pred-numeric'] = 3
        elif pdata['predclass'] in ['B', 'C']:
            pdata['pred'] = 'Medium'
            pdata['pred-numeric'] = 2
        else:
            pdata['pred'] = 'Low'
            pdata['pred-numeric'] = 1

        result.append(pdata)

    return (result)