Esempio n. 1
0
    def __init__(self, **kwargs):
        """Initializer."""
        self.available_options.update({
            'all': False,
            'catnames': None,
            'exist': False,
            'forward': False,
            'keepparent': False,
            'nodate': False,
            'summary': None,
        })
        super().__init__(**kwargs)
        self.skipdates = self.opt.nodate
        self.checkbackward = not self.opt.forward
        self.checkbroken = not (self.opt.forward and self.opt.exist)
        self.removeparent = not self.opt.keepparent
        self.main = not self.opt.all
        self.tocheck = DequeGenerator()

        self.workingcatname = self.opt.catnames
        self._setup_menubar()
Esempio n. 2
0
                pywikibot.output(u"Page does not exist.")
            ctoshow += 500
        else:
            pywikibot.output(u"Not understood.")


try:
    checked = {}
    skipdates = False
    checkforward = True
    checkbackward = True
    checkbroken = True
    removeparent = True
    main = True
    workingcatname = ''
    tocheck = DequeGenerator()
    for arg in pywikibot.handle_args():
        if arg.startswith('-nodate'):
            skipdates = True
        elif arg.startswith('-forward'):
            checkbackward = False
            checkbroken = False
        elif arg.startswith('-exist'):
            checkbroken = False
        elif arg.startswith('-keepparent'):
            removeparent = False
        elif arg.startswith('-all'):
            main = False
        elif not workingcatname:
            workingcatname = arg
Esempio n. 3
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    global workingcat, parentcats
    global checked, tocheck
    global excludefile

    checked = {}
    tocheck = DequeGenerator()

    workingcatname = ''

    options = {}
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        option = arg[1:]
        if not arg.startswith('-'):
            if not workingcatname:
                workingcatname = arg
            else:
                pywikibot.warning('Working category "{}" is already given.'
                                  .format(workingcatname))
        else:
            options[option] = True

    if not workingcatname:
        pywikibot.bot.suggest_help(missing_parameters=['working category'])
        return

    mysite = pywikibot.Site()
    summary = i18n.twtranslate(mysite, 'makecat-create',
                               {'cat': workingcatname})

    bot = MakeCatBot(site=mysite, **options)

    workingcat = pywikibot.Category(mysite, '{0}{1}'
                                            .format(mysite.namespaces.CATEGORY,
                                                    workingcatname))
    filename = pywikibot.config.datafilepath(
        'category',
        workingcatname.encode('ascii', 'xmlcharrefreplace').decode('ascii') +
        '_exclude.txt')
    try:
        with codecs.open(filename, 'r', encoding=mysite.encoding()) as f:
            for line in f.readlines():
                # remove leading and trailing spaces, LF and CR
                line = line.strip()
                if not line:
                    continue
                pl = pywikibot.Page(mysite, line)
                checked[pl] = pl

        excludefile = codecs.open(filename, 'a', encoding=mysite.encoding())
    except IOError:
        # File does not exist
        excludefile = codecs.open(filename, 'w', encoding=mysite.encoding())

    # Get parent categories in order to `removeparent`
    try:
        parentcats = workingcat.categories()
    except pywikibot.Error:
        parentcats = []

    # Do not include articles already in subcats; only checking direct subcats
    subcatlist = list(workingcat.subcategories())
    if subcatlist:
        subcatlist = pagegenerators.PreloadingGenerator(subcatlist)
        for cat in subcatlist:
            artlist = list(cat.articles())
            for page in artlist:
                checked[page] = page

    # Fetch articles in category, and mark as already checked (seen)
    # If category is empty, ask user if they want to look for pages
    # in a diferent category.
    articles = list(workingcat.articles(content=True))
    if not articles:
        pywikibot.output('Category {} does not exist or is empty. '
                         'Which page to start with?'
                         .format(workingcatname))
        answer = pywikibot.input('(Default is [[{}]]):'.format(workingcatname))
        if not answer:
            answer = workingcatname
        pywikibot.output('' + answer)
        pl = pywikibot.Page(mysite, answer)
        articles = [pl]

    for pl in articles:
        checked[pl] = pl
        bot.include(pl, summary=summary)

    gen = pagegenerators.DequePreloadingGenerator(tocheck)

    for page in gen:
        if bot.checkbroken or page.exists():
            bot.asktoadd(page, summary)
Esempio n. 4
0
class MakeCatBot(SingleSiteBot, NoRedirectPageBot):

    """Bot tries to find new articles for a given category."""

    def __init__(self, **kwargs):
        """Initializer."""
        self.available_options.update({
            'all': False,
            'catnames': None,
            'exist': False,
            'forward': False,
            'keepparent': False,
            'nodate': False,
            'summary': None,
        })
        super().__init__(**kwargs)
        self.skipdates = self.opt.nodate
        self.checkbackward = not self.opt.forward
        self.checkbroken = not (self.opt.forward and self.opt.exist)
        self.removeparent = not self.opt.keepparent
        self.main = not self.opt.all
        self.tocheck = DequeGenerator()

        self.workingcatname = self.opt.catnames
        self._setup_menubar()

    @classmethod
    def _setup_menubar(cls):
        """Setup treat_page option bar."""
        small = [
            ('yes', 'y'), ('no', 'n'), ('ignore', 'i'),
            ('extend', 'e'), ('help', 'h')]
        extended = small[:3] + [
            ('more', 'm'), ('sort key', 'k'), ('skip', 's'), ('check', 'c'),
            ('other', 'o'), ('list', 'l'), ('reduce', 'r'), ('help', 'h')]
        cls.option_bar = {'e': extended, 'r': small}
        cls.treat_options = cls.option_bar['r']

    @property
    def generator(self):
        """Generator property used by run()."""
        return pagegenerators.DequePreloadingGenerator(self.tocheck)

    @staticmethod
    def highlight_title(page, condition=True):
        """Highlight a page title if conditon is True."""
        if condition:
            pywikibot.output(
                color_format('\n>>> {lightpurple}{0}{default} <<<',
                             page.title()))

    @staticmethod
    def print_dot(condition=True):
        """Print a single dot if conditon is True."""
        if condition:
            pywikibot.output('.', newline=False)

    def needcheck(self, page):
        """Verify whether the current page may be processed."""
        global checked
        return not (self.main and page.namespace() != 0
                    or page in checked
                    or self.skipdates and page.autoFormat()[0] is not None)

    def change_category(self, page, categories):
        """Change the category of page."""
        global workingcat, parentcats
        for category in categories:
            if self.removeparent and category in parentcats:
                page.change_category(workingcat, summary=self.opt.summary)
                return True
        return False

    def include(self, page, checklinks=True, realinclude=True, linkterm=None):
        """Include the current page to the working category."""
        global workingcat, parentcats
        global checked
        actualworkingcat = workingcat
        if linkterm:
            actualworkingcat.sortKey = linkterm
        if realinclude and page.exists():
            if page.isRedirectPage():
                checklinks = True
            else:
                cats = list(page.categories())
                if workingcat not in cats \
                   and not self.change_category(page, cats):
                    newtext = textlib.replaceCategoryLinks(
                        page.text, cats + [actualworkingcat],
                        site=page.site)
                    page.put(newtext, summary=self.opt.summary)

        if checklinks:
            self.checklinks(page)

    def checklinks(self, page):
        """Check whether the page has to be added to the tocheck deque."""
        global checked
        pywikibot.output('\nChecking links for "{}"...'
                         .format(page.title()), newline=False)
        generators = [page.linkedPages()]
        if self.checkbackward:
            generators.append(page.getReferences())
        for i, linked_page in enumerate(chain(*generators)):
            self.print_dot(not i % 25)
            if self.needcheck(linked_page):
                self.tocheck.append(linked_page)
                checked.add(linked_page)

    def init_page(self, page):
        """Add redirect targets to check list."""
        global checked
        super().init_page(page)
        if page.isRedirectPage():
            newpage = page.getRedirectTarget()
            if self.needcheck(newpage):
                self.tocheck.append(newpage)
                checked.add(newpage)

    def skip_page(self, page):
        """Check whether the page is to be skipped."""
        if not self.checkbroken and not page.exists():
            pywikibot.warning('Page {page} does not exist on {page.site}. '
                              'Skipping.'.format(page=page))
            return True
        return super().skip_page(page)

    def treat_page(self):
        """Work on current page and ask to add article to category."""
        global checked
        global excludefile
        pl = self.current_page
        ctoshow = 500
        pywikibot.output('')
        pywikibot.output('== {} =='.format(pl.title()))
        while True:
            answer = pywikibot.input_choice(
                'Add to category {}?'.format(self.workingcatname),
                self.treat_options, default='i')
            if answer == 'y':
                self.include(pl)
                break
            if answer == 'c':
                self.include(pl, realinclude=False)
                break
            if answer == 'k':
                if pl.exists() and not pl.isRedirectPage():
                    linkterm = pywikibot.input(
                        'In what manner should it be alphabetized?')
                    self.include(pl, linkterm=linkterm)
                    break
                self.include(pl)
                break
            elif answer == 'n':
                excludefile.write('%s\n' % pl.title())
                break
            elif answer == 'i':
                break
            if answer in 'er':
                self.treat_options = self.option_bar[answer]
            elif answer == 'h':
                pywikibot.output("""
[y]es:      Add the page and check links')
[n]o:       Never add the page, saved to exclusion list
[i]gnore:   Neither do not add the page not check links
[m]ore:     show more content of the page starting from the beginning
sort [k]ey: Add with sort key like [[Category|Title]]
[s]kip:     Add the page, but skip checking links
[c]heck:    Do not add the page, but do check links
[o]ther:    Add another page
[l]ist:     Show a list of the pages to check
[e]xtend:   A more extended option list
[r]educe:   Reduce option list
[q]uit:     Save exclusion list and exit this script
""")
            elif answer == 'o':
                pagetitle = pywikibot.input('Specify page to add:')
                page = pywikibot.Page(pywikibot.Site(), pagetitle)
                if page not in checked:
                    self.include(page)
            elif answer == 's':
                if not pl.exists():
                    pywikibot.output('Page does not exist; not added.')
                elif pl.isRedirectPage():
                    pywikibot.output(
                        'Redirect page. Will be included normally.')
                    self.include(pl, realinclude=False)
                else:
                    self.include(pl, checklinks=False)
                break
            elif answer == 'l':
                length = len(self.tocheck)
                pywikibot.output('Number of pages still to check: {}'
                                 .format(length))
                if length:
                    pywikibot.output('Pages to be checked:')
                    pywikibot.output(
                        fill(' - '.join(page.title()
                                        for page in self.tocheck)))
                self.highlight_title(page)
            elif answer == 'm':
                self.highlight_title(pl, ctoshow > 500)
                if pl.exists():
                    pywikibot.output(pl.text[0:ctoshow])
                else:
                    pywikibot.output('Page does not exist.')
                ctoshow += 500