def __init__(self, **kwargs): """Initializer.""" self.available_options.update({ 'all': False, 'catnames': None, 'exist': False, 'forward': False, 'keepparent': False, 'nodate': False, 'summary': None, }) super().__init__(**kwargs) self.skipdates = self.opt.nodate self.checkbackward = not self.opt.forward self.checkbroken = not (self.opt.forward and self.opt.exist) self.removeparent = not self.opt.keepparent self.main = not self.opt.all self.tocheck = DequeGenerator() self.workingcatname = self.opt.catnames self._setup_menubar()
pywikibot.output(u"Page does not exist.") ctoshow += 500 else: pywikibot.output(u"Not understood.") try: checked = {} skipdates = False checkforward = True checkbackward = True checkbroken = True removeparent = True main = True workingcatname = '' tocheck = DequeGenerator() for arg in pywikibot.handle_args(): if arg.startswith('-nodate'): skipdates = True elif arg.startswith('-forward'): checkbackward = False checkbroken = False elif arg.startswith('-exist'): checkbroken = False elif arg.startswith('-keepparent'): removeparent = False elif arg.startswith('-all'): main = False elif not workingcatname: workingcatname = arg
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ global workingcat, parentcats global checked, tocheck global excludefile checked = {} tocheck = DequeGenerator() workingcatname = '' options = {} local_args = pywikibot.handle_args(args) for arg in local_args: option = arg[1:] if not arg.startswith('-'): if not workingcatname: workingcatname = arg else: pywikibot.warning('Working category "{}" is already given.' .format(workingcatname)) else: options[option] = True if not workingcatname: pywikibot.bot.suggest_help(missing_parameters=['working category']) return mysite = pywikibot.Site() summary = i18n.twtranslate(mysite, 'makecat-create', {'cat': workingcatname}) bot = MakeCatBot(site=mysite, **options) workingcat = pywikibot.Category(mysite, '{0}{1}' .format(mysite.namespaces.CATEGORY, workingcatname)) filename = pywikibot.config.datafilepath( 'category', workingcatname.encode('ascii', 'xmlcharrefreplace').decode('ascii') + '_exclude.txt') try: with codecs.open(filename, 'r', encoding=mysite.encoding()) as f: for line in f.readlines(): # remove leading and trailing spaces, LF and CR line = line.strip() if not line: continue pl = pywikibot.Page(mysite, line) checked[pl] = pl excludefile = codecs.open(filename, 'a', encoding=mysite.encoding()) except IOError: # File does not exist excludefile = codecs.open(filename, 'w', encoding=mysite.encoding()) # Get parent categories in order to `removeparent` try: parentcats = workingcat.categories() except pywikibot.Error: parentcats = [] # Do not include articles already in subcats; only checking direct subcats subcatlist = list(workingcat.subcategories()) if subcatlist: subcatlist = pagegenerators.PreloadingGenerator(subcatlist) for cat in subcatlist: artlist = list(cat.articles()) for page in artlist: checked[page] = page # Fetch articles in category, and mark as already checked (seen) # If category is empty, ask user if they want to look for pages # in a diferent category. articles = list(workingcat.articles(content=True)) if not articles: pywikibot.output('Category {} does not exist or is empty. ' 'Which page to start with?' .format(workingcatname)) answer = pywikibot.input('(Default is [[{}]]):'.format(workingcatname)) if not answer: answer = workingcatname pywikibot.output('' + answer) pl = pywikibot.Page(mysite, answer) articles = [pl] for pl in articles: checked[pl] = pl bot.include(pl, summary=summary) gen = pagegenerators.DequePreloadingGenerator(tocheck) for page in gen: if bot.checkbroken or page.exists(): bot.asktoadd(page, summary)
class MakeCatBot(SingleSiteBot, NoRedirectPageBot): """Bot tries to find new articles for a given category.""" def __init__(self, **kwargs): """Initializer.""" self.available_options.update({ 'all': False, 'catnames': None, 'exist': False, 'forward': False, 'keepparent': False, 'nodate': False, 'summary': None, }) super().__init__(**kwargs) self.skipdates = self.opt.nodate self.checkbackward = not self.opt.forward self.checkbroken = not (self.opt.forward and self.opt.exist) self.removeparent = not self.opt.keepparent self.main = not self.opt.all self.tocheck = DequeGenerator() self.workingcatname = self.opt.catnames self._setup_menubar() @classmethod def _setup_menubar(cls): """Setup treat_page option bar.""" small = [ ('yes', 'y'), ('no', 'n'), ('ignore', 'i'), ('extend', 'e'), ('help', 'h')] extended = small[:3] + [ ('more', 'm'), ('sort key', 'k'), ('skip', 's'), ('check', 'c'), ('other', 'o'), ('list', 'l'), ('reduce', 'r'), ('help', 'h')] cls.option_bar = {'e': extended, 'r': small} cls.treat_options = cls.option_bar['r'] @property def generator(self): """Generator property used by run().""" return pagegenerators.DequePreloadingGenerator(self.tocheck) @staticmethod def highlight_title(page, condition=True): """Highlight a page title if conditon is True.""" if condition: pywikibot.output( color_format('\n>>> {lightpurple}{0}{default} <<<', page.title())) @staticmethod def print_dot(condition=True): """Print a single dot if conditon is True.""" if condition: pywikibot.output('.', newline=False) def needcheck(self, page): """Verify whether the current page may be processed.""" global checked return not (self.main and page.namespace() != 0 or page in checked or self.skipdates and page.autoFormat()[0] is not None) def change_category(self, page, categories): """Change the category of page.""" global workingcat, parentcats for category in categories: if self.removeparent and category in parentcats: page.change_category(workingcat, summary=self.opt.summary) return True return False def include(self, page, checklinks=True, realinclude=True, linkterm=None): """Include the current page to the working category.""" global workingcat, parentcats global checked actualworkingcat = workingcat if linkterm: actualworkingcat.sortKey = linkterm if realinclude and page.exists(): if page.isRedirectPage(): checklinks = True else: cats = list(page.categories()) if workingcat not in cats \ and not self.change_category(page, cats): newtext = textlib.replaceCategoryLinks( page.text, cats + [actualworkingcat], site=page.site) page.put(newtext, summary=self.opt.summary) if checklinks: self.checklinks(page) def checklinks(self, page): """Check whether the page has to be added to the tocheck deque.""" global checked pywikibot.output('\nChecking links for "{}"...' .format(page.title()), newline=False) generators = [page.linkedPages()] if self.checkbackward: generators.append(page.getReferences()) for i, linked_page in enumerate(chain(*generators)): self.print_dot(not i % 25) if self.needcheck(linked_page): self.tocheck.append(linked_page) checked.add(linked_page) def init_page(self, page): """Add redirect targets to check list.""" global checked super().init_page(page) if page.isRedirectPage(): newpage = page.getRedirectTarget() if self.needcheck(newpage): self.tocheck.append(newpage) checked.add(newpage) def skip_page(self, page): """Check whether the page is to be skipped.""" if not self.checkbroken and not page.exists(): pywikibot.warning('Page {page} does not exist on {page.site}. ' 'Skipping.'.format(page=page)) return True return super().skip_page(page) def treat_page(self): """Work on current page and ask to add article to category.""" global checked global excludefile pl = self.current_page ctoshow = 500 pywikibot.output('') pywikibot.output('== {} =='.format(pl.title())) while True: answer = pywikibot.input_choice( 'Add to category {}?'.format(self.workingcatname), self.treat_options, default='i') if answer == 'y': self.include(pl) break if answer == 'c': self.include(pl, realinclude=False) break if answer == 'k': if pl.exists() and not pl.isRedirectPage(): linkterm = pywikibot.input( 'In what manner should it be alphabetized?') self.include(pl, linkterm=linkterm) break self.include(pl) break elif answer == 'n': excludefile.write('%s\n' % pl.title()) break elif answer == 'i': break if answer in 'er': self.treat_options = self.option_bar[answer] elif answer == 'h': pywikibot.output(""" [y]es: Add the page and check links') [n]o: Never add the page, saved to exclusion list [i]gnore: Neither do not add the page not check links [m]ore: show more content of the page starting from the beginning sort [k]ey: Add with sort key like [[Category|Title]] [s]kip: Add the page, but skip checking links [c]heck: Do not add the page, but do check links [o]ther: Add another page [l]ist: Show a list of the pages to check [e]xtend: A more extended option list [r]educe: Reduce option list [q]uit: Save exclusion list and exit this script """) elif answer == 'o': pagetitle = pywikibot.input('Specify page to add:') page = pywikibot.Page(pywikibot.Site(), pagetitle) if page not in checked: self.include(page) elif answer == 's': if not pl.exists(): pywikibot.output('Page does not exist; not added.') elif pl.isRedirectPage(): pywikibot.output( 'Redirect page. Will be included normally.') self.include(pl, realinclude=False) else: self.include(pl, checklinks=False) break elif answer == 'l': length = len(self.tocheck) pywikibot.output('Number of pages still to check: {}' .format(length)) if length: pywikibot.output('Pages to be checked:') pywikibot.output( fill(' - '.join(page.title() for page in self.tocheck))) self.highlight_title(page) elif answer == 'm': self.highlight_title(pl, ctoshow > 500) if pl.exists(): pywikibot.output(pl.text[0:ctoshow]) else: pywikibot.output('Page does not exist.') ctoshow += 500