def test_NamespaceFilterPageGenerator(self): self.assertFunction("NamespaceFilterPageGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0) self.assertEqual(len(tuple(gen)), 3) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1) self.assertEqual(len(tuple(gen)), 4) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10) self.assertEqual(len(tuple(gen)), 6) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10)) self.assertEqual(len(tuple(gen)), 10)
def test_NamespaceFilterPageGenerator(self): self.assertFunction("NamespaceFilterPageGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0) self.assertTrue(len(tuple(gen)) == 3) gen = pagegenerators.PagesFromTitlesGenerator(self.titles) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1) self.assertTrue(len(tuple(gen)) == 4) gen = pagegenerators.PagesFromTitlesGenerator(self.titles) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10) self.assertTrue(len(tuple(gen)) == 6) gen = pagegenerators.PagesFromTitlesGenerator(self.titles) gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10)) self.assertTrue(len(tuple(gen)) == 10)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ options = {} gen = None local_args = pywikibot.handle_args(args) # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. gen_factory = pagegenerators.GeneratorFactory(positional_arg_name='page') for arg in local_args: option, sep, value = arg.partition(':') if option == '-xml': filename = value or pywikibot.input( "Please enter the XML dump's filename:") gen = TableXmlDumpPageGenerator(filename) elif option == '-auto': issue_deprecation_warning( 'The usage of "-auto"', '-always', 1, ArgumentDeprecationWarning, since='20170205') options['always'] = True elif option in ['-always', '-quiet', '-skipwarning']: options[option[1:]] = True else: if option in ('-sql', '-mysqlquery'): if option == '-sql': issue_deprecation_warning( 'The usage of "-sql"', '-mysqlquery', 1, ArgumentDeprecationWarning, since='20170205') query = value or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' """ arg = '-mysqlquery:' + query gen_factory.handleArg(arg) if gen: gen = pagegenerators.NamespaceFilterPageGenerator( gen, gen_factory.namespaces) else: gen = gen_factory.getCombinedGenerator() if gen: if not gen_factory.nopreload: gen = pagegenerators.PreloadingGenerator(gen) bot = Table2WikiRobot(generator=gen, **options) bot.run() else: suggest_help(missing_generator=True)
def __init__(self, configurl=u'https://raw.githubusercontent.com/wikimedia/labs-tools-heritage/master/erfgoedbot/monuments_config/nl_nl.json'): """ Grab generator based on search to work on. """ page = requests.get(configurl) self.monumentsconfig = page.json() #print (self.monumentsconfig) self.site = pywikibot.Site(self.monumentsconfig.get(u'lang'), self.monumentsconfig.get(u'project')) self.repo = self.site.data_repository() self.wikidatafield = None for field in self.monumentsconfig.get('fields'): if field.get(u'dest') and field.get(u'dest')=='wd_item': self.wikidatafield = field.get(u'source') self.property = u'P359' self.designation = u'Q916333' self.monuments = self.getMonumentsOnWikidata(self.property, self.designation) row_template = pywikibot.Page(self.site, '{0}:{1}'.format(self.site.namespace(10), self.monumentsconfig.get('rowTemplate'))) trans_gen = pagegenerators.ReferringPageGenerator(row_template, onlyTemplateInclusion=True) self.generator = pagegenerators.NamespaceFilterPageGenerator(trans_gen, self.monumentsconfig.get('namespaces'), site=self.site)
def getOorlogsmonumentenDataGenerator(): """ Generator to parse https://nl.wikipedia.org/w/index.php?title=Speciaal:VerwijzingenNaarHier/Sjabloon:Tabelrij_oorlogsmonument_Nederland&namespace=0&limit=500 """ site = pywikibot.Site('nl', 'wikipedia') row_template = pywikibot.Page( site, 'Template:Tabelrij oorlogsmonument Nederland') trans_gen = pagegenerators.ReferringPageGenerator( row_template, onlyTemplateInclusion=True) filtered_gen = pagegenerators.NamespaceFilterPageGenerator(trans_gen, [0], site=site) for page in filtered_gen: print page.title() templates = page.templatesWithParams() for (template, params) in templates: #print template if template.title( with_ns=False) == u'Tabelrij oorlogsmonument Nederland': # metadata = {} for param in params: #print param (field, _, value) = param.partition(u'=') # Remove leading or trailing spaces field = field.strip() metadata[field] = value #print field #print value yield metadata
def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title())
def main(): featured = False gen = None # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-featured': featured = True else: genFactory.handleArg(arg) mysite = pywikibot.Site() if mysite.sitename() == 'wikipedia:nl': pywikibot.output( u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}' ) sys.exit() if featured: featuredList = i18n.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.Site(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: pywikibot.showHelp('fixing_redirects')
def main(): global mysite, linktrail, page start = [] for arg in pywikibot.handleArgs(): start.append(arg) if start: start = " ".join(start) else: start = "!" mysite = pywikibot.Site() linktrail = mysite.linktrail() try: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) except pywikibot.NoPage: pywikibot.output( "The bot does not know the disambiguation category for your wiki.") raise # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: pagestoload = pagegenerators.PreloadingGenerator(pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def __init__(self, pageToUnlink, **kwargs): self.availableOptions.update({ 'namespaces': [], # Which namespaces should be processed? # default to [] which means all namespaces will be processed }) super(UnlinkBot, self).__init__(**kwargs) self.pageToUnlink = pageToUnlink linktrail = self.pageToUnlink.site.linktrail() gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if self.getOption('namespaces') != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, self.getOption('namespaces')) self.generator = pagegenerators.PreloadingGenerator(gen) # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.comment = i18n.twtranslate(self.pageToUnlink.site, 'unlink-unlinking', self.pageToUnlink.title())
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.getSite() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: if not namespaces: namespaces = [0] cat = catlib.Category(site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def get_template_generator(lng, tpl): """Create a generator of articles linking to template.""" site = pwb.Site(lng, "wikipedia") tpl_name = "{}:{}".format(site.namespace(10), tpl) tpl_page = pwb.Page(site, tpl_name) ref_gen = pg.ReferringPageGenerator(tpl_page, onlyTemplateInclusion=True) filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0]) return site.preloadpages(filter_gen, pageprops=True)
def test_QualityFilterPageGenerator(self): site = self.site gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.QualityFilterPageGenerator(gen, [0]) self.assertEqual(len(tuple(gen)), 7) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [4]) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) self.assertEqual(len(tuple(gen)), 10)
def getCategoryContent(catname): cat = pywikibot.Category(site, catname) pagesInCat = list(cat.articles(False)) pagesList = pagegenerators.PreloadingGenerator( pagesInCat) # On génère la liste des pages incluses dans la catégorie pagesList = pagegenerators.NamespaceFilterPageGenerator( pagesList, [0]) #On ne garde que les articles (Namespace 0) return pagesList
def list_template_usage(row_template_name): """Return a generator of main space pages transcluding a given template.""" site = wikipedia.getSite('de', 'wikipedia') rowTemplate = wikipedia.Page( site, u'%s:%s' % (site.namespace(10), row_template_name)) transGen = pagegenerators.ReferringPageGenerator( rowTemplate, onlyTemplateInclusion=True) filteredGen = pagegenerators.NamespaceFilterPageGenerator(transGen, [0]) generator = pagegenerators.PreloadingGenerator(filteredGen) return generator
def generate_transclusions(site, template, namespaces=[]): pywikibot.output(u'Fetching template transclusions...') transclusion_page = pywikibot.Page(site, template, ns=10) gen = pagegenerators.ReferringPageGenerator(transclusion_page, onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces, site) for page in gen: yield page
def test_NamespaceFilterPageGenerator(self): self.assertFunction("NamespaceFilterPageGenerator") site = self.site gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0, site) self.assertEqual(len(tuple(gen)), 3) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1, site) self.assertEqual(len(tuple(gen)), 4) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10, site) self.assertEqual(len(tuple(gen)), 6) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10), site) self.assertEqual(len(tuple(gen)), 10) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator( gen, ('Talk', 'Template'), site) self.assertEqual(len(tuple(gen)), 10)
def main(): commonssite = pywikibot.Site(u'commons', u'commons') templatepage = pywikibot.Page(commonssite, title=u'Template:RKDimages') gen = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(templatepage, onlyTemplateInclusion=True), 6)) rkdimagesImporter = RKDImagesImporter(gen) rkdimagesImporter.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} generator = None checkcurrent = False ns = [] ns.append(14) # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-summary'): if len(arg) == 8: options['summary'] = pywikibot.input( u'What summary do you want to use?') else: options['summary'] = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True elif arg == '-always': options['always'] = True else: genFactory.handleArg(arg) if checkcurrent: primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.Site().code) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(pywikibot.Page( pywikibot.Site(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) if not generator: generator = genFactory.getCombinedGenerator() if generator: pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, **options) bot.run() else: pywikibot.showHelp()
def _templates_generator(self): generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(self.source_template, onlyTemplateInclusion=True), [10] ) for page in generator: template = page if template.title().endswith("/doc") and pywikibot.Page(self.site, template.title()[:-4]).exists(): template = pywikibot.Page(self.site, template.title()[:-4]) if template != self.source_template: yield template for redirect in template.getReferences(redirectsOnly=True, withTemplateInclusion=False): yield redirect
def __init__(self, lang, noclaims, templateclaims): """ Arguments: * lang - The language code of the Wikipedia we're working on * noclaims - The title of the page on Wikidata with the list of pages to work on * templateclaims - The title of the page on Wikipedia with the template claims """ self.lang = lang self.site = pywikibot.Site(self.lang, u'wikipedia') self.repo = self.site.data_repository() self.generator = pg.PreloadingGenerator(pg.NamespaceFilterPageGenerator(self.getNoclaimGenerator(noclaims), 0)) self.templateclaims = templateclaims self.templates = self.getTemplateClaims(templateclaims)
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Never ask before changing a page always = False to13 = False format = False for arg in pywikibot.handleArgs(): if arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True elif arg == '-to13': to13 = True elif arg == '-format': format = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) site = pywikibot.getSite() site.login() if pageTitle: gen = iter( [pywikibot.Page(pywikibot.Link(t, site)) for t in pageTitle]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp('isbn') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = IsbnBot(preloadingGen, to13=to13, format=format, always=always) bot.run()
def list_template_usage(site_obj, tmpl_name): """ Takes Site object and template name and returns a generator. The function expects a Site object (pywikibot.Site()) and a template name (String). It creates a list of all pages using that template and returns them as a generator. The generator will load 50 pages at a time for iteration. """ name = "{}:{}".format(site_obj.namespace(10), tmpl_name) tmpl_page = pywikibot.Page(site_obj, name) ref_gen = pg.ReferringPageGenerator(tmpl_page, onlyTemplateInclusion=True) filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0]) generator = site_obj.preloadpages(filter_gen, pageprops=True) return generator
def template_dict_generator(templates, namespaces): mysite = pywikibot.Site() # The names of the templates are the keys, and lists of pages # transcluding templates are the values. mytpl = mysite.getNamespaceIndex(mysite.template_namespace()) for template in templates: transcludingArray = [] gen = pagegenerators.ReferringPageGenerator( pywikibot.Page(mysite, template, ns=mytpl), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) for page in gen: transcludingArray.append(page) yield template, transcludingArray
def getPageGenerator(self): if self.getOption('use_hash'): gen = self.useHashGenerator() else: nowCommonsTemplates = [pywikibot.Page(self.site, title, ns=10) for title in self.ncTemplates()] gens = [pg.ReferringPageGenerator(t, followRedirects=True, onlyTemplateInclusion=True) for t in nowCommonsTemplates] gen = pg.CombinedPageGenerator(gens) gen = pg.NamespaceFilterPageGenerator(gen, [6]) gen = pg.DuplicateFilterPageGenerator(gen) gen = pg.PreloadingGenerator(gen) return gen
def main(): """ Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. """ summary = None generator = None checkcurrent = False always = False ns = [] ns.append(14) # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True elif arg == '-always': always = True else: genFactory.handleArg(arg) if checkcurrent: primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.Site().code) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator(pywikibot.Page( pywikibot.Site(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData(u'You have to specify the generator you ' u'want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ local_args = pywikibot.handle_args(args) generator = None start = local_args[0] if local_args else '!' mysite = pywikibot.Site() try: mysite.disambcategory() except pywikibot.Error as e: pywikibot.output(e) else: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) if not generator: pywikibot.showHelp() return # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: pagestoload = pagegenerators.PreloadingGenerator(pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def crawlerCat(category, delay, subcat=False, checkTalk=False): log = u'' cat = pywikibot.Category(site, category) pagesInCat = list(cat.articles(False)) pagesList = pagegenerators.PreloadingGenerator( pagesInCat) # On génère la liste des pages incluses dans la catégorie pagesList = pagegenerators.NamespaceFilterPageGenerator( pagesList, [0]) #On ne garde que les articles (Namespace 0) log += removeTemplate(pagesList, cat.title(withNamespace=False), delay, checkTalk) if subcat: subcat -= 1 subcategories = list(cat.subcategories()) for subc in subcategories: log += crawlerCat(subc.title(withNamespace=False), delay, subcat, checkTalk) return log
def main(*args): ''' Main and only loop ''' # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator(preload=True) if gen: generator = pagegenerators.NamespaceFilterPageGenerator(gen, 14) skiptemplates = [ u'Interwiki from wikidata', u'On Wikidata', u'Countries of Europe', u'VN' ] for page in generator: pywikibot.output(u'Working on %s' % (page.title(), )) if not page.exists(): pywikibot.output(u'Page does not exist') continue text = page.get() foundtemplate = False for skiptemplate in skiptemplates: if u'{{%s' % (skiptemplate.lower(), ) in text.lower(): foundtemplate = True if foundtemplate: pywikibot.output(u'Already has the template') continue newtext = u'{{Interwiki from wikidata}}\n' + text summary = u'Adding {{Interwiki from wikidata}} to get links to Wikipedia articles' pywikibot.showDiff(text, newtext) page.put(newtext, summary=summary) else: pywikibot.bot.suggest_help(missing_generator=True) return False
def main(): local_args = pywikibot.handleArgs() generator = None start = local_args[0] if local_args else '!' mysite = pywikibot.Site() try: mysite.disambcategory() except pywikibot.Error as e: pywikibot.output(e) else: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) if not generator: pywikibot.showHelp() return # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: pagestoload = pagegenerators.PreloadingGenerator(pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ quietMode = False # use -quiet to get less output # if the -file argument is used, page titles are stored in this array. # otherwise it will only contain one page. articles = [] # if -file is not used, this temporary array is used to read the page title. page_title = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] xmlfilename = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handle_args(args): if arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlfilename = arg[5:] gen = TableXmlDumpPageGenerator(xmlfilename) elif arg == '-sql': query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' LIMIT 200""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-skip:'): articles = articles[articles.index(arg[6:]):] elif arg.startswith('-auto'): config.table2wikiAskOnlyWarnings = True config.table2wikiSkipWarnings = True pywikibot.output('Automatic mode!\n') elif arg.startswith('-quiet'): quietMode = True else: if not genFactory.handleArg(arg): page_title.append(arg) # if the page is given as a command line argument, # connect the title's parts with spaces if page_title != []: page_title = ' '.join(page_title) page = pywikibot.Page(pywikibot.Site(), page_title) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = Table2WikiRobot(preloadingGen, quietMode) bot.run() else: pywikibot.showHelp('table2wiki')