def countTemplates(self, templates, namespaces): mysite = wikipedia.getSite() mytpl = mysite.template_namespace() + ':' finalText = [ u'Number of transclusions per template', u'------------------------------------' ] total = 0 # The names of the templates are the keys, and the numbers of transclusions are the values. templateDict = {} for template in templates: gen = pagegenerators.ReferringPageGenerator( wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces) count = 0 for page in gen: count += 1 templateDict[template] = count finalText.append(u'%s: %d' % (template, count)) total += count for line in finalText: wikipedia.output(line, toStdout=True) wikipedia.output(u'TOTAL: %d' % total, toStdout=True) wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.getSite() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: import catlib if not namespaces: namespaces = [0] cat = catlib.Category(site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def main(): global mysite, linktrail, page start = [] for arg in wikipedia.handleArgs(): start.append(arg) if start: start = " ".join(start) else: start = "!" mysite = wikipedia.getSite() linktrail = mysite.linktrail() try: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) except wikipedia.NoPage: print "The bot does not know the disambiguation category for your wiki." raise # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: wikipedia.getall(mysite, pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def listTemplates(self, templates, namespaces): mysite = wikipedia.getSite() count = 0 # The names of the templates are the keys, and lists of pages transcluding templates are the values. templateDict = {} finalText = [u'List of pages transcluding templates:'] for template in templates: finalText.append(u'* %s' % template) finalText.append(u'------------------------------------') for template in templates: transcludingArray = [] gen = pagegenerators.ReferringPageGenerator( wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces) for page in gen: finalText.append(u'%s' % page.title()) count += 1 transcludingArray.append(page) templateDict[template] = transcludingArray finalText.append(u'Total page count: %d' % count) for line in finalText: wikipedia.output(line, toStdout=True) wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title())
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = getRijksmonumentWithoutLocation() # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: locationTemplate = locateImage(page, conn, cursor) if locationTemplate: addLocation(page, locationTemplate)
def main(): featured = False gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-featured': featured = True else: genFactory.handleArg(arg) mysite = pywikibot.getSite() if mysite.sitename() == 'wikipedia:nl': pywikibot.output( u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}' ) sys.exit() if featured: featuredList = pywikibot.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.getSite(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: pywikibot.showHelp('fixing_redirects')
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() always = False for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] gen = XmlDumpSelflinkPageGenerator(xmlFilename) elif arg == '-sql': # NOT WORKING YET query = """ SELECT page_namespace, page_title FROM page JOIN pagelinks JOIN text ON (page_id = pl_from AND page_id = old_id) WHERE pl_title = page_title AND pl_namespace = page_namespace AND page_namespace = 0 AND (old_text LIKE concat('%[[', page_title, ']]%') OR old_text LIKE concat('%[[', page_title, '|%')) LIMIT 100""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp('selflink') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = SelflinkBot(preloadingGen, always) bot.run()
def generateTransclusions(Site, template, namespaces=[]): pywikibot.output(u'Fetching template transclusions...') transclusionPage = pywikibot.Page(Site, template, defaultNamespace=10) gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces, Site) for page in gen: yield page
def main(): summary_commandline,template,gen = None,None,None exceptions,PageTitles,namespaces = [],[],[] cat='' autoText,autoTitle = False,False genFactory = pagegenerators.GeneratorFactory() arg=False#------if you dont want to work with arguments leave it False if you want change it to True--- if arg==False: for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith( '-page:' ): if len(arg) == 6: PageTitles.append(wikipedia.input( u'Which page do you want to chage?' )) else: PageTitles.append(arg[6:]) elif arg.startswith( '-cat:' ): if len(arg) == 5: cat=wikipedia.input( u'Which Category do you want to chage?' ) else: cat='Category:'+arg[5:] elif arg.startswith( '-template:' ): if len(arg) == 10: template.append(wikipedia.input( u'Which Template do you want to chage?' )) else: template.append('Template:'+arg[10:]) elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith( '-namespace:' ): namespaces.append( int( arg[11:] ) ) elif arg.startswith( '-ns:' ): namespaces.append( int( arg[4:] ) ) elif arg.startswith( '-summary:' ): wikipedia.setAction( arg[9:] ) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = generator else: PageTitles = [raw_input(u'Page:> ').decode('utf-8')] if cat!='': facatfalist=facatlist(cat) if facatfalist!=False: run(facatfalist) if PageTitles: pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles] gen = iter( pages ) if not gen: wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces ) preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time run(preloadingGen)
def main(): summary_commandline, gen, template = None, None, None namespaces, PageTitles, exceptions = [], [], [] encat = '' autoText, autoTitle = False, False recentcat, newcat = False, False genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = wikipedia.input( u'At which page do you want to start?') firstPageTitle = wikipedia.Page( fasite, firstPageTitle).title(withNamespace=False) gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, 0, includeredirects=True) elif arg.startswith('-template:'): template = arg[10:] elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) elif arg.startswith('-summary:'): wikipedia.setAction(arg[9:]) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = generator if not gen: wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) preloadingGen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces) else: preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) _cache, last_timestamp = get_cache() add_text(preloadingGen) now = str(datetime.now()) todaynum = int(now.split('-')[2].split(' ')[0]) + int( now.split('-')[1]) * 30 + (int(now.split('-')[0]) - 2000) * 365 if last_timestamp + 3 < todaynum: put_cache(_cache, todaynum) else: put_cache({}, 0)
def getPageGenerator(self): if use_hash: gen = self.useHashGenerator() else: gen = pagegenerators.ReferringPageGenerator( self.nowCommonsTemplate, followRedirects=True, onlyTemplateInclusion=True) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [6]) return gen
def crawlerLink(pagename, apres): modifier = u'False' #pagename = unicode(arg[len('-links:'):], 'utf-8') page = wikipedia.Page(site, pagename) gen = pagegenerators.ReferringPageGenerator(page) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for Page in pagegenerators.PreloadingGenerator(gen, 100): #print(Page.title().encode(config.console_encoding, 'replace')) if not apres or apres == u'' or modifier == u'True': modification(Page.title()) #crawlerLink(Page.title()) elif Page.title() == apres: modifier = u'True'
def main(): """ Process command line arguments and invoke bot. """ #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Never ask before changing a page always = False to13 = False format = False for arg in pywikibot.handleArgs(): if arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True elif arg == '-to13': to13 = True elif arg == '-format': format = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) site = pywikibot.getSite() if pageTitle: gen = iter([pywikibot.Page(site, t) for t in pageTitle]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp('isbn') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = IsbnBot(preloadingGen, to13=to13, format=format, always=always) bot.run()
def countRefs(self, templates, namespaces): mysite = wikipedia.getSite() mytpl = mysite.template_namespace()+':' finalText = [u'Number of transclusions per template',u'------------------------------------'] for template in templates: gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) count = 0 for page in gen: count += 1 finalText.append(u'%s: %d' % (template, count)) for line in finalText: wikipedia.output(line)
def crawlerCatLink(pagename, apres): modifier = u'False' cat = catlib.Category(site, pagename) pages = cat.articlesList(False) for Page in pagegenerators.PreloadingGenerator(pages, 100): page = wikipedia.Page(site, Page.title()) gen = pagegenerators.ReferringPageGenerator(page) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for PageLiee in pagegenerators.PreloadingGenerator(gen, 100): #print(Page.title().encode(config.console_encoding, 'replace')) if not apres or apres == u'' or modifier == u'True': modification(PageLiee.title()) #crawlerLink(Page.title()) elif PageLiee.title() == apres: modifier = u'True'
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ summary = None generator = None always = False ns = [] ns.append(14) # Process global args and prepare generator args parser genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(*args): if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( pywikibot.Page(pywikibot.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if generator: pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run() else: pywikibot.showHelp()
def template_dict_generator(templates, namespaces): mysite = pywikibot.getSite() # The names of the templates are the keys, and lists of pages # transcluding templates are the values. mytpl = mysite.getNamespaceIndex(mysite.template_namespace()) for template in templates: transcludingArray = [] gen = pg.ReferringPageGenerator( pywikibot.Page(mysite, template, defaultNamespace=mytpl), onlyTemplateInclusion=True) if namespaces: gen = pg.NamespaceFilterPageGenerator(gen, namespaces) for page in gen: transcludingArray.append(page) yield template, transcludingArray
def getPageGenerator(self): if use_hash: gen = self.useHashGenerator() else: nowCommonsTemplates = [pywikibot.Page(self.site, title, defaultNamespace=10) for title in self.ncTemplates()] gens = [pg.ReferringPageGenerator(t, followRedirects=True, onlyTemplateInclusion=True) for t in nowCommonsTemplates] gen = pg.CombinedPageGenerator(gens) gen = pg.NamespaceFilterPageGenerator(gen, [6]) gen = pg.DuplicateFilterPageGenerator(gen) gen = pg.PreloadingGenerator(gen) return gen
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = wikipedia.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: wikipedia.showHelp('noreferences') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def crawlerCat(category, recursif, apres): modifier = u'False' cat = catlib.Category(site, category) pages = cat.articlesList(False) gen = pagegenerators.NamespaceFilterPageGenerator(pages, [ns]) for Page in pagegenerators.PreloadingGenerator(gen, 100): if not apres or apres == u'' or modifier == u'True': modification(Page.title()) #crawlerLink(Page.title()) elif Page.title() == apres: modifier = u'True' if recursif == True: subcat = cat.subcategories(recurse=True) for subcategory in subcat: pages = subcategory.articlesList(False) for Page in pagegenerators.PreloadingGenerator(pages, 100): modification(Page.title())
def main(): ''' Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. ''' summary = None generator = None checkcurrent = False always = False ns = [] ns.append(14) # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( pywikibot.Page(pywikibot.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData( u'You have to specify the generator you want to use for the script!' ) pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run()
def main(): start = '!' featured = False namespace = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-featured': featured = True elif arg.startswith('-namespace'): if len(arg) == 10: namespace = int(wikipedia.input(u'Which namespace should be processed?')) else: namespace = int(arg[11:]) else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() mysite = wikipedia.getSite() if mysite.sitename() == 'wikipedia:nl': wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}') sys.exit() linktrail = mysite.linktrail() if featured: featuredList = wikipedia.translate(mysite, featured_articles) ref = wikipedia.Page(wikipedia.getSite(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for page in generator: workon(page) elif namespace is not None: for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False): workon(page) elif gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: wikipedia.showHelp('fixing_redirects')
def main(): ''' Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. ''' summary = None; generator = None; checkcurrent = False; always = False ns = [] ns.append(14) # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-summary'): if len(arg) == 8: summary = wikipedia.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(wikipedia.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: if not page.exists(): wikipedia.output(u'Page %s does not exist. Skipping.' % page.aslink()) elif page.isRedirectPage(): wikipedia.output(u'Page %s is a redirect. Skipping.' % page.aslink()) elif page.isCategoryRedirect(): wikipedia.output(u'Page %s is a category redirect. Skipping.' % page.aslink()) elif page.isDisambig(): wikipedia.output(u'Page %s is a disambiguation. Skipping.' % page.aslink()) else: (status, always) = addCommonscat(page, summary, always)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) bigcategory = u'' target = u'' generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( wikipedia.getSite(), wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-bigcat'): if len(arg) == 7: bigcategory = wikipedia.input( u'What category do you want to split out?') else: bigcategory = arg[8:] elif arg.startswith('-target'): if len(arg) == 7: target = wikipedia.input( u'What category is the target category?') else: target = arg[8:] if not bigcategory == u'': splitOutCategory(bigcategory, target) else: if not generator: generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14]) for cat in generator: intersectCategories(cat)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) generator = None genFactory = pagegenerators.GeneratorFactory() target = u'/Users/hay/tmp/wlm/' for arg in wikipedia.handleArgs(): if arg.startswith('-target:'): target = arg[len('-target:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: imagepage = wikipedia.ImagePage(page.site(), page.title()) downloadFile(imagepage, target)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: categorizeImage(page, conn, cursor)
def main(*args): templateNames = [] templates = {} subst = False remove = False namespaces = [] editSummary = '' addedCat = '' acceptAll = False genFactory = pagegenerators.GeneratorFactory() # If xmlfilename is None, references will be loaded from the live wiki. xmlfilename = None user = None skip = False timestamp = None # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-remove': remove = True elif arg == '-subst': subst = True elif arg == ('-always'): acceptAll = True elif arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input(u'Please enter the XML dump\'s filename: ') else: xmlfilename = arg[5:] elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[len('-namespace:'):])) except ValueError: namespaces.append(arg[len('-namespace:'):]) elif arg.startswith('-category:'): addedCat = arg[len('-category:'):] elif arg.startswith('-summary:'): editSummary = arg[len('-summary:'):] elif arg.startswith('-user:'******'-user:'******'-skipuser:'******'-skipuser:'******'-timestamp:'): timestamp = arg[len('-timestamp:'):] else: if not genFactory.handleArg(arg): templateNames.append(pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10).titleWithoutNamespace()) if subst or remove: for templateName in templateNames: templates[templateName] = None else: try: for i in range(0, len(templateNames), 2): templates[templateNames[i]] = templateNames[i + 1] except IndexError: pywikibot.output(u'Unless using -subst or -remove, you must give an even number of template names.') return oldTemplates = [] ns = pywikibot.getSite().template_namespace() for templateName in templates.keys(): oldTemplate = pywikibot.Page(pywikibot.getSite(), templateName, defaultNamespace=10) oldTemplates.append(oldTemplate) if xmlfilename: gen = XmlDumpTemplatePageGenerator(oldTemplates, xmlfilename) else: gen = genFactory.getCombinedGenerator() if not gen: gens = [] gens = [pagegenerators.ReferringPageGenerator(t, onlyTemplateInclusion = True) for t in oldTemplates] gen = pagegenerators.CombinedPageGenerator(gens) gen = pagegenerators.DuplicateFilterPageGenerator(gen) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) if user: gen = UserEditFilterGenerator(gen, user, timestamp, skip) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat) bot.run()
def main(): gen = None singlePageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] HTTPignore = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() global day day = 7 for arg in pywikibot.handleArgs(): if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): HTTPignore.append(int(arg[8:])) elif arg.startswith('-day:'): day = int(arg[5:]) else: if not genFactory.handleArg(arg): singlePageTitle.append(arg) if singlePageTitle: singlePageTitle = ' '.join(singlePageTitle) page = pywikibot.Page(pywikibot.getSite(), singlePageTitle) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2) gen = pagegenerators.PreloadingGenerator(gen, pageNumber = pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore) try: bot.run() finally: waitTime = 0 # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: pywikibot.output( u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: pywikibot.output(u'Interrupted.') break if countLinkCheckThreads() > 0: pywikibot.output(u'Remaining %i threads will be killed.' % countLinkCheckThreads()) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() # wait until the report thread is shut down; the user can # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: pywikibot.output(u'Report thread interrupted.') bot.history.reportThread.kill() pywikibot.output(u'Saving history...') bot.history.save() else: pywikibot.showHelp()
def main(): quietMode = False # use -quiet to get less output # if the -file argument is used, page titles are stored in this array. # otherwise it will only contain one page. articles = [] # if -file is not used, this temporary array is used to read the page title. page_title = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] xmlfilename = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlfilename = arg[5:] gen = TableXmlDumpPageGenerator(xmlfilename) elif arg == '-sql': query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' LIMIT 200""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-skip:'): articles = articles[articles.index(arg[6:]):] elif arg.startswith('-auto'): config.table2wikiAskOnlyWarnings = True config.table2wikiSkipWarnings = True print "Automatic mode!\n" elif arg.startswith('-quiet'): quietMode = True else: if not genFactory.handleArg(arg): page_title.append(arg) # if the page is given as a command line argument, # connect the title's parts with spaces if page_title != []: page_title = ' '.join(page_title) page = pywikibot.Page(pywikibot.getSite(), page_title) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = Table2WikiRobot(preloadingGen, quietMode) bot.run() else: pywikibot.showHelp('table2wiki')