def crawlerLink(pagename): #pagename = unicode(arg[len('-links:'):], 'utf-8') page = wikipedia.Page(site, pagename) gen = pagegenerators.ReferringPageGenerator(page) #gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) for Page in pagegenerators.PreloadingGenerator(gen,100): modification(Page.title())
def main(): featured = False gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-featured': featured = True else: genFactory.handleArg(arg) mysite = pywikibot.getSite() if mysite.sitename() == 'wikipedia:nl': pywikibot.output( u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}' ) sys.exit() if featured: featuredList = pywikibot.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.getSite(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: pywikibot.showHelp('fixing_redirects')
def countTemplates(self, templates, namespaces): mysite = wikipedia.getSite() mytpl = mysite.template_namespace() + ':' finalText = [ u'Number of transclusions per template', u'------------------------------------' ] total = 0 # The names of the templates are the keys, and the numbers of transclusions are the values. templateDict = {} for template in templates: gen = pagegenerators.ReferringPageGenerator( wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces) count = 0 for page in gen: count += 1 templateDict[template] = count finalText.append(u'%s: %d' % (template, count)) total += count for line in finalText: wikipedia.output(line, toStdout=True) wikipedia.output(u'TOTAL: %d' % total, toStdout=True) wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title())
def main(): genFactory = pagegenerators.GeneratorFactory() commandline_arguments = list() templateTitle = u'' for arg in pywikibot.handleArgs(): if arg.startswith('-template'): if len(arg) == 9: templateTitle = pywikibot.input( u'Please enter the template to work on:') else: templateTitle = arg[10:] elif genFactory.handleArg(arg): continue else: commandline_arguments.append(arg) if len(commandline_arguments) % 2 or not templateTitle: raise ValueError # or something. fields = dict() for i in xrange(0, len(commandline_arguments), 2): fields[commandline_arguments[i]] = commandline_arguments[i + 1] if templateTitle: gen = pagegenerators.ReferringPageGenerator(pywikibot.Page( pywikibot.getSite(), "Template:%s" % templateTitle), onlyTemplateInclusion=True) else: gen = genFactory.getCombinedGenerator() if not gen: # TODO: Build a transcluding generator based on templateTitle return bot = HarvestRobot(gen, templateTitle, fields) bot.run()
def listTemplates(self, templates, namespaces): mysite = wikipedia.getSite() count = 0 # The names of the templates are the keys, and lists of pages transcluding templates are the values. templateDict = {} finalText = [u'List of pages transcluding templates:'] for template in templates: finalText.append(u'* %s' % template) finalText.append(u'------------------------------------') for template in templates: transcludingArray = [] gen = pagegenerators.ReferringPageGenerator( wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator( gen, namespaces) for page in gen: finalText.append(u'%s' % page.title()) count += 1 transcludingArray.append(page) templateDict[template] = transcludingArray finalText.append(u'Total page count: %d' % count) for line in finalText: wikipedia.output(line, toStdout=True) wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def __init__(self): '''Constructor of SubsterBot(), initialize needed vars.''' pywikibot.output( u'\03{lightgreen}* Initialization of bot:\03{default}') basic.AutoBasicBot.__init__(self) # modification of timezone to be in sync with wiki os.environ['TZ'] = 'Europe/Amsterdam' if hasattr(time, "tzset"): time.tzset() pywikibot.output(u'Setting process TimeZone (TZ): %s' % str(time.tzname)) # ('CET', 'CEST') else: # e.g. windows doesn't have that attribute pywikibot.warning( u'This operating system has NO SUPPORT for setting TimeZone by ' u'code! Before running this script, please set the TimeZone ' u'manually to one approriate for use with the Wikipedia ' u'language and region you intend to.') # init constants self._bot_config = bot_config # convert e.g. namespaces to corret language self._bot_config['TemplateName'] = pywikibot.Page( self.site, self._bot_config['TemplateName']).title() self._template_regex = re.compile( '\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S) # TODO: implement proper error handling template/output for wikidata # see: https://bugzilla.wikimedia.org/show_bug.cgi?id=60225 # see: https://www.wikidata.org/wiki/Template:Exchange_Rate_Data #if self.site.is_data_repository(): # self._bot_config['VerboseMessage'] = self._bot_config['data_VerboseMessage'] # init constants self._userListPage = pywikibot.Page(self.site, self._bot_config['TemplateName']) self._ConfCSSpostprocPage = pywikibot.Page( self.site, self._bot_config['ConfCSSpostproc']) self._ConfCSSconfigPage = pywikibot.Page( self.site, self._bot_config['ConfCSSconfig']) self.pagegen = pagegenerators.ReferringPageGenerator( self._userListPage, onlyTemplateInclusion=True) self._code = self._ConfCSSpostprocPage.get() pywikibot.output( u'Imported postproc %s rev %s from %s' % ((self._ConfCSSpostprocPage.title(asLink=True), ) + self._ConfCSSpostprocPage.getVersionHistory(revCount=1)[0][:2])) self._flagenable = {} if self._ConfCSSconfigPage.exists(): exec(self._ConfCSSconfigPage.get() ) # with variable: bot_config_wiki self._flagenable = bot_config_wiki['flagenable'] pywikibot.output( u'Imported config %s rev %s from %s' % ((self._ConfCSSconfigPage.title(asLink=True), ) + self._ConfCSSconfigPage.getVersionHistory(revCount=1)[0][:2]))
def generateTransclusions(Site, template, namespaces=[]): pywikibot.output(u'Fetching template transclusions...') transclusionPage = pywikibot.Page(Site, template, defaultNamespace=10) gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces, Site) for page in gen: yield page
def getPageGenerator(self): if use_hash: gen = self.useHashGenerator() else: gen = pagegenerators.ReferringPageGenerator( self.nowCommonsTemplate, followRedirects=True, onlyTemplateInclusion=True) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [6]) return gen
def modification(PageHS): page = Page(site, PageHS) if page.exists(): if page.namespace() != 0 and page.title( ) != u'Utilisateur:JackBot/test': return else: try: PageEnd = page.getRedirectTarget() except wikipedia.NoPage: print "NoPage" return gen2 = pagegenerators.ReferringPageGenerator(page) for PageCourante in pagegenerators.PreloadingGenerator(gen2, 100): print(PageCourante.title().encode(config.console_encoding, 'replace')) try: PageBegin = PageCourante.get() except wikipedia.NoPage: print "NoPage" return except wikipedia.IsRedirectPage: print "Redirect page" return except wikipedia.LockedPage: print "Locked/protected page" return except wikipedia.ServerError: print "ServerError" return except wikipedia.NoSuchSite: print "NoSuchSite" return except wikipedia.InvalidTitle: print "InvalidTitle" return PageTemp = PageBegin while PageTemp.find(u'[[' + PageHS + u']]') != -1: PageTemp = PageTemp[ 0:PageTemp.find(u'[[' + PageHS + u']]') + 2] + PageEnd.title() + u'|' + PageHS + PageTemp[ PageTemp.find(u'[[' + PageHS + u']]') + len(u'[[' + PageHS + u']]') - 2:len(PageTemp)] while PageTemp.find(u'[[' + PageHS + u'|') != -1: PageTemp = PageTemp[ 0:PageTemp.find(u'[[' + PageHS + u'|') + 2] + PageEnd.title( ) + PageTemp[PageTemp.find(u'[[' + PageHS + u'|') + len(u'[[' + PageHS + u'|') - 1:len(PageTemp)] if PageTemp != PageBegin: sauvegarde(PageCourante, PageTemp) if PageHS.find(u'/') != -1 or PageHS.find(u' - ') != -1: page.delete(u'Suppression après gestion des pages liées', u'', throttle=True)
def crawlerLink(pagename, apres): modifier = u'False' #pagename = unicode(arg[len('-links:'):], 'utf-8') page = wikipedia.Page(site, pagename) gen = pagegenerators.ReferringPageGenerator(page) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for Page in pagegenerators.PreloadingGenerator(gen, 100): #print(Page.title().encode(config.console_encoding, 'replace')) if not apres or apres == u'' or modifier == u'True': modification(Page.title()) #crawlerLink(Page.title()) elif Page.title() == apres: modifier = u'True'
def main(): gen = None acceptall = False test = False linkingPageTitle = 'Gebruiker:Erwin/Bot/Archiveerlinks' #What projects should be checked? projects = {'wikipedia': ['nl']} linkingPageTitles = { 'wikipedia': { 'nl': 'Gebruiker:Erwin/Bot/Archiveerlinks' }, 'wikisource': { 'nl': 'Gebruiker:Erwin85/Bot/Archiveerlinks' } } for arg in wikipedia.handleArgs(): if arg == '-always': acceptall = True #Override defined projects #Use: -project:family:code elif arg.startswith('-project'): if len(arg) == 8: project = [ wikipedia.input(u'Family?'), wikipedia.input(u'Code?') ] else: project = re.split(r'\:', arg[9:]) projects = {project[0]: [project[1]]} elif arg == '-test': test = True wikipedia.output(u'Using test settings.') projects = {'wikipedia': ['nl']} for family, langs in projects.iteritems(): for lang in langs: if not test: linkingPageTitle = linkingPageTitles[family][lang] else: linkingPageTitle = 'Gebruiker:Erwin/Bot/Archiveerlinkstest' wikipedia.output(u'\n>> %s:%s<<\n' % (family, lang)) referredPage = wikipedia.Page( wikipedia.getSite(code=lang, fam=family), linkingPageTitle) gen = pagegenerators.ReferringPageGenerator(referredPage) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=40) bot = ArchivingRobot(preloadingGen, time.time(), wikipedia.getSite(code=lang, fam=family), linkingPageTitle, acceptall) bot.run()
def countRefs(self, templates, namespaces): mysite = wikipedia.getSite() mytpl = mysite.template_namespace()+':' finalText = [u'Number of transclusions per template',u'------------------------------------'] for template in templates: gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) count = 0 for page in gen: count += 1 finalText.append(u'%s: %d' % (template, count)) for line in finalText: wikipedia.output(line)
def crawlerCatLink(pagename, apres): modifier = u'False' cat = catlib.Category(site, pagename) pages = cat.articlesList(False) for Page in pagegenerators.PreloadingGenerator(pages, 100): page = wikipedia.Page(site, Page.title()) gen = pagegenerators.ReferringPageGenerator(page) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for PageLiee in pagegenerators.PreloadingGenerator(gen, 100): #print(Page.title().encode(config.console_encoding, 'replace')) if not apres or apres == u'' or modifier == u'True': modification(PageLiee.title()) #crawlerLink(Page.title()) elif PageLiee.title() == apres: modifier = u'True'
def findimages(): wikien = wikipedia.getSite(code='en', fam='wikipedia') commons = wikipedia.getSite(code='commons', fam='commons') transclusionPage = wikipedia.Page(wikien, 'Template:Commons ok') gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) # category = catlib.Category(wikien, 'Copy to Wikimedia Commons') # gen = pagegenerators.CategorizedPageGenerator(category, recurse=True) for page in gen: if page.namespace() == 6: print page moveimage(page) else: print '%s is not in the image namespace.' % (str(page))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ summary = None generator = None always = False ns = [] ns.append(14) # Process global args and prepare generator args parser genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(*args): if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( pywikibot.Page(pywikibot.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if generator: pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run() else: pywikibot.showHelp()
def template_dict_generator(templates, namespaces): mysite = pywikibot.getSite() # The names of the templates are the keys, and lists of pages # transcluding templates are the values. mytpl = mysite.getNamespaceIndex(mysite.template_namespace()) for template in templates: transcludingArray = [] gen = pg.ReferringPageGenerator( pywikibot.Page(mysite, template, defaultNamespace=mytpl), onlyTemplateInclusion=True) if namespaces: gen = pg.NamespaceFilterPageGenerator(gen, namespaces) for page in gen: transcludingArray.append(page) yield template, transcludingArray
def getPageGenerator(self): if use_hash: gen = self.useHashGenerator() else: nowCommonsTemplates = [pywikibot.Page(self.site, title, defaultNamespace=10) for title in self.ncTemplates()] gens = [pg.ReferringPageGenerator(t, followRedirects=True, onlyTemplateInclusion=True) for t in nowCommonsTemplates] gen = pg.CombinedPageGenerator(gens) gen = pg.NamespaceFilterPageGenerator(gen, [6]) gen = pg.DuplicateFilterPageGenerator(gen) gen = pg.PreloadingGenerator(gen) return gen
def main(): ''' Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. ''' summary = None generator = None checkcurrent = False always = False ns = [] ns.append(14) # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-summary'): if len(arg) == 8: summary = pywikibot.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( pywikibot.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( pywikibot.Page(pywikibot.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData( u'You have to specify the generator you want to use for the script!' ) pregenerator = pagegenerators.PreloadingGenerator(generator) bot = CommonscatBot(pregenerator, always, summary) bot.run()
def __iter__(self): generator = pagegenerators.ReferringPageGenerator( self.disambPage, followRedirects=False, withTemplateInclusion=False) generator = pagegenerators.PageTitleFilterPageGenerator( generator, ignore_title) refs = [page for page in generator] if len(refs) < self.minimum: pywikibot.output(u"Found only %d pages to work on; skipping." % len(refs)) return pywikibot.output(u"Will work on %d pages." % len(refs)) for ref in refs: yield ref
def modification(PageHS): page = Page(site, PageHS) if page.exists(): if page.namespace() != 10 and page.title( ) != u'Utilisateur:JackBot/test': return else: if PageHS.find(u'=') != -1: NouveauModele = u'langue|' + PageHS[1:len(PageHS) - 1] + u'}}' gen = pagegenerators.ReferringPageGenerator(page, 0) for PageCourante in pagegenerators.PreloadingGenerator(gen, 100): print(PageCourante.title().encode(config.console_encoding, 'replace')) try: PageBegin = PageCourante.get() except wikipedia.NoPage: print "NoPage" return except wikipedia.IsRedirectPage: print "Redirect page" return except wikipedia.LockedPage: print "Locked/protected page" return except wikipedia.ServerError: print "ServerError" return except wikipedia.NoSuchSite: print "NoSuchSite" return except wikipedia.InvalidTitle: print "InvalidTitle" return PageTemp = PageBegin while PageTemp.find(PageHS) != -1: if NouveauModele == u'': PageTemp = PageTemp[0:PageTemp.find( PageHS)] + NouveauModele + PageTemp[ PageTemp.find(PageHS) + len(PageHS):len(PageTemp)] else: PageTemp = PageTemp[0:PageTemp.find( PageHS)] + PageTemp[PageTemp.find(PageHS) + len(PageHS):len(PageTemp)] if PageTemp != PageBegin: sauvegarde(PageCourante.title(), PageTemp)
def main(): start = '!' featured = False namespace = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-featured': featured = True elif arg.startswith('-namespace'): if len(arg) == 10: namespace = int(wikipedia.input(u'Which namespace should be processed?')) else: namespace = int(arg[11:]) else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() mysite = wikipedia.getSite() if mysite.sitename() == 'wikipedia:nl': wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}') sys.exit() linktrail = mysite.linktrail() if featured: featuredList = wikipedia.translate(mysite, featured_articles) ref = wikipedia.Page(wikipedia.getSite(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) for page in generator: workon(page) elif namespace is not None: for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False): workon(page) elif gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: wikipedia.showHelp('fixing_redirects')
def main(): ''' Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. ''' summary = None; generator = None; checkcurrent = False; always = False ns = [] ns.append(14) # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-summary'): if len(arg) == 8: summary = wikipedia.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-checkcurrent'): checkcurrent = True primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(wikipedia.getSite().language()) generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: if not page.exists(): wikipedia.output(u'Page %s does not exist. Skipping.' % page.aslink()) elif page.isRedirectPage(): wikipedia.output(u'Page %s is a redirect. Skipping.' % page.aslink()) elif page.isCategoryRedirect(): wikipedia.output(u'Page %s is a category redirect. Skipping.' % page.aslink()) elif page.isDisambig(): wikipedia.output(u'Page %s is a disambiguation. Skipping.' % page.aslink()) else: (status, always) = addCommonscat(page, summary, always)
def createPageGenerator(self, firstPageTitle): if pywikibot.getSite().lang in self.misspellingCategory: misspellingCategoryTitle = self.misspellingCategory[pywikibot.getSite().lang] misspellingCategory = catlib.Category(pywikibot.getSite(), misspellingCategoryTitle) generator = pagegenerators.CategorizedPageGenerator( misspellingCategory, recurse = True, start=firstPageTitle) else: misspellingTemplateName = 'Template:%s' \ % self.misspellingTemplate[pywikibot.getSite().lang] misspellingTemplate = pywikibot.Page(pywikibot.getSite(), misspellingTemplateName) generator = pagegenerators.ReferringPageGenerator( misspellingTemplate, onlyTemplateInclusion=True) if firstPageTitle: pywikibot.output( u'-start parameter unsupported on this wiki because there is no category for misspellings.') preloadingGen = pagegenerators.PreloadingGenerator(generator) return preloadingGen
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) bigcategory = u'' target = u'' generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( wikipedia.getSite(), wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-bigcat'): if len(arg) == 7: bigcategory = wikipedia.input( u'What category do you want to split out?') else: bigcategory = arg[8:] elif arg.startswith('-target'): if len(arg) == 7: target = wikipedia.input( u'What category is the target category?') else: target = arg[8:] if not bigcategory == u'': splitOutCategory(bigcategory, target) else: if not generator: generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14]) for cat in generator: intersectCategories(cat)
def generator(self): # Choose which generator to use according to options. pagegen = None if self.__workonnew: if not self.__number: self.__number = config.special_page_limit pagegen = pagegenerators.NewpagesPageGenerator(number = self.__number) elif self.__refpagetitle: refpage = wikipedia.Page(wikipedia.getSite(), self.__refpagetitle) pagegen = pagegenerators.ReferringPageGenerator(refpage) elif self.__linkpagetitle: linkpage = wikipedia.Page(wikipedia.getSite(), self.__linkpagetitle) pagegen = pagegenerators.LinkedPageGenerator(linkpage) elif self.__catname: cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % self.__catname) if self.__start: pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse, start = self.__start) else: pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse) elif self.__textfile: pagegen = pagegenerators.TextfilePageGenerator(self.__textfile) else: if not self.__start: self.__start = '!' namespace = wikipedia.Page(wikipedia.getSite(), self.__start).namespace() start = wikipedia.Page(wikipedia.getSite(), self.__start).titleWithoutNamespace() pagegen = pagegenerators.AllpagesPageGenerator(start, namespace) return pagegen
action = 'pages' elif arg == ('categories'): action = 'categories' elif arg.startswith('-start:'): start = wikipedia.Page(wikipedia.getSite(), arg[7:]) gen = pagegenerators.AllpagesPageGenerator( start.titleWithoutNamespace(), namespace=start.namespace(), includeredirects=False) elif arg.startswith('-cat:'): cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) elif arg.startswith('-ref:'): ref = wikipedia.Page(wikipedia.getSite(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(ref) elif arg.startswith('-link:'): link = wikipedia.Page(wikipedia.getSite(), arg[6:]) gen = pagegenerators.LinkedPageGenerator(link) elif arg.startswith('-page:'): singlepage = wikipedia.Page(wikipedia.getSite(), arg[6:]) gen = iter([singlepage]) #else: #bug if action == 'pages': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CommonsLinkBot(preloadingGen, acceptall=False) bot.pages() elif action == 'categories': preloadingGen = pagegenerators.PreloadingGenerator(gen)
def main(): pageName = '' singlePage = '' summary = '' always = False doSinglePage = False doCategory = False deleteSubcategories = True doRef = False doLinks = False doImages = False undelete = False fileName = '' gen = None # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg.startswith('-file'): if len(arg) == len('-file'): fileName = pywikibot.input( u'Enter name of file to delete pages from:') else: fileName = arg[len('-file:'):] elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = pywikibot.input(u'Enter a reason for the deletion:') else: summary = arg[len('-summary:'):] elif arg.startswith('-cat'): doCategory = True if len(arg) == len('-cat'): pageName = pywikibot.input( u'Enter the category to delete from:') else: pageName = arg[len('-cat:'):] elif arg.startswith('-nosubcats'): deleteSubcategories = False elif arg.startswith('-links'): doLinks = True if len(arg) == len('-links'): pageName = pywikibot.input(u'Enter the page to delete from:') else: pageName = arg[len('-links:'):] elif arg.startswith('-ref'): doRef = True if len(arg) == len('-ref'): pageName = pywikibot.input(u'Enter the page to delete from:') else: pageName = arg[len('-ref:'):] elif arg.startswith('-page'): doSinglePage = True if len(arg) == len('-page'): pageName = pywikibot.input(u'Enter the page to delete:') else: pageName = arg[len('-page:'):] elif arg.startswith('-images'): doImages = True if len(arg) == len('-images'): pageName = pywikibot.input( u'Enter the page with the images to delete:') else: pageName = arg[len('-images'):] elif arg.startswith('-undelete'): undelete = True mysite = pywikibot.getSite() if doSinglePage: if not summary: summary = pywikibot.input(u'Enter a reason for the deletion:') page = pywikibot.Page(mysite, pageName) gen = iter([page]) elif doCategory: if not summary: summary = pywikibot.translate(mysite, msg_delete_category) \ % pageName ns = mysite.category_namespace() categoryPage = catlib.Category(mysite, ns + ':' + pageName) gen = pagegenerators.CategorizedPageGenerator( categoryPage, recurse=deleteSubcategories) elif doLinks: if not summary: summary = pywikibot.translate(mysite, msg_delete_links) % pageName pywikibot.setAction(summary) linksPage = pywikibot.Page(mysite, pageName) gen = pagegenerators.LinkedPageGenerator(linksPage) elif doRef: if not summary: summary = pywikibot.translate(mysite, msg_delete_ref) % pageName refPage = pywikibot.Page(mysite, pageName) gen = pagegenerators.ReferringPageGenerator(refPage) elif fileName: if not summary: summary = pywikibot.translate(mysite, msg_simple_delete) gen = pagegenerators.TextfilePageGenerator(fileName) elif doImages: if not summary: summary = pywikibot.translate(mysite, msg_delete_images) gen = pagegenerators.ImagesPageGenerator( pywikibot.Page(mysite, pageName)) if gen: pywikibot.setAction(summary) # We are just deleting pages, so we have no need of using a preloading page generator # to actually get the text of those pages. bot = DeletionRobot(gen, summary, always, undelete) bot.run() else: pywikibot.showHelp(u'delete')
def main(*args): templateNames = [] templates = {} subst = False remove = False namespaces = [] editSummary = '' addedCat = '' acceptAll = False genFactory = pagegenerators.GeneratorFactory() # If xmlfilename is None, references will be loaded from the live wiki. xmlfilename = None user = None skip = False timestamp = None # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-remove': remove = True elif arg == '-subst': subst = True elif arg == ('-always'): acceptAll = True elif arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input(u'Please enter the XML dump\'s filename: ') else: xmlfilename = arg[5:] elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[len('-namespace:'):])) except ValueError: namespaces.append(arg[len('-namespace:'):]) elif arg.startswith('-category:'): addedCat = arg[len('-category:'):] elif arg.startswith('-summary:'): editSummary = arg[len('-summary:'):] elif arg.startswith('-user:'******'-user:'******'-skipuser:'******'-skipuser:'******'-timestamp:'): timestamp = arg[len('-timestamp:'):] else: if not genFactory.handleArg(arg): templateNames.append(pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10).titleWithoutNamespace()) if subst or remove: for templateName in templateNames: templates[templateName] = None else: try: for i in range(0, len(templateNames), 2): templates[templateNames[i]] = templateNames[i + 1] except IndexError: pywikibot.output(u'Unless using -subst or -remove, you must give an even number of template names.') return oldTemplates = [] ns = pywikibot.getSite().template_namespace() for templateName in templates.keys(): oldTemplate = pywikibot.Page(pywikibot.getSite(), templateName, defaultNamespace=10) oldTemplates.append(oldTemplate) if xmlfilename: gen = XmlDumpTemplatePageGenerator(oldTemplates, xmlfilename) else: gen = genFactory.getCombinedGenerator() if not gen: gens = [] gens = [pagegenerators.ReferringPageGenerator(t, onlyTemplateInclusion = True) for t in oldTemplates] gen = pagegenerators.CombinedPageGenerator(gens) gen = pagegenerators.DuplicateFilterPageGenerator(gen) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) if user: gen = UserEditFilterGenerator(gen, user, timestamp, skip) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat) bot.run()
import sys, os sys.path.append("/home/rk/py/pywikibot/compat") import time, re import wikipedia as pywikibot import pagegenerators import logging # output debug messages DEBUG = False site = pywikibot.Site("et", "wikipedia") tpl = pywikibot.Page(site, u'Mall:EestiAsula') gen = pagegenerators.ReferringPageGenerator(tpl, followRedirects=False, withTemplateInclusion=True, onlyTemplateInclusion=True) generator = pagegenerators.PreloadingGenerator(gen, pageNumber=50) claims_rules = { ##### main type deprecated 107: 'q618123', # GDN = geo 17: 'q191', # Country = Estonia #TODO: is in the administrative unit #TODO: administrative unit type #instance of << town Q3957 << alev << alevik << village Q532 } def editclaim(article, pid, value):