def get_batch_of_files(current_site, batches_of, api_step, start_from): start_getting_allfiles = time.clock() # print u'Requesting {0} files'.format(batches_of) #debug batch_of_files_generator = pagegenerators.AllpagesPageGenerator( site=current_site, start=start_from, namespace=6, includeredirects=False, total=batches_of, step=api_step, content=False) done_getting_allfiles = time.clock() batch_of_files = [] for page in batch_of_files_generator: batch_of_files.append(page) # print u'Got {0} pages in {1}s'.format(len(batch_of_files), done_getting_allfiles - start_getting_allfiles) #debug return batch_of_files
def main(*args): gen = pagegenerators.AllpagesPageGenerator(start="Icon-ffxiv-", namespace=6) for page in gen: if "-ffxiv-" not in page.title(): break kill(page)
def main(): source = pywikibot.getSite('fr', 'vikidia') pagesList = pagegenerators.AllpagesPageGenerator(namespace=0, includeredirects=False, site=source, start=u"") for page in pagesList: print page.title() inter(page)
def custom_generator(self): end = self.getOption('end') for page in pagegenerators.AllpagesPageGenerator( start=self.getOption('start'), namespace=120, site=self.repo, total=self.getOption('total')): yield page if page.title(with_ns=False) == end: break
def custom_generator(self): end = self.opt['end'] for page in pagegenerators.AllpagesPageGenerator( start=self.opt['start'], namespace=120, site=self.repo, total=self.opt['total']): yield page if page.title(with_ns=False) == end: break
def main(): singlepage = [] gen = None start = None action = None for arg in pywikibot.handleArgs(): if arg == ('pages'): action = 'pages' elif arg == ('categories'): action = 'categories' elif arg.startswith('-start:'): start = pywikibot.Page(pywikibot.Site(), arg[7:]) gen = pagegenerators.AllpagesPageGenerator( start.title(withNamespace=False), namespace=start.namespace(), includeredirects=False) elif arg.startswith('-cat:'): cat = pywikibot.Category(pywikibot.Site(), 'Category:%s' % arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) elif arg.startswith('-ref:'): ref = pywikibot.Page(pywikibot.Site(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(ref) elif arg.startswith('-link:'): link = pywikibot.Page(pywikibot.Site(), arg[6:]) gen = pagegenerators.LinkedPageGenerator(link) elif arg.startswith('-page:'): singlepage = pywikibot.Page(pywikibot.Site(), arg[6:]) gen = iter([singlepage]) #else: #bug if action == 'pages': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CommonsLinkBot(preloadingGen, acceptall=False) bot.pages() elif action == 'categories': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CommonsLinkBot(preloadingGen, acceptall=False) bot.categories() else: pywikibot.showHelp(u'commons_link')
choice = 'y' if not acceptAll: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All', 'Quit'], ['y', 'N', 'a', 'q'], 'N') if choice == 'q': exit() if choice == 'a': acceptAll = True choice = 'y' if choice == 'y': self.page.put(newText, editSummary) pass def run(self): self.addZeroSection() # main code if __name__ == '__main__': common.login(username) talkPages = pagegenerators.AllpagesPageGenerator(namespace=1, includeredirects=False, site=common.getWikiSite()) #talkPages = [pywikibot.Page(site = dxdCommonLibrary.getWikiSite() ,title = u'Обговорення користувача:RLuts')] talkPages = pagegenerators.PreloadingGenerator(talkPages) # do work for page in talkPages: leadSectionFormatter = LeadSectionFormatter(page) leadSectionFormatter.run()
def main(): site = pywikibot.getSite('pl', 'wikinews') lista_stron = pagegenerators.AllpagesPageGenerator(site=site) re_cytat = re.compile('{{[cC]ytat\|(.*?)}}', re.DOTALL) re_tresc = re.compile( '(.*?)($|\|2=|\|3=|\|4=|\|5=|\|[0-9]*px\|[0-9]*px\|)', re.DOTALL) re_autor = re.compile('(4=|\|[0-9]*px\|[0-9]*px\|)(.*?)($|\|)', re.DOTALL) re_zrodlo = re.compile('5=(.*)', re.DOTALL) re_cytatlewy = re.compile('{{[cC]ytatLewy\|(.*?)}}', re.DOTALL) re_cytatprawy = re.compile('{{[cC]ytatPrawy\|(.*?)}}', re.DOTALL) for a in lista_stron: log = '' try: strona = a.get() except pywikibot.IsRedirectPage: #print u'[[%s]] - przekierowanie' % a.title() log = log + '\n*[[%s]] - przekierowanie' % a.title() except pywikibot.Error: print('[[%s]] - błąd' % a.title()) log = log + '\n*[[%s]] - błąd' % a.title() else: s_cytat = re.findall(re_cytat, a.get()) for b in s_cytat: final = '' s_tresc = re.search(re_tresc, b) s_autor = re.search(re_autor, b) s_zrodlo = re.search(re_zrodlo, b) print(b) print('\n\n') if s_tresc: final = final + '\n\'\'\'treść\'\'\': %s' % s_tresc.group( 1) #print u'\n\'\'\'treść\'\'\': %s' % s_tresc.group(1) if s_autor: final = final + '\n:\'\'\'autor\'\'\': %s' % ( s_autor.group(2)) print('\n:\'\'\'autor\'\'\': %s' % (s_autor.group(2))) if s_zrodlo: final = final + '\n:\'\'\'źródło\'\'\': %s' % s_zrodlo.group( 1) #print u'\n:\'\'\'źródło\'\'\': %s' % s_zrodlo.group(1) final = final + '\n:\'\'\'link\'\'\': [[%s]]<br/><br/>' % a.title( ) #print u'\n:\'\'\'link\'\'\': [[%s]]<br/><br/>\n' % a.title() #print final file = open("output/wikinews.txt", 'a') file.write(final.encode("utf-8")) file.close s_cytatlewy = re.findall(re_cytatlewy, a.get()) for c in s_cytatlewy: final_l = '' final_l = final_l + '\n\'\'\'treść\'\'\': %s' % c final_l = final_l + '\n:\'\'\'link\'\'\': [[%s]]<br/><br/>' % a.title( ) file = open("output/wikinews_lewy.txt", 'a') file.write(final_l.encode("utf-8")) file.close s_cytatprawy = re.findall(re_cytatprawy, a.get()) for d in s_cytatprawy: final_p = '' final_p = final_p + '\n\'\'\'treść\'\'\': %s' % d final_p = final_p + '\n:\'\'\'link\'\'\': [[%s]]<br/><br/>' % a.title( ) file = open("output/wikinews_prawy.txt", 'a') file.write(final_p.encode("utf-8")) file.close file = open("log/wikinews.txt", 'a') file.write(log.encode("utf-8")) file.close
from lib.googlegeocode import GoogleGeocode settings = ConfigParser.ConfigParser() settings.read('../../configs/settings.ini') with open('config.json') as config_file: config = json.load(config_file) motorway_regex = '[A-Z]?-?\d+\s*(\((\w|\s)+\))' border_regex = '.*border (crossing|checkpoint)' geonames = GeoNames(settings.get('vendor', 'geonames_username'), './.cache') google_geocode = GoogleGeocode('./.cache') site = pywikibot.Site() gen = pagegenerators.AllpagesPageGenerator(site=site) disamb_cat = pywikibot.Category(site, 'Disambiguation') disamb_pages = [article.title() for article in disamb_cat.articles()] count = 0 for page in gen: if not page.isRedirectPage() and page.title() not in disamb_pages: print '#%d. %s' % (count + 1, page.title().encode('utf-8')) print 'http://' + settings.get( 'general', 'domain') + '/en/' + page.title(asUrl=True) # Uncomment to resume from speciffic point #if count < 3055: # count += 1 # continue
def newPages(all=False): global nbrModif, nbrTotal log = u'' #BUGFIX bugfixPage = pywikibot.Page(site,u"Utilisateur:LinedBot") bugfixPage.save('') #END OF FIX homonCat = pywikibot.Category(site,u"Homonymie") ebaucheCat = pywikibot.Category(site,u"Ébauche") ebaucheCat = set(ebaucheCat.subcategories(recurse=3)) hiddenCat = pywikibot.Category(site,u"Catégorie cachée") hiddenCat = set(hiddenCat.subcategories()) portalCat = pywikibot.Category(site,u"Liste d'articles") portalCat = set(portalCat.subcategories()) ignoreCat = pywikibot.Category(site,u"Page ignorée par les robots") concoursCat = pywikibot.Category(site,u"Article VikiConcours") deadendPagesList = list(pagegenerators.DeadendPagesPageGenerator(site=site)) lonelyPagesList = list(pagegenerators.LonelyPagesPageGenerator(site=site)) if all: pagesList = pagegenerators.AllpagesPageGenerator(namespace=0,includeredirects=False,site=site) else: pagesList = pagegenerators.NewpagesPageGenerator(total=50,site=site) for page in pagesList: try: pageTemp = page.get() except pywikibot.NoPage: pywikibot.output(u"Page %s does not exist; skipping." % page.title(asLink=True)) except pywikibot.IsRedirectPage: pywikibot.output(u"Page %s is a redirect; skipping." % page.title(asLink=True)) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping." % page.title(asLink=True)) else: # On ne s'occupe de la page que si elle n'est ni une homonymie ni une page du VikiConcours pageCat = page.categories() if (not homonCat in pageCat) and (not concoursCat in pageCat): #On ne traite l'ajout de bandeau que si la page n'est pas ignorée jobList = [] if not ignoreCat in pageCat: # s'il existe des références, on retire le job 'orphelin' if page in lonelyPagesList: jobList.append(u'orphelin') # s'il n'existe aucune catégorie (directe), on ajoute le job 'catégoriser' realCat = list(set(pageCat) - set(hiddenCat) - set(ebaucheCat)) nbCat = len(list(realCat)) if nbCat == 0: jobList.append(u'catégoriser') # si la page n'appartient à aucun portail, on ajoute le job 'portail' nbPort = len(set(pageCat) & set(portalCat)) if nbPort == 0: jobList.append(u'portail') # si la page ne pointe vers aucune autre, on ajoute le job 'impasse' if page in deadendPagesList: jobList.append(u'impasse') """ # si la page fait plus de 2000 octets et ne contient aucun lien externe if len(pageTemp) > 2000 and len(list(page.extlinks())) == 0: jobList.append(u'sourcer') """ else: print u'Skipping [[' + page.title() + ']], page in ignore list.' pageTemp, oldJobList = removeBanner(pageTemp) jobList = updateJobList(oldJobList, jobList) job = u'' # Différence symétrique entre les deux listes, on regarde si des éléments ne sont pas contenus dans les deux listes : (A-B)+(B-A) diff = list(set(oldJobList).symmetric_difference(set(jobList))) if diff != []: nbrTotal += 1 if len(jobList) > 0: job = ','.join(jobList) banner = u'{{Maintenance|job=' + job + '|date=~~~~~}}\n\n' pageTemp = banner + pageTemp summary = u'[[VD:Robot|Robot]] : Mise à jour du bandeau de maintenance.' else: summary = u'[[VD:Robot|Robot]] : Retrait du bandeau de maintenance.' c = callback.Callback() page.text = pageTemp page.save(summary,callback=c) if c.error == None: nbrModif += 1 log +=u'*' + '{{Utilisateur:LinedBot/ExtLinker|' + page.title() + u'}} : Mise à jour du bandeau {{m|maintenance}} avec les paramètres suivants : ' + job + '\n' return log
paraules = { # Add the word using regexp and next to it, the word without regexp (see the example). You can add an infinite amount of words. u' ([Pp]ag(e|es))( |\.|\,)': u'Page', } def main(page, paraula): if page.title() == paraules[paraula]: print(u"Skipping %s because is the link page" % str(page)) return substitucio = ' [[' + paraules[paraula] + '|\\1]]\\3' #site = pwb.Site() #page = pwb.Page(site, page) print(u"Page: %s" % page) text = page.text comptador = re.search(paraula, text) if comptador: print("======= EDITING PAGE %s! =======" % page) noutext = re.sub(paraula, substitucio, text) page.text = noutext page.save(u'Bot: Adding links for %s' % paraules[paraula]) else: print("No links added") return if __name__ == '__main__': allpages = pg.AllpagesPageGenerator(site=pwb.Site(), start="!", namespace=0, includeredirects = True) pages = pg.PreloadingGenerator(allpages, pageNumber = 100) for page in pages: for paraula in paraules: main(page, paraula) print("\nFinished!")