def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put into the correct position and into the right order. This combines the old instances of standardizeInterwiki and standardizeCategories. The page footer consists of the following parts in that sequence: 1. categories 2. additional information depending on the local site policy 3. interwiki """ categories = [] interwiki_links = [] # get categories if not self.template: categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True # get interwiki interwiki_links = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # remove interwiki text = textlib.removeLanguageLinks(text, site=self.site) # add categories, main to top if categories: # TODO: Sort categories in alphabetic order, e.g. using # categories.sort()? (T100265) # TODO: Get main categories from Wikidata? main = pywikibot.Category(self.site, 'Category:' + self.title, sort_key=' ') if main in categories: categories.pop(categories.index(main)) categories.insert(0, main) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # add interwiki if interwiki_links: text = textlib.replaceLanguageLinks(text, interwiki_links, site=self.site, template=self.template, template_subpage=subpage) return text
def CAT(site, name, hide): name = site.namespace(14) + ':' + name cat = pywikibot.Category(site, name) yield from cat.articles(endsort=hide) if hide: yield from cat.articles(startFrom=chr(ord(hide) + 1))
import mwparserfromhell as mwp import pywikibot as pwb pwb.config.put_throttle = 0 site = pwb.Site() try: for page in pwb.Category(site, "Formes de verbes en français").articles(): page_text = page.text if "pron-rimes" not in page_text: parsed_text = mwp.parse(page_text) templates = parsed_text.filter_templates() fr_pron_templates = list( filter(lambda t: t.name == "pron" and t.params[1] == "fr", templates)) if not fr_pron_templates or not fr_pron_templates[0].params: continue fr_pron = fr_pron_templates[0].params[0] lang_sections = parsed_text.get_sections(levels=[2]) if page_text.count("{{langue|") != len(lang_sections): print("Mauvais nombre de sections : " + page.title()) continue fr_section = \ list( filter(lambda s: s.filter_headings()[0].title.filter_templates()[0] == "{{langue|fr}}", lang_sections))[ 0] if "{{S|prononciation}}" in fr_section: level_3_sections = fr_section.get_sections(levels=[3])
def encatlist(encat): count = 0 listenpageTitle = [] encat = encat.replace(u'[[', u'').replace(u']]', u'').replace( u'Category:', u'').replace(u'category:', u'').strip() language = 'en' encat = pywikibot.Category(pywikibot.Site(language), encat) listacategory = [encat] for enpageTitle in listacategory: try: fapages = pagefafinder(enpageTitle) if fapages is not False: for pages, profix_fa in fapages: if profix_fa == '14': pages = u'Category:' + unicode(pages, 'UTF-8') elif profix_fa == '12': pages = u'Help:' + unicode(pages, 'UTF-8') elif profix_fa == '10': pages = u'Template:' + unicode(pages, 'UTF-8') elif profix_fa == '6': pages = u'File:' + unicode(pages, 'UTF-8') elif profix_fa == '4': pages = u'Wikipedia:' + unicode(pages, 'UTF-8') elif profix_fa == '100': pages = u'Portal:' + unicode(pages, 'UTF-8') elif profix_fa in [ '1', '2', '3', '5', '7', '8', '9', '11', '13', '15', '101', '103', '118', '119', '446', '447', '828', '829' ]: continue else: pages = unicode(pages, 'UTF-8') pywikibot.output(u'\03{lightgreen}Adding ' + pages + u' to fapage lists\03{default}') listenpageTitle.append(pages) except: try: enpageTitle = unicode( str(enpageTitle), 'UTF-8').split(u'|')[0].split(u']]')[0].replace( u'[[', u'').strip() except: enpageTitle = enpageTitle.split(u'|')[0].split( u']]')[0].replace(u'[[', u'').strip() cat = pywikibot.Category(pywikibot.Site(language), enpageTitle) gent = pagegenerators.CategorizedPageGenerator(cat) for pagework in gent: count += 1 try: link = str(pagework).split(u'|')[0].split( u']]')[0].replace(u'[[', u'').strip() except: pagework = unicode(str(pagework), 'UTF-8') link = pagework.split(u'|')[0].split(u']]')[0].replace( u'[[', u'').strip() pywikibot.output(link) fapagetitle = englishdictionry(link, en_site, fa_site) if fapagetitle is False: continue else: pywikibot.output(u'\03{lightgreen}Adding ' + fapagetitle + u' to fapage lists\03{default}') listenpageTitle.append(fapagetitle) if listenpageTitle == []: return False, False return listenpageTitle, listacategory
import pywikibot from pywikibot import pagegenerators import regex as re #use this rather than "re" to avoid the "look-behind requires fixed-width pattern" error site = pywikibot.Site('zh', 'wikipedia') cat = pywikibot.Category(site, 'Category:連結格式不正確的條目') gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True) ilh = '(?<!\{\{(Advtranslation|Plant\-translation|Translate|Translating|Translation[ _]+WIP|Translation|Trans|Tran|Voltranslation|Wptranslation|正在翻(譯|译)|(翻)?(譯|译)(中)?)[^\}]*)\[\[\:(w|aa|ab|ace|ady|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|azb|ba|bar|bat-smg|bcl|be|be-tarask|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|diq|dsb|dv|dz|ee|egl|eml|el|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|gom|got|gsw|als|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jp|jam|jbo|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lrc|lt|ltg|lv|lzh|zh-classical|mai|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nan|zh-min-nan|nap|nb|no|nds|nds-nl|ne|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|olo|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rup|rw|sa|sah|sc|scn|sco|sd|se|sg|sgs|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|tcy|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|tyv|udm|ug|uk|ur|uz|ve|vec|vep|vi|vls|vo|vro|wa|war|wo|wuu|xal|xh|xmf|yi|yo|yue|zh-yue|za|zea|zu)\:(?!(wiktionary|wikt|wikinews|n|wikibooks|b|wikiquote|q|wikisource|s|oldwikisource|species|wikispecies|wikiversity|v|betawikiversity|wikimedia|foundation|wmf|wikivoyage|voy|commons|c|meta|metawikipedia|m|strategy|incubator|mediawikiwiki|mw|mediawiki|quality|otrswiki|otrs|ticket|phabricator|bugzilla|mediazilla|phab|nost|testwiki|wikidata|d|outreach|outreachwiki|toollabs|wikitech|dbdump|download|gerrit|mail|mailarchive|rev|spcom|sulutil|svn|tools|tswiki|wm2016|wm2017|wmania|User|Wikipedia|MediaWiki|File|Image|WP|Project|Template|Help|Special|U|利用者)\:)|(?<=\r|\n)(\=){2,}.*\[\[.*\]\].*(\=){2,}|\[\[(JP|JA|EN)\:\:' ls_t = '\{\{Link style\|time=\d{4}\-\d{2}\-\d{2}T\d{2}\:\d{2}\:\d{2}\+00:00\}\}' count = 0 tot_num = len(list(cat.articles(namespaces=0, recurse=True))) print(tot_num) for page in gen: count += 1 art_txt = page.text ilh_num = len(re.findall(ilh, art_txt, re.I)) if (ilh_num > 0): continue new_art_txt = re.sub(ls_t + '\n', '', art_txt, flags=re.I) new_art_txt = re.sub(ls_t, '', new_art_txt, flags=re.I) page.text = new_art_txt percentage = 100 * count / tot_num print(format(percentage, '0.3f'), '%:', page.title(), 'has', ilh_num,
match = CSD_TEMPLATE.search(self.text) return match.group(1) if match else None def get_csd_rev(self): csd_revs = (rev for rev in self.page_object.revisions() if CSD_SUMMARY.search(rev.comment)) try: return next(csd_revs) except StopIteration: return None site = pywikibot.Site() rules = AlexNewArtBotResult(rulesName) cat = pywikibot.Category(site, catName) # Find scores for each article in the category articles = [Article(page) for page in cat.articles(namespaces=(0))] articles.sort(key=lambda a: a.score, reverse=True) articles = articles[:NUM_ARTICLES] # Upload to the wiki content = "" content += "== CSD alerts ==" now = datetime.datetime.utcnow() for each_article in articles: csd_rev = each_article.get_csd_rev() if csd_rev: deletion_delta = now - csd_rev.timestamp age_in_hours = float(deletion_delta.total_seconds()) / 3600
config_page = pywikibot.Page(site, config_page_name) cfg = config_page.text cfg = json.loads(cfg) print(json.dumps(cfg, indent=4, ensure_ascii=False)) if not cfg['enable']: exit('disabled\n') reported_pages = set() for page in pywikibot.Page(site, cfg['np_page']).linkedPages(namespaces=[0]): reported_pages.add(page.title()) # print(reported_pages) afd_pages = set() for page in pywikibot.Category(site, cfg['afd_category']).members(namespaces=[0]): afd_pages.add(page.title()) # print(afd_pages) othertext = '' text_dict = {} for cate in pywikibot.Category(site, cfg['np_category']).members(): if cate.namespace().id != 14: othertext += '# [[:{}]]\n'.format(cate.title()) continue m = re.search(r'^Category:自(\d+)年(\d+)月主題關注度不足的條目$', cate.title()) key = None if m: key = int(m.group(1)) * 100 + int(m.group(2)) text_dict[key] = '=== [[:{}]] ===\n'.format(cate.title()) for page in cate.members():
def importPadsCheckbox(): category = pywikibot.Category(site, catnameimport) gen = pagegenerators.CategorizedPageGenerator(category) print('Reading [[Category:%s]]' % (catnameimport)) #pages = [pywikibot.Page(site, 'Usuari:TeixidoraBot/Sandbox3')] #for page in pages: for page in gen: print('\n== %s ==\n' % (page.title())) #get pads to import padnotes = page.text.split('{{Esdeveniment pr/padnotes')[1:] padnumber = 0 pads = [] for padnote in padnotes: padnote = padnote.split('}}')[0] try: padnumber += 1 padurl = re.findall(r'(?im)notes pad url\s*=\s*(https?://[^\s\|]+)', padnote)[0] importornot = re.findall(r'(?im)bot import\s*=\s*Si', padnote) and True or False pads.append([padnumber, padurl, importornot]) except: pass print('Pads found:\n%s' % (pads)) contentall = '' padurls = [] for padnumber, padurl, importornot in pads: print('Padurl: %s' % (padurl)) if importornot: #get pad content padurls.append(padurl) content = getPadContent(url=padurl) content = removeNoImport(content=content) contentall += '\n' + content #create redirect from apunts to apunts/01 """if padnumber == 1: redapuntstitle = '%s/apunts' % (page.title()) redapuntspage = pywikibot.Page(site, redapuntstitle) if not redapuntspage.exists(): redapuntspage.text = '#REDIRECT [[%s/apunts/01]]' % (page.title()) redapuntspage.save('BOT - Redirect')""" #save pad and params apuntstitle = '%s/apunts/%02d' % (page.title(), padnumber) apuntspage = pywikibot.Page(site, apuntstitle) if not apuntspage.exists() or \ (apuntspage.exists() and len(apuntspage.text) < 5) or \ '/Sandbox' in apuntstitle or \ getPageHistoryAuthors(page=apuntspage) == [botname]: #import labels into template parameters and remove them from content content = importLabels(padurl=padurl, page=page, content=content) #import pad into apunts importPad(padurl=padurl, content=content, apuntspage=apuntspage) #remove checkbox switchCheckbox(oldvalue='Si', newvalue='Fet', page=page, apuntspage=apuntspage) print('Imported correctly') else: log(log='[[%s|Apunts page]] exists, skiping. [%s Pad] not imported.' % (apuntstitle, padurl)) switchCheckbox(oldvalue='Si', newvalue='Fet', page=page, apuntspage=apuntspage) else: print('Not set to import, skiping...') #import keywords if padurls: importKeywords(padurls=padurls, page=page, content=contentall)
def addCategory(self, catName): cat = pywikibot.Category(self.site, "Catégorie:" + catName) for page in cat.articles(recurse=True): if page.userName() != 'CoalémosBot': self.pages.append(page)
def main(): # page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # No verbose output verbose = True # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True elif arg == '-quiet': verbose = False else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.Site(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.Site() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: if not namespaces: namespaces = [0] cat = pywikibot.Category( site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always, verbose) bot.run()
def __init__(self, **kwargs): """Initialzer.""" super(InformationBot, self).__init__(**kwargs) lang_tmp_cat = pywikibot.Category(self.site, self.lang_tmp_cat) self.lang_tmps = lang_tmp_cat.articles(namespaces=[10])
def add_dates(site, category_name, template_names): print('Розчищаємо', category_name) cat = pywikibot.Category(site, 'Категорія:' + category_name) for page in pagegenerators.PreloadingGenerator(cat.articles(), 10): fix_page(site, page)
def newCategoryGenerator(bot, category): cat = pywikibot.Category(bot.site, category) return pagegenerators.CategorizedPageGenerator(cat)
def test_init(self): """Test the category's __init__ for one condition that can't be dry.""" site = self.get_site() with self.assertRaisesRegex(ValueError, self.NOCATEGORYNAMESPACE_RE): pywikibot.Category(site, 'Wikipedia:Test')
def do_cat(cat): cat = pywikibot.Category(site, cat) gen = cat.articles(namespaces=[0], content=True) for page in gen: do_page(page, cat)
def main(): config.use_mwparserfromhell = False locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') db = False global test global dry dry = False # À activer seulement pour les tests test = False # À activer seulement pour tester le script sur une seule page ns_test = False recurse_test = False for arg in pywikibot.handleArgs(): if arg == "-dry": dry = True pywikibot.output(u'(dry is ON)') elif arg[0:6] == "-test:": test = True titre_page_test = arg[6:] elif arg[0:4] == "-ns:": ns_test = True namespaces_test_value = [int(i) for i in arg[4:].split(',')] elif arg[0:9] == "-recurse:": recurse_test = True recurse_test_value = bool(arg[9:]) comment_modele = u"%(nombre_articles)i articles) (Bot: Mise à jour de la liste des articles récents (%(precision_pages)s)" site = pywikibot.Site() titre_modele = u"Articles récents" modele = pywikibot.Page(site, titre_modele, ns=10) gen = pagegenerators.ReferringPageGenerator(modele, onlyTemplateInclusion=True) matchDebut1 = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents DEBUT -->" matchFin1 = u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents FIN -->" matchDebut2 = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents Liste DEBUT -->" matchFin2 = u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents Liste FIN -->" if test: pywikibot.output(u'(test is ON)') gen = [pywikibot.Page(site, titre_page_test)] if ns_test: pywikibot.output(u'(ns_test is ON)') for main_page in gen: try: comment = comment_modele pywikibot.output( u"\n========================\nTraitement de %s\n========================" % main_page.title()) text = main_page.get() ##################### ### Récupération des informations sur la page ##################### templates = textlib.extract_templates_and_params_regex(text) template_in_use = None for tuple in templates: if tuple[0] != u'Articles récents': continue else: template_in_use = tuple[1] break if not template_in_use: pywikibot.output( u"Aucun modèle {{Articles récents}} détecté sur la page %s" % main_page.title()) continue titre_categorie = check_and_return_parameter( template_in_use, u'catégorie') if not titre_categorie: continue cat = pywikibot.Category(site, titre_categorie) nbMax = check_and_return_parameter(template_in_use, 'nbMax', 10) try: nbMax = int(nbMax) except: pywikibot.output(u'Erreur : nbMax incorrect') continue namespaces = check_and_return_parameter(template_in_use, 'namespaces', '0') namespaces = namespaces.split(',') try: namespaces = [int(k) for k in namespaces] except: pywikibot.output( u'Erreur : des namespaces spécifiés ne sont pas des entiers' ) continue recurse = check_and_return_parameter(template_in_use, 'recurse', '0') if recurse.lower().strip() in ('oui', '1'): recurse = True else: recurse = False delai_creation = check_and_return_parameter( template_in_use, 'delai', '0') try: delai_creation = int(delai_creation) except: pywikibot.output(u'Erreur : delai incorrect') continue format_date = check_and_return_parameter(template_in_use, u'date') or None if format_date: try: test_date = datetime.datetime.now() test_date.strftime(format_date) except: format_date = None pywikibot.output(u'Erreur : format de date incorrect') puce = check_and_return_parameter(template_in_use, 'puces', '#') listeRecents = text[(text.index(matchDebut1) + len(matchDebut1)):text.index(matchFin1)] # Permet d'enlever le premier élément (vide) de la liste listeRecents = listeRecents.split('\n%s ' % puce)[1:] listeRecents_old = [page for page in listeRecents] listeRecents = list() dico_dates_presentes = {} for recent in listeRecents_old: r = re.search(u"(\[\[.*\]\]) ?(\(.+\))?", recent) if r: listeRecents.append(r.group(1)) if r.group(2): dico_dates_presentes[r.group(1)] = r.group(2)[1:-1] else: pass text = re.sub( re.compile(u"%s.*%s" % (matchDebut2, matchFin2), re.S), u"%s%s" % (matchDebut2, matchFin2), text) ##################### # Au cas où il n'y aurait aucune nouvelle page mais # une ou des pages ayant été supprimée(s) exception_maj = False # Pour préciser le résumé d'édition precisions_comment = u"" pywikibot.output('stade 0') ##################### ### Vérification des pages récentes actuelles (en cas de suppression) ##################### for titre_article in listeRecents: try: page = pywikibot.Page( site, re.sub(u"\[\[(.*)\]\]", "\\1", titre_article )) # Pour enlever les crochets : [[…]]. # Si la page existe toujours et n'est pas une # redirection, on la laisse dans la liste… page.get() if format_date and not dico_dates_presentes.has_key( titre_article) and find_date(page, cat): # Date trouvée alors qu'elle n'y était pas. exception_maj = True dico_dates_presentes[titre_article] = find_date( page, cat).strftime(format_date) except pywikibot.NoPage: pywikibot.output(u"La page %s n'existe plus." % page.title(asLink=True)) pywikibot.output( u"Suppression de la page %s de la liste listeRecents" % page.title(asLink=True)) precisions_comment += (u"; - %s" % titre_article) listeRecents.remove(titre_article) # On force la mise à jour de la page, même si aucun nouvel article # récent n'est trouvé. exception_maj = True except pywikibot.IsRedirectPage: pywikibot.output( u"La page %s n'est plus qu'une redirection." % page.title(asLink=True)) try: nouvelle_page = page.getRedirectTarget() pywikibot.output( u"Modification du titre la page %s (renommée en %s)" % (page.title(asLink=True), nouvelle_page.title(asLink=True, withSection=False))) precisions_comment += ( u"; - %s ; + %s" % (titre_article, nouvelle_page.title(asLink=True, withSection=False))) if not nouvelle_page.title( asLink=True, withSection=False) in listeRecents: listeRecents[listeRecents.index( titre_article)] = nouvelle_page.title( asLink=True, withSection=False) else: pywikibot.output( u"La page destination était déjà présente dans la liste" ) listeRecents.pop(listeRecents.index(titre_article)) # On force la mise à jour de la page, même si aucun nouvel article # récent n'est trouvé. exception_maj = True except: pywikibot.output( u"an error occured (CircularRedirect?)") #except KeyboardInterrupt: # pywikibot.stopme() except: try: pywikibot.output( u"Erreur inconnue lors du traitement de la page %s" % page.title(asLink=True)) except: pywikibot.output( u"Erreur inconnue lors du traitement d'une page") else: # Si pas d'erreur : on passe à la page suivante continue if precisions_comment: precisions_comment = precisions_comment[ 2:] # Pour supprimer le '; ' ##################### ##################### ### Recherches des articles nouveaux ##################### precisions_comment2 = u"" # Récupération de la dernière mise à jour de la page par le bot db = _mysql.connect( host='tools-db', db='s51245__totoazero', read_default_file="/data/project/totoazero/replica.my.cnf") results = db.query( 'SELECT last FROM maj_articles_recents WHERE page="%s"' % main_page.title().replace('"', '\\"').encode('utf-8')) results = db.store_result() result = results.fetch_row(maxrows=0) pywikibot.output(("last check was " + str(result))) if result: first_passage = False t = result[0][0] timestamp = pywikibot.Timestamp.strptime( t, "%Y-%m-%d %H:%M:%S") # Permet de ne générer que la liste des articles ajoutés à la # catégorie après la dernière modification de la page # contenant le modèle {{Articles récents}}. #list_new.extend([page for page in site.categorymembers(cat, starttime=timestamp, sortby='timestamp', namespaces=[0])]) list_new = [ page for page in cat.articles(starttime=timestamp, sortby='timestamp', namespaces=namespaces, recurse=recurse) ] list_new.reverse() else: # nouvelle page, premier passage du bot first_passage = True timestamp = main_page.editTime() if delai_creation > 0: timestamp -= datetime.timedelta(hours=delai_creation) # Génération de la première liste, pour éviter si possible de # laisser la page vide. list_new = [page for page in cat.newest_pages(total=nbMax)] # TODO : mieux ? #list_new = [page for page in cat.articles(sortby='timestamp', namespaces=namespaces, recurse=recurse)] pywikibot.output('stade 2') now = datetime.datetime.now() # NB : exception_maj peut être passer à True si un article # a été supprimé de la catégorie. if len(list_new) == 0 and not exception_maj: # Inutile d'aller plus loin s'il n'y a aucun nouvel article. end_page(main_page, now, first_passage) continue # Liste des pages pour requête SQL sur base frwiki_p list_new_str = '("' list_new_str += '", "'.join([ page.title(asLink=False, underscore=True).replace('"', '\\"') for page in list_new ]) list_new_str += '")' pywikibot.output(list_new_str) # Fonctionne uniquement avec les pages du ns 0 pour le moment frwiki_p = _mysql.connect( host='frwiki.analytics.db.svc.eqiad.wmflabs', db='frwiki_p', read_default_file="/data/project/totoazero/replica.my.cnf") pywikibot.output( 'SELECT page_title, page_id FROM page where page_title IN %s AND page_namespace=0' % list_new_str.encode('utf-8')) results = frwiki_p.query( 'SELECT page_title, page_id FROM page where page_title IN %s AND page_namespace=0' % list_new_str.encode('utf-8')) results = frwiki_p.store_result() result = results.fetch_row(maxrows=0) pywikibot.output(result) dico_result = {} for tuple in result: title = tuple[0] id = tuple[1] dico_result[title] = id pywikibot.output(dico_result) dico_timestamp = {} pywikibot.output('stade 3') frwiki_p = _mysql.connect( host='frwiki.analytics.db.svc.eqiad.wmflabs', db='frwiki_p', read_default_file="/data/project/totoazero/replica.my.cnf") for key in dico_result: id = dico_result[key] pywikibot.output( 'SELECT cl_from, cl_timestamp FROM categorylinks WHERE cl_from = %s AND cl_to = "%s"' % (id.encode('utf-8'), cat.title(asLink=False, underscore=True, withNamespace=False).encode('utf-8'))) results = frwiki_p.query( 'SELECT cl_from, cl_timestamp FROM categorylinks WHERE cl_from = %s AND cl_to = "%s"' % (id.encode('utf-8'), cat.title(asLink=False, underscore=True, withNamespace=False).encode('utf-8'))) results = frwiki_p.store_result() result = results.fetch_row(maxrows=0) if result: dico_timestamp[key.decode( 'utf-8')] = pywikibot.Timestamp.strptime( result[0][1], "%Y-%m-%d %H:%M:%S") else: pywikibot.output(u"pas de date trouvée pour %s" % key.decode('utf-8')) pywikibot.output(dico_timestamp) # Permet de mettre les nouvelles pages comme des titres : # nécessaires plus loin ! list_new = [page.title(asLink=True) for page in list_new] # Permet de récupérer des infos sur la catégorie. # NB : Si ralentit le script, l'item cat_info['pages'] # correspondant au nombre de pages contenues # dans la catégorie doit pouvoir être remplacé # par len(listeCategorie) + len(list_new). cat_info = site.categoryinfo(cat) pywikibot.output(cat_info) pywikibot.output('stade 4') list_new_old = list() list_new_old.extend(list_new) pywikibot.output('delai_creation is %s' % delai_creation) #for titre_page in list_new_old: # print titre_page for titre_page in list_new_old: # NB : titre_page est du type [[Nom de la page]] pywikibot.output("----------") pywikibot.output(u"Page récemment ajoutée : %s" % titre_page) if not titre_page in listeRecents: if delai_creation: # Délai imposé (en heures) depuis la création de l'article, # au-delà duquel l'article récemment ajouté à la catégorie # ne doit pas figurer dans la liste. # Exemple : delai_creation = 24 # => le bot liste uniquement les articles créés il y # a moins de 24h. page = pywikibot.Page(site, titre_page[2:-2]) # NB : date_creation et date_plus_petite_requise # sont du type pywikibot.Timestamp date_creation = page.getVersionHistory()[-1][1] pywikibot.output(date_creation) if delai_creation > 0: date_plus_petite_requise = pywikibot.Timestamp.now( ) - datetime.timedelta(hours=delai_creation) elif delai_creation == -1: # 'timestamp' a été défini plus haut comme étant la date de dernière # édition du bot sur la page. date_plus_petite_requise = timestamp pywikibot.output(date_plus_petite_requise) if date_plus_petite_requise > date_creation: pywikibot.output(u"Vérification du délai : Non") pywikibot.output( u"La page ne satisfait pas le délai depuis la création imposé." ) list_new.remove(titre_page) continue else: pywikibot.output(u"Vérification du délai : OK") precisions_comment2 += (u"; + %s" % titre_page) else: # Si l'article se trouve déjà dans la liste listeRecents # il est inutile de le rajouter à nouveau. list_new.remove(titre_page) pywikibot.output( u"L'article était déjà présent sur la page.") # Re-vérification pour voir si list_new contient toujours # au moins une page. if len(list_new) == 0 and not exception_maj: # Inutile d'aller plus loin s'il n'y a aucun nouvel article. pywikibot.output('Nothing left.') continue # Re-vérification pour voir si list_new contient toujours # au moins une page. if len(list_new) == 0 and not exception_maj: # Inutile d'aller plus loin s'il n'y a aucun nouvel article. end_page(main_page, now, first_passage) continue if precisions_comment: # Si precisions_comment contient déjà des infos (suppression de pages) precisions_comment += precisions_comment2 else: precisions_comment = precisions_comment2[ 2:] # Pour supprimer le '; ' pywikibot.output('stade 5') # Pour compléter le résumé d'édition comment = comment % { 'nombre_articles': cat_info['pages'], 'precision_pages': precisions_comment } ##################### ### Création de la liste des articles récents ##################### liste_nouveaux_recents = list() liste_nouveaux_recents.extend(list_new) # Si le nombre d'articles nouveaux est strictement au nombre maximum # d'articles récents qui doivent figurer. if len(liste_nouveaux_recents) < nbMax: i = 0 while len(liste_nouveaux_recents) != nbMax: if len(listeRecents) < i + 1: # Dans le cas où la liste listeRecents ne contiendrait pas # assez d'éléments. break liste_nouveaux_recents.append(listeRecents[i]) i += 1 if i == len(listeRecents ): # Pourrait provoquer une erreur de longueur break elif len(liste_nouveaux_recents) > nbMax: liste_nouveaux_recents = liste_nouveaux_recents[0:(nbMax - 1)] # La liste liste_nouveaux_recents contient désormais # nbMax articles récents exactement pywikibot.output('stade 6') liste_nouveaux_recents_string = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents DEBUT -->" for titre_article in liste_nouveaux_recents: liste_nouveaux_recents_string += u'\n%s %s' % (puce, titre_article) if format_date and dico_timestamp.has_key( titre_article[2:-2].replace(' ', '_')): pywikibot.output('stade 6-1') pywikibot.output( dico_timestamp[titre_article[2:-2].replace( ' ', '_')].strftime(format_date)) try: liste_nouveaux_recents_string += ( ' (' + dico_timestamp[titre_article[2:-2].replace( ' ', '_')].strftime(format_date).decode('utf-8') + ')') except: try: liste_nouveaux_recents_string += ( ' (' + dico_timestamp[titre_article[2:-2].replace( ' ', '_')].strftime(format_date) + ')') except: raise "erreur au stade 6-1" elif dico_dates_presentes.has_key(titre_article): pywikibot.output('stade 6-2') pywikibot.output(dico_dates_presentes[titre_article]) try: liste_nouveaux_recents_string += ( ' (' + dico_dates_presentes[titre_article] + ')') except: # UnicodeEncodeError: try: liste_nouveaux_recents_string += ( ' (' + dico_dates_presentes[titre_article].decode( 'utf-8') + ')') except: raise "erreur au stade 6-2" liste_nouveaux_recents_string += u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents FIN -->" ##################### ##################### ### Mise à jour du contenu de la page ##################### new_text = text pywikibot.output('stade 7') # Mise à jour de la liste des articles récents (listeRecents) new_text = re.sub( re.compile(u'%s.*%s' % (matchDebut1, matchFin1), re.S), liste_nouveaux_recents_string, new_text) pywikibot.output(new_text) pywikibot.output(u'Commentaire: %s' % comment) if not dry: main_page.put(new_text, comment=comment) end_page(main_page, now, first_passage) else: pywikibot.showDiff(main_page.get(), new_text) ##################### except Exception, myexception: pywikibot.output("Erreur lors du traitement de la page %s" % main_page.title(asLink=True)) _errorhandler.handle( myexception, level='warning', addtags={'page': main_page.title(asLink=True)})
def run(self): """Run the bot.""" # validate L10N self.template_list = self.site.category_redirects() if not self.template_list: pywikibot.warning('No redirect templates defined for {}'.format( self.site)) return if not self.get_cat(): pywikibot.warning('No redirect category found for {}'.format( self.site)) return self.user = self.site.user() # invokes login() self.newredirs = [] localtime = time.localtime() today = '{:04d}-{:02d}-{:02d}'.format(*localtime[:3]) self.datafile = pywikibot.config.datafilepath( '{}-catmovebot-data'.format(self.site.dbName())) try: with open(self.datafile, 'rb') as inp: self.record = pickle.load(inp) except IOError: self.record = {} if self.record: with open(self.datafile + '.bak', 'wb') as f: pickle.dump(self.record, f, protocol=config.pickle_protocol) # regex to match soft category redirects # TODO: enhance and use textlib.MultiTemplateMatchBuilder # note that any templates containing optional "category:" are # incorrect and will be fixed by the bot template_regex = re.compile( r"""{{\s*(?:%(prefix)s\s*:\s*)? # optional "template:" (?:%(template)s)\s*\| # catredir template name (\s*%(catns)s\s*:\s*)? # optional "category:" ([^|}]+) # redirect target cat (?:\|[^|}]*)*}} # optional arguments 2+, ignored """ % { 'prefix': self.site.namespace(10).lower(), 'template': '|'.join( item.replace(' ', '[ _]+') for item in self.template_list), 'catns': self.site.namespace(14) }, re.I | re.X) self.check_hard_redirect() comment = i18n.twtranslate(self.site, self.move_comment) counts = {} nonemptypages = [] redircat = self.cat pywikibot.output('\nChecking {} category redirect pages'.format( redircat.categoryinfo['subcats'])) catpages = set() for cat in redircat.subcategories(): catpages.add(cat) cat_title = cat.title(with_ns=False) if 'category redirect' in cat_title: message = i18n.twtranslate( self.site, 'category_redirect-log-ignoring', {'oldcat': cat.title(as_link=True, textlink=True)}) self.log_text.append(message) continue if hasattr(cat, '_catinfo'): # skip empty categories that don't return a "categoryinfo" key catdata = cat.categoryinfo if 'size' in catdata and int(catdata['size']): # save those categories that have contents nonemptypages.append(cat) if cat_title not in self.record: # make sure every redirect has a self.record entry self.record[cat_title] = {today: None} with suppress(Error): self.newredirs.append('*# {} → {}'.format( cat.title(as_link=True, textlink=True), cat.getCategoryRedirectTarget().title(as_link=True, textlink=True))) # do a null edit on cat with suppress(Exception): cat.save() # delete self.record entries for non-existent categories for cat_name in list(self.record): if pywikibot.Category(self.site, self.catprefix + cat_name) not in catpages: del self.record[cat_name] pywikibot.output( '\nMoving pages out of {} redirected categories.'.format( len(nonemptypages))) for cat in pagegenerators.PreloadingGenerator(nonemptypages): try: if not cat.isCategoryRedirect(): message = i18n.twtranslate( self.site, 'category_redirect-log-false-positive', {'oldcat': cat.title(as_link=True, textlink=True)}) self.log_text.append(message) continue except Error: message = i18n.twtranslate( self.site, 'category_redirect-log-not-loaded', {'oldcat': cat.title(as_link=True, textlink=True)}) self.log_text.append(message) continue cat_title = cat.title(with_ns=False) if not self.readyToEdit(cat): counts[cat_title] = None message = i18n.twtranslate( self.site, 'category_redirect-log-skipping', {'oldcat': cat.title(as_link=True, textlink=True)}) self.log_text.append(message) continue dest = cat.getCategoryRedirectTarget() if not dest.exists(): message = i18n.twtranslate( self.site, 'category_redirect-problem-redirects', { 'oldcat': cat.title(as_link=True, textlink=True), 'redpage': dest.title(as_link=True, textlink=True) }) self.problems.append(message) # do a null edit on cat to update any special redirect # categories this wiki might maintain with suppress(Exception): cat.save() continue if dest.isCategoryRedirect(): double = dest.getCategoryRedirectTarget() if double in (dest, cat): message = i18n.twtranslate( self.site, 'category_redirect-log-loop', {'oldcat': dest.title(as_link=True, textlink=True)}) self.log_text.append(message) # do a null edit on cat with suppress(Exception): cat.save() else: message = i18n.twtranslate( self.site, 'category_redirect-log-double', { 'oldcat': cat.title(as_link=True, textlink=True), 'newcat': dest.title(as_link=True, textlink=True), 'targetcat': double.title(as_link=True, textlink=True) }) self.log_text.append(message) oldtext = cat.text # remove the old redirect from the old text, # leaving behind any non-redirect text oldtext = template_regex.sub('', oldtext) newtext = ('{{%(redirtemp)s|%(ncat)s}}' % { 'redirtemp': self.template_list[0], 'ncat': double.title(with_ns=False) }) newtext = newtext + oldtext.strip() try: cat.text = newtext cat.save( i18n.twtranslate(self.site, self.dbl_redir_comment)) except Error as e: message = i18n.twtranslate( self.site, 'category_redirect-log-failed', {'error': e}) self.log_text.append(message) continue found, moved = self.move_contents(cat_title, dest.title(with_ns=False), editSummary=comment) if found is None: message = i18n.twtranslate( self.site, 'category_redirect-log-move-error', {'oldcat': cat.title(as_link=True, textlink=True)}) self.log_text.append(message) elif found: self.record[cat_title][today] = found message = i18n.twtranslate( self.site, 'category_redirect-log-moved', { 'oldcat': cat.title(as_link=True, textlink=True), 'found': found, 'moved': moved }) self.log_text.append(message) counts[cat_title] = found # do a null edit on cat with suppress(Exception): cat.save() self.teardown()
import pywikibot from pywikibot import pagegenerators siteC = pywikibot.Site(u'commons', u'commons') siteC.login() category = pywikibot.Category(siteC, u'Images from Wiki Loves Africa 2021') gen = pagegenerators.CategorizedPageGenerator(category) cmp_sound = 0 cmp_video = 0 for file in gen: title = file.title() text = file.text print('Analyze: ' + file.title()) if title[-4:] == '.webm' or title[-4:] == '.ogv': if '[[Category:Videos from Wiki Loves Africa 2021]]' not in text: newtext = text + '\r\n' + '[[Category:Videos from Wiki Loves Africa 2021]]' file.text = newtext file.save(u"Add category video WLA") print(file.title() + ' - WLA ***************************') cmp_sound = cmp_sound + 1 elif title[-4:] == '.ogg' or title[-4:] == '.wav': if '[[Category:Audio from Wiki Loves Africa 2021]]' not in text: newtext = text + '\r\n' + '[[Category:Audio from Wiki Loves Africa 2021]]' file.text = newtext file.save(u"Add category sound WLA") print(file.title() + ' - WLA ***************************') cmp_video = cmp_video + 1
checkbroken = False elif arg.startswith('-keepparent'): removeparent = False elif arg.startswith('-all'): main = False elif not workingcatname: workingcatname = arg if not workingcatname: pywikibot.bot.suggest_help(missing_parameters=['working category']) sys.exit(0) mysite = pywikibot.Site() summary = i18n.twtranslate(mysite, 'makecat-create', {'cat': workingcatname}) workingcat = pywikibot.Category( mysite, u'%s:%s' % (mysite.namespaces.CATEGORY, workingcatname)) filename = pywikibot.config.datafilepath( 'category', workingcatname.encode('ascii', 'xmlcharrefreplace') + '_exclude.txt') try: f = codecs.open(filename, 'r', encoding=mysite.encoding()) for line in f.readlines(): # remove trailing newlines and carriage returns try: while line[-1] in ['\n', '\r']: line = line[:-1] except IndexError: pass pl = pywikibot.Page(mysite, line) checked[pl] = pl f.close()
Author: Fae, http://j.mp/faewm Permissions: CC-BY-SA-4.0 ''' import pywikibot, sys, re, string, time from sys import argv from time import sleep from colorama import Fore, Back, Style, init init() site = pywikibot.getSite('commons', 'commons') print Fore.GREEN + NOTICE, Fore.WHITE catname = u"Category:Images from the Canadian Copyright Collection at the British Library" category = pywikibot.Category(site, catname) pairs = [] count = 0 for image in category.members(): if image.namespace() != ":File:": continue if not re.search("\(HS85-10-.{4,}\).jpg", image.title()): continue pair = [] for ext in ['tif', 'tiff']: tiff = re.sub("\.jpg$", " original." + ext, image.title()) tim = pywikibot.ImagePage(site, tiff) if tim.exists(): pair = [image, tim] break if pair == []:
def main(): site = pywikibot.Site('wikiapiary', 'wikiapiary') catname = 'Category:Website' cat = pywikibot.Category(site, catname) gen = pagegenerators.CategorizedPageGenerator(cat, start='Spyropedia') pre = pagegenerators.PreloadingGenerator(gen) for page in pre: if page.isRedirectPage(): continue wtitle = page.title() wtext = page.text #if not wtitle.startswith('5'): # continue if re.search('Internet Archive', wtext): #print('It has IA parameter') pass else: print('\n', '#' * 50, '\n', wtitle, '\n', '#' * 50) print('https://wikiapiary.com/wiki/%s' % (re.sub(' ', '_', wtitle))) print('Missing IA parameter') if re.search(r'(?i)API URL=http', wtext): apiurl = re.findall(r'(?i)API URL=(http[^\n]+?)\n', wtext)[0] print('API:', apiurl) else: print('No API found in WikiApiary, skiping') continue indexurl = 'index.php'.join(apiurl.rsplit('api.php', 1)) urliasearch = 'https://archive.org/search.php?query=originalurl:"%s" OR originalurl:"%s"' % ( apiurl, indexurl) f = urllib.request.urlopen(urliasearch) raw = f.read().decode('utf-8') if re.search(r'(?i)Your search did not match any items', raw): print('No dumps found at Internet Archive') else: itemidentifier = re.findall( r'<a href="/details/([^ ]+?)" title=', raw)[0] itemurl = 'https://archive.org/details/%s' % (itemidentifier) print('Item found:', itemurl) metaurl = 'https://archive.org/download/%s/%s_files.xml' % ( itemidentifier, itemidentifier) g = urllib.request.urlopen(metaurl) raw2 = g.read().decode('utf-8') raw2 = raw2.split('</file>') itemfiles = [] for raw2_ in raw2: try: x = re.findall( r'(?im)<file name="[^ ]+-(\d{8})-[^ ]+" source="original">', raw2_)[0] y = re.findall(r'(?im)<size>(\d+)</size>', raw2_)[0] itemfiles.append([int(x), int(y)]) except: pass itemfiles.sort(reverse=True) print(itemfiles) itemdate = str(itemfiles[0][0])[0:4] + '/' + str( itemfiles[0][0])[4:6] + '/' + str(itemfiles[0][0])[6:8] itemsize = itemfiles[0][1] iaparams = """|Internet Archive identifier=%s |Internet Archive URL=%s |Internet Archive added date=%s 00:00:00 |Internet Archive file size=%s""" % (itemidentifier, itemurl, itemdate, itemsize) newtext = page.text newtext = re.sub(r'(?im)\n\}\}', '\n%s\n}}' % (iaparams), newtext) if page.text != newtext: pywikibot.showDiff(page.text, newtext) page.text = newtext page.save('BOT - Adding dump details: %s, %s, %s bytes' % (itemidentifier, itemdate, itemsize), botflag=True)
def main(*args): """ Process command line arguments and perform task. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # Loading the comments global categoryToCheck, project_inserted # always, define a generator to understand if the user sets one, # defining what's genFactory always = False generator = False show = False moveBlockCheck = False protectedpages = False protectType = 'edit' namespace = 0 # To prevent Infinite loops errorCount = 0 # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() # Process local args for arg in local_args: if arg == '-always': always = True elif arg == '-move': moveBlockCheck = True elif arg == '-show': show = True elif arg.startswith('-protectedpages'): protectedpages = True if len(arg) > 15: namespace = int(arg[16:]) elif arg.startswith('-moveprotected'): protectedpages = True protectType = 'move' if len(arg) > 14: namespace = int(arg[15:]) else: genFactory.handleArg(arg) if config.mylang not in project_inserted: pywikibot.output(u"Your project is not supported by this script.\n" u"You have to edit the script and add it!") return site = pywikibot.Site() if protectedpages: generator = site.protectedpages(namespace=namespace, type=protectType) # Take the right templates to use, the category and the comment TSP = i18n.translate(site, templateSemiProtection) TTP = i18n.translate(site, templateTotalProtection) TSMP = i18n.translate(site, templateSemiMoveProtection) TTMP = i18n.translate(site, templateTotalMoveProtection) TNR = i18n.translate(site, templateNoRegex) TU = i18n.translate(site, templateUnique) categories = i18n.translate(site, categoryToCheck) commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary') if not generator: generator = genFactory.getCombinedGenerator() if not generator: generator = list() pywikibot.output(u'Loading categories...') # Define the category if no other generator has been setted for CAT in categories: cat = pywikibot.Category(site, CAT) # Define the generator gen = pagegenerators.CategorizedPageGenerator(cat) for pageCat in gen: generator.append(pageCat) pywikibot.output(u'Categories loaded, start!') # Main Loop preloadingGen = pagegenerators.PreloadingGenerator(generator, step=60) for page in preloadingGen: pagename = page.title(asLink=True) pywikibot.output('Loading %s...' % pagename) try: text = page.text except pywikibot.NoPage: pywikibot.output("%s doesn't exist! Skipping..." % pagename) continue except pywikibot.IsRedirectPage: pywikibot.output("%s is a redirect! Skipping..." % pagename) if show: showQuest(page) continue # FIXME: This check does not work : # PreloadingGenerator cannot set correctly page.editRestriction # (see bug 55322) # if not page.canBeEdited(): # pywikibot.output("%s is sysop-protected : this account can't edit " # "it! Skipping..." % pagename) # continue restrictions = page.protection() try: editRestr = restrictions['edit'] except KeyError: editRestr = None if not page.canBeEdited(): pywikibot.output(u"%s is protected: " u"this account can't edit it! Skipping..." % pagename) continue # Understand, according to the template in the page, what should be the # protection and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU) # Only to see if the text is the same or not... oldtext = text # keep track of the changes for each step (edit then move) changes = -1 if not editRestr: # page is not edit-protected # Deleting the template because the page doesn't need it. if TU: replaceToPerform = u'|'.join(TTP + TSP + TU) else: replaceToPerform = u'|'.join(TTP + TSP) text, changes = re.subn( '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) if changes == 0: text, changes = re.subn('(%s)' % replaceToPerform, '', text) msg = u'The page is editable for all' if not moveBlockCheck: msg += u', deleting the template..' pywikibot.output(u'%s.' % msg) elif editRestr[0] == 'sysop': # total edit protection if (TemplateInThePage[0] == 'sysop-total' and TTP) or \ (TemplateInThePage[0] == 'unique' and TU): msg = 'The page is protected to the sysop' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: pywikibot.output( u'The page is protected to the sysop, but the ' u'template seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[1], text) elif TSP or TU: # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection if TemplateInThePage[0] == 'autoconfirmed-total' or \ TemplateInThePage[0] == 'unique': msg = 'The page is editable only for the autoconfirmed users' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: pywikibot.output(u'The page is editable only for the ' u'autoconfirmed users, but the template ' u'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[0], text) if changes == 0: # We tried to fix edit-protection templates, but it did not work. pywikibot.warning('No edit-protection template could be found') if moveBlockCheck and changes > -1: # checking move protection now try: moveRestr = restrictions['move'] except KeyError: moveRestr = False changes = -1 if not moveRestr: pywikibot.output(u'The page is movable for all, deleting the ' u'template...') # Deleting the template because the page doesn't need it. if TU: replaceToPerform = u'|'.join(TSMP + TTMP + TU) else: replaceToPerform = u'|'.join(TSMP + TTMP) text, changes = re.subn( '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) if changes == 0: text, changes = re.subn('(%s)' % replaceToPerform, '', text) elif moveRestr[0] == 'sysop': # move-total-protection if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \ (TemplateInThePage[0] == 'unique' and TU): pywikibot.output(u'The page is protected from moving to ' u'the sysop, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output(u'The page is protected from moving to ' u'the sysop, but the template seems not ' u'correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[3], text) elif TSMP or TU: # implicitely moveRestr[0] = 'autoconfirmed', # move-semi-protection if TemplateInThePage[0] == 'autoconfirmed-move' or \ TemplateInThePage[0] == 'unique': pywikibot.output(u'The page is movable only for the ' u'autoconfirmed users, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output(u'The page is movable only for the ' u'autoconfirmed users, but the template ' u'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[2], text) if changes == 0: # We tried to fix move-protection templates, but it did not work pywikibot.warning('No move-protection template could be found') if oldtext != text: # Ok, asking if the change has to be performed and do it if yes. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(oldtext, text) if not always: choice = pywikibot.input_choice( u'Do you want to accept these ' u'changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')], 'n') if choice == 'a': always = True if always or choice == 'y': while True: try: page.put(text, commentUsed, force=True) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') break except pywikibot.ServerError: # Sometimes there is this error that's quite annoying # because can block the whole process for nothing. errorCount += 1 if errorCount < 5: pywikibot.output(u'Server Error! Wait..') time.sleep(3) continue else: # Prevent Infinite Loops raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output(u'Cannot change %s because of ' u'blacklist entry %s' % (page.title(), e.url)) break except pywikibot.LockedPage: pywikibot.output(u'The page is still protected. ' u'Skipping...') break except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args, )) break else: # Break only if the errors are one after the other errorCount = 0 break
def main(): summary_commandline, gen, template = None, None, None namespaces, PageTitles, exceptions = [], [], [] encat, newcatfile = '', '' autoText, autoTitle = False, False recentcat, newcat = False, False genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append( pywikibot.input(u'Which page do you want to chage?')) else: PageTitles.append(arg[6:]) break elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith('-template:'): template = arg[10:] elif arg.startswith('-facat:'): facat = arg.replace(u'Category:', u'').replace(u'category:', u'').replace(u'زمرو:', u'') encat = englishdictionry(u'زمرو:' + facat[7:], fa_site, en_site).replace(u'Category:', u'').replace( u'category:', u'') break elif arg.startswith('-encat:'): encat = arg[7:].replace(u'Category:', u'').replace(u'category:', u'').replace(u'زمرو:', u'') break elif arg.startswith('-newcatfile:'): newcatfile = arg[12:] break elif arg.startswith('-recentcat'): arg = arg.replace(':', '') if len(arg) == 10: genfa = pagegenerators.RecentchangesPageGenerator() else: genfa = pagegenerators.RecentchangesPageGenerator( number=int(arg[10:])) genfa = pagegenerators.DuplicateFilterPageGenerator(genfa) genfa = pagegenerators.NamespaceFilterPageGenerator(genfa, [14]) preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60) recentcat = True break elif arg.startswith('-newcat'): arg = arg.replace(':', '') if len(arg) == 7: genfa = pagegenerators.NewpagesPageGenerator(step=100, namespaces=14) else: genfa = pagegenerators.NewpagesPageGenerator(step=int(arg[7:]), namespaces=14) preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60) newcat = True break elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) elif arg.startswith('-summary:'): pywikibot.setAction(arg[9:]) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = genFactory.getCombinedGenerator(gen) if encat != '': encatfalist, encatlists = encatlist(encat) if encatlists: for encat in encatlists: encat = englishdictionry(encat, en_site, fa_site) if encat: run([encat]) if encatfalist is not False: run(encatfalist) if PageTitles: pages = [ pywikibot.Page(fa_site, PageTitle) for PageTitle in PageTitles ] gen = iter(pages) if recentcat: for workpage in preloadingGen: workpage = workpage.title() cat = pywikibot.Category(fa_site, workpage) gent = pagegenerators.CategorizedPageGenerator(cat) run(gent) pywikibot.stopme() sys.exit() if newcat: for workpage in preloadingGen: workpage = workpage.title() workpage = englishdictionry(workpage, fa_site, en_site) if workpage is not False: encatfalist, encatlists = encatlist(workpage) if encatlists: for encat in encatlists: encat = englishdictionry(encat, en_site, fa_site) if encat: run([encat]) if encatfalist is not False: run(encatfalist) pywikibot.stopme() sys.exit() if newcatfile: text2 = codecs.open(newcatfile, 'r', 'utf8') text = text2.read() linken = re.findall(ur'\[\[.*?\]\]', text, re.S) if linken: for workpage in linken: pywikibot.output(u'\03{lightblue}Working on --- Link ' + workpage + u' at th newcatfile\03{default}') workpage = workpage.split(u'|')[0].replace(u'[[', u'').replace( u']]', u'').strip() workpage = englishdictionry(workpage, fa_site, en_site) if workpage is not False: encatfalist, encatlists = encatlist(workpage) workpage = englishdictionry(workpage, fa_site, en_site) if encatlists: run(encatlists) if encatfalist is not False: run(encatfalist) pywikibot.stopme() sys.exit() if not gen: pywikibot.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) run(preloadingGen)
def run(self): """Run the bot.""" # validate L10N self.template_list = self.site.category_redirects() if not self.template_list: pywikibot.warning(u"No redirect templates defined for %s" % self.site) return if not self.get_cat(): pywikibot.warning(u"No redirect category found for %s" % self.site) return user = self.site.user() # invokes login() newredirs = [] localtime = time.localtime() today = '%04d-%02d-%02d' % localtime[:3] edit_request_page = pywikibot.Page( self.site, u"User:%s/category edit requests" % user) datafile = pywikibot.config.datafilepath("%s-catmovebot-data" % self.site.dbName()) try: with open(datafile, "rb") as inp: record = cPickle.load(inp) except IOError: record = {} if record: with open(datafile + ".bak", "wb") as f: cPickle.dump(record, f, protocol=config.pickle_protocol) # regex to match soft category redirects # TODO: enhance and use textlib._MultiTemplateMatchBuilder # note that any templates containing optional "category:" are # incorrect and will be fixed by the bot template_regex = re.compile( r"""{{\s*(?:%(prefix)s\s*:\s*)? # optional "template:" (?:%(template)s)\s*\| # catredir template name (\s*%(catns)s\s*:\s*)? # optional "category:" ([^|}]+) # redirect target cat (?:\|[^|}]*)*}} # optional arguments 2+, ignored """ % { 'prefix': self.site.namespace(10).lower(), 'template': "|".join( item.replace(" ", "[ _]+") for item in self.template_list), 'catns': self.site.namespace(14) }, re.I | re.X) self.check_hard_redirect() comment = i18n.twtranslate(self.site, self.move_comment) counts = {} nonemptypages = [] redircat = self.cat pywikibot.output(u"\nChecking %d category redirect pages" % redircat.categoryinfo['subcats']) catpages = set() for cat in redircat.subcategories(): catpages.add(cat) cat_title = cat.title(withNamespace=False) if "category redirect" in cat_title: self.log_text.append(u"* Ignoring %s" % cat.title(asLink=True, textlink=True)) continue if hasattr(cat, "_catinfo"): # skip empty categories that don't return a "categoryinfo" key catdata = cat.categoryinfo if "size" in catdata and int(catdata['size']): # save those categories that have contents nonemptypages.append(cat) if cat_title not in record: # make sure every redirect has a record entry record[cat_title] = {today: None} try: newredirs.append("*# %s -> %s" % (cat.title(asLink=True, textlink=True), cat.getCategoryRedirectTarget().title( asLink=True, textlink=True))) except pywikibot.Error: pass # do a null edit on cat try: cat.save() except Exception: pass # delete record entries for non-existent categories for cat_name in record.keys(): if pywikibot.Category(self.site, self.catprefix + cat_name) not in catpages: del record[cat_name] pywikibot.output(u"\nMoving pages out of %s redirected categories." % len(nonemptypages)) for cat in pagegenerators.PreloadingGenerator(nonemptypages): try: if not cat.isCategoryRedirect(): self.log_text.append(u"* False positive: %s" % cat.title(asLink=True, textlink=True)) continue except pywikibot.Error: self.log_text.append(u"* Could not load %s; ignoring" % cat.title(asLink=True, textlink=True)) continue cat_title = cat.title(withNamespace=False) if not self.readyToEdit(cat): counts[cat_title] = None self.log_text.append(u"* Skipping %s; in cooldown period." % cat.title(asLink=True, textlink=True)) continue dest = cat.getCategoryRedirectTarget() if not dest.exists(): self.problems.append("# %s redirects to %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True))) # do a null edit on cat to update any special redirect # categories this wiki might maintain try: cat.save() except Exception: pass continue if dest.isCategoryRedirect(): double = dest.getCategoryRedirectTarget() if double == dest or double == cat: self.log_text.append( u"* Redirect loop from %s" % dest.title(asLink=True, textlink=True)) # do a null edit on cat try: cat.save() except Exception: pass else: self.log_text.append( u"* Fixed double-redirect: %s -> %s -> %s" % (cat.title(asLink=True, textlink=True), dest.title(asLink=True, textlink=True), double.title(asLink=True, textlink=True))) oldtext = cat.text # remove the old redirect from the old text, # leaving behind any non-redirect text oldtext = template_regex.sub("", oldtext) newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % { 'redirtemp': self.template_list[0], 'ncat': double.title(withNamespace=False) }) newtext = newtext + oldtext.strip() try: cat.text = newtext cat.save( i18n.twtranslate(self.site, self.dbl_redir_comment)) except pywikibot.Error as e: self.log_text.append("** Failed: %s" % e) continue found, moved = self.move_contents(cat_title, dest.title(withNamespace=False), editSummary=comment) if found is None: self.log_text.append(u"* [[:%s%s]]: error in move_contents" % (self.catprefix, cat_title)) elif found: record[cat_title][today] = found self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" % (self.catprefix, cat_title, found, moved)) counts[cat_title] = found # do a null edit on cat try: cat.save() except Exception: pass with open(datafile, "wb") as f: cPickle.dump(record, f, protocol=config.pickle_protocol) self.log_text.sort() self.problems.sort() newredirs.sort() comment = i18n.twtranslate(self.site, self.maint_comment) self.log_page.text = ( u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" % time.gmtime()[:6] + u'\n'.join(self.log_text) + u'\n* New redirects since last report:\n' + u'\n'.join(newredirs) + u'\n' + u'\n'.join(self.problems) + u'\n' + self.get_log_text()) self.log_page.save(comment) if self.edit_requests: edit_request_page.text = (self.edit_request_text % { 'itemlist': u"\n" + u"\n".join((self.edit_request_item % item) for item in self.edit_requests) }) edit_request_page.save(comment)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ global workingcat, parentcats global checked, tocheck global excludefile checked = {} tocheck = DequeGenerator() workingcatname = '' options = {} local_args = pywikibot.handle_args(args) for arg in local_args: option = arg[1:] if not arg.startswith('-'): if not workingcatname: workingcatname = arg else: pywikibot.warning('Working category "{}" is already given.' .format(workingcatname)) else: options[option] = True if not workingcatname: pywikibot.bot.suggest_help(missing_parameters=['working category']) return mysite = pywikibot.Site() summary = i18n.twtranslate(mysite, 'makecat-create', {'cat': workingcatname}) bot = MakeCatBot(site=mysite, **options) workingcat = pywikibot.Category(mysite, '{0}{1}' .format(mysite.namespaces.CATEGORY, workingcatname)) filename = pywikibot.config.datafilepath( 'category', workingcatname.encode('ascii', 'xmlcharrefreplace').decode('ascii') + '_exclude.txt') try: with codecs.open(filename, 'r', encoding=mysite.encoding()) as f: for line in f.readlines(): # remove leading and trailing spaces, LF and CR line = line.strip() if not line: continue pl = pywikibot.Page(mysite, line) checked[pl] = pl excludefile = codecs.open(filename, 'a', encoding=mysite.encoding()) except IOError: # File does not exist excludefile = codecs.open(filename, 'w', encoding=mysite.encoding()) # Get parent categories in order to `removeparent` try: parentcats = workingcat.categories() except pywikibot.Error: parentcats = [] # Do not include articles already in subcats; only checking direct subcats subcatlist = list(workingcat.subcategories()) if subcatlist: subcatlist = pagegenerators.PreloadingGenerator(subcatlist) for cat in subcatlist: artlist = list(cat.articles()) for page in artlist: checked[page] = page # Fetch articles in category, and mark as already checked (seen) # If category is empty, ask user if they want to look for pages # in a different category. articles = list(workingcat.articles(content=True)) if not articles: pywikibot.output('Category {} does not exist or is empty. ' 'Which page to start with?' .format(workingcatname)) answer = pywikibot.input('(Default is [[{}]]):'.format(workingcatname)) if not answer: answer = workingcatname pywikibot.output('' + answer) pl = pywikibot.Page(mysite, answer) articles = [pl] for pl in articles: checked[pl] = pl bot.include(pl, summary=summary) gen = pagegenerators.DequePreloadingGenerator(tocheck) for page in gen: if bot.checkbroken or page.exists(): bot.asktoadd(page, summary)
sys.path.append(".") if os.path.exists('pywikibot.lwp'): os.remove('pywikibot.lwp') #pepega import pywikibot import azurlane.load_src import azurlane.weapon import azurlane.wiki equip_stats_srcs = azurlane.load_src.load_sharecfg('equip_data_statistics') site = pywikibot.Site('azurlane') # The site we want to run our bot on site.login() category = pywikibot.Category(site, 'Submarine Torpedo') for page in category.articles(): equip_wikis = page.text.split('|-|') edit_message = 'Bot: set armor modifiers:' edited = False for equip_wiki_index, equip_wiki in enumerate(equip_wikis): equip_id = int(azurlane.wiki.get_template_value(equip_wiki, 'BaseID')) equip = equip_stats_srcs[equip_id] weapon_id = equip['weapon_id'][1] weapon = azurlane.weapon.WeaponStats(weapon_id) prev_armor_type = ['CoefMax', 'Coef', 'PatternSpread', 'Spread'] for armor_type_index, armor_type in enumerate(['ArmorModL', 'ArmorModM', 'ArmorModH']): old_value = azurlane.wiki.get_template_value(equip_wiki, armor_type) new_value = str(int(round(weapon.armor_modifiers[armor_type_index] * 100)))
from equipment_page_map import equipment_page_map equip_stats_srcs = azurlane.load_src.load_sharecfg('equip_data_statistics') upgrade_srcs = azurlane.load_src.load_sharecfg('equip_upgrade_data') upgrades = [] for upgrade_data in upgrade_srcs.values(): output_id = upgrade_data['target_id'] source_id = upgrade_data['upgrade_from'] upgrades.append((source_id, output_id)) site = pywikibot.Site('azurlane') # The site we want to run our bot on site.login() category = pywikibot.Category(site, 'Equipment') def equipment_box(equip_id): equip = equip_stats_srcs[equip_id] link = equipment_page_map[equip['id']] name = link.split('#')[0] result = '{{EquipmentBox|%d|%s|%s|%s}} ' % (equip['rarity'], name, link, equip['icon']) if link == name: result += '[[%s]]' % name else: result += '[[%s|%s]]' % (link, name) return result for page in category.articles(): equip_wikis = page.text.split('|-|') edit_message = 'Bot: remove extraneous template arguments'
parser.add_argument('-v', '--version', action="version", version="%(prog)s " + version) parser.add_argument('-p', '--start', type=str, metavar="START", help='format: YYYYMMDD - start date for counting page views', default='20190101') parser.add_argument('-q', '--end', type=str, metavar="END", help='format: YYYYMMDD - end date for counting page views', default='20190701') args = parser.parse_args() # We are using the German language edition of Wikipedia for all # queries. site = pywikibot.Site(args.lang, args.site) # decide what to do if args.category: # Given the (German) name of a category, extract statistics # for all articles belonging to that category. page = pywikibot.Category(site, args.category) # check, whether this really is a category page if not page.is_categorypage(): sys.exit(args.category + " is not a category page") for i, a in enumerate(page.articles(namespaces=[0])): stats = get_page_stats(args.start, args.end, end_date, a) print_stats(i, a.title(), stats, args.sep) if args.languages: # Given the (German) name of an article, extract statistics # for all available language editions. # page = pywikibot.Page(site, args.languages)