def check_page(self, pagename): """Check one page.""" pywikibot.output("\nChecking %s" % pagename) sys.stdout.flush() page1 = Page(self.original, pagename) txt1 = page1.text if self.options.dest_namespace: dest_ns = int(self.options.dest_namespace) else: dest_ns = None for site in self.sites: if dest_ns is not None: page2 = Page(site, page1.title(withNamespace=False), dest_ns) pywikibot.output("\nCross namespace, new title: %s" % page2.title()) else: page2 = Page(site, pagename) if page2.exists(): txt2 = page2.text else: txt2 = '' if str(site) in config.replicate_replace: txt_new = multiple_replace(txt1, config.replicate_replace[str(site)]) if txt1 != txt_new: pywikibot.output( 'NOTE: text replaced using config.sync_replace') pywikibot.output('%s %s %s' % (txt1, txt_new, txt2)) txt1 = txt_new if txt1 != txt2: pywikibot.output("\n %s DIFFERS" % site) self.differences[site].append(pagename) if self.options.replace: page2.text = txt1 page2.save(self.put_message(site)) else: sys.stdout.write('.') sys.stdout.flush()
def task(self): for idx, item in enumerate(self.get_list()): lemma = Page(self.wiki, item["title"]) temp_text = lemma.text for cat in self.cat_list: temp_text = re.sub(f"\[\[Kategorie:{cat}\]\]\n?", "", temp_text) lemma.text = temp_text lemma.save("remove categories") return True
def process(day): """ one day bot processing arguments: day -- python date format """ if params.verbose: print("processing Journal des recréations ({day})".format(day=format_date(day))) start = to_date(day) end = to_date(day+ONE_DAY) result = "\n== {} ==\n".format(format_date(day)) comment = '' for i,page in enumerate(creation_log(start,end),1): gras = '' if params.verbose: print (i,page["timestamp"]) dl = deletelog(page["title"]) if dl: page_pas = Page(Site(), "Discussion:"+page["title"]+"/Suppression") if page_pas.exists() and re.search('\{\{\ ?Article supprimé', page_pas.get(), re.I): comment += u' - %s (malgré [[%s|PàS]])' % (page["title"], page_pas.title()) gras = "'''" r = ("* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}}{g} \n" .format(title = wiki_param(page["title"]) , pas = page_pas.title()), user = wiki_param(page["user"]), date = format_date(from_date(dl["timestamp"])), g = gras) if params.verbose: print(r) result += r page = Page(Site(), params.prefix+"/"+format_date(day,skip_day=True)) try: result = page.get()+result except NoPage: pass page.put(result,comment="Journal des recréations ({day})".format(day=format_date(day)) + comment)
def task(self): try: with open("page_redirects.json") as json_file: lemma_dict = json.load(json_file) except IOError: exit(1) for lemma in lemma_dict["rows"]: lemma_page = Page(self.wiki, lemma[0]) lemma_page.delete("unnötige Weiterleitung", prompt=False, mark=True)
def load_Proximos_Eventos(self): from pywikibot import Page self.page_proximos = Page(self.site, "Próximos Eventos") self.proximos = [] for line in self.page_proximos.text.split('\n'): if line.startswith("*'''"): try: self.proximos.append(Evento(line)) except: print( f"Falha ao tentar parsear linha da página 'Próximos Eventos':\n===\n{line}\n===" )
def getFilesFromPage(siteSrc, nbPages, iTitles): (i, title) = iTitles pages = Page(siteSrc, title).imagelinks() nbFiles = len(list(pages)) if (nbFiles > 0): log("%i/%i Process %s : %i files found" % (i + 1, nbPages, title, nbFiles)) else: log("%i/%i Process %s : no files found" % (i + 1, nbPages, title)) return mapTitle(pages)
def _print_author(self): self.logger.info("Print author register.") overview = [ "{|class =\"wikitable sortable\" style=\"text-align:right;\"" "\n!Autor\n!Artikel\n!colspan=\"2\"|Erschließungsgrad" ] for register in self.registers.author: if register.author.last_name: self.logger.debug(register) self.save_if_changed( Page( self.wiki, f"Paulys Realencyclopädie der classischen " f"Altertumswissenschaft/Register/{register.author.name}" ), register.get_register_str(), "Register aktualisiert") overview.append(register.overview_line) overview.append("|}") self.save_if_changed( Page( self.wiki, "Paulys Realencyclopädie der classischen " "Altertumswissenschaft/Register/Autorenübersicht"), "\n".join(overview), "Register aktualisiert")
def check_page(self, pagename): """Check one page.""" pywikibot.output('\nChecking ' + pagename) page1 = Page(self.original, pagename) txt1 = page1.text if self.options.dest_namespace: dest_ns = int(self.options.dest_namespace) else: dest_ns = None for site in self.sites: if dest_ns is not None: page2 = Page(site, page1.title(with_ns=False), dest_ns) pywikibot.output('\nCross namespace, new title: ' + page2.title()) else: page2 = Page(site, pagename) txt2 = page2.text if str(site) in config.replicate_replace: txt_new = multiple_replace(txt1, config.replicate_replace[str(site)]) if txt1 != txt_new: pywikibot.output( 'NOTE: text replaced using config.sync_replace') pywikibot.output('{0} {1} {2}'.format(txt1, txt_new, txt2)) txt1 = txt_new if txt1 != txt2: pywikibot.output('\n {0} DIFFERS'.format(site)) self.differences[site].append(pagename) if self.options.replace: page2.text = txt1 page2.save(self.put_message(site)) else: pywikibot.stdout('.', newline=False)
def extract_game_page_from_league_table_file(league_table_file_page): league_table_file_name = league_table_file_page.title() season = _get_word_after(league_table_file_name, "עונת").replace("-", "/") fixture_number = f'מחזור {_get_word_after(league_table_file_name, "מחזור")}' season_games = maccabi_games.get_games_by_season(season) game = [g for g in season_games if g.fixture == fixture_number] if len(game) != 1: raise RuntimeError(f"Too much matching games: {game}") game_page_name = generate_page_name_from_game(game[0]) return Page(site, game_page_name)
def getPageSrcDstFromTitle(src, dst, pageTitle): p = Page(src, pageTitle) ns = p.namespace() # specific case for "Project pages" # TODO : use an option ! if (ns.id == 4 or ns.id == 102): if (ns.subpages): subPage = pageTitle.split("/", 1) if (len(subPage) > 1): title = subPage[1] else: title = pageTitle else: title = pageTitle newPage = Page(dst, title) if (newPage.site != dst): newPage = Page(dst, newPage.titleWithoutNamespace(), ns.id) return (p, newPage, ns)
def make_magazines(self, dictionary_of_magazines_by_year): for idx_year, year in enumerate(dictionary_of_magazines_by_year): magazines = dictionary_of_magazines_by_year[year] self.logger.debug(f"make_mag_year {idx_year + 1}/" f"{len(dictionary_of_magazines_by_year)}") for idx_mag, magazine in enumerate(magazines): self.logger.debug( f"make_mag_mag {idx_mag + 1}/{len(magazines)} ... issue:{year}/{magazine}" ) if year == "1986" and magazine == "31": self.logger.warning( "There is magazine 1986, 31, this is special, no creating here" ) continue if self.debug: lemma = Page(self.wiki, "Benutzer:THEbotIT/Test") else: lemma = Page( self.wiki, f"Die Gartenlaube ({year})/Heft {int(magazine):d}") new_text = self.make_magazine(year, magazine) if new_text: if hash(new_text.strip()) != hash(lemma.text.strip()): self.logger.debug( f"Print [[Die Gartenlaube ({year})/Heft {magazine}]]." ) if lemma.text != '': lemma.text = new_text lemma.save( "Automatische Aktualisierung des Heftes", botflag=True) else: lemma.text = new_text lemma.save("automatische Hefterstellung", botflag=True) else: self.logger.debug( f"Keine Änderung im Text ({year}/{magazine}).")
def extract_coach_tenures(name): """ Extract a coaches tenures from Wikipedia. Arguments: - name (name of coach) Returns: - list(dict) """ logging.info('Looking for coach %s' % name) page_name = get_page_name_from_coach_name_wiki(name) # If we can't find a wikipedia page, return immediately if not page_name: return [] else: logging.debug('Looking up %s as http://en.wikipedia.org/wiki/%s' % (name, page_name)) # Extract page content from wikipedia and narrow it down to the templates p = Page(Site('en', 'wikipedia'), page_name) if p.isRedirectPage(): p = p.getRedirectTarget() content = p.get() parsed = mwparserfromhell.parse(content) templates = parsed.filter_templates() # Extract teams and years from the template teams, years = None, None for t in templates: for p in t.params: if "coach_teams" in p.name: teams = parse_coach_teams_and_positions_from_wiki(p) if "coach_years" in p.name: years = parse_coach_years_from_wiki(p) # If we were not able to extract information from the page, log & return empty if not teams or not years: logging.warning( 'ISSUE DETECTED: %s is valid page but no information extracted' % name) return [] tenures = [dict(t[0].items() + t[1].items()) for t in zip(teams, years)] [d.update({'name': name}) for d in tenures] return tenures
def make_sure_league_table_file_is_on_game_page(league_table_file_page): game_page = extract_game_page_from_league_table_file( league_table_file_page) if not game_page.exists(): raise RuntimeError( f"Could not find this game page: {game_page.title()}." f"Created from this league table: {league_table_file_page.title()}" ) parsed_mw_text = mwparserfromhell.parse(game_page.text) football_game_template = parsed_mw_text.filter_templates( football_games_template_name)[0] table_arg_dont_exist = league_table_file_argument_name not in football_game_template # Contain just \n or spaces: empty_table_arg_exist = ( league_table_file_argument_name in football_game_template) and not football_game_template.get( league_table_file_argument_name).value.strip() if table_arg_dont_exist or empty_table_arg_exist: logger.info( f"Adding league table file to the page: {game_page.title()}") football_game_template.add(league_table_file_argument_name, league_table_file_page.title(with_ns=False)) game_page.text = parsed_mw_text game_page.save( summary= "MaccabiBot - Updating league tables files to the relevant game pages", botflag=True) else: # The current league table is a File (ns=6) current_league_table_file = Page( site, football_game_template.get(league_table_file_argument_name).value, ns=6) if current_league_table_file != league_table_file_page: logger.warning( f"Found an existing league table which is different from what we have." f"Current: {current_league_table_file}, We have: {league_table_file_page}" ) else: logger.info( f"Page: {game_page.title()} has an existing league table and its a good one!" )
def getCategories(self, article): baseDir = "articleCategoriesCache/" if not os.path.exists(baseDir): os.makedirs(baseDir) fname = baseDir + article if os.path.isfile(fname): lines = [] try: with codecs.open(fname, encoding='utf-8') as f: lines = [line.strip() for line in f.readlines()] #print "utf8 encoding" except: with codecs.open(fname) as f: lines = [line.strip() for line in f.readlines()] #print "ascii encoding" lines = self.filterCategories(lines) if lines != []: #print "get Cat Cache:", lines return lines site = Site("en") page = Page(site, article.decode("utf8")) #print article #print page.get() #print page.get(get_redirect = True) #print "redirect?", page.isRedirectPage() if page.isRedirectPage(): page = page.getRedirectTarget() #print [cat.title() for cat in page.categories()] cats = sorted([ cat.title() for cat in page.categories() if not cat.isHiddenCategory() ]) #print "downloaded cats1: ", cats cats = self.filterCategories(cats) #print "downloaded cats2: ", cats text = "" for cat in cats: text += cat + "\n" try: with codecs.open(fname, "a+") as f: f.write(text) except: with codecs.open(fname, "a+") as f: f.write(text.encode('utf-8')) return cats
def task(self): # pragma: no cover error_task = ERROTask(wiki=self.wiki, debug=False, logger=self.logger) for lemma in self.search_pages(): page = Page(self.wiki, lemma["title"]) temp_text = page.text try: temp_text = self.convert_all(temp_text) page.text = temp_text re_page = RePage(page) if not self.debug: re_page.save("Entfernen veralteter Vorlagen.") except (ReDatenException, ValueError): error = traceback.format_exc().splitlines()[-1] error_task.task(lemma["title"], error) error_task.finish_task() if self.search_pages(): return False return True
def get_names_current_coaches_and_coordinators(): """ Gets the names of the current D1 coaches and coordinators. Params: - None Returns: - list[str(), str()] """ p = Page(Site('en', 'wikipedia'), 'List_of_current_NCAA_Division_I_FBS_football_coaches').get() parsed = mwparserfromhell.parse(p) nameps = filter(lambda x: 'sortname' == x.name, parsed.filter_templates()) results = [] for n in nameps: results.append(' '.join([unicode(p.value) for p in n.params[:2]])) return results
def main(): page = Page(site, "Star Wars Wiki:Apêndice de Tradução de obras/JSON") fixes = {} with open("bot/user-fixes.py") as f: exec(f.read( )) # Can't import it normally because of naming and undefined variable if not 'obras' in fixes: raise Exception('Obras dictionary not found') fixes_json = json.dumps(fixes['obras'], ensure_ascii=False, indent=4) new_text = "<pre>{}</pre>".format(fixes_json.replace('\\\\1', '$1')) showDiff(page.text, new_text) if get_user_yes_or_no("Salvar?"): page.text = new_text page.save( "([[User:Thales César|Thales]]) 2.2 Atualizado com novas informações" )
def task(self): lemma_list = self._search() for idx, lemma in enumerate(lemma_list): page = Page(self.wiki, title='Index:{}'.format(lemma['title'])) self.logger.info('{}/{}:{}'.format(idx, len(lemma_list), page)) match = regex_picture.search(page.text) if match: self.logger.info(match.group(1)) temp = re.sub('\|\d{2,3}px', '', match.group(1)) if not re.search('thumb', match.group(1)): temp = temp + '|thumb' self.logger.info(temp) if temp == match.group(1): self.logger.info('nothing to do here.') continue temp = '|BILD=[[{}]]'.format(temp) temp_text = regex_picture.sub(temp, page.text) page.text = temp_text page.save(botflag=True, summary='set thumb as parameter') return True
def history(self, fertig: Tuple[int, int], korrigiert: Tuple[int, int], unkorrigiert: Tuple[int, int]): page = Page(self.wiki, "Benutzer:THEbotIT/" + self.bot_name) temp_text = page.text composed_text = "".join([ "|-\n", "|", self.timestamp.start_of_run.strftime("%Y%m%d-%H%M"), "||", str(unkorrigiert[1]), "||", str(unkorrigiert[0]), "||", str(int(unkorrigiert[0] / unkorrigiert[1])), "||", str(korrigiert[1]), "||", str(korrigiert[0]), "||", str(int(korrigiert[0] / korrigiert[1])), "||", str(fertig[1]), "||", str(fertig[0]), "||", str(int(fertig[0] / fertig[1])), "\n<!--new line-->" ]) temp_text = re.sub("<!--new line-->", composed_text, temp_text) page.text = temp_text page.save("RE Statistik aktualisiert", botflag=True)
def process(day): """ one day bot processing arguments: day -- python date format """ if params.verbose: print("processing Journal des recréations ({day})".format( day=format_date(day))) start = to_date(day) end = to_date(day + ONE_DAY) result = "\n== {} ==\n".format(format_date(day)) for i, page in enumerate(creation_log(start, end), 1): if params.verbose: print(i, page["timestamp"]) dl = deletelog(page["title"]) if dl: r = ( "* {{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}} \n" .format(title=wiki_param(page["title"]), pas=wiki_param("Discussion:" + page["title"] + "/Suppression"), user=wiki_param(page["user"]), date=format_date(from_date(dl["timestamp"])))) if params.verbose: print(r) result += r page = Page(Site(), params.prefix + "/" + format_date(day, skip_day=True)) try: result = page.get() + result except NoPage: pass page.put( result, comment="Journal des recréations ({day})".format(day=format_date(day)))
def load_Eventos_Regulares(self): from pywikibot import Page self.page_regulares = Page(self.site, "Eventos Regulares") self.regulares = [] comment = False for line in self.page_regulares.text.split('\n'): line = line.strip() if comment: if line.endswith("-->"): comment = False else: # TODO: salvar conteudo dos comentarios aqui continue if line.startswith("<!--"): comment = True # Existe a possibilidade de ser um comentário de uma linha. # Portanto precisamos checar novamente: if line.endswith("-->"): comment = False elif line.startswith("==") and line.endswith("=="): if "Semanais" in line: recorrencia = "Semanal" elif "Quinzenais" in line: recorrencia = "Quinzenal" elif "Mensais" in line: recorrencia = "Mensal" else: recorrencia = None elif line.startswith("*'''"): try: self.regulares.append(Evento(line, recorrencia)) except Exception: msg = "Falha ao tentar parsear linha da página " msg += "'Eventos Regulares':\n===\n{}\n===".format(line) print(msg) tb.print_exc()
def save_page(page_name: str, text: str, sheet_name: str, template_name: str) -> None: '''実際に wiki のページを書き込む''' if template_name is None: template_name = page_name # Bot 編集ページであることを知らせるフッターを付加して更新する sheet_url = get_sheet_url(sheet_name) footer = '\n\n{{bot/編集の注意|template_name = %s | url = %s}}' \ % (template_name, sheet_url) text += footer # ページに変更がない場合には何もしない page = Page(site, page_name) if page.text == text: return page.text = text if args.debug: print(page.text) else: page.save()
def generate_overviews(self): """Create page on wikis with overview of bot results.""" for site in self.sites: sync_overview_page = Page(site, 'User:%s/sync.py overview' % site.user()) output = "== Pages that differ from original ==\n\n" if self.differences[site]: output += "".join('* [[:%s]]\n' % l for l in self.differences[site]) else: output += "All important pages are the same" output += "\n\n== Admins from original that are missing here ==\n\n" if self.user_diff[site]: output += "".join('* %s\n' % l.replace('_', ' ') for l in self.user_diff[site]) else: output += "All users from original are also present on this wiki" pywikibot.output(output) sync_overview_page.text = output sync_overview_page.save(self.put_message(site))
def load_Proximos_Eventos(self): """ Load "Próximo Eventos" page from wiki, parses each `Evento` (one line) and append them to `proximos` list. """ from pywikibot import Page self.page_proximos = Page(self.site, "Próximos Eventos") self.proximos = [] # exclude comments patt = r'(<!--)(\n|.)+?(-->)' events = re.sub(patt, '', self.page_proximos.text) for line in events.split('\n'): if line.startswith("*'''"): try: self.proximos.append(Evento(line)) except Exception: msg = "Falha ao tentar parsear linha da página " msg += "'Próximos Eventos':\n===\n{}\n===".format(line) print(msg) tb.print_exc()
def test_all(self): site1 = Site('en') site2 = Site('de') pn = ['page1', 'page2', 'page3'] sites = [site1, site2] pages = [Page(s, '%s-%s' % (p, s.dbName())) for p in pn for s in sites] m = PerWikiMapper(2) m.add('Foo.jpg', pages[0]) for index in [1, 2, 3]: m.add('Bar.jpg', pages[index]) m.add('Baz.jpg', pages[1]) m.add('Quux.jpg', pages[1]) file_list = [] for page, files in m.files_per_page(): file_list.append(page.title() + '>' + '|'.join(files)) expected = [ 'Page1-enwiki>Foo.jpg', 'Page2-enwiki>Bar.jpg', 'Page1-dewiki>Bar.jpg|Baz.jpg|Quux.jpg', 'Page2-dewiki>Bar.jpg' ] self.assertEqual(sorted(file_list), sorted(expected))
def _create_current_projects_template(self): """Create a current projects template with the new projects.""" page_name = self._make_year_title( self._config["year_pages"]["current_projects_template"]) page = Page(self._site, page_name) if page.exists() and not self._overwrite: logging.warning( "Page '{}' already exists. It will not be created.".format( page.title())) return project_format = "[[{ns}:{{proj}}|{{proj}}]]".format( ns=self._config["project_namespace"]) delimiter = "''' · '''" template_data = {} for program in self._programs: projects = set() for strategy in program.get('strategies'): # projects sorted by id to get thematic grouping projects.update(strategy.get("projects")) template_data[program.get('name')] = delimiter.join([ project_format.format(proj=self._projects[project]) for project in sorted(projects) ]) template = Template("Aktuella projekt/layout") template.add_parameter("år", self._year) template.add_parameter("access", template_data["Tillgång"]) template.add_parameter("use", template_data["Användning"]) template.add_parameter("community", template_data["Gemenskapen"]) template.add_parameter("enabling", template_data["Möjliggörande"]) page.text = template.multiline_string() + \ "\n<noinclude>{{Dokumentation}}</noinclude>" logging.info("Writing to page '{}'.".format(page.title())) logging.debug(page.text) self._write_page(page)
def generate_overviews(self): """Create page on wikis with overview of bot results.""" for site in self.sites: sync_overview_page = Page( site, 'User:{0}/sync.py overview'.format(site.user())) output = '== Pages that differ from original ==\n\n' if self.differences[site]: output += ''.join('* [[:{}]]\n'.format(page_title) for page_title in self.differences[site]) else: output += 'All important pages are the same' output += ( '\n\n== Admins from original that are missing here ==\n\n') if self.user_diff[site]: output += ''.join('* {}\n'.format(user_name.replace('_', ' ')) for user_name in self.user_diff[site]) else: output += ( 'All users from original are also present on this wiki') pywikibot.output(output) sync_overview_page.text = output sync_overview_page.save(self.put_message(site))
def send_log_to_wiki(self): wiki_log_page = f"Benutzer:THEbotIT/Logs/{self.bot_name}" page = Page(self.wiki, wiki_log_page) page.text += self.logger.create_wiki_log_lines() page.save(f"Update of Bot {self.bot_name}", botflag=True)
def current_page(self): """Patch current_page to return any page.""" return Page(self.site, 'Main', ns=4)
def setUpClass(cls): """Setup class for all tests.""" super(TestCosmeticChanges, cls).setUpClass() cls.cct = CosmeticChangesToolkit(Page(cls.site, 'Test'))