def get_prplatform(page: Page): for template in mwparserfromhell.parse(page.text()).filter_templates(): if not template.name.matches('Infobox Team'): continue if not template.has('prplatform'): return None return template.get('prplatform').value.strip()
def process(self, article): page = Page(self.client, article) wikitext = page.text() matches = re.search( r"\[\[((?:[^\]|$]+)(?=.png|.jpg|.jpeg|.gif|.svg).png|.jpg|.jpeg|.gif|svg)(?:|[^\]$]+)\]\]", wikitext) if matches: for match in matches.groups(): self.file.write(match + "\n")
def process(self, page_name): page = Page(self.client, page_name) wikitext = page.text() if BEGINNING: wikitext = TEMPLATE + "\n" + wikitext else: wikitext = wikitext + "\n" + TEMPLATE page.edit(wikitext, summary=self.lang.t("scripts.addtemplate.summary").format( template=TEMPLATE))
def preview(self, article: Page): text = article.text() lines = [] for line in text.splitlines(): curline = line while len(curline) > 80: lines.append(curline[:80]) curline = curline[80:] lines.append(curline) return os.linesep.join(lines[:15])
def do_recategorize(self, article: Page, additions, removals, replaces): if self.query == "": return print("Guardando...") wikitext = article.text() wikitext = wikitext.replace("[[Category:", "[[" + self.category_name) for removal in removals: wikitext = re.sub(r'\[\[' + removal + r'(?:|.*)*\]\]', '', wikitext) for addition in additions: wikitext = wikitext + "\n[[{}]]".format(addition) for replace in replaces: search, rep = replace.split("%") wikitext = re.sub(r'\[\[' + self.category_name + search + r'(?:|.*)*\]\]', '[[' + self.category_name + rep + ']]', wikitext) plainadditions = ",".join(map(lambda x: "+" + ",".join(x.split(':')[1:]), additions)) plainremovals = ",".join(map(lambda x: "-" + ",".join(x.split(':')[1:]), removals)) plainreplaces = ",".join(map(lambda x: "=>".join(x.split('%')), replaces)) query = plainadditions + "; " + plainremovals + "; " + plainreplaces article.edit(wikitext, summary=self.lang.t("scripts.recategorize.reason") + query, minor=True, bot=True)
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = 'Some page' self.page_text = 'Hello world' MockSite = mock.patch('mwclient.client.Site').start() self.site = MockSite() self.site.get.return_value = { 'query': { 'pages': { '1': { 'title': title } } } } self.site.rights = ['read'] self.page = Page(self.site, title) self.site.get.return_value = { 'query': { 'pages': { '2': { 'ns': 0, 'pageid': 2, 'revisions': [{ '*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z' }], 'title': title } } } } def get_last_api_call_args(self, http_method='POST'): if http_method == 'GET': args, kwargs = self.site.get.call_args else: args, kwargs = self.site.post.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args(http_method='GET') assert text == self.page_text assert args == { 'prop': 'revisions', 'rvdir': 'older', 'titles': self.page.page_title, 'rvprop': 'content|timestamp', 'rvlimit': '1' } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args(http_method='GET') assert args['rvsection'] == '0' def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args(http_method='GET') assert args['rvexpandtemplates'] == '1' def test_assertuser_true(self): # Check that assert=user is sent when force_login=True self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = True self.site.force_login = True self.site.api.return_value = {'edit': {'result': 'Ok'}} self.page.save('Some text') args = self.get_last_api_call_args() assert args['assert'] == 'user' def test_assertuser_false(self): # Check that assert=user is not sent when force_login=False self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = False self.site.force_login = False self.site.api.return_value = {'edit': {'result': 'Ok'}} self.page.save('Some text') args = self.get_last_api_call_args() assert 'assert' not in args def test_handle_edit_error_assertuserfailed(self): # Check that AssertUserFailedError is triggered api_error = APIError( 'assertuserfailed', 'Assertion that the user is logged in failed', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(AssertUserFailedError): self.page.handle_edit_error(api_error, 'n/a') def test_handle_edit_error_protected(self): # Check that ProtectedPageError is triggered api_error = APIError( 'protectedpage', 'The "editprotected" right is required to edit this page', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(ProtectedPageError) as pp_error: self.page.handle_edit_error(api_error, 'n/a') assert pp_error.value.code == 'protectedpage' assert str( pp_error.value ) == 'The "editprotected" right is required to edit this page'
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = 'Some page' self.page_text = 'Hello world' MockSite = mock.patch('mwclient.client.Site').start() self.site = MockSite() self.site.api.return_value = {'query': {'pages': {'1': {'title': title}}}} self.site.rights = ['read'] self.page = Page(self.site, title) self.site.api.return_value = {'query': {'pages': {'2': { 'ns': 0, 'pageid': 2, 'revisions': [{'*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z'}], 'title': title }}}} def get_last_api_call_args(self): args, kwargs = self.site.api.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args() assert text == self.page_text assert args == { 'prop': 'revisions', 'rvdir': 'older', 'titles': self.page.page_title, 'rvprop': 'content|timestamp', 'rvlimit': '1' } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args() assert args['rvsection'] == '0' def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args() assert args['rvexpandtemplates'] == '1' def test_get_text_expanded_deprecated(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.get_expanded() args = self.get_last_api_call_args() assert args['rvexpandtemplates'] == '1'
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = 'Some page' self.page_text = 'Hello world' MockSite = mock.patch('mwclient.client.Site').start() self.site = MockSite() self.site.get.return_value = { 'query': { 'pages': { '1': { 'title': title } } } } self.site.rights = ['read'] self.site.api_limit = 500 self.site.version = (1, 32, 0) self.page = Page(self.site, title) self.site.get.return_value = { 'query': { 'pages': { '2': { 'ns': 0, 'pageid': 2, 'revisions': [{ '*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z' }], 'title': title } } } } def get_last_api_call_args(self, http_method='POST'): if http_method == 'GET': args, kwargs = self.site.get.call_args else: args, kwargs = self.site.post.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args(http_method='GET') assert text == self.page_text assert args == { 'prop': 'revisions', 'rvdir': 'older', 'titles': self.page.page_title, 'rvprop': 'content|timestamp', 'rvlimit': '1', 'rvslots': 'main', } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args(http_method='GET') assert args['rvsection'] == '0' def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args(http_method='GET') assert self.site.expandtemplates.call_count == 1 assert args.get('rvexpandtemplates') is None def test_assertuser_true(self): # Check that assert=user is sent when force_login=True self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = True self.site.force_login = True self.site.api.return_value = {'edit': {'result': 'Ok'}} self.page.save('Some text') args = self.get_last_api_call_args() assert args['assert'] == 'user' def test_assertuser_false(self): # Check that assert=user is not sent when force_login=False self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = False self.site.force_login = False self.site.api.return_value = {'edit': {'result': 'Ok'}} self.page.save('Some text') args = self.get_last_api_call_args() assert 'assert' not in args def test_handle_edit_error_assertuserfailed(self): # Check that AssertUserFailedError is triggered api_error = APIError( 'assertuserfailed', 'Assertion that the user is logged in failed', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(AssertUserFailedError): self.page.handle_edit_error(api_error, 'n/a') def test_handle_edit_error_protected(self): # Check that ProtectedPageError is triggered api_error = APIError( 'protectedpage', 'The "editprotected" right is required to edit this page', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(ProtectedPageError) as pp_error: self.page.handle_edit_error(api_error, 'n/a') assert pp_error.value.code == 'protectedpage' assert str( pp_error.value ) == 'The "editprotected" right is required to edit this page' def test_get_page_categories(self): # Check that page.categories() works, and that a correct API call is made self.site.get.return_value = { "batchcomplete": "", "query": { "pages": { "1009371": { "pageid": 1009371, "ns": 14, "title": "Category:1879 births", }, "1005547": { "pageid": 1005547, "ns": 14, "title": "Category:1955 deaths", } } } } cats = list(self.page.categories()) args = self.get_last_api_call_args(http_method='GET') assert { 'generator': 'categories', 'titles': self.page.page_title, 'iiprop': 'timestamp|user|comment|url|size|sha1|metadata|archivename', 'inprop': 'protection', 'prop': 'info|imageinfo', 'gcllimit': repr(self.page.site.api_limit), } == args assert set([c.name for c in cats]) == set([ 'Category:1879 births', 'Category:1955 deaths', ])
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = 'Some page' self.page_text = 'Hello world' MockSite = mock.patch('mwclient.client.Site').start() self.site = MockSite() self.site.api.return_value = { 'query': { 'pages': { '1': { 'title': title } } } } self.site.rights = ['read'] self.page = Page(self.site, title) self.site.api.return_value = { 'query': { 'pages': { '2': { 'ns': 0, 'pageid': 2, 'revisions': [{ '*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z' }], 'title': title } } } } def get_last_api_call_args(self): args, kwargs = self.site.api.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args() assert text == self.page_text assert args == { 'prop': 'revisions', 'rvdir': 'older', 'titles': self.page.page_title, 'rvprop': 'content|timestamp', 'rvlimit': '1' } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args() assert args['rvsection'] == '0' def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args() assert args['rvexpandtemplates'] == '1' def test_get_text_expanded_deprecated(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.get_expanded() args = self.get_last_api_call_args() assert args['rvexpandtemplates'] == '1'
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = "Some page" self.page_text = "Hello world" MockSite = mock.patch("mwclient.client.Site").start() self.site = MockSite() self.site.api.return_value = {"query": {"pages": {"1": {"title": title}}}} self.site.rights = ["read"] self.page = Page(self.site, title) self.site.api.return_value = { "query": { "pages": { "2": { "ns": 0, "pageid": 2, "revisions": [{"*": "Hello world", "timestamp": "2014-08-29T22:25:15Z"}], "title": title, } } } } def get_last_api_call_args(self): args, kwargs = self.site.api.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args() assert text == self.page_text assert args == { "prop": "revisions", "rvdir": "older", "titles": self.page.page_title, "rvprop": "content|timestamp", "rvlimit": "1", } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args() assert args["rvsection"] == "0" def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args() assert args["rvexpandtemplates"] == "1" def test_get_text_expanded_deprecated(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.get_expanded() args = self.get_last_api_call_args() assert args["rvexpandtemplates"] == "1"
class InterwikiTask(object): def __init__(self, page: Page, client: Site, interwikis: List[Interwiki], logger: Logger, lang: Lang): self.page = page self.wikitext = self.page.text() self.client = client self.interwiki_links = dict() for language, link in page.langlinks(): if language not in self.interwiki_links: self.interwiki_links[language] = [link] else: self.interwiki_links[language].append(link) self.interwikis = interwikis self.logger = logger self.edited = False self.created = [] self.deleted = [] self.lang = lang def interwiki_exists(self, idioma): return idioma in self.interwiki_links def __fetch_interwiki(self, idioma): for iw in self.interwikis: if iw.language == idioma: return iw return None def get_interwiki_wikicode(self, idioma, articulo): interwiki = self.__fetch_interwiki(idioma) if interwiki is not None and interwiki.fake: interwiki_wikicode = "{{" + idioma + "|" + articulo + "}}" else: interwiki_wikicode = "[[" + idioma + ":" + articulo + "]]" return interwiki_wikicode def remove_interwiki(self, language, reason, article): self.logger.info( self.lang.t("interwiki.removing").format(reason=reason, lang=language, src=self.page.name, dest=article)) self.edited = True self.wikitext = self.wikitext.replace( self.get_interwiki_wikicode(language, article), "") self.deleted.append(language) if language in self.interwiki_links: del self.interwiki_links[language] def create_interwiki(self, language, article): self.logger.info( self.lang.t("interwiki.creating").format(lang=language, src=self.page.name, dest=article)) self.edited = True self.wikitext = self.wikitext + "\n" + self.get_interwiki_wikicode( language, article) self.created.append(language) def locate_article_on_interwiki(self, language, article, existed_earlier=False): interwiki = self.__fetch_interwiki(language) if interwiki is not None and interwiki.skip: return None if interwiki is None: self.logger.debug( self.lang.t("interwiki.dbg_obsolete").format( lang=language, src=self.page.name, dest=article)) self.remove_interwiki(language, self.lang.t("interwiki.obsolete"), article) return None url = "http://" + interwiki.api + "/wiki/" + quote(article) try: response = requests.get(url) except: # some requests error , don't change stuff return article if existed_earlier else None if response.status_code == 404: self.logger.debug( self.lang.t("interwiki.dbg_not_found").format( lang=language, src=self.page.name, dest=article)) if existed_earlier: self.remove_interwiki(language, self.lang.t("interwiki.not_found"), article) return None elif response.status_code == 200: self.logger.debug( self.lang.t("interwiki.dbg_found").format(lang=language, src=self.page.name, dest=article)) if not existed_earlier: self.create_interwiki(language, article) return article else: # some other error , don't change stuff return article if existed_earlier else None def missing_interwikis(self): missing = [] for interwiki in self.interwikis: found = False if not interwiki.fake: if interwiki.language in self.interwiki_links: found = True else: if re.search(r"{{" + interwiki.language + r"\|[^}]+(?:}){2}\n", self.wikitext) is not None: found = True if not found: missing.append(interwiki.language) return missing def clean_broken_interwikis(self): self.logger.debug( self.lang.t("interwiki.cleanup").format(name=self.page.name)) for lang, articles in list(self.interwiki_links.items()): for article in articles: self.locate_article_on_interwiki(lang, article, existed_earlier=True) def generate_summary(self): tasks_done = [] if len(self.created) > 0: tasks_done.append( self.lang.t("interwiki.reason_added") + ", ".join(self.created)) if len(self.deleted) > 0: tasks_done.append( self.lang.t("interwiki.reason_removed") + ", ".join(self.deleted)) return self.lang.t("interwiki.reason_header") + "; ".join(tasks_done) def save_changes(self): if self.edited: self.logger.info( self.lang.t("interwiki.saving").format(name=self.page.name)) self.page.edit(self.wikitext, summary=self.generate_summary()) self.created = [] self.deleted = [] self.edited = False self.page = Page(self.page.site, self.page.name) self.wikitext = self.page.text() self.interwiki_links = dict() for lang, link in self.page.langlinks(): if lang not in self.interwiki_links: self.interwiki_links[lang] = [link] else: self.interwiki_links[lang].append(link)
class TestPageApiArgs(unittest.TestCase): def setUp(self): title = 'Some page' self.page_text = 'Hello world' MockSite = mock.patch('mwclient.client.Site').start() self.site = MockSite() self.site.get.return_value = {'query': {'pages': {'1': {'title': title}}}} self.site.rights = ['read'] self.page = Page(self.site, title) self.site.get.return_value = {'query': {'pages': {'2': { 'ns': 0, 'pageid': 2, 'revisions': [{'*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z'}], 'title': title }}}} def get_last_api_call_args(self, http_method='POST'): if http_method == 'GET': args, kwargs = self.site.get.call_args else: args, kwargs = self.site.post.call_args action = args[0] args = args[1:] kwargs.update(args) return kwargs def tearDown(self): mock.patch.stopall() def test_get_page_text(self): # Check that page.text() works, and that a correct API call is made text = self.page.text() args = self.get_last_api_call_args(http_method='GET') assert text == self.page_text assert args == { 'prop': 'revisions', 'rvdir': 'older', 'titles': self.page.page_title, 'rvprop': 'content|timestamp', 'rvlimit': '1' } def test_get_page_text_cached(self): # Check page.text() caching self.page.revisions = mock.Mock(return_value=iter([])) self.page.text() self.page.text() # When cache is hit, revisions is not, so call_count should be 1 assert self.page.revisions.call_count == 1 self.page.text(cache=False) # With cache explicitly disabled, we should hit revisions assert self.page.revisions.call_count == 2 def test_get_section_text(self): # Check that the 'rvsection' parameter is sent to the API text = self.page.text(section=0) args = self.get_last_api_call_args(http_method='GET') assert args['rvsection'] == '0' def test_get_text_expanded(self): # Check that the 'rvexpandtemplates' parameter is sent to the API text = self.page.text(expandtemplates=True) args = self.get_last_api_call_args(http_method='GET') assert args['rvexpandtemplates'] == '1' def test_assertuser_true(self): # Check that assert=user is sent when force_login=True self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = True self.site.force_login = True self.site.api.return_value = { 'edit': {'result': 'Ok'} } self.page.save('Some text') args = self.get_last_api_call_args() assert args['assert'] == 'user' def test_assertuser_false(self): # Check that assert=user is not sent when force_login=False self.site.blocked = False self.site.rights = ['read', 'edit'] self.site.logged_in = False self.site.force_login = False self.site.api.return_value = { 'edit': {'result': 'Ok'} } self.page.save('Some text') args = self.get_last_api_call_args() assert 'assert' not in args def test_handle_edit_error_assertuserfailed(self): # Check that AssertUserFailedError is triggered api_error = APIError('assertuserfailed', 'Assertion that the user is logged in failed', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(AssertUserFailedError): self.page.handle_edit_error(api_error, 'n/a') def test_handle_edit_error_protected(self): # Check that ProtectedPageError is triggered api_error = APIError('protectedpage', 'The "editprotected" right is required to edit this page', 'See https://en.wikipedia.org/w/api.php for API usage') with pytest.raises(ProtectedPageError) as pp_error: self.page.handle_edit_error(api_error, 'n/a') assert pp_error.value.code == 'protectedpage' assert str(pp_error.value) == 'The "editprotected" right is required to edit this page'