def task(self):  # pragma: no cover
     regex = re.compile(r"\n\|PND=")
     searcher = PetScan()
     searcher.add_yes_template("ADBDaten")
     searcher.add_positive_category("ADB:Ohne GND-Link")
     lemma_list = searcher.run()
     for lemma in lemma_list:
         page = Page(self.wiki, lemma["title"])
         temp_text = page.text
         if regex.search(temp_text):
             self.logger.info(f"change {lemma['title']}")
             temp_text = regex.sub("\n|GND=", temp_text)
         page.text = temp_text
         page.save("PND -> GND", botflag=True)
     return True
Exemple #2
0
    def _prepare_searcher(self) -> PetScan:
        searcher = PetScan()
        searcher.add_yes_template("REDaten")

        if self.debug:
            searcher.add_namespace(2)
        else:
            searcher.add_namespace(0)
            searcher.add_positive_category("RE:Fertig")
            searcher.add_positive_category("RE:Korrigiert")
            searcher.add_positive_category("RE:Platzhalter")
            searcher.set_logic_union()
            searcher.set_sort_criteria("date")
            searcher.set_sortorder_decending()
            searcher.set_timeout(120)
        return searcher
Exemple #3
0
class FixReStructure(OneTimeBot):
    bot_name = '20180125_FixReStructure'

    def __init__(self, wiki, debug):
        OneTimeBot.__init__(self, wiki, debug)
        self.searcher = PetScan()
        self.timeout = timedelta(hours=5)

    def get_lemmas(self):
        self.searcher.add_positive_category("RE:Verweisung")
        self.searcher.add_no_template("REAutor")
        self.searcher.add_yes_template("REDaten")
        self.searcher.set_sort_criteria("size")
        self.searcher.set_sortorder_decending()
        for lemma in self.searcher.run():
            yield Page(self.wiki, lemma['title'])

    @staticmethod
    def process_text(text):
        regex_anmerkungen = re.compile("\s*== Anmerkungen")
        if regex_anmerkungen.search(text):
            return regex_anmerkungen.sub("\n{{REAutor|OFF}}\n== Anmerkungen",
                                         text).rstrip()
        else:
            return text.rstrip() + "\n{{REAutor|OFF}}"

    def task(self):
        for idx, page in enumerate(self.get_lemmas()):
            self.logger.info(str(idx) + "/" + str(page))
            pre_text = page.text
            page.text = self.process_text(pre_text)
            if pre_text != page.text:
                page.save(
                    "Inserted a REAutor statement for a correct structure")
            if self._watchdog():
                self.logger.warning("Enough for the day, don't run to long.")
                return False
        return True
class FixReStructure(OneTimeBot):
    bot_name = '20180125_FixReStructure'

    def __init__(self, wiki, debug):
        OneTimeBot.__init__(self, wiki, debug)
        self.searcher = PetScan()
        self.timeout = timedelta(hours=5)

    def get_lemmas(self):
        self.searcher.add_positive_category("RE:Verweisung")
        self.searcher.add_no_template("REAutor")
        self.searcher.add_yes_template("REDaten")
        self.searcher.set_sort_criteria("size")
        self.searcher.set_sortorder_decending()
        for lemma in self.searcher.run():
            yield Page(self.wiki, lemma['title'])

    @staticmethod
    def process_text(text):
        regex_anmerkungen = re.compile("\s*== Anmerkungen")
        if regex_anmerkungen.search(text):
            return regex_anmerkungen.sub("\n{{REAutor|OFF}}\n== Anmerkungen", text).rstrip()
        else:
            return text.rstrip() + "\n{{REAutor|OFF}}"

    def task(self):
        for idx, page in enumerate(self.get_lemmas()):
            self.logger.info(str(idx) + "/" + str(page))
            pre_text = page.text
            page.text = self.process_text(pre_text)
            if pre_text != page.text:
                page.save("Inserted a REAutor statement for a correct structure")
            if self._watchdog():
                self.logger.warning("Enough for the day, don't run to long.")
                return False
        return True
# -*- coding: utf-8 -*-
__author__ = 'eso'
import sys
sys.path.append('../../')
from tools.petscan import PetScan
import re
import requests
import pywikibot

searcher_catscan = PetScan()
searcher_catscan.add_namespace('Seite')
searcher_catscan.add_namespace(0)
searcher_catscan.add_yes_template('Sperrschrift')
sites = searcher_catscan.run()
site = pywikibot.Site()

for lemma in sites:
    if lemma['a']['nstext'] == '(Article)':
        page = pywikibot.Page(site, lemma['a']['title'])
    else:
        page = pywikibot.Page(site,
                              lemma['a']['nstext'] + ':' + lemma['a']['title'])
    test_for_fit = re.search('Sperrschrift', page.text)
    #print(lemma['a']['title'])
    if test_for_fit:
        page.text = re.sub('Sperrschrift', 'SperrSchrift', page.text)
        page.save(
            summary=
            'bot edit: Vereinheitlichung der Vorlage Sperrschrift zu SperrSchrift',
            botflag=True,
        )
# -*- coding: utf-8 -*-
__author__ = 'eso'
import sys
sys.path.append('../../')
from tools.petscan import PetScan
import re
import requests
import pywikibot

searcher_catscan = PetScan()
searcher_catscan.add_namespace('Seite')
searcher_catscan.add_namespace(0)
searcher_catscan.add_yes_template('Sperrschrift')
sites = searcher_catscan.run()
site = pywikibot.Site()

for lemma in sites:
    if lemma['a']['nstext'] == '(Article)':
        page = pywikibot.Page(site, lemma['a']['title'])
    else:
        page = pywikibot.Page(site, lemma['a']['nstext'] + ':' + lemma['a']['title'])
    test_for_fit = re.search('Sperrschrift', page.text)
    #print(lemma['a']['title'])
    if test_for_fit:
        page.text = re.sub('Sperrschrift', 'SperrSchrift', page.text)
        page.save(summary='bot edit: Vereinheitlichung der Vorlage Sperrschrift zu SperrSchrift', botflag=True, )
Exemple #7
0
class TestCatScan(TestCase):
    def setUp(self):
        self.petscan = PetScan()

    def test_add_options(self):
        self.petscan.add_options({"max_age": "45"})
        self.petscan.add_options({"smaller": "300"})
        self.assertDictEqual({"smaller": "300", "max_age": "45"}, self.petscan.options)

    def test_add_categoy(self):
        self.petscan.add_positive_category("pos1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_positive_category("pos3", 2)
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg2")
        self.petscan.add_negative_category("neg3", 3)
        self.assertEqual(["pos1", "pos2", "pos3|2"], self.petscan.categories["positive"])
        self.assertEqual(["neg1", "neg2", "neg3|3"], self.petscan.categories["negative"])

    def test_add_namespace(self):
        self.petscan.add_namespace(0)
        self.petscan.add_namespace("Datei")
        self.petscan.add_namespace([2, "Vorlage"])
        self.assertDictEqual({"ns[0]": "1", "ns[2]": "1", "ns[6]": "1", "ns[10]": "1"}, self.petscan.options)

    def test_activate_redirects(self):
        self.petscan.activate_redirects()
        self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options)

    def test_deactivate_redirects(self):
        self.petscan.deactivate_redirects()
        self.assertDictEqual({"show_redirects": "no"}, self.petscan.options)

    def test_last_change_before(self):
        self.petscan.last_change_before(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"before": "12340101020242"}, self.petscan.options)

    def test_last_change_after(self):
        self.petscan.last_change_after(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"after": "12340101020242"}, self.petscan.options)

    def test_max_age(self):
        self.petscan.max_age(1234)
        self.assertDictEqual({"max_age": "1234"}, self.petscan.options)

    def test_only_new(self):
        self.petscan.only_new()
        self.assertDictEqual({"only_new": "1"}, self.petscan.options)

    def test_smaller_then(self):
        self.petscan.smaller_then(42)
        self.assertDictEqual({"smaller": "42"}, self.petscan.options)

    def test_larger_then(self):
        self.petscan.larger_then(42)
        self.assertDictEqual({"larger": "42"}, self.petscan.options)

    def test_get_wikidata(self):
        self.petscan.get_wikidata_items()
        self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options)

    def test_get_Pages_with_wikidata(self):
        self.petscan.get_pages_with_wd_items()
        self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options)

    def test_get_Pages_without_wikidata(self):
        self.petscan.get_pages_without_wd_items()
        self.assertDictEqual({"wikidata_item": "without"}, self.petscan.options)

    def test_set_or(self):
        self.petscan.set_logic_union()
        self.assertDictEqual({"combination": "union"}, self.petscan.options)

    def test_set_regex(self):
        self.petscan.set_regex_filter("abc")
        self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options)

    def test_set_last_edits(self):
        self.petscan.set_last_edit_bots(True)
        self.petscan.set_last_edit_anons(False)
        self.petscan.set_last_edit_flagged()
        self.assertDictEqual({"edits[bots]": "yes", "edits[anons]": "no", "edits[flagged]": "yes"}, self.petscan.options)

    def test_construct_cat_string(self):
        self.petscan.add_positive_category("pos 1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg 2")
        self.petscan.add_negative_category("neg3")
        self.assertEqual("pos+1\r\npos2", self.petscan._construct_list_argument(self.petscan.categories["positive"]))
        self.assertEqual("neg1\r\nneg+2\r\nneg3",
                         self.petscan._construct_list_argument(self.petscan.categories["negative"]))

    def test_construct_templates(self):
        self.petscan.add_yes_template("yes1")
        self.petscan.add_yes_template("yes2")
        self.petscan.add_any_template("any1")
        self.petscan.add_any_template("any2")
        self.petscan.add_any_template("any3")
        self.petscan.add_no_template("no1")
        self.petscan.add_no_template("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&templates_yes=yes1%0D%0Ayes2&templates_any=any1%0D%0Aany2%0D%0Aany3&templates_no=no1%0D%0Ano2")

    def test_construct_outlinks(self):
        self.petscan.add_yes_outlink("yes1")
        self.petscan.add_yes_outlink("yes2")
        self.petscan.add_any_outlink("any1")
        self.petscan.add_any_outlink("any2")
        self.petscan.add_any_outlink("any3")
        self.petscan.add_no_outlink("no1")
        self.petscan.add_no_outlink("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&outlinks_yes=yes1%0D%0Ayes2&outlinks_any=any1%0D%0Aany2%0D%0Aany3&outlinks_no=no1%0D%0Ano2")

    def test_construct_links_to(self):
        self.petscan.add_yes_links_to("yes1")
        self.petscan.add_yes_links_to("yes2")
        self.petscan.add_any_links_to("any1")
        self.petscan.add_any_links_to("any2")
        self.petscan.add_any_links_to("any3")
        self.petscan.add_no_links_to("no1")
        self.petscan.add_no_links_to("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&links_to_all=yes1%0D%0Ayes2&links_to_any=any1%0D%0Aany2%0D%0Aany3&links_to_no=no1%0D%0Ano2")


    def test_construct_options(self):
        self.petscan.options = {"max_age": "1234",
                                 "get_q": "1",
                                 "show_redirects": "yes"}
        self.assertEqual("&max_age=1234" in str(self.petscan), True)
        self.assertEqual("&get_q=1" in str(self.petscan), True)
        self.assertEqual("&show_redirects=yes" in str(self.petscan), True)

    def test_construct_string(self):
        self.petscan.set_language("en")
        self.petscan.set_project("wikipedia")
        # only a positive category
        self.petscan.add_positive_category("test")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test")
        # only a negative category
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_negative_category("test")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test")
        # only a option
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_options({"max_age": "10"})
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10")

    def test_do_positive(self):
        with requests_mock.mock() as mock:
            mock.get("https://petscan.wmflabs.org/"
                     "?language=de&project=wikisource&format=json&doit=1",
                     text='{"n": "result","a": {"querytime_sec": 1.572163,'
                          '"query": "https://petscan.wmflabs.org/?language=de'
                          '&project=wikisource&categories=Autoren&get_q=1'
                          '&show_redirects=no&ns[0]=1&max_age=48'
                          '&format=json&doit=1"},'
                          '"*": [{"n": "combination",'
                          '"a": {"type": "subset",'
                          '"*": [{"id": 3279,'
                          '"len": 10197,'
                          '"n": "page",'
                          '"namespace": 0,'
                          '"nstext": "",'
                          '"q": "Q60644",'
                          '"title": "Friedrich_Rückert",'
                          '"touched": "20161024211701"}]}}]}')
            self.assertEqual(self.petscan.run(), [{"id": 3279,
                                                   "len": 10197,
                                                   "n": "page",
                                                   "namespace": 0,
                                                   "nstext": "",
                                                   "q": "Q60644",
                                                   "title": "Friedrich_Rückert",
                                                   "touched": "20161024211701"}])

    def test_do_negative(self):
        with requests_mock.mock() as mock:
            mock.get("https://petscan.wmflabs.org/"
                     "?language=de&project=wikisource&format=json&doit=1",
                     status_code=404)
            with self.assertRaises(ConnectionError):
                self.petscan.run()
Exemple #8
0
class AuthorList(CanonicalBot):
    # pylint: disable=bare-except,too-many-branches,broad-except
    def __init__(self, wiki, debug):
        CanonicalBot.__init__(self, wiki, debug)
        self.searcher = PetScan()
        self.repo = self.wiki.data_repository()  # this is a DataSite object
        self.string_list = []
        self.match_property = re.compile(r"\{\{#property:P(\d{1,4})\}\}")
        self.number_to_month = {
            1: "Januar",
            2: "Februar",
            3: "März",
            4: "April",
            5: "Mai",
            6: "Juni",
            7: "Juli",
            8: "August",
            9: "September",
            10: "Oktober",
            11: "November",
            12: "Dezember"
        }

    def __enter__(self):
        CanonicalBot.__enter__(self)
        if self.timestamp.start_of_run.day == 1:
            self.data.assign_dict({})
            self.logger.warning(
                "The data is thrown away. It is the first of the month")
        return self

    def task(self):
        lemma_list = self._run_searcher()
        self._build_database(lemma_list)
        if self.debug:
            dump = Page(self.wiki, f"Benutzer:THEbotIT/{self.bot_name}")
        else:
            dump = Page(self.wiki, "Liste der Autoren")
        old_text = dump.text
        new_text = self._convert_to_table()
        if new_text[150:] != old_text[150:]:  # compare all but the date
            dump.text = new_text
            dump.save("Die Liste wurde auf den aktuellen Stand gebracht.",
                      botflag=True)
        else:
            self.logger.info("Heute gab es keine Änderungen, "
                             "daher wird die Seite nicht überschrieben.")
        return True

    def _run_searcher(self):
        # was the last run successful
        if self.debug:
            # if False
            yesterday = datetime.now() - timedelta(days=5)
            self.searcher.last_change_after(
                datetime(year=int(yesterday.strftime("%Y")),
                         month=int(yesterday.strftime("%m")),
                         day=int(yesterday.strftime("%d"))))
        elif self.last_run_successful and self.data:
            start_of_search = self.create_timestamp_for_search()
            self.searcher.last_change_after(start_of_search)
            self.logger.info(
                f"The date {start_of_search.strftime('%d.%m.%Y')} "
                f"is set to the argument \"after\".")
        else:
            self.logger.warning(
                "There was no timestamp found of the last run, "
                "so the argument \"after\" is not set.")
        self.searcher.add_namespace(0)  # search in main namespace
        self.searcher.add_positive_category("Autoren")
        self.searcher.add_yes_template("Personendaten")
        self.searcher.get_wikidata_items()

        self.logger.debug(self.searcher)

        entries_to_search = self.searcher.run()
        return entries_to_search

    _space_regex = re.compile(r"\s+")

    def _strip_spaces(self, raw_string: str):
        return self._space_regex.subn(raw_string.strip(), " ")[0]

    def _build_database(self, lemma_list):
        # pylint: disable=too-many-statements
        for idx, author in enumerate(lemma_list):
            self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}")
            # delete preexisting data of this author
            try:
                del self.data[str(author["id"])]
            except KeyError:
                if self.last_run_successful:
                    self.logger.info(
                        f"Can't delete old entry of [[{author['title']}]]")

            dict_author = {"title": author["title"]}
            # extract the Personendaten-block form the wikisource page
            page = Page(self.wiki, author["title"])
            try:
                try:
                    personendaten = re.search(
                        r"\{\{Personendaten(?:.|\n)*?\n\}\}\n",
                        page.text).group()
                except AttributeError:
                    self.logger.error(
                        f"No valid block \"Personendaten\" was found for "
                        f"[[{author['title']}]].")
                    personendaten = None
                if personendaten:
                    # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten)
                    # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten)
                    template_extractor = TemplateHandler(personendaten)
                    dict_author.update({
                        "name":
                        self._strip_spaces(
                            template_extractor.get_parameter("NACHNAME")
                            ["value"])
                    })
                    dict_author.update({
                        "first_name":
                        self._strip_spaces(
                            template_extractor.get_parameter("VORNAMEN")
                            ["value"])
                    })
                    try:
                        dict_author.update({
                            "birth":
                            self._strip_spaces(
                                template_extractor.get_parameter(
                                    "GEBURTSDATUM")["value"])
                        })
                    except Exception:
                        dict_author.update({"birth": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a birthdate for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "death":
                            self._strip_spaces(
                                template_extractor.get_parameter("STERBEDATUM")
                                ["value"])
                        })
                    except Exception:
                        dict_author.update({"death": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a deathdate for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "description":
                            template_extractor.get_parameter(
                                "KURZBESCHREIBUNG")["value"]
                        })
                    except Exception:
                        dict_author.update({"description": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a description for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "synonyms":
                            template_extractor.get_parameter("ALTERNATIVNAMEN")
                            ["value"]
                        })
                    except Exception:
                        dict_author.update({"synonyms": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find synonyms for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "sortkey":
                            template_extractor.get_parameter("SORTIERUNG")
                            ["value"]
                        })
                        if dict_author["sortkey"] == "":
                            raise ValueError
                    except Exception:
                        self.logger.debug(
                            f"there is no sortkey for [[{author['title']}]].")
                        # make a dummy key
                        if not dict_author["name"]:
                            dict_author["sortkey"] = dict_author["first_name"]
                            self.logger.warning("Author has no last name.")
                        elif not dict_author["first_name"]:
                            dict_author["sortkey"] = dict_author["name"]
                            self.logger.warning(
                                "Author has no last first_name.")
                        else:
                            dict_author["sortkey"] = \
                                dict_author["name"] + ", " + dict_author["first_name"]
                    try:
                        dict_author.update({"wikidata": author["q"]})
                    except KeyError:
                        self.logger.warning(
                            f"The autor [[{author['title']}]] has no wikidata_item"
                        )
                    self.data.update({author["id"]: dict_author})
            except Exception as exception:
                self.logger.exception("Exception not catched: ",
                                      exc_info=exception)
                self.logger.error(f"author {author['title']} have a problem")

    @staticmethod
    def _sort_author_list(list_authors):
        list_authors.sort(key=lambda x: x[0])
        for i in range(len(list_authors) - 1):
            if list_authors[i][0] == list_authors[i + 1][0]:
                equal_count = 2
                while True:
                    if i + equal_count <= len(list_authors):
                        if list_authors[i][0] != list_authors[i +
                                                              equal_count][0]:
                            break
                        equal_count += 1
                temp_list = list_authors[i:i + equal_count]
                temp_list.sort(key=lambda x: x[5])  # sort by birth date
                list_authors[i:i + equal_count] = temp_list

    def _convert_to_table(self):
        # pylint: disable=too-many-locals
        # make a list of lists
        self.logger.info("Start compiling.")
        list_authors = []
        for key in self.data:
            author_dict = self.data[key]
            list_author = []
            list_author.append(author_dict["sortkey"])  # 0
            list_author.append(author_dict["title"].replace("_", " "))  # 1
            list_author.append(author_dict["name"])  # 2
            list_author.append(author_dict["first_name"])  # 3

            for event in ["birth", "death"]:
                list_author.append(
                    self._handle_birth_and_death(event, author_dict))  # 4,6
                try:
                    list_author.append(str(DateConversion(
                        list_author[-1])))  # 5,7
                except ValueError:
                    self.logger.error(
                        f"Can´t compile sort key for {author_dict['title']}: "
                        f"{event}/{author_dict[event]}")
                    list_author.append("!-00-00")  # 5,7
            list_author.append(author_dict["description"])  # 8
            list_authors.append(list_author)

        # sorting the list
        self.logger.info("Start sorting.")
        self._sort_author_list(list_authors)

        self.logger.info("Start printing.")
        start_of_run = self.timestamp.start_of_run
        self.string_list.append(
            f"Diese Liste der Autoren enthält alle {len(self.data)}<ref>Stand: "
            f"{start_of_run.day}.{start_of_run.month}.{start_of_run.year}, "
            f"{self.timestamp.start_of_run.strftime('%H:%M')} (UTC)</ref> Autoren, "
            f"zu denen in Wikisource eine Autorenseite existiert.")
        self.string_list.append(
            "Die Liste kann mit den Buttons neben den Spaltenüberschriften"
            " nach der jeweiligen Spalte sortiert werden.")
        self.string_list.append("<!--")
        self.string_list.append(
            "Diese Liste wurde durch ein Computerprogramm erstellt, "
            "das die Daten verwendet, "
            "die aus den Infoboxen auf den Autorenseiten stammen.")
        self.string_list.append(
            "Sollten daher Fehler vorhanden sein, "
            "sollten diese jeweils dort korrigiert werden.")
        self.string_list.append("-->")
        self.string_list.append("{|class=\"wikitable sortable\"")
        self.string_list.append("!style=\"width:20%\"| Name")
        self.string_list.append(
            "!data-sort-type=\"text\" style=\"width:15%\"| Geb.-datum")
        self.string_list.append(
            "!data-sort-type=\"text\" style=\"width:15%\"| Tod.-datum")
        self.string_list.append(
            "!class=\"unsortable\" style=\"width:50%\"| Beschreibung")
        for list_author in list_authors:
            aut_sort, aut_page, aut_sur, aut_pre, birth_str, \
                birth_sort, death_str, death_sort, description = \
                list_author
            self.string_list.append("|-")
            if aut_sur and aut_pre:
                self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|"
                                        f"[[{aut_page}|{aut_sur}, {aut_pre}]]")
            elif aut_pre:
                self.string_list.append(
                    f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_pre}]]"
                )
            else:
                self.string_list.append(
                    f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_sur}]]"
                )
            self.string_list.append(
                f"|data-sort-value=\"{birth_sort}\"|{birth_str}")
            self.string_list.append(
                f"|data-sort-value=\"{death_sort}\"|{death_str}")
            self.string_list.append(f"|{description}")
        self.string_list.append("|}")
        self.string_list.append('')
        self.string_list.append("== Anmerkungen ==")
        self.string_list.append("<references/>")
        self.string_list.append('')
        self.string_list.append("{{SORTIERUNG:Autoren #Liste der}}")
        self.string_list.append("[[Kategorie:Listen]]")
        self.string_list.append("[[Kategorie:Autoren|!]]")

        return "\n".join(self.string_list)

    def _handle_birth_and_death(self, event, author_dict):
        if author_dict[event] == '' or self.match_property.search(
                author_dict[event]):
            self.logger.debug(
                f"No valid entry in {event} for "
                f"[[{author_dict['title']}]] ... Fallback to wikidata")
            try:
                item = ItemPage(self.repo, author_dict["wikidata"])
                if event == "birth":
                    property_label = "P569"
                else:
                    property_label = "P570"
                claim = item.text["claims"][property_label][0]
                date_from_data = claim.getTarget()
                if date_from_data.precision < 7:
                    self.logger.error(
                        f"Precison is to low for [[{author_dict['title']}]]")
                elif date_from_data.precision < 8:
                    date_from_data = int(
                        ceil(float(date_from_data.year) / 100.0) * 100)
                    if date_from_data < 1000:
                        date_from_data = str(date_from_data)[0:1] + ". Jh."
                    else:
                        date_from_data = str(date_from_data)[0:2] + ". Jh."
                elif date_from_data.precision < 10:
                    date_from_data = str(date_from_data.year)
                elif date_from_data.precision < 11:
                    date_from_data = self.number_to_month[date_from_data.month] + " " + \
                        str(date_from_data.year)
                else:
                    date_from_data = f"{date_from_data.day}. " \
                        f"{self.number_to_month[date_from_data.month]} " \
                        f"{date_from_data.year}"
                if re.search("-", date_from_data):
                    date_from_data = date_from_data.replace("-",
                                                            "") + " v. Chr."
                self.logger.debug(
                    f"Found {date_from_data} @ wikidata for {event}")
                return date_from_data  # 4,6
            except Exception:
                self.logger.debug("Wasn't able to ge any data from wikidata")
                return ''  # 4,6
        else:
            return author_dict[event]  # 4,6
Exemple #9
0
class AuthorList(CanonicalBot):
    # pylint: disable=bare-except,too-many-branches,broad-except
    def __init__(self, wiki, debug):
        CanonicalBot.__init__(self, wiki, debug)
        self.searcher = PetScan()
        self.repo = self.wiki.data_repository()  # this is a DataSite object
        self.string_list = []
        self.match_property = re.compile(r"\{\{#property:P(\d{1,4})\}\}")
        self.number_to_month = {1: "Januar",
                                2: "Februar",
                                3: "März",
                                4: "April",
                                5: "Mai",
                                6: "Juni",
                                7: "Juli",
                                8: "August",
                                9: "September",
                                10: "Oktober",
                                11: "November",
                                12: "Dezember"}

    def __enter__(self):
        CanonicalBot.__enter__(self)
        if self.timestamp.start_of_run.day == 1:
            self.data.assign_dict(dict())
            self.logger.warning("The data is thrown away. It is the first of the month")
        return self

    def task(self):
        lemma_list = self._run_searcher()
        self._build_database(lemma_list)
        if self.debug:
            dump = Page(self.wiki, f"Benutzer:THEbotIT/{self.bot_name}")
        else:
            dump = Page(self.wiki, "Liste der Autoren")
        old_text = dump.text
        new_text = self._convert_to_table()
        if new_text[150:] != old_text[150:]:  # compare all but the date
            dump.text = new_text
            dump.save("Die Liste wurde auf den aktuellen Stand gebracht.", botflag=True)
        else:
            self.logger.info("Heute gab es keine Änderungen, "
                             "daher wird die Seite nicht überschrieben.")
        return True

    def _run_searcher(self):
        # was the last run successful
        if self.debug:
            # if False
            yesterday = datetime.now() - timedelta(days=5)
            self.searcher.last_change_after(datetime(year=int(yesterday.strftime("%Y")),
                                                     month=int(yesterday.strftime("%m")),
                                                     day=int(yesterday.strftime("%d"))))
        elif self.last_run_successful and self.data:
            start_of_search = self.create_timestamp_for_search()
            self.searcher.last_change_after(start_of_search)
            self.logger.info(f"The date {start_of_search.strftime('%d.%m.%Y')} "
                             f"is set to the argument \"after\".")
        else:
            self.logger.warning("There was no timestamp found of the last run, "
                                "so the argument \"after\" is not set.")
        self.searcher.add_namespace(0)  # search in main namespace
        self.searcher.add_positive_category("Autoren")
        self.searcher.add_yes_template("Personendaten")
        self.searcher.get_wikidata_items()

        self.logger.debug(self.searcher)

        entries_to_search = self.searcher.run()
        return entries_to_search

    _space_regex = re.compile(r"\s+")

    def _strip_spaces(self, raw_string: str):
        return self._space_regex.subn(raw_string.strip(), " ")[0]

    def _build_database(self, lemma_list):
        # pylint: disable=too-many-statements
        for idx, author in enumerate(lemma_list):
            self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}")
            # delete preexisting data of this author
            try:
                del self.data[str(author["id"])]
            except KeyError:
                if self.last_run_successful:
                    self.logger.info(f"Can't delete old entry of [[{author['title']}]]")

            dict_author = {"title": author["title"]}
            # extract the Personendaten-block form the wikisource page
            page = Page(self.wiki, author["title"])
            try:
                try:
                    personendaten = re.search(r"\{\{Personendaten(?:.|\n)*?\n\}\}\n",
                                              page.text).group()
                except AttributeError:
                    self.logger.error(f"No valid block \"Personendaten\" was found for "
                                      f"[[{author['title']}]].")
                    personendaten = None
                if personendaten:
                    # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten)
                    # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten)
                    template_extractor = TemplateHandler(personendaten)
                    dict_author.update({"name": self._strip_spaces(
                        template_extractor.get_parameter("NACHNAME")["value"])})
                    dict_author.update({"first_name": self._strip_spaces(
                        template_extractor.get_parameter("VORNAMEN")["value"])})
                    try:
                        dict_author.update({"birth": self._strip_spaces(
                            template_extractor.get_parameter("GEBURTSDATUM")["value"])})
                    except Exception:
                        dict_author.update({"birth": ""})
                        self.logger.warning(f"Templatehandler couldn't find a birthdate for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({"death": self._strip_spaces(
                            template_extractor.get_parameter("STERBEDATUM")["value"])})
                    except Exception:
                        dict_author.update({"death": ""})
                        self.logger.warning(f"Templatehandler couldn't find a deathdate for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"description":
                             template_extractor.get_parameter("KURZBESCHREIBUNG")["value"]})
                    except Exception:
                        dict_author.update({"description": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a description for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"synonyms":
                             template_extractor.get_parameter("ALTERNATIVNAMEN")["value"]})
                    except Exception:
                        dict_author.update({"synonyms": ""})
                        self.logger.warning(f"Templatehandler couldn't find synonyms for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"sortkey": template_extractor.get_parameter("SORTIERUNG")["value"]})
                        if dict_author["sortkey"] == "":
                            raise ValueError
                    except Exception:
                        self.logger.debug(f"there is no sortkey for [[{author['title']}]].")
                        # make a dummy key
                        if not dict_author["name"]:
                            dict_author["sortkey"] = dict_author["first_name"]
                            self.logger.warning("Author has no last name.")
                        elif not dict_author["first_name"]:
                            dict_author["sortkey"] = dict_author["name"]
                            self.logger.warning("Author has no last first_name.")
                        else:
                            dict_author["sortkey"] = \
                                dict_author["name"] + ", " + dict_author["first_name"]
                    try:
                        dict_author.update({"wikidata": author["q"]})
                    except KeyError:
                        self.logger.warning(f"The autor [[{author['title']}]] has no wikidata_item")
                    self.data.update({author["id"]: dict_author})
            except Exception as exception:
                self.logger.exception("Exception not catched: ", exc_info=exception)
                self.logger.error(f"author {author['title']} have a problem")

    @staticmethod
    def _sort_author_list(list_authors):
        list_authors.sort(key=lambda x: x[0])
        for i in range(len(list_authors) - 1):
            if list_authors[i][0] == list_authors[i + 1][0]:
                equal_count = 2
                while True:
                    if i + equal_count <= len(list_authors):
                        if list_authors[i][0] != list_authors[i + equal_count][0]:
                            break
                        equal_count += 1
                temp_list = list_authors[i:i + equal_count]
                temp_list.sort(key=lambda x: x[5])  # sort by birth date
                list_authors[i:i + equal_count] = temp_list

    def _convert_to_table(self):
        # pylint: disable=too-many-locals
        # make a list of lists
        self.logger.info("Start compiling.")
        list_authors = []
        for key in self.data:
            author_dict = self.data[key]
            list_author = list()
            list_author.append(author_dict["sortkey"])  # 0
            list_author.append(author_dict["title"].replace("_", " "))  # 1
            list_author.append(author_dict["name"])  # 2
            list_author.append(author_dict["first_name"])  # 3

            for event in ["birth", "death"]:
                list_author.append(self._handle_birth_and_death(event, author_dict))  # 4,6
                try:
                    list_author.append(str(DateConversion(list_author[-1])))  # 5,7
                except ValueError:
                    self.logger.error(f"Can´t compile sort key for {author_dict['title']}: "
                                      f"{event}/{author_dict[event]}")
                    list_author.append("!-00-00")  # 5,7
            list_author.append(author_dict["description"])  # 8
            list_authors.append(list_author)

        # sorting the list
        self.logger.info("Start sorting.")
        self._sort_author_list(list_authors)

        self.logger.info("Start printing.")
        start_of_run = self.timestamp.start_of_run
        self.string_list.append(f"Diese Liste der Autoren enthält alle {len(self.data)}<ref>Stand: "
                                f"{start_of_run.day}.{start_of_run.month}.{start_of_run.year}, "
                                f"{self.timestamp.start_of_run.strftime('%H:%M')} (UTC)</ref> Autoren, "
                                f"zu denen in Wikisource eine Autorenseite existiert.")
        self.string_list.append("Die Liste kann mit den Buttons neben den Spaltenüberschriften"
                                " nach der jeweiligen Spalte sortiert werden.")
        self.string_list.append("<!--")
        self.string_list.append("Diese Liste wurde durch ein Computerprogramm erstellt, "
                                "das die Daten verwendet, "
                                "die aus den Infoboxen auf den Autorenseiten stammen.")
        self.string_list.append("Sollten daher Fehler vorhanden sein, "
                                "sollten diese jeweils dort korrigiert werden.")
        self.string_list.append("-->")
        self.string_list.append("{|class=\"wikitable sortable\"")
        self.string_list.append("!style=\"width:20%\"| Name")
        self.string_list.append("!data-sort-type=\"text\" style=\"width:15%\"| Geb.-datum")
        self.string_list.append("!data-sort-type=\"text\" style=\"width:15%\"| Tod.-datum")
        self.string_list.append("!class=\"unsortable\" style=\"width:50%\"| Beschreibung")
        for list_author in list_authors:
            aut_sort, aut_page, aut_sur, aut_pre, birth_str, \
                birth_sort, death_str, death_sort, description = \
                list_author
            self.string_list.append("|-")
            if aut_sur and aut_pre:
                self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|"
                                        f"[[{aut_page}|{aut_sur}, {aut_pre}]]")
            elif aut_pre:
                self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_pre}]]")
            else:
                self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_sur}]]")
            self.string_list.append(f"|data-sort-value=\"{birth_sort}\"|{birth_str}")
            self.string_list.append(f"|data-sort-value=\"{death_sort}\"|{death_str}")
            self.string_list.append(f"|{description}")
        self.string_list.append("|}")
        self.string_list.append('')
        self.string_list.append("== Anmerkungen ==")
        self.string_list.append("<references/>")
        self.string_list.append('')
        self.string_list.append("{{SORTIERUNG:Autoren #Liste der}}")
        self.string_list.append("[[Kategorie:Listen]]")
        self.string_list.append("[[Kategorie:Autoren|!]]")

        return "\n".join(self.string_list)

    def _handle_birth_and_death(self, event, author_dict):
        if author_dict[event] == '' or self.match_property.search(author_dict[event]):
            self.logger.debug(f"No valid entry in {event} for "
                              f"[[{author_dict['title']}]] ... Fallback to wikidata")
            try:
                item = ItemPage(self.repo, author_dict["wikidata"])
                if event == "birth":
                    property_label = "P569"
                else:
                    property_label = "P570"
                claim = item.text["claims"][property_label][0]
                date_from_data = claim.getTarget()
                if date_from_data.precision < 7:
                    self.logger.error(f"Precison is to low for [[{author_dict['title']}]]")
                elif date_from_data.precision < 8:
                    date_from_data = int(ceil(float(date_from_data.year) / 100.0) * 100)
                    if date_from_data < 1000:
                        date_from_data = str(date_from_data)[0:1] + ". Jh."
                    else:
                        date_from_data = str(date_from_data)[0:2] + ". Jh."
                elif date_from_data.precision < 10:
                    date_from_data = str(date_from_data.year)
                elif date_from_data.precision < 11:
                    date_from_data = self.number_to_month[date_from_data.month] + " " + \
                        str(date_from_data.year)
                else:
                    date_from_data = f"{date_from_data.day}. " \
                        f"{self.number_to_month[date_from_data.month]} " \
                        f"{date_from_data.year}"
                if re.search("-", date_from_data):
                    date_from_data = date_from_data.replace("-", "") + " v. Chr."
                self.logger.debug(f"Found {date_from_data} @ wikidata for {event}")
                return date_from_data  # 4,6
            except Exception:
                self.logger.debug("Wasn't able to ge any data from wikidata")
                return ''  # 4,6
        else:
            return author_dict[event]  # 4,6
# -*- coding: utf-8 -*-
__author__ = 'eso'
import sys
sys.path.append('../../')
import re
from pywikibot import Page, Site
from tools.petscan import PetScan

wiki = Site()

searcher = PetScan()
searcher.add_yes_template('Biel')
lemma_list = searcher.run()

for idx, lemma in enumerate(lemma_list):
    print(idx, len(lemma_list), lemma['title'])
    link_page = Page(wiki, lemma['title'])
    temp_text = link_page.text
    if re.search('\{\{Biel\|', temp_text):
        temp_text = re.sub('\{\{Biel\|1240647\}\}', '{{Bielefeld|1240647}}', temp_text)
        temp_text = re.sub('\{\{Biel\|590504\}\}', '{{Bielefeld|590504}}', temp_text)
        temp_text = re.sub('\{\{Biel\|1732676\}\}', '{{Bielefeld|1732676}}', temp_text)
        temp_text = re.sub('\{\{Biel\|548435\}\}', '{{Bielefeld|548435}}', temp_text)
        temp_text = re.sub('\{\{Biel\|32920\}\}', '{{Bielefeld|32920}}', temp_text)
    if link_page.text != temp_text:
        link_page.text = temp_text
        link_page.save(botflag=True, summary='Biel -> Bielefeld')

Exemple #11
0
class TestCatScan(TestCase):
    def setUp(self):
        self.petscan = PetScan()

    def test_add_options(self):
        self.petscan.add_options({"max_age": "45"})
        self.petscan.add_options({"smaller": "300"})
        self.assertDictEqual({
            "smaller": "300",
            "max_age": "45"
        }, self.petscan.options)

    def test_add_categoy(self):
        self.petscan.add_positive_category("pos1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_positive_category("pos3", 2)
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg2")
        self.petscan.add_negative_category("neg3", 3)
        self.assertEqual(["pos1", "pos2", "pos3|2"],
                         self.petscan.categories["positive"])
        self.assertEqual(["neg1", "neg2", "neg3|3"],
                         self.petscan.categories["negative"])

    def test_add_namespace(self):
        self.petscan.add_namespace(0)
        self.petscan.add_namespace([2, 10])
        self.assertDictEqual({
            "ns[0]": "1",
            "ns[2]": "1",
            "ns[10]": "1"
        }, self.petscan.options)

    def test_activate_redirects(self):
        self.petscan.activate_redirects()
        self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options)

    def test_deactivate_redirects(self):
        self.petscan.deactivate_redirects()
        self.assertDictEqual({"show_redirects": "no"}, self.petscan.options)

    def test_last_change_before(self):
        self.petscan.last_change_before(
            datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"before": "12340101020242"},
                             self.petscan.options)

    def test_last_change_after(self):
        self.petscan.last_change_after(
            datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"after": "12340101020242"}, self.petscan.options)

    def test_max_age(self):
        self.petscan.max_age(1234)
        self.assertDictEqual({"max_age": "1234"}, self.petscan.options)

    def test_only_new(self):
        self.petscan.only_new()
        self.assertDictEqual({"only_new": "1"}, self.petscan.options)

    def test_smaller_then(self):
        self.petscan.smaller_then(42)
        self.assertDictEqual({"smaller": "42"}, self.petscan.options)

    def test_larger_then(self):
        self.petscan.larger_then(42)
        self.assertDictEqual({"larger": "42"}, self.petscan.options)

    def test_get_wikidata(self):
        self.petscan.get_wikidata_items()
        self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options)

    def test_get_Pages_with_wikidata(self):
        self.petscan.get_pages_with_wd_items()
        self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options)

    def test_get_Pages_without_wikidata(self):
        self.petscan.get_pages_without_wd_items()
        self.assertDictEqual({"wikidata_item": "without"},
                             self.petscan.options)

    def test_set_or(self):
        self.petscan.set_logic_union()
        self.assertDictEqual({"combination": "union"}, self.petscan.options)

    def test_set_regex(self):
        self.petscan.set_regex_filter("abc")
        self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options)

    def test_set_last_edits(self):
        self.petscan.set_last_edit_bots(True)
        self.petscan.set_last_edit_anons(False)
        self.petscan.set_last_edit_flagged()
        self.assertDictEqual(
            {
                "edits[bots]": "yes",
                "edits[anons]": "no",
                "edits[flagged]": "yes"
            }, self.petscan.options)

    def test_construct_cat_string(self):
        self.petscan.add_positive_category("pos 1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg 2")
        self.petscan.add_negative_category("neg3")
        self.assertEqual(
            "pos+1\r\npos2",
            self.petscan._construct_list_argument(
                self.petscan.categories["positive"]))
        self.assertEqual(
            "neg1\r\nneg+2\r\nneg3",
            self.petscan._construct_list_argument(
                self.petscan.categories["negative"]))

    def test_construct_templates(self):
        self.petscan.add_yes_template("yes1")
        self.petscan.add_yes_template("yes2")
        self.petscan.add_any_template("any1")
        self.petscan.add_any_template("any2")
        self.petscan.add_any_template("any3")
        self.petscan.add_no_template("no1")
        self.petscan.add_no_template("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&templates_yes=yes1%0D%0Ayes2"
            "&templates_any=any1%0D%0Aany2%0D%0Aany3"
            "&templates_no=no1%0D%0Ano2")

    def test_construct_outlinks(self):
        self.petscan.add_yes_outlink("yes1")
        self.petscan.add_yes_outlink("yes2")
        self.petscan.add_any_outlink("any1")
        self.petscan.add_any_outlink("any2")
        self.petscan.add_any_outlink("any3")
        self.petscan.add_no_outlink("no1")
        self.petscan.add_no_outlink("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&outlinks_yes=yes1%0D%0Ayes2"
            "&outlinks_any=any1%0D%0Aany2%0D%0Aany3"
            "&outlinks_no=no1%0D%0Ano2")

    def test_construct_links_to(self):
        self.petscan.add_yes_links_to("yes1")
        self.petscan.add_yes_links_to("yes2")
        self.petscan.add_any_links_to("any1")
        self.petscan.add_any_links_to("any2")
        self.petscan.add_any_links_to("any3")
        self.petscan.add_no_links_to("no1")
        self.petscan.add_no_links_to("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&links_to_all=yes1%0D%0Ayes2"
            "&links_to_any=any1%0D%0Aany2%0D%0Aany3"
            "&links_to_no=no1%0D%0Ano2")

    def test_construct_options(self):
        self.petscan.options = {
            "max_age": "1234",
            "get_q": "1",
            "show_redirects": "yes"
        }
        self.assertEqual("&max_age=1234" in str(self.petscan), True)
        self.assertEqual("&get_q=1" in str(self.petscan), True)
        self.assertEqual("&show_redirects=yes" in str(self.petscan), True)

    def test_construct_string(self):
        self.petscan.set_language("en")
        self.petscan.set_project("wikipedia")
        # only a positive category
        self.petscan.add_positive_category("test")
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test"
        )
        # only a negative category
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_negative_category("test")
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test"
        )
        # only a option
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_options({"max_age": "10"})
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10"
        )

    def test_do_positive(self):
        with requests_mock.mock() as mock:
            mock.get(
                "https://petscan.wmflabs.org/"
                "?language=de&project=wikisource&format=json&doit=1",
                text='{"n": "result","a": {"querytime_sec": 1.572163,'
                '"query": "https://petscan.wmflabs.org/?language=de'
                '&project=wikisource&categories=Autoren&get_q=1'
                '&show_redirects=no&ns[0]=1&max_age=48'
                '&format=json&doit=1"},'
                '"*": [{"n": "combination",'
                '"a": {"type": "subset",'
                '"*": [{"id": 3279,'
                '"len": 10197,'
                '"n": "page",'
                '"namespace": 0,'
                '"nstext": "",'
                '"q": "Q60644",'
                '"title": "Friedrich_Rückert",'
                '"touched": "20161024211701"}]}}]}')
            self.assertEqual(self.petscan.run(), [{
                "id": 3279,
                "len": 10197,
                "n": "page",
                "namespace": 0,
                "nstext": "",
                "q": "Q60644",
                "title": "Friedrich_Rückert",
                "touched": "20161024211701"
            }])

    def test_do_negative(self):
        with requests_mock.mock() as mock:
            mock.get(
                "https://petscan.wmflabs.org/"
                "?language=de&project=wikisource&format=json&doit=1",
                status_code=404)
            with self.assertRaises(PetScanException):
                self.petscan.run()
Exemple #12
0

def substitute_sperrsatz(template):
    handler = TemplateHandler(template.group(0))
    handler.set_title('SperrSchrift')
    parameters = handler.get_parameterlist()
    parameters.append({'key': 'satz', 'value': '1'})
    handler.update_parameters(parameters)
    return handler.get_str(str_complex=False)


searcher_catscan = PetScan()
searcher_catscan.add_namespace(0)
searcher_catscan.add_namespace('Seite')
searcher_catscan.add_namespace('Index')
searcher_catscan.add_yes_template('Sperrsatz')
sites = searcher_catscan.run()
site = pywikibot.Site()

for lemma in sites:
    if lemma['a']['nstext'] == '(Article)':
        page = pywikibot.Page(site, lemma['a']['title'])
    else:
        page = pywikibot.Page(site,
                              lemma['a']['nstext'] + ':' + lemma['a']['title'])
    test_for_fit = re.search('Sperrsatz', page.text)
    if test_for_fit:
        try:
            page.text = re.sub('\{\{Sperrsatz(?:\{\{.*?\}\}|.)*?\}\}',
                               substitute_sperrsatz, page.text)
            page.save(