class TestCatScan(TestCase): def setUp(self): self.petscan = PetScan() def test_add_options(self): self.petscan.add_options({"max_age": "45"}) self.petscan.add_options({"smaller": "300"}) self.assertDictEqual({"smaller": "300", "max_age": "45"}, self.petscan.options) def test_add_categoy(self): self.petscan.add_positive_category("pos1") self.petscan.add_positive_category("pos2") self.petscan.add_positive_category("pos3", 2) self.petscan.add_negative_category("neg1") self.petscan.add_negative_category("neg2") self.petscan.add_negative_category("neg3", 3) self.assertEqual(["pos1", "pos2", "pos3|2"], self.petscan.categories["positive"]) self.assertEqual(["neg1", "neg2", "neg3|3"], self.petscan.categories["negative"]) def test_add_namespace(self): self.petscan.add_namespace(0) self.petscan.add_namespace("Datei") self.petscan.add_namespace([2, "Vorlage"]) self.assertDictEqual({"ns[0]": "1", "ns[2]": "1", "ns[6]": "1", "ns[10]": "1"}, self.petscan.options) def test_activate_redirects(self): self.petscan.activate_redirects() self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options) def test_deactivate_redirects(self): self.petscan.deactivate_redirects() self.assertDictEqual({"show_redirects": "no"}, self.petscan.options) def test_last_change_before(self): self.petscan.last_change_before(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42)) self.assertDictEqual({"before": "12340101020242"}, self.petscan.options) def test_last_change_after(self): self.petscan.last_change_after(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42)) self.assertDictEqual({"after": "12340101020242"}, self.petscan.options) def test_max_age(self): self.petscan.max_age(1234) self.assertDictEqual({"max_age": "1234"}, self.petscan.options) def test_only_new(self): self.petscan.only_new() self.assertDictEqual({"only_new": "1"}, self.petscan.options) def test_smaller_then(self): self.petscan.smaller_then(42) self.assertDictEqual({"smaller": "42"}, self.petscan.options) def test_larger_then(self): self.petscan.larger_then(42) self.assertDictEqual({"larger": "42"}, self.petscan.options) def test_get_wikidata(self): self.petscan.get_wikidata_items() self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options) def test_get_Pages_with_wikidata(self): self.petscan.get_pages_with_wd_items() self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options) def test_get_Pages_without_wikidata(self): self.petscan.get_pages_without_wd_items() self.assertDictEqual({"wikidata_item": "without"}, self.petscan.options) def test_set_or(self): self.petscan.set_logic_union() self.assertDictEqual({"combination": "union"}, self.petscan.options) def test_set_regex(self): self.petscan.set_regex_filter("abc") self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options) def test_set_last_edits(self): self.petscan.set_last_edit_bots(True) self.petscan.set_last_edit_anons(False) self.petscan.set_last_edit_flagged() self.assertDictEqual({"edits[bots]": "yes", "edits[anons]": "no", "edits[flagged]": "yes"}, self.petscan.options) def test_construct_cat_string(self): self.petscan.add_positive_category("pos 1") self.petscan.add_positive_category("pos2") self.petscan.add_negative_category("neg1") self.petscan.add_negative_category("neg 2") self.petscan.add_negative_category("neg3") self.assertEqual("pos+1\r\npos2", self.petscan._construct_list_argument(self.petscan.categories["positive"])) self.assertEqual("neg1\r\nneg+2\r\nneg3", self.petscan._construct_list_argument(self.petscan.categories["negative"])) def test_construct_templates(self): self.petscan.add_yes_template("yes1") self.petscan.add_yes_template("yes2") self.petscan.add_any_template("any1") self.petscan.add_any_template("any2") self.petscan.add_any_template("any3") self.petscan.add_no_template("no1") self.petscan.add_no_template("no2") self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=de&project=wikisource&templates_yes=yes1%0D%0Ayes2&templates_any=any1%0D%0Aany2%0D%0Aany3&templates_no=no1%0D%0Ano2") def test_construct_outlinks(self): self.petscan.add_yes_outlink("yes1") self.petscan.add_yes_outlink("yes2") self.petscan.add_any_outlink("any1") self.petscan.add_any_outlink("any2") self.petscan.add_any_outlink("any3") self.petscan.add_no_outlink("no1") self.petscan.add_no_outlink("no2") self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=de&project=wikisource&outlinks_yes=yes1%0D%0Ayes2&outlinks_any=any1%0D%0Aany2%0D%0Aany3&outlinks_no=no1%0D%0Ano2") def test_construct_links_to(self): self.petscan.add_yes_links_to("yes1") self.petscan.add_yes_links_to("yes2") self.petscan.add_any_links_to("any1") self.petscan.add_any_links_to("any2") self.petscan.add_any_links_to("any3") self.petscan.add_no_links_to("no1") self.petscan.add_no_links_to("no2") self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=de&project=wikisource&links_to_all=yes1%0D%0Ayes2&links_to_any=any1%0D%0Aany2%0D%0Aany3&links_to_no=no1%0D%0Ano2") def test_construct_options(self): self.petscan.options = {"max_age": "1234", "get_q": "1", "show_redirects": "yes"} self.assertEqual("&max_age=1234" in str(self.petscan), True) self.assertEqual("&get_q=1" in str(self.petscan), True) self.assertEqual("&show_redirects=yes" in str(self.petscan), True) def test_construct_string(self): self.petscan.set_language("en") self.petscan.set_project("wikipedia") # only a positive category self.petscan.add_positive_category("test") self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test") # only a negative category self.petscan.categories = {"positive": [], "negative": []} self.petscan.add_negative_category("test") self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test") # only a option self.petscan.categories = {"positive": [], "negative": []} self.petscan.add_options({"max_age": "10"}) self.assertEqual(str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10") def test_do_positive(self): with requests_mock.mock() as mock: mock.get("https://petscan.wmflabs.org/" "?language=de&project=wikisource&format=json&doit=1", text='{"n": "result","a": {"querytime_sec": 1.572163,' '"query": "https://petscan.wmflabs.org/?language=de' '&project=wikisource&categories=Autoren&get_q=1' '&show_redirects=no&ns[0]=1&max_age=48' '&format=json&doit=1"},' '"*": [{"n": "combination",' '"a": {"type": "subset",' '"*": [{"id": 3279,' '"len": 10197,' '"n": "page",' '"namespace": 0,' '"nstext": "",' '"q": "Q60644",' '"title": "Friedrich_Rückert",' '"touched": "20161024211701"}]}}]}') self.assertEqual(self.petscan.run(), [{"id": 3279, "len": 10197, "n": "page", "namespace": 0, "nstext": "", "q": "Q60644", "title": "Friedrich_Rückert", "touched": "20161024211701"}]) def test_do_negative(self): with requests_mock.mock() as mock: mock.get("https://petscan.wmflabs.org/" "?language=de&project=wikisource&format=json&doit=1", status_code=404) with self.assertRaises(ConnectionError): self.petscan.run()
class AuthorList(CanonicalBot): # pylint: disable=bare-except,too-many-branches,broad-except def __init__(self, wiki, debug): CanonicalBot.__init__(self, wiki, debug) self.searcher = PetScan() self.repo = self.wiki.data_repository() # this is a DataSite object self.string_list = [] self.match_property = re.compile(r"\{\{#property:P(\d{1,4})\}\}") self.number_to_month = {1: "Januar", 2: "Februar", 3: "März", 4: "April", 5: "Mai", 6: "Juni", 7: "Juli", 8: "August", 9: "September", 10: "Oktober", 11: "November", 12: "Dezember"} def __enter__(self): CanonicalBot.__enter__(self) if self.timestamp.start_of_run.day == 1: self.data.assign_dict(dict()) self.logger.warning("The data is thrown away. It is the first of the month") return self def task(self): lemma_list = self._run_searcher() self._build_database(lemma_list) if self.debug: dump = Page(self.wiki, f"Benutzer:THEbotIT/{self.bot_name}") else: dump = Page(self.wiki, "Liste der Autoren") old_text = dump.text new_text = self._convert_to_table() if new_text[150:] != old_text[150:]: # compare all but the date dump.text = new_text dump.save("Die Liste wurde auf den aktuellen Stand gebracht.", botflag=True) else: self.logger.info("Heute gab es keine Änderungen, " "daher wird die Seite nicht überschrieben.") return True def _run_searcher(self): # was the last run successful if self.debug: # if False yesterday = datetime.now() - timedelta(days=5) self.searcher.last_change_after(datetime(year=int(yesterday.strftime("%Y")), month=int(yesterday.strftime("%m")), day=int(yesterday.strftime("%d")))) elif self.last_run_successful and self.data: start_of_search = self.create_timestamp_for_search() self.searcher.last_change_after(start_of_search) self.logger.info(f"The date {start_of_search.strftime('%d.%m.%Y')} " f"is set to the argument \"after\".") else: self.logger.warning("There was no timestamp found of the last run, " "so the argument \"after\" is not set.") self.searcher.add_namespace(0) # search in main namespace self.searcher.add_positive_category("Autoren") self.searcher.add_yes_template("Personendaten") self.searcher.get_wikidata_items() self.logger.debug(self.searcher) entries_to_search = self.searcher.run() return entries_to_search _space_regex = re.compile(r"\s+") def _strip_spaces(self, raw_string: str): return self._space_regex.subn(raw_string.strip(), " ")[0] def _build_database(self, lemma_list): # pylint: disable=too-many-statements for idx, author in enumerate(lemma_list): self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}") # delete preexisting data of this author try: del self.data[str(author["id"])] except KeyError: if self.last_run_successful: self.logger.info(f"Can't delete old entry of [[{author['title']}]]") dict_author = {"title": author["title"]} # extract the Personendaten-block form the wikisource page page = Page(self.wiki, author["title"]) try: try: personendaten = re.search(r"\{\{Personendaten(?:.|\n)*?\n\}\}\n", page.text).group() except AttributeError: self.logger.error(f"No valid block \"Personendaten\" was found for " f"[[{author['title']}]].") personendaten = None if personendaten: # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten) # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten) template_extractor = TemplateHandler(personendaten) dict_author.update({"name": self._strip_spaces( template_extractor.get_parameter("NACHNAME")["value"])}) dict_author.update({"first_name": self._strip_spaces( template_extractor.get_parameter("VORNAMEN")["value"])}) try: dict_author.update({"birth": self._strip_spaces( template_extractor.get_parameter("GEBURTSDATUM")["value"])}) except Exception: dict_author.update({"birth": ""}) self.logger.warning(f"Templatehandler couldn't find a birthdate for: " f"[[{author['title']}]]") try: dict_author.update({"death": self._strip_spaces( template_extractor.get_parameter("STERBEDATUM")["value"])}) except Exception: dict_author.update({"death": ""}) self.logger.warning(f"Templatehandler couldn't find a deathdate for: " f"[[{author['title']}]]") try: dict_author.update( {"description": template_extractor.get_parameter("KURZBESCHREIBUNG")["value"]}) except Exception: dict_author.update({"description": ""}) self.logger.warning( f"Templatehandler couldn't find a description for: " f"[[{author['title']}]]") try: dict_author.update( {"synonyms": template_extractor.get_parameter("ALTERNATIVNAMEN")["value"]}) except Exception: dict_author.update({"synonyms": ""}) self.logger.warning(f"Templatehandler couldn't find synonyms for: " f"[[{author['title']}]]") try: dict_author.update( {"sortkey": template_extractor.get_parameter("SORTIERUNG")["value"]}) if dict_author["sortkey"] == "": raise ValueError except Exception: self.logger.debug(f"there is no sortkey for [[{author['title']}]].") # make a dummy key if not dict_author["name"]: dict_author["sortkey"] = dict_author["first_name"] self.logger.warning("Author has no last name.") elif not dict_author["first_name"]: dict_author["sortkey"] = dict_author["name"] self.logger.warning("Author has no last first_name.") else: dict_author["sortkey"] = \ dict_author["name"] + ", " + dict_author["first_name"] try: dict_author.update({"wikidata": author["q"]}) except KeyError: self.logger.warning(f"The autor [[{author['title']}]] has no wikidata_item") self.data.update({author["id"]: dict_author}) except Exception as exception: self.logger.exception("Exception not catched: ", exc_info=exception) self.logger.error(f"author {author['title']} have a problem") @staticmethod def _sort_author_list(list_authors): list_authors.sort(key=lambda x: x[0]) for i in range(len(list_authors) - 1): if list_authors[i][0] == list_authors[i + 1][0]: equal_count = 2 while True: if i + equal_count <= len(list_authors): if list_authors[i][0] != list_authors[i + equal_count][0]: break equal_count += 1 temp_list = list_authors[i:i + equal_count] temp_list.sort(key=lambda x: x[5]) # sort by birth date list_authors[i:i + equal_count] = temp_list def _convert_to_table(self): # pylint: disable=too-many-locals # make a list of lists self.logger.info("Start compiling.") list_authors = [] for key in self.data: author_dict = self.data[key] list_author = list() list_author.append(author_dict["sortkey"]) # 0 list_author.append(author_dict["title"].replace("_", " ")) # 1 list_author.append(author_dict["name"]) # 2 list_author.append(author_dict["first_name"]) # 3 for event in ["birth", "death"]: list_author.append(self._handle_birth_and_death(event, author_dict)) # 4,6 try: list_author.append(str(DateConversion(list_author[-1]))) # 5,7 except ValueError: self.logger.error(f"Can´t compile sort key for {author_dict['title']}: " f"{event}/{author_dict[event]}") list_author.append("!-00-00") # 5,7 list_author.append(author_dict["description"]) # 8 list_authors.append(list_author) # sorting the list self.logger.info("Start sorting.") self._sort_author_list(list_authors) self.logger.info("Start printing.") start_of_run = self.timestamp.start_of_run self.string_list.append(f"Diese Liste der Autoren enthält alle {len(self.data)}<ref>Stand: " f"{start_of_run.day}.{start_of_run.month}.{start_of_run.year}, " f"{self.timestamp.start_of_run.strftime('%H:%M')} (UTC)</ref> Autoren, " f"zu denen in Wikisource eine Autorenseite existiert.") self.string_list.append("Die Liste kann mit den Buttons neben den Spaltenüberschriften" " nach der jeweiligen Spalte sortiert werden.") self.string_list.append("<!--") self.string_list.append("Diese Liste wurde durch ein Computerprogramm erstellt, " "das die Daten verwendet, " "die aus den Infoboxen auf den Autorenseiten stammen.") self.string_list.append("Sollten daher Fehler vorhanden sein, " "sollten diese jeweils dort korrigiert werden.") self.string_list.append("-->") self.string_list.append("{|class=\"wikitable sortable\"") self.string_list.append("!style=\"width:20%\"| Name") self.string_list.append("!data-sort-type=\"text\" style=\"width:15%\"| Geb.-datum") self.string_list.append("!data-sort-type=\"text\" style=\"width:15%\"| Tod.-datum") self.string_list.append("!class=\"unsortable\" style=\"width:50%\"| Beschreibung") for list_author in list_authors: aut_sort, aut_page, aut_sur, aut_pre, birth_str, \ birth_sort, death_str, death_sort, description = \ list_author self.string_list.append("|-") if aut_sur and aut_pre: self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|" f"[[{aut_page}|{aut_sur}, {aut_pre}]]") elif aut_pre: self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_pre}]]") else: self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_sur}]]") self.string_list.append(f"|data-sort-value=\"{birth_sort}\"|{birth_str}") self.string_list.append(f"|data-sort-value=\"{death_sort}\"|{death_str}") self.string_list.append(f"|{description}") self.string_list.append("|}") self.string_list.append('') self.string_list.append("== Anmerkungen ==") self.string_list.append("<references/>") self.string_list.append('') self.string_list.append("{{SORTIERUNG:Autoren #Liste der}}") self.string_list.append("[[Kategorie:Listen]]") self.string_list.append("[[Kategorie:Autoren|!]]") return "\n".join(self.string_list) def _handle_birth_and_death(self, event, author_dict): if author_dict[event] == '' or self.match_property.search(author_dict[event]): self.logger.debug(f"No valid entry in {event} for " f"[[{author_dict['title']}]] ... Fallback to wikidata") try: item = ItemPage(self.repo, author_dict["wikidata"]) if event == "birth": property_label = "P569" else: property_label = "P570" claim = item.text["claims"][property_label][0] date_from_data = claim.getTarget() if date_from_data.precision < 7: self.logger.error(f"Precison is to low for [[{author_dict['title']}]]") elif date_from_data.precision < 8: date_from_data = int(ceil(float(date_from_data.year) / 100.0) * 100) if date_from_data < 1000: date_from_data = str(date_from_data)[0:1] + ". Jh." else: date_from_data = str(date_from_data)[0:2] + ". Jh." elif date_from_data.precision < 10: date_from_data = str(date_from_data.year) elif date_from_data.precision < 11: date_from_data = self.number_to_month[date_from_data.month] + " " + \ str(date_from_data.year) else: date_from_data = f"{date_from_data.day}. " \ f"{self.number_to_month[date_from_data.month]} " \ f"{date_from_data.year}" if re.search("-", date_from_data): date_from_data = date_from_data.replace("-", "") + " v. Chr." self.logger.debug(f"Found {date_from_data} @ wikidata for {event}") return date_from_data # 4,6 except Exception: self.logger.debug("Wasn't able to ge any data from wikidata") return '' # 4,6 else: return author_dict[event] # 4,6
class AuthorList(CanonicalBot): # pylint: disable=bare-except,too-many-branches,broad-except def __init__(self, wiki, debug): CanonicalBot.__init__(self, wiki, debug) self.searcher = PetScan() self.repo = self.wiki.data_repository() # this is a DataSite object self.string_list = [] self.match_property = re.compile(r"\{\{#property:P(\d{1,4})\}\}") self.number_to_month = { 1: "Januar", 2: "Februar", 3: "März", 4: "April", 5: "Mai", 6: "Juni", 7: "Juli", 8: "August", 9: "September", 10: "Oktober", 11: "November", 12: "Dezember" } def __enter__(self): CanonicalBot.__enter__(self) if self.timestamp.start_of_run.day == 1: self.data.assign_dict({}) self.logger.warning( "The data is thrown away. It is the first of the month") return self def task(self): lemma_list = self._run_searcher() self._build_database(lemma_list) if self.debug: dump = Page(self.wiki, f"Benutzer:THEbotIT/{self.bot_name}") else: dump = Page(self.wiki, "Liste der Autoren") old_text = dump.text new_text = self._convert_to_table() if new_text[150:] != old_text[150:]: # compare all but the date dump.text = new_text dump.save("Die Liste wurde auf den aktuellen Stand gebracht.", botflag=True) else: self.logger.info("Heute gab es keine Änderungen, " "daher wird die Seite nicht überschrieben.") return True def _run_searcher(self): # was the last run successful if self.debug: # if False yesterday = datetime.now() - timedelta(days=5) self.searcher.last_change_after( datetime(year=int(yesterday.strftime("%Y")), month=int(yesterday.strftime("%m")), day=int(yesterday.strftime("%d")))) elif self.last_run_successful and self.data: start_of_search = self.create_timestamp_for_search() self.searcher.last_change_after(start_of_search) self.logger.info( f"The date {start_of_search.strftime('%d.%m.%Y')} " f"is set to the argument \"after\".") else: self.logger.warning( "There was no timestamp found of the last run, " "so the argument \"after\" is not set.") self.searcher.add_namespace(0) # search in main namespace self.searcher.add_positive_category("Autoren") self.searcher.add_yes_template("Personendaten") self.searcher.get_wikidata_items() self.logger.debug(self.searcher) entries_to_search = self.searcher.run() return entries_to_search _space_regex = re.compile(r"\s+") def _strip_spaces(self, raw_string: str): return self._space_regex.subn(raw_string.strip(), " ")[0] def _build_database(self, lemma_list): # pylint: disable=too-many-statements for idx, author in enumerate(lemma_list): self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}") # delete preexisting data of this author try: del self.data[str(author["id"])] except KeyError: if self.last_run_successful: self.logger.info( f"Can't delete old entry of [[{author['title']}]]") dict_author = {"title": author["title"]} # extract the Personendaten-block form the wikisource page page = Page(self.wiki, author["title"]) try: try: personendaten = re.search( r"\{\{Personendaten(?:.|\n)*?\n\}\}\n", page.text).group() except AttributeError: self.logger.error( f"No valid block \"Personendaten\" was found for " f"[[{author['title']}]].") personendaten = None if personendaten: # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten) # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten) template_extractor = TemplateHandler(personendaten) dict_author.update({ "name": self._strip_spaces( template_extractor.get_parameter("NACHNAME") ["value"]) }) dict_author.update({ "first_name": self._strip_spaces( template_extractor.get_parameter("VORNAMEN") ["value"]) }) try: dict_author.update({ "birth": self._strip_spaces( template_extractor.get_parameter( "GEBURTSDATUM")["value"]) }) except Exception: dict_author.update({"birth": ""}) self.logger.warning( f"Templatehandler couldn't find a birthdate for: " f"[[{author['title']}]]") try: dict_author.update({ "death": self._strip_spaces( template_extractor.get_parameter("STERBEDATUM") ["value"]) }) except Exception: dict_author.update({"death": ""}) self.logger.warning( f"Templatehandler couldn't find a deathdate for: " f"[[{author['title']}]]") try: dict_author.update({ "description": template_extractor.get_parameter( "KURZBESCHREIBUNG")["value"] }) except Exception: dict_author.update({"description": ""}) self.logger.warning( f"Templatehandler couldn't find a description for: " f"[[{author['title']}]]") try: dict_author.update({ "synonyms": template_extractor.get_parameter("ALTERNATIVNAMEN") ["value"] }) except Exception: dict_author.update({"synonyms": ""}) self.logger.warning( f"Templatehandler couldn't find synonyms for: " f"[[{author['title']}]]") try: dict_author.update({ "sortkey": template_extractor.get_parameter("SORTIERUNG") ["value"] }) if dict_author["sortkey"] == "": raise ValueError except Exception: self.logger.debug( f"there is no sortkey for [[{author['title']}]].") # make a dummy key if not dict_author["name"]: dict_author["sortkey"] = dict_author["first_name"] self.logger.warning("Author has no last name.") elif not dict_author["first_name"]: dict_author["sortkey"] = dict_author["name"] self.logger.warning( "Author has no last first_name.") else: dict_author["sortkey"] = \ dict_author["name"] + ", " + dict_author["first_name"] try: dict_author.update({"wikidata": author["q"]}) except KeyError: self.logger.warning( f"The autor [[{author['title']}]] has no wikidata_item" ) self.data.update({author["id"]: dict_author}) except Exception as exception: self.logger.exception("Exception not catched: ", exc_info=exception) self.logger.error(f"author {author['title']} have a problem") @staticmethod def _sort_author_list(list_authors): list_authors.sort(key=lambda x: x[0]) for i in range(len(list_authors) - 1): if list_authors[i][0] == list_authors[i + 1][0]: equal_count = 2 while True: if i + equal_count <= len(list_authors): if list_authors[i][0] != list_authors[i + equal_count][0]: break equal_count += 1 temp_list = list_authors[i:i + equal_count] temp_list.sort(key=lambda x: x[5]) # sort by birth date list_authors[i:i + equal_count] = temp_list def _convert_to_table(self): # pylint: disable=too-many-locals # make a list of lists self.logger.info("Start compiling.") list_authors = [] for key in self.data: author_dict = self.data[key] list_author = [] list_author.append(author_dict["sortkey"]) # 0 list_author.append(author_dict["title"].replace("_", " ")) # 1 list_author.append(author_dict["name"]) # 2 list_author.append(author_dict["first_name"]) # 3 for event in ["birth", "death"]: list_author.append( self._handle_birth_and_death(event, author_dict)) # 4,6 try: list_author.append(str(DateConversion( list_author[-1]))) # 5,7 except ValueError: self.logger.error( f"Can´t compile sort key for {author_dict['title']}: " f"{event}/{author_dict[event]}") list_author.append("!-00-00") # 5,7 list_author.append(author_dict["description"]) # 8 list_authors.append(list_author) # sorting the list self.logger.info("Start sorting.") self._sort_author_list(list_authors) self.logger.info("Start printing.") start_of_run = self.timestamp.start_of_run self.string_list.append( f"Diese Liste der Autoren enthält alle {len(self.data)}<ref>Stand: " f"{start_of_run.day}.{start_of_run.month}.{start_of_run.year}, " f"{self.timestamp.start_of_run.strftime('%H:%M')} (UTC)</ref> Autoren, " f"zu denen in Wikisource eine Autorenseite existiert.") self.string_list.append( "Die Liste kann mit den Buttons neben den Spaltenüberschriften" " nach der jeweiligen Spalte sortiert werden.") self.string_list.append("<!--") self.string_list.append( "Diese Liste wurde durch ein Computerprogramm erstellt, " "das die Daten verwendet, " "die aus den Infoboxen auf den Autorenseiten stammen.") self.string_list.append( "Sollten daher Fehler vorhanden sein, " "sollten diese jeweils dort korrigiert werden.") self.string_list.append("-->") self.string_list.append("{|class=\"wikitable sortable\"") self.string_list.append("!style=\"width:20%\"| Name") self.string_list.append( "!data-sort-type=\"text\" style=\"width:15%\"| Geb.-datum") self.string_list.append( "!data-sort-type=\"text\" style=\"width:15%\"| Tod.-datum") self.string_list.append( "!class=\"unsortable\" style=\"width:50%\"| Beschreibung") for list_author in list_authors: aut_sort, aut_page, aut_sur, aut_pre, birth_str, \ birth_sort, death_str, death_sort, description = \ list_author self.string_list.append("|-") if aut_sur and aut_pre: self.string_list.append(f"|data-sort-value=\"{aut_sort}\"|" f"[[{aut_page}|{aut_sur}, {aut_pre}]]") elif aut_pre: self.string_list.append( f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_pre}]]" ) else: self.string_list.append( f"|data-sort-value=\"{aut_sort}\"|[[{aut_page}|{aut_sur}]]" ) self.string_list.append( f"|data-sort-value=\"{birth_sort}\"|{birth_str}") self.string_list.append( f"|data-sort-value=\"{death_sort}\"|{death_str}") self.string_list.append(f"|{description}") self.string_list.append("|}") self.string_list.append('') self.string_list.append("== Anmerkungen ==") self.string_list.append("<references/>") self.string_list.append('') self.string_list.append("{{SORTIERUNG:Autoren #Liste der}}") self.string_list.append("[[Kategorie:Listen]]") self.string_list.append("[[Kategorie:Autoren|!]]") return "\n".join(self.string_list) def _handle_birth_and_death(self, event, author_dict): if author_dict[event] == '' or self.match_property.search( author_dict[event]): self.logger.debug( f"No valid entry in {event} for " f"[[{author_dict['title']}]] ... Fallback to wikidata") try: item = ItemPage(self.repo, author_dict["wikidata"]) if event == "birth": property_label = "P569" else: property_label = "P570" claim = item.text["claims"][property_label][0] date_from_data = claim.getTarget() if date_from_data.precision < 7: self.logger.error( f"Precison is to low for [[{author_dict['title']}]]") elif date_from_data.precision < 8: date_from_data = int( ceil(float(date_from_data.year) / 100.0) * 100) if date_from_data < 1000: date_from_data = str(date_from_data)[0:1] + ". Jh." else: date_from_data = str(date_from_data)[0:2] + ". Jh." elif date_from_data.precision < 10: date_from_data = str(date_from_data.year) elif date_from_data.precision < 11: date_from_data = self.number_to_month[date_from_data.month] + " " + \ str(date_from_data.year) else: date_from_data = f"{date_from_data.day}. " \ f"{self.number_to_month[date_from_data.month]} " \ f"{date_from_data.year}" if re.search("-", date_from_data): date_from_data = date_from_data.replace("-", "") + " v. Chr." self.logger.debug( f"Found {date_from_data} @ wikidata for {event}") return date_from_data # 4,6 except Exception: self.logger.debug("Wasn't able to ge any data from wikidata") return '' # 4,6 else: return author_dict[event] # 4,6
class TestCatScan(TestCase): def setUp(self): self.petscan = PetScan() def test_add_options(self): self.petscan.add_options({"max_age": "45"}) self.petscan.add_options({"smaller": "300"}) self.assertDictEqual({ "smaller": "300", "max_age": "45" }, self.petscan.options) def test_add_categoy(self): self.petscan.add_positive_category("pos1") self.petscan.add_positive_category("pos2") self.petscan.add_positive_category("pos3", 2) self.petscan.add_negative_category("neg1") self.petscan.add_negative_category("neg2") self.petscan.add_negative_category("neg3", 3) self.assertEqual(["pos1", "pos2", "pos3|2"], self.petscan.categories["positive"]) self.assertEqual(["neg1", "neg2", "neg3|3"], self.petscan.categories["negative"]) def test_add_namespace(self): self.petscan.add_namespace(0) self.petscan.add_namespace([2, 10]) self.assertDictEqual({ "ns[0]": "1", "ns[2]": "1", "ns[10]": "1" }, self.petscan.options) def test_activate_redirects(self): self.petscan.activate_redirects() self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options) def test_deactivate_redirects(self): self.petscan.deactivate_redirects() self.assertDictEqual({"show_redirects": "no"}, self.petscan.options) def test_last_change_before(self): self.petscan.last_change_before( datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42)) self.assertDictEqual({"before": "12340101020242"}, self.petscan.options) def test_last_change_after(self): self.petscan.last_change_after( datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42)) self.assertDictEqual({"after": "12340101020242"}, self.petscan.options) def test_max_age(self): self.petscan.max_age(1234) self.assertDictEqual({"max_age": "1234"}, self.petscan.options) def test_only_new(self): self.petscan.only_new() self.assertDictEqual({"only_new": "1"}, self.petscan.options) def test_smaller_then(self): self.petscan.smaller_then(42) self.assertDictEqual({"smaller": "42"}, self.petscan.options) def test_larger_then(self): self.petscan.larger_then(42) self.assertDictEqual({"larger": "42"}, self.petscan.options) def test_get_wikidata(self): self.petscan.get_wikidata_items() self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options) def test_get_Pages_with_wikidata(self): self.petscan.get_pages_with_wd_items() self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options) def test_get_Pages_without_wikidata(self): self.petscan.get_pages_without_wd_items() self.assertDictEqual({"wikidata_item": "without"}, self.petscan.options) def test_set_or(self): self.petscan.set_logic_union() self.assertDictEqual({"combination": "union"}, self.petscan.options) def test_set_regex(self): self.petscan.set_regex_filter("abc") self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options) def test_set_last_edits(self): self.petscan.set_last_edit_bots(True) self.petscan.set_last_edit_anons(False) self.petscan.set_last_edit_flagged() self.assertDictEqual( { "edits[bots]": "yes", "edits[anons]": "no", "edits[flagged]": "yes" }, self.petscan.options) def test_construct_cat_string(self): self.petscan.add_positive_category("pos 1") self.petscan.add_positive_category("pos2") self.petscan.add_negative_category("neg1") self.petscan.add_negative_category("neg 2") self.petscan.add_negative_category("neg3") self.assertEqual( "pos+1\r\npos2", self.petscan._construct_list_argument( self.petscan.categories["positive"])) self.assertEqual( "neg1\r\nneg+2\r\nneg3", self.petscan._construct_list_argument( self.petscan.categories["negative"])) def test_construct_templates(self): self.petscan.add_yes_template("yes1") self.petscan.add_yes_template("yes2") self.petscan.add_any_template("any1") self.petscan.add_any_template("any2") self.petscan.add_any_template("any3") self.petscan.add_no_template("no1") self.petscan.add_no_template("no2") self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=de" "&project=wikisource" "&templates_yes=yes1%0D%0Ayes2" "&templates_any=any1%0D%0Aany2%0D%0Aany3" "&templates_no=no1%0D%0Ano2") def test_construct_outlinks(self): self.petscan.add_yes_outlink("yes1") self.petscan.add_yes_outlink("yes2") self.petscan.add_any_outlink("any1") self.petscan.add_any_outlink("any2") self.petscan.add_any_outlink("any3") self.petscan.add_no_outlink("no1") self.petscan.add_no_outlink("no2") self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=de" "&project=wikisource" "&outlinks_yes=yes1%0D%0Ayes2" "&outlinks_any=any1%0D%0Aany2%0D%0Aany3" "&outlinks_no=no1%0D%0Ano2") def test_construct_links_to(self): self.petscan.add_yes_links_to("yes1") self.petscan.add_yes_links_to("yes2") self.petscan.add_any_links_to("any1") self.petscan.add_any_links_to("any2") self.petscan.add_any_links_to("any3") self.petscan.add_no_links_to("no1") self.petscan.add_no_links_to("no2") self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=de" "&project=wikisource" "&links_to_all=yes1%0D%0Ayes2" "&links_to_any=any1%0D%0Aany2%0D%0Aany3" "&links_to_no=no1%0D%0Ano2") def test_construct_options(self): self.petscan.options = { "max_age": "1234", "get_q": "1", "show_redirects": "yes" } self.assertEqual("&max_age=1234" in str(self.petscan), True) self.assertEqual("&get_q=1" in str(self.petscan), True) self.assertEqual("&show_redirects=yes" in str(self.petscan), True) def test_construct_string(self): self.petscan.set_language("en") self.petscan.set_project("wikipedia") # only a positive category self.petscan.add_positive_category("test") self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test" ) # only a negative category self.petscan.categories = {"positive": [], "negative": []} self.petscan.add_negative_category("test") self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test" ) # only a option self.petscan.categories = {"positive": [], "negative": []} self.petscan.add_options({"max_age": "10"}) self.assertEqual( str(self.petscan), "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10" ) def test_do_positive(self): with requests_mock.mock() as mock: mock.get( "https://petscan.wmflabs.org/" "?language=de&project=wikisource&format=json&doit=1", text='{"n": "result","a": {"querytime_sec": 1.572163,' '"query": "https://petscan.wmflabs.org/?language=de' '&project=wikisource&categories=Autoren&get_q=1' '&show_redirects=no&ns[0]=1&max_age=48' '&format=json&doit=1"},' '"*": [{"n": "combination",' '"a": {"type": "subset",' '"*": [{"id": 3279,' '"len": 10197,' '"n": "page",' '"namespace": 0,' '"nstext": "",' '"q": "Q60644",' '"title": "Friedrich_Rückert",' '"touched": "20161024211701"}]}}]}') self.assertEqual(self.petscan.run(), [{ "id": 3279, "len": 10197, "n": "page", "namespace": 0, "nstext": "", "q": "Q60644", "title": "Friedrich_Rückert", "touched": "20161024211701" }]) def test_do_negative(self): with requests_mock.mock() as mock: mock.get( "https://petscan.wmflabs.org/" "?language=de&project=wikisource&format=json&doit=1", status_code=404) with self.assertRaises(PetScanException): self.petscan.run()