def test_second_equal(self):
     test_string_argument_second_equal = "BILD=Der Todesgang des armenischen Volkes.pdf{{!}}page=276"
     test_string_second_equal = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_second_equal + "}}"
     test_dict_second_equal = {"key": "BILD", "value": "Der Todesgang des armenischen Volkes.pdf{{!}}page=276"}
     test_list_second_equal = [test_dict_argument_1, test_dict_second_equal]
     handler = TemplateHandler(test_string_second_equal)
     self.assertEqual(test_list_second_equal, handler.get_parameterlist())
Exemple #2
0
    def from_text(cls, article_text):
        """
        main parser function for initiating a ReArticle from a given piece of text.

        :param article_text: text that represent a valid ReArticle
        :rtype: Article
        """
        finder = TemplateFinder(article_text)
        find_re_daten = finder.get_positions(RE_DATEN)
        find_re_abschnitt = finder.get_positions(RE_ABSCHNITT)
        # only one start template can be present
        if len(find_re_daten) + len(find_re_abschnitt) != 1:
            raise ReDatenException(
                "Article has the wrong structure. There must one start template"
            )
        if find_re_daten:
            find_re_start = find_re_daten
        else:
            find_re_start = find_re_abschnitt
        find_re_author = finder.get_positions(RE_AUTHOR)
        # only one end template can be present
        if len(find_re_author) != 1:
            raise ReDatenException(
                "Article has the wrong structure. There must one stop template"
            )
        # the templates must have the right order
        if find_re_start[0]["pos"][0] > find_re_author[0]["pos"][0]:
            raise ReDatenException(
                "Article has the wrong structure. Wrong order of templates.")
        # it can only exists text between the start and the end template.
        if find_re_start[0]["pos"][0] != 0:
            raise ReDatenException(
                "Article has the wrong structure. There is text in front of the article."
            )
        if find_re_author[0]["pos"][1] != len(article_text):
            raise ReDatenException(
                "Article has the wrong structure. There is text after the article."
            )
        try:
            re_start = TemplateHandler(find_re_start[0]["text"])
        except TemplateHandlerException as error:
            raise ReDatenException(
                "Start-Template has the wrong structure.") from error
        try:
            re_author = TemplateHandler(find_re_author[0]["text"])
        except TemplateHandlerException as error:
            raise ReDatenException(
                "Author-Template has the wrong structure.") from error
        properties_dict = cls._extract_properties(re_start.parameters)
        author_name = re_author.parameters[0]["value"]
        try:
            author_issue = re_author.parameters[1]["value"]
        except IndexError:
            author_issue = ""
        return Article(article_type=re_start.title,
                       re_daten_properties=properties_dict,
                       text=article_text[find_re_start[0]["pos"][1]:
                                         find_re_author[0]["pos"][0]].strip(),
                       author=(author_name, author_issue))
Exemple #3
0
 def test_set_title(self):
     test_string_12_test_title = "{{" + test_title_test + "|" \
                                 + test_string_argument_1 + "|" \
                                 + test_string_argument_2 + "}}"
     handler = TemplateHandler(test_string_12_simple)
     handler.set_title(test_title_test)
     self.assertEqual(test_string_12_test_title,
                      handler.get_str(str_complex=False))
 def test_update_parameters(self):
     test_string_345_simple = "{{" + test_title + "|" + test_string_argument_3 + "|" + test_string_argument_4 + "|" + test_string_argument_5 + "}}"
     test_list_345 = [test_dict_argument_3, test_dict_argument_4, test_dict_argument_5]
     handler = TemplateHandler(test_string_12_simple)
     self.assertEqual(test_dict_argument_1, handler.get_parameter("1"))
     self.assertEqual(test_dict_argument_2, handler.get_parameter("2"))
     handler.update_parameters(test_list_345)
     self.assertEqual(test_string_345_simple, handler.get_str(str_complex=False))
Exemple #5
0
    def convert_re_nachtrag(template: str):
        template_nachtrag = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_nachtrag.get_parameterlist()
        new_list.append({"key": "NACHTRAG", "value": "ON"})
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
Exemple #6
0
 def test_without_key(self):
     test_string_12_no_key = "{{" + test_title + "|" \
                             + test_string_argument_1_no_key + "|" \
                             + test_string_argument_2 + "}}"
     test_list_12_no_key = [
         test_dict_argument_1_no_key, test_dict_argument_2
     ]
     handler = TemplateHandler(test_string_12_no_key)
     self.assertEqual(test_list_12_no_key, handler.get_parameterlist())
Exemple #7
0
    def convert_re_platzhalter(self, template: str):
        template_platzhalter = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_platzhalter.get_parameterlist()
        new_list = self._gemeinfrei_todesjahr(new_list)
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
Exemple #8
0
 def _get_pre_text(self):
     template_handler = TemplateHandler()
     template_handler.title = RE_DATEN
     list_of_properties = []
     for re_property in self._properties:
         list_of_properties.append({"key": re_property.name,
                                    "value": re_property.value_to_string()})
     template_handler.update_parameters(list_of_properties)
     return template_handler.get_str(str_complex=True)
Exemple #9
0
    def convert_re_nachtrag_platzhalter(self, template: str):
        template_platzhalter = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_platzhalter.get_parameterlist()
        new_list.append({"key": "NACHTRAG", "value": "ON"})
        new_list = self._gemeinfrei_todesjahr(new_list)
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
Exemple #10
0
 def test_second_equal(self):
     test_string_argument_second_equal = "BILD=Der Todesgang des armenischen Volkes.pdf{{!}}page=276"
     test_string_second_equal = "{{" + test_title_test + "|" + \
                                test_string_argument_1 + "|" + \
                                test_string_argument_second_equal + "}}"
     test_dict_second_equal = {
         "key": "BILD",
         "value": "Der Todesgang des armenischen Volkes.pdf{{!}}page=276"
     }
     test_list_second_equal = [test_dict_argument_1, test_dict_second_equal]
     handler = TemplateHandler(test_string_second_equal)
     self.assertEqual(test_list_second_equal, handler.get_parameterlist())
Exemple #11
0
def substitute_sperrsatz(template):
    handler = TemplateHandler(template.group(0))
    handler.set_title('SperrSchrift')
    parameters = handler.get_parameterlist()
    parameters.append({'key': 'satz', 'value': '1'})
    handler.update_parameters(parameters)
    return handler.get_str(str_complex=False)
Exemple #12
0
 def test_get_str(self):
     handler = TemplateHandler()
     handler.set_title(test_title)
     handler.update_parameters(test_list_12)
     self.assertEqual(test_string_12_simple,
                      handler.get_str(str_complex=False))
     self.assertEqual(test_string_12_complex,
                      handler.get_str(str_complex=True))
    def test_link_with_text(self):
        test_string_argument_2_link = "2 = [[link|text for link]] more"
        test_string_12_link = "{{" + test_title + "|" + test_string_argument_1_no_key + "|" + test_string_argument_2_link + "}}"
        test_dict_link = {"key": "2", "value": "[[link|text for link]] more"}
        test_list_link = [test_dict_argument_1_no_key, test_dict_link]
        handler = TemplateHandler(test_string_12_link)
        self.assertEqual(test_list_link, handler.get_parameterlist())

        del handler

        test_string_argument_link = "[[link|text for link]] more"
        test_string_12_link_no_key = "{{" + test_title + "|" + test_string_argument_1_no_key + "|" + test_string_argument_link + "}}"
        test_dict_link_no_key = {"key": None, "value": "[[link|text for link]] more"}
        test_list_link_no_key = [test_dict_argument_1_no_key, test_dict_link_no_key]
        handler = TemplateHandler(test_string_12_link_no_key)
        self.assertEqual(test_list_link_no_key, handler.get_parameterlist())
    def convert_re_platzhalter(self, template: str):
        template_platzhalter = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_platzhalter.get_parameterlist()
        new_list = self._gemeinfrei_todesjahr(new_list)
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
    def convert_re_nachtrag(template: str):
        template_nachtrag = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_nachtrag.get_parameterlist()
        new_list.append({"key": "NACHTRAG", "value": "ON"})
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
    def convert_re_nachtrag_platzhalter(self, template: str):
        template_platzhalter = TemplateHandler(template)
        template_daten = TemplateHandler()
        new_list = template_platzhalter.get_parameterlist()
        new_list.append({"key": "NACHTRAG", "value": "ON"})
        new_list = self._gemeinfrei_todesjahr(new_list)
        template_daten.update_parameters(new_list)
        template_daten.set_title("REDaten")

        return template_daten.get_str()
    def test_bug_authorlist(self):
        test_string_argument_bug = "STERBEDATUM = 2. Januar < ref name = \"adp\" / > oder 31. Januar < ref > 49. Jahres - Bericht d.Schles.Ges.für vaterländische Cultur, S. 317, Nekrolog {{GBS|hP1DAAAAIAAJ|PA317}} < / ref > 1871"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug = {"key": "STERBEDATUM", "value": "2. Januar < ref name = \"adp\" / > oder 31. Januar < ref > 49. Jahres - Bericht d.Schles.Ges.für vaterländische Cultur, S. 317, Nekrolog {{GBS|hP1DAAAAIAAJ|PA317}} < / ref > 1871"}
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "GEBURTSDATUM=1783 < ref name = \"EB\" >  Encyclopaedia Britannica.  11. Auflage(1911), Bd. 1, S.[[:en:Page:EB1911 - Volume 01. djvu / 792 | 748]] {{an | englisch, im Artikel}} < / ref >"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug = {"key": "GEBURTSDATUM", "value": "1783 < ref name = \"EB\" >  Encyclopaedia Britannica.  11. Auflage(1911), Bd. 1, S.[[:en:Page:EB1911 - Volume 01. djvu / 792 | 748]] {{an | englisch, im Artikel}} < / ref >"}
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "GEBURTSORT=Klein Flottbek (heute zu [[Hamburg]])|STERBEDATUM=28. Oktober 1929|STERBEORT=[[Rom]]"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug_1 = {"key": "GEBURTSORT", "value": "Klein Flottbek (heute zu [[Hamburg]])"}
        test_dict_bug_2 = {"key": "STERBEDATUM", "value": "28. Oktober 1929"}
        test_dict_bug_3 = {"key": "STERBEORT", "value": "[[Rom]]"}
        test_list_bug = [test_dict_argument_1, test_dict_bug_1, test_dict_bug_2, test_dict_bug_3]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "ALTERNATIVNAMEN = Carl Biedermann; Friedrich Karl Biedermann; Karl Friedrich 4[Pseudonym]|SONSTIGES=[http://gso.gbv.de/DB=1.28/REL?PPN=004072189&RELTYPE=TT Martin Opitz im VD 17]"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug_1 = {"key": "ALTERNATIVNAMEN", "value": "Carl Biedermann; Friedrich Karl Biedermann; Karl Friedrich 4[Pseudonym]"}
        test_dict_bug_2 = {"key": "SONSTIGES", "value": "[http://gso.gbv.de/DB=1.28/REL?PPN=004072189&RELTYPE=TT Martin Opitz im VD 17]"}
        test_list_bug = [test_dict_argument_1, test_dict_bug_1, test_dict_bug_2]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "SONSTIGES=Pächter der [[w:Harste|Domäne Harste]], Vater von [[w:Karl Henrici|Karl Henrici]]<ref>""Zeitschrift des Vereins für Hamburgische Geschichte."" Band 42. 1953, S. 135 [http://books.google.de/books?id=1XISAAAAIAAJ&q=%2B%22henrici%22+%2B%221885%22+%2B%22harste%22&dq=%2B%22henrici%22+%2B%221885%22+%2B%22harste%22 Google]</ref>"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug = {"key": "SONSTIGES", "value": "Pächter der [[w:Harste|Domäne Harste]], Vater von [[w:Karl Henrici|Karl Henrici]]<ref>""Zeitschrift des Vereins für Hamburgische Geschichte."" Band 42. 1953, S. 135 [http://books.google.de/books?id=1XISAAAAIAAJ&q=%2B%22henrici%22+%2B%221885%22+%2B%22harste%22&dq=%2B%22henrici%22+%2B%221885%22+%2B%22harste%22 Google]</ref>"}
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)
Exemple #18
0
    def test_link_with_text(self):
        test_string_argument_2_link = "2 = [[link|text for link]] more"
        test_string_12_link = "{{" + test_title + "|" \
                              + test_string_argument_1_no_key + "|" \
                              + test_string_argument_2_link + "}}"
        test_dict_link = {"key": "2", "value": "[[link|text for link]] more"}
        test_list_link = [test_dict_argument_1_no_key, test_dict_link]
        handler = TemplateHandler(test_string_12_link)
        self.assertEqual(test_list_link, handler.get_parameterlist())

        del handler

        test_string_argument_link = "[[link|text for link]] more"
        test_string_12_link_no_key = "{{" + test_title + "|" \
                                     + test_string_argument_1_no_key + "|" \
                                     + test_string_argument_link + "}}"
        test_dict_link_no_key = {
            "key": None,
            "value": "[[link|text for link]] more"
        }
        test_list_link_no_key = [
            test_dict_argument_1_no_key, test_dict_link_no_key
        ]
        handler = TemplateHandler(test_string_12_link_no_key)
        self.assertEqual(test_list_link_no_key, handler.get_parameterlist())
Exemple #19
0
 def _get_pre_text(self):
     template_handler = TemplateHandler()
     template_handler.title = RE_DATEN
     list_of_properties = []
     for re_property in self._properties:
         list_of_properties.append({
             "key": re_property.name,
             "value": re_property.value_to_string()
         })
     template_handler.update_parameters(list_of_properties)
     return template_handler.get_str(str_complex=True)
Exemple #20
0
    def test_template_in_template(self):
        test_string_argument_template = "{{otherTemplate|other_argument}}"
        test_string_12_template = "{{" + test_title + "|" \
                                  + test_string_argument_template + "|" \
                                  + test_string_argument_2 + "}}"
        test_dict_template_no_key = {
            "key": None,
            "value": "{{otherTemplate|other_argument}}"
        }
        test_list_template_no_key = [
            test_dict_template_no_key, test_dict_argument_2
        ]
        handler = TemplateHandler(test_string_12_template)
        self.assertListEqual(test_list_template_no_key,
                             handler.get_parameterlist())
        del handler

        test_string_argument_template2 = "{{Kapitaelchen|Test}}"
        test_string_template_2 = "{{" + test_title_sperr + "|" + test_string_argument_template2 + "}}"
        test_dict_template_2 = {"key": None, "value": "{{Kapitaelchen|Test}}"}
        test_list_template_2 = [test_dict_template_2]
        handler = TemplateHandler(test_string_template_2)
        self.assertListEqual(test_list_template_2, handler.get_parameterlist())
        del handler

        test_string_argument_1_template = "1={{otherTemplate|other_argument}}"
        test_string_12_template_no_key = "{{" + test_title + "|" \
                                         + test_string_argument_1_template + "|" \
                                         + test_string_argument_2 + "}}"
        test_dict_template = {
            "key": "1",
            "value": "{{otherTemplate|other_argument}}"
        }
        test_list_template = [test_dict_template, test_dict_argument_2]
        handler = TemplateHandler(test_string_12_template_no_key)
        self.assertListEqual(test_list_template, handler.get_parameterlist())
Exemple #21
0
 def test_update_parameters(self):
     test_string_345_simple = "{{" + test_title + "|" \
                              + test_string_argument_3 + "|" \
                              + test_string_argument_4 + "|" \
                              + test_string_argument_5 + "}}"
     test_list_345 = [
         test_dict_argument_3, test_dict_argument_4, test_dict_argument_5
     ]
     handler = TemplateHandler(test_string_12_simple)
     self.assertEqual(test_dict_argument_1, handler.get_parameter("1"))
     self.assertEqual(test_dict_argument_2, handler.get_parameter("2"))
     handler.update_parameters(test_list_345)
     self.assertEqual(test_string_345_simple,
                      handler.get_str(str_complex=False))
    def test_template_in_template(self):
        test_string_argument_template = "{{otherTemplate|other_argument}}"
        test_string_12_template = "{{" + test_title + "|" + test_string_argument_template + "|" + test_string_argument_2 + "}}"
        test_dict_template_no_key = {"key": None, "value": "{{otherTemplate|other_argument}}"}
        test_list_template_no_key = [test_dict_template_no_key, test_dict_argument_2]
        handler = TemplateHandler(test_string_12_template)
        self.assertListEqual(test_list_template_no_key, handler.get_parameterlist())
        del handler

        test_string_argument_template2 = "{{Kapitaelchen|Test}}"
        test_string_template_2 = "{{" + test_title_sperr + "|" + test_string_argument_template2 + "}}"
        test_dict_template_2 = {"key": None, "value": "{{Kapitaelchen|Test}}"}
        test_list_template_2 = [test_dict_template_2]
        handler = TemplateHandler(test_string_template_2)
        self.assertListEqual(test_list_template_2, handler.get_parameterlist())
        del handler

        test_string_argument_1_template = "1={{otherTemplate|other_argument}}"
        test_string_12_template_no_key = "{{" + test_title + "|" + test_string_argument_1_template + "|" + test_string_argument_2 + "}}"
        test_dict_template = {"key": "1", "value": "{{otherTemplate|other_argument}}"}
        test_list_template = [test_dict_template, test_dict_argument_2]
        handler = TemplateHandler(test_string_12_template_no_key)
        self.assertListEqual(test_list_template, handler.get_parameterlist())
 def test_get_parameter(self):
     handler = TemplateHandler(test_string_12_complex)
     self.assertEqual(test_dict_argument_1, handler.get_parameter("1"))
     self.assertEqual(test_dict_argument_2, handler.get_parameter("2"))
Exemple #24
0
 def test_get_parameter(self):
     handler = TemplateHandler(test_string_12_complex)
     self.assertEqual(test_dict_argument_1, handler.get_parameter("1"))
     self.assertEqual(test_dict_argument_2, handler.get_parameter("2"))
Exemple #25
0
 def test_template_from_page(self):
     handler = TemplateHandler(test_string_12_complex)
     self.assertEqual(test_list_12, handler.get_parameterlist())
Exemple #26
0
    def test_bug_authorlist(self):
        test_string_argument_bug = "STERBEDATUM = 2. Januar < ref name = \"adp\" / > oder 31. Januar " \
                                   "< ref > 49. Jahres - Bericht d.Schles.Ges.für vaterländische Cultur, S. 317, " \
                                   "Nekrolog {{GBS|hP1DAAAAIAAJ|PA317}} < / ref > 1871"
        test_string_bug = "{{" + test_title_test + "|" \
                          + test_string_argument_1 + "|" \
                          + test_string_argument_bug + "}}"
        test_dict_bug = {
            "key":
            "STERBEDATUM",
            "value":
            "2. Januar < ref name = \"adp\" / > oder 31. Januar "
            "< ref > 49. Jahres - Bericht d.Schles.Ges.für vaterländische Cultur, S. 317, "
            "Nekrolog {{GBS|hP1DAAAAIAAJ|PA317}} < / ref > 1871"
        }
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "GEBURTSDATUM=1783 < ref name = \"EB\" >  Encyclopaedia Britannica.  " \
                                   "11. Auflage(1911), Bd. 1, S.[[:en:Page:EB1911 - Volume 01. djvu / 792 | 748]] " \
                                   "{{an | englisch, im Artikel}} < / ref >"
        test_string_bug = "{{" + test_title_test + "|" \
                          + test_string_argument_1 + "|" \
                          + test_string_argument_bug + "}}"
        test_dict_bug = {
            "key":
            "GEBURTSDATUM",
            "value":
            "1783 < ref name = \"EB\" >  Encyclopaedia Britannica.  "
            "11. Auflage(1911), Bd. 1, S.[[:en:Page:EB1911 - Volume 01. djvu / 792 | 748]] "
            "{{an | englisch, im Artikel}} < / ref >"
        }
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "GEBURTSORT=Klein Flottbek (heute zu [[Hamburg]])" \
                                   "|STERBEDATUM=28. Oktober 1929|STERBEORT=[[Rom]]"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug_1 = {
            "key": "GEBURTSORT",
            "value": "Klein Flottbek (heute zu [[Hamburg]])"
        }
        test_dict_bug_2 = {"key": "STERBEDATUM", "value": "28. Oktober 1929"}
        test_dict_bug_3 = {"key": "STERBEORT", "value": "[[Rom]]"}
        test_list_bug = [
            test_dict_argument_1, test_dict_bug_1, test_dict_bug_2,
            test_dict_bug_3
        ]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "ALTERNATIVNAMEN = Carl Biedermann; Friedrich Karl Biedermann; " \
                                   "Karl Friedrich 4[Pseudonym]" \
                                   "|SONSTIGES=[http://gso.gbv.de/DB=1.28/REL?PPN=004072189&RELTYPE=TT " \
                                   "Martin Opitz im VD 17]"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug_1 = {
            "key":
            "ALTERNATIVNAMEN",
            "value":
            "Carl Biedermann; Friedrich Karl Biedermann; Karl Friedrich 4[Pseudonym]"
        }
        test_dict_bug_2 = {
            "key":
            "SONSTIGES",
            "value":
            "[http://gso.gbv.de/DB=1.28/REL?PPN=004072189&RELTYPE=TT Martin Opitz im VD 17]"
        }
        test_list_bug = [
            test_dict_argument_1, test_dict_bug_1, test_dict_bug_2
        ]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)

        test_string_argument_bug = "SONSTIGES=Pächter der [[w:Harste|Domäne Harste]], " \
                                   "Vater von [[w:Karl Henrici|Karl Henrici]]<ref>Zeitschrift des Vereins für " \
                                   "Hamburgische Geschichte."" Band 42. 1953, S. 135 " \
                                   "[http://books.google.de/books?id=1XISAAAAIAAJ Google]</ref>"
        test_string_bug = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_bug + "}}"
        test_dict_bug = {
            "key":
            "SONSTIGES",
            "value":
            "Pächter der [[w:Harste|Domäne Harste]], "
            "Vater von [[w:Karl Henrici|Karl Henrici]]<ref>"
            "Zeitschrift des Vereins für "
            "Hamburgische Geschichte."
            " Band 42. 1953, S. 135 "
            "[http://books.google.de/books?id=1XISAAAAIAAJ Google]</ref>"
        }
        test_list_bug = [test_dict_argument_1, test_dict_bug]
        handler = TemplateHandler(test_string_bug)
        real_dict = handler.get_parameterlist()
        self.assertEqual(test_list_bug, real_dict)
 def test_template_from_page(self):
     handler = TemplateHandler(test_string_12_complex)
     self.assertEqual(test_list_12, handler.get_parameterlist())
 def test_without_key(self):
     test_string_12_no_key = "{{" + test_title + "|" + test_string_argument_1_no_key + "|" + test_string_argument_2 + "}}"
     test_list_12_no_key = [test_dict_argument_1_no_key, test_dict_argument_2]
     handler = TemplateHandler(test_string_12_no_key)
     self.assertEqual(test_list_12_no_key, handler.get_parameterlist())
Exemple #29
0
    elif number < 1000:
        for members in range(digits - 3):
            number_str = "0" + number_str
    return number_str


site = pywikibot.Site()

for idx, i in enumerate(range(6, 296, 2)):
    #for idx, i in enumerate(range(120, 144, 2)):
    if i < 206:
        continue
    page = pywikibot.Page(site,
                          'Orbis sensualium pictus/{}'.format(titles[idx]))
    print(i, titles[idx])
    handler = TemplateHandler()

    first_page = proofreadpage.ProofreadPage(
        site, "Seite:OrbisPictus {}.jpg".format(add_zeros(i, 3)))
    second_page = proofreadpage.ProofreadPage(
        site, "Seite:OrbisPictus {}.jpg".format(add_zeros(i + 1, 3)))

    status_1 = first_page.status
    status_2 = second_page.status

    if status_1 == "Fertig" and status_2 == "Fertig":
        status = "Fertig"
    elif status_1 == "Unkorrigiert" or status_2 == "Unkorrigiert":
        status = "Unkorrigiert"
    else:
        status = "Korrigiert"
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Achtes Buch',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Eilftes Buch',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Neuntes Buch',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Siebentes Buch',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Zehntes Buch',
    'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Zweyter Theil/Zwölftes Buch'
]

wiki = pywikibot.Site()

lemma_dummy = 'Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung/Erster Theil/Erstes Buch'
page = pywikibot.Page(wiki, title='Benutzer:THEbotIT/Test2')
template_textdaten = re.search('\{\{Textdaten(?:\{\{.*?\}\}|.)*?\}\}',
                               page.text, re.DOTALL).group()
list_textdaten = TemplateHandler(template_textdaten).get_parameterlist()

template_navigation1 = TemplateHandler()
template_navigation1.set_title('Navigation2')
template_navigation2 = TemplateHandler()
template_navigation2.set_title('Navigation2')

list_navigation = []
list_navigation.append({'key': 'AUTOR', 'value': '[[Basilius von Ramdohr]]'})
list_navigation.append({
    'key':
    'ARTIKEL',
    'value':
    '[[Venus Urania. Ueber die Natur der Liebe, über ihre Veredelung und Verschönerung|Venus Urania]]'
})
list_navigation.append({'key': 'KAPITEL', 'value': list_textdaten[3]['value']})
 def test_bug_no_arguments(self):
     test_string = "{{just_this}}"
     handler = TemplateHandler(test_string)
     self.assertListEqual([], handler.get_parameterlist())
Exemple #32
0
    def _build_database(self, lemma_list):
        # pylint: disable=too-many-statements
        for idx, author in enumerate(lemma_list):
            self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}")
            # delete preexisting data of this author
            try:
                del self.data[str(author["id"])]
            except KeyError:
                if self.last_run_successful:
                    self.logger.info(
                        f"Can't delete old entry of [[{author['title']}]]")

            dict_author = {"title": author["title"]}
            # extract the Personendaten-block form the wikisource page
            page = Page(self.wiki, author["title"])
            try:
                try:
                    personendaten = re.search(
                        r"\{\{Personendaten(?:.|\n)*?\n\}\}\n",
                        page.text).group()
                except AttributeError:
                    self.logger.error(
                        f"No valid block \"Personendaten\" was found for "
                        f"[[{author['title']}]].")
                    personendaten = None
                if personendaten:
                    # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten)
                    # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten)
                    template_extractor = TemplateHandler(personendaten)
                    dict_author.update({
                        "name":
                        self._strip_spaces(
                            template_extractor.get_parameter("NACHNAME")
                            ["value"])
                    })
                    dict_author.update({
                        "first_name":
                        self._strip_spaces(
                            template_extractor.get_parameter("VORNAMEN")
                            ["value"])
                    })
                    try:
                        dict_author.update({
                            "birth":
                            self._strip_spaces(
                                template_extractor.get_parameter(
                                    "GEBURTSDATUM")["value"])
                        })
                    except Exception:
                        dict_author.update({"birth": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a birthdate for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "death":
                            self._strip_spaces(
                                template_extractor.get_parameter("STERBEDATUM")
                                ["value"])
                        })
                    except Exception:
                        dict_author.update({"death": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a deathdate for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "description":
                            template_extractor.get_parameter(
                                "KURZBESCHREIBUNG")["value"]
                        })
                    except Exception:
                        dict_author.update({"description": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a description for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "synonyms":
                            template_extractor.get_parameter("ALTERNATIVNAMEN")
                            ["value"]
                        })
                    except Exception:
                        dict_author.update({"synonyms": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find synonyms for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({
                            "sortkey":
                            template_extractor.get_parameter("SORTIERUNG")
                            ["value"]
                        })
                        if dict_author["sortkey"] == "":
                            raise ValueError
                    except Exception:
                        self.logger.debug(
                            f"there is no sortkey for [[{author['title']}]].")
                        # make a dummy key
                        if not dict_author["name"]:
                            dict_author["sortkey"] = dict_author["first_name"]
                            self.logger.warning("Author has no last name.")
                        elif not dict_author["first_name"]:
                            dict_author["sortkey"] = dict_author["name"]
                            self.logger.warning(
                                "Author has no last first_name.")
                        else:
                            dict_author["sortkey"] = \
                                dict_author["name"] + ", " + dict_author["first_name"]
                    try:
                        dict_author.update({"wikidata": author["q"]})
                    except KeyError:
                        self.logger.warning(
                            f"The autor [[{author['title']}]] has no wikidata_item"
                        )
                    self.data.update({author["id"]: dict_author})
            except Exception as exception:
                self.logger.exception("Exception not catched: ",
                                      exc_info=exception)
                self.logger.error(f"author {author['title']} have a problem")
 def test_set_title(self):
     test_string_12_test_title = "{{" + test_title_test + "|" + test_string_argument_1 + "|" + test_string_argument_2 + "}}"
     handler = TemplateHandler(test_string_12_simple)
     handler.set_title(test_title_test)
     self.assertEqual(test_string_12_test_title, handler.get_str(str_complex=False))
Exemple #34
0
 def test_bug_no_arguments(self):
     test_string = "{{just_this}}"
     handler = TemplateHandler(test_string)
     self.assertListEqual([], handler.get_parameterlist())
Exemple #35
0
    def _build_database(self, lemma_list):
        # pylint: disable=too-many-statements
        for idx, author in enumerate(lemma_list):
            self.logger.debug(f"{idx + 1}/{len(lemma_list)} {author['title']}")
            # delete preexisting data of this author
            try:
                del self.data[str(author["id"])]
            except KeyError:
                if self.last_run_successful:
                    self.logger.info(f"Can't delete old entry of [[{author['title']}]]")

            dict_author = {"title": author["title"]}
            # extract the Personendaten-block form the wikisource page
            page = Page(self.wiki, author["title"])
            try:
                try:
                    personendaten = re.search(r"\{\{Personendaten(?:.|\n)*?\n\}\}\n",
                                              page.text).group()
                except AttributeError:
                    self.logger.error(f"No valid block \"Personendaten\" was found for "
                                      f"[[{author['title']}]].")
                    personendaten = None
                if personendaten:
                    # personendaten = re.sub('<ref.*?>.*?<\/ref>|<ref.*?\/>', '', personendaten)
                    # personendaten = re.sub('\{\{CRef|.*?(?:\{\{.*?\}\})?}}', '', personendaten)
                    template_extractor = TemplateHandler(personendaten)
                    dict_author.update({"name": self._strip_spaces(
                        template_extractor.get_parameter("NACHNAME")["value"])})
                    dict_author.update({"first_name": self._strip_spaces(
                        template_extractor.get_parameter("VORNAMEN")["value"])})
                    try:
                        dict_author.update({"birth": self._strip_spaces(
                            template_extractor.get_parameter("GEBURTSDATUM")["value"])})
                    except Exception:
                        dict_author.update({"birth": ""})
                        self.logger.warning(f"Templatehandler couldn't find a birthdate for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update({"death": self._strip_spaces(
                            template_extractor.get_parameter("STERBEDATUM")["value"])})
                    except Exception:
                        dict_author.update({"death": ""})
                        self.logger.warning(f"Templatehandler couldn't find a deathdate for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"description":
                             template_extractor.get_parameter("KURZBESCHREIBUNG")["value"]})
                    except Exception:
                        dict_author.update({"description": ""})
                        self.logger.warning(
                            f"Templatehandler couldn't find a description for: "
                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"synonyms":
                             template_extractor.get_parameter("ALTERNATIVNAMEN")["value"]})
                    except Exception:
                        dict_author.update({"synonyms": ""})
                        self.logger.warning(f"Templatehandler couldn't find synonyms for: "
                                            f"[[{author['title']}]]")
                    try:
                        dict_author.update(
                            {"sortkey": template_extractor.get_parameter("SORTIERUNG")["value"]})
                        if dict_author["sortkey"] == "":
                            raise ValueError
                    except Exception:
                        self.logger.debug(f"there is no sortkey for [[{author['title']}]].")
                        # make a dummy key
                        if not dict_author["name"]:
                            dict_author["sortkey"] = dict_author["first_name"]
                            self.logger.warning("Author has no last name.")
                        elif not dict_author["first_name"]:
                            dict_author["sortkey"] = dict_author["name"]
                            self.logger.warning("Author has no last first_name.")
                        else:
                            dict_author["sortkey"] = \
                                dict_author["name"] + ", " + dict_author["first_name"]
                    try:
                        dict_author.update({"wikidata": author["q"]})
                    except KeyError:
                        self.logger.warning(f"The autor [[{author['title']}]] has no wikidata_item")
                    self.data.update({author["id"]: dict_author})
            except Exception as exception:
                self.logger.exception("Exception not catched: ", exc_info=exception)
                self.logger.error(f"author {author['title']} have a problem")
        for members in range(digits-2):
            number_str = "0" + number_str
    elif number < 1000:
        for members in range(digits-3):
            number_str = "0" + number_str
    return number_str

site = pywikibot.Site()

for idx, i in enumerate(range(6, 296, 2)):
#for idx, i in enumerate(range(120, 144, 2)):
    if i < 206:
        continue
    page = pywikibot.Page(site, 'Orbis sensualium pictus/{}'.format(titles[idx]))
    print(i, titles[idx])
    handler = TemplateHandler()

    first_page = proofreadpage.ProofreadPage(site, "Seite:OrbisPictus {}.jpg".format(add_zeros(i, 3)))
    second_page = proofreadpage.ProofreadPage(site, "Seite:OrbisPictus {}.jpg".format(add_zeros(i + 1, 3)))

    status_1 = first_page.status
    status_2 = second_page.status

    if status_1 == "Fertig" and status_2 == "Fertig":
        status = "Fertig"
    elif status_1 == "Unkorrigiert" or status_2 == "Unkorrigiert":
        status = "Unkorrigiert"
    else:
        status = "Korrigiert"

    print(status_1, status_2, status)
 def test_get_str(self):
     handler = TemplateHandler()
     handler.set_title(test_title)
     handler.update_parameters(test_list_12)
     self.assertEqual(test_string_12_simple, handler.get_str(str_complex=False))
     self.assertEqual(test_string_12_complex, handler.get_str(str_complex=True))