Exemple #1
0
def test_level_setter_does_not_overwrite_title():
    s = Section('={{t}}=\nb')
    t = s.templates[0]
    s.level = 1  # testing for no effect
    s.level = 2
    assert '=={{t}}==\nb' == s.string
    assert '{{t}}' == t.string
def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []
    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions

    # es uses definition lists, not well supported by the parser...
    # replace them by numbered lists
    if locale == "es":
        lists = section.get_lists(pattern="[:;]")
        if lists:
            sec = "".join(a_list.string for a_list in lists)
            section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec)
            section.contents = re.sub(r":;[a-z]:+[\s]+", "## ",
                                      section.contents)

    lists = section.get_lists(pattern=section_patterns[locale])
    if lists:
        for a_list in lists:
            for idx, code in enumerate(a_list.items):
                # Ignore some patterns
                if word not in words_to_keep[locale] and any(
                        ignore_me in code.lower()
                        for ignore_me in definitions_to_ignore[locale]):
                    continue

                # Transform and clean the Wikicode
                definition = process_templates(word, clean(code), locale)
                # Skip empty definitions
                # [SV] Skip almost empty definitions
                if not definition or (locale == "sv" and len(definition) < 2):
                    continue

                # Keep the definition ...
                definitions.append(definition)

                # ... And its eventual sub-definitions
                subdefinitions: List[SubDefinitions] = []
                for sublist in a_list.sublists(
                        i=idx, pattern=sublist_patterns[locale]):
                    for idx2, subcode in enumerate(sublist.items):
                        subdefinition = process_templates(
                            word, clean(subcode), locale)
                        subdefinitions.append(subdefinition)
                        subsubdefinitions: List[str] = []
                        for subsublist in sublist.sublists(
                                i=idx2, pattern=sublist_patterns[locale]):
                            for subsubcode in subsublist.items:
                                subsubdefinitions.append(
                                    process_templates(word, clean(subsubcode),
                                                      locale))
                        if subsubdefinitions:
                            subdefinitions.append(tuple(subsubdefinitions))
                if subdefinitions:
                    definitions.append(tuple(subdefinitions))

    return definitions
 def test_level_setter_does_not_overwrite_title(self):
     ae = self.assertEqual
     s = Section('={{t}}=\nb')
     t = s.templates[0]
     s.level = 1  # testing for no effect
     s.level = 2
     ae('=={{t}}==\nb', s.string)
     ae('{{t}}', t.string)
    def test_unbalanced_equalsigns_in_title(self):
        s = Section('====== ==   \n')
        self.assertEqual(2, s.level)
        self.assertEqual('==== ', s.title)

        s = Section('== ======   \n')
        self.assertEqual(2, s.level)
        self.assertEqual(' ====', s.title)

        s = Section('========  \n')
        self.assertEqual(3, s.level)
        self.assertEqual('==', s.title)
Exemple #5
0
def test_unbalanced_equalsigns_in_title():
    s = Section('====== ==   \n')
    assert 2 == s.level
    assert '==== ' == s.title

    s = Section('== ======   \n')
    assert 2 == s.level
    assert ' ====' == s.title

    s = Section('========  \n')
    assert 3 == s.level
    assert '==' == s.title
Exemple #6
0
def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []

    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions
    if locale == "es" and section.title.strip().startswith(
        ("Forma adjetiva", "Forma verbal")):
        return definitions

    # es uses definition lists, not well supported by the parser...
    # replace them by numbered lists
    if locale == "es":
        if lists := section.get_lists(pattern="[:;]"):
            sec = "".join(a_list.string for a_list in lists)
            section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec)
            section.contents = re.sub(r":;[\s]*[a-z]:+[\s]+", "## ",
                                      section.contents)
Exemple #7
0
def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []

    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions

    lists = section.get_lists(pattern=section_patterns[locale])
    if lists:
        for a_list in lists:
            for idx, code in enumerate(a_list.items):
                # Ignore some patterns
                if word not in words_to_keep[locale] and any(
                        ignore_me in code.lower()
                        for ignore_me in definitions_to_ignore[locale]):
                    continue

                # Transform and clean the Wikicode
                definition = process_templates(word, clean(code), locale)
                # Skip empty definitions
                # [SV] Skip almost empty definitions
                if not definition or (locale == "sv" and len(definition) < 2):
                    continue

                # Keep the definition ...
                definitions.append(definition)

                # ... And its eventual sub-definitions
                subdefinitions: List[SubDefinitions] = []
                for sublist in a_list.sublists(
                        i=idx, pattern=sublist_patterns[locale]):
                    for idx2, subcode in enumerate(sublist.items):
                        subdefinition = process_templates(
                            word, clean(subcode), locale)
                        subdefinitions.append(subdefinition)
                        subsubdefinitions: List[str] = []
                        for subsublist in sublist.sublists(
                                i=idx2, pattern=sublist_patterns[locale]):
                            for subsubcode in subsublist.items:
                                subsubdefinitions.append(
                                    process_templates(word, clean(subsubcode),
                                                      locale))
                        if subsubdefinitions:
                            subdefinitions.append(tuple(subsubdefinitions))
                if subdefinitions:
                    definitions.append(tuple(subdefinitions))

    return definitions
Exemple #8
0
def find_etymology(word: str, locale: str, parsed_section: wtp.Section) -> str:
    """Find the etymology."""

    etyl: str

    if locale == "ca":
        return process_templates(word, clean(parsed_section.contents), locale)

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                return etyl

    elif locale == "es":
        etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        return process_templates(word, clean(etyl), locale)

    elif locale == "pt":
        section_title = parsed_section.title.strip()
        if section_title == "{{etimologia|pt}}":
            try:
                etyl = parsed_section.get_lists()[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        else:
            # "Etimologia" title section
            try:
                etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        return process_templates(word, clean(etyl), locale)

    etymologies = chain.from_iterable(
        section.items for section in parsed_section.get_lists())
    for etymology in etymologies:
        if any(ignore_me in etymology.lower()
               for ignore_me in definitions_to_ignore[locale]):
            continue
        etyl = process_templates(word, clean(etymology), locale)
        if etyl:
            return etyl
    return ""
Exemple #9
0
def find_etymology(word: str, locale: str,
                   parsed_section: wtp.Section) -> List[Definitions]:
    """Find the etymology."""
    definitions: List[Definitions] = []
    etyl: str

    if locale in {"ca", "no"}:
        definitions.append(
            process_templates(word, clean(parsed_section.contents), locale))
        return definitions

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            if etyl := process_templates(word, clean(item), locale):
                definitions.append(etyl)
        return definitions
 def test_template_at_the_start(self):
     ts = Section('{{t}}').templates
     self.assertEqual(ts[0].string, '{{t}}')
 def test_section_heading_tabs(self):
     s = Section('=\tt\t=\t')
     self.assertEqual(s.string, '=\tt\t=\t')
     self.assertEqual(s.title, '\tt\t')
     self.assertEqual(s.contents, '')
 def test_set_lead_contents(self):
     s = Section('lead')
     s.contents = 'newlead'
     self.assertEqual('newlead', s.string)
 def test_set_level(self):
     s = Section('=== t ===\ntext')
     s.level = 2
     self.assertEqual('== t ==\ntext', s.string)
def find_etymology(word: str, locale: str,
                   parsed_section: wtp.Section) -> List[Definitions]:
    """Find the etymology."""
    definitions: List[Definitions] = []
    etyl: str

    if locale in ("ca", "no"):
        definitions.append(
            process_templates(word, clean(parsed_section.contents), locale))
        return definitions

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                definitions.append(etyl)
        return definitions

    elif locale in ("es", "it"):
        items = [
            item.strip()
            for item in parsed_section.get_lists(pattern=("", ))[0].items[1:]
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                definitions.append(etyl)
        return definitions

    elif locale == "pt":
        section_title = parsed_section.title.strip()
        if section_title == "{{etimologia|pt}}":
            try:
                etyl = parsed_section.get_lists()[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        else:
            # "Etimologia" title section
            try:
                etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        definitions.append(process_templates(word, clean(etyl), locale))
        return definitions

    for section in parsed_section.get_lists():
        for idx, section_item in enumerate(section.items):
            if any(ignore_me in section_item.lower()
                   for ignore_me in definitions_to_ignore[locale]):
                continue
            definitions.append(
                process_templates(word, clean(section_item), locale))
            subdefinitions: List[SubDefinitions] = []
            for sublist in section.sublists(i=idx):
                for idx2, subcode in enumerate(sublist.items):
                    subdefinitions.append(
                        process_templates(word, clean(subcode), locale))
            if subdefinitions:
                definitions.append(tuple(subdefinitions))

    return definitions
Exemple #15
0
def test_trailing_space_setter():
    s = Section('=t= \no')
    s.contents = 'n'
    assert '=t= \nn' == s.string
 def test_set_title(self):
     s = Section('== section ==\ntext.')
     s.title = ' newtitle '
     self.assertEqual(' newtitle ', s.title)
Exemple #17
0
def test_set_contents():
    s = Section('== title ==\ntext.')
    s.contents = ' newcontents '
    assert ' newcontents ' == s.contents
Exemple #18
0
def test_set_level():
    s = Section('=== t ===\ntext')
    s.level = 2
    assert '== t ==\ntext' == s.string
Exemple #19
0
def test_set_lead_contents():
    s = Section('lead')
    s.contents = 'newlead'
    assert 'newlead' == s.string
 def test_nolevel7(self):
     s = Section('======= h6 =======\n')
     self.assertEqual(6, s.level)
     self.assertEqual('= h6 =', s.title)
Exemple #21
0
def test_template_at_the_start():
    ts = Section('{{t}}').templates
    assert ts[0].string == '{{t}}'
Exemple #22
0
def test_setting_lead_section_contents():
    s = Section('a\nb')
    s.contents = 'c'
    assert 'c' == s.string
 def test_trailing_space_setter(self):
     s = Section('=t= \no')
     s.contents = 'n'
     self.assertEqual('=t= \nn', s.string)
 def test_del_title(self):
     s = Section('== section ==\ntext.')
     del s.title
     self.assertEqual('text.', s.string)
     self.assertIsNone(s.title)
     del s.title  # no change, no exception
 def test_setting_lead_section_contents(self):
     s = Section('a\nb')
     s.contents = 'c'
     self.assertEqual('c', s.string)
 def test_lead_set_title(self):
     s = Section('lead text')
     s.title = ' newtitle '
 def test_level6(self):
     s = Section('====== == ======\n')
     self.assertEqual(6, s.level)
     self.assertEqual(' == ', s.title)
 def test_set_contents(self):
     s = Section('== title ==\ntext.')
     s.contents = ' newcontents '
     self.assertEqual(' newcontents ', s.contents)
 def test_leadsection(self):
     s = Section('lead text. \n== section ==\ntext.')
     self.assertEqual(0, s.level)
     self.assertIsNone(s.title)
Exemple #30
0
def test_section_heading_tabs():
    s = Section('=\tt\t=\t')
    assert s.string == '=\tt\t=\t'
    assert s.title == '\tt\t'
    assert s.contents == ''