def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []
    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions

    # es uses definition lists, not well supported by the parser...
    # replace them by numbered lists
    if locale == "es":
        lists = section.get_lists(pattern="[:;]")
        if lists:
            sec = "".join(a_list.string for a_list in lists)
            section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec)
            section.contents = re.sub(r":;[a-z]:+[\s]+", "## ",
                                      section.contents)

    lists = section.get_lists(pattern=section_patterns[locale])
    if lists:
        for a_list in lists:
            for idx, code in enumerate(a_list.items):
                # Ignore some patterns
                if word not in words_to_keep[locale] and any(
                        ignore_me in code.lower()
                        for ignore_me in definitions_to_ignore[locale]):
                    continue

                # Transform and clean the Wikicode
                definition = process_templates(word, clean(code), locale)
                # Skip empty definitions
                # [SV] Skip almost empty definitions
                if not definition or (locale == "sv" and len(definition) < 2):
                    continue

                # Keep the definition ...
                definitions.append(definition)

                # ... And its eventual sub-definitions
                subdefinitions: List[SubDefinitions] = []
                for sublist in a_list.sublists(
                        i=idx, pattern=sublist_patterns[locale]):
                    for idx2, subcode in enumerate(sublist.items):
                        subdefinition = process_templates(
                            word, clean(subcode), locale)
                        subdefinitions.append(subdefinition)
                        subsubdefinitions: List[str] = []
                        for subsublist in sublist.sublists(
                                i=idx2, pattern=sublist_patterns[locale]):
                            for subsubcode in subsublist.items:
                                subsubdefinitions.append(
                                    process_templates(word, clean(subsubcode),
                                                      locale))
                        if subsubdefinitions:
                            subdefinitions.append(tuple(subsubdefinitions))
                if subdefinitions:
                    definitions.append(tuple(subdefinitions))

    return definitions
Exemple #2
0
def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []

    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions
    if locale == "es" and section.title.strip().startswith(
        ("Forma adjetiva", "Forma verbal")):
        return definitions

    # es uses definition lists, not well supported by the parser...
    # replace them by numbered lists
    if locale == "es":
        if lists := section.get_lists(pattern="[:;]"):
            sec = "".join(a_list.string for a_list in lists)
            section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec)
            section.contents = re.sub(r":;[\s]*[a-z]:+[\s]+", "## ",
                                      section.contents)
Exemple #3
0
def find_section_definitions(word: str, section: wtp.Section,
                             locale: str) -> List[Definitions]:
    """Find definitions from the given *section*, with eventual sub-definitions."""
    definitions: List[Definitions] = []

    # do not look for definitions in french verb form section
    if locale == "fr" and section.title.strip().startswith(
            "{{S|verbe|fr|flexion"):
        return definitions

    lists = section.get_lists(pattern=section_patterns[locale])
    if lists:
        for a_list in lists:
            for idx, code in enumerate(a_list.items):
                # Ignore some patterns
                if word not in words_to_keep[locale] and any(
                        ignore_me in code.lower()
                        for ignore_me in definitions_to_ignore[locale]):
                    continue

                # Transform and clean the Wikicode
                definition = process_templates(word, clean(code), locale)
                # Skip empty definitions
                # [SV] Skip almost empty definitions
                if not definition or (locale == "sv" and len(definition) < 2):
                    continue

                # Keep the definition ...
                definitions.append(definition)

                # ... And its eventual sub-definitions
                subdefinitions: List[SubDefinitions] = []
                for sublist in a_list.sublists(
                        i=idx, pattern=sublist_patterns[locale]):
                    for idx2, subcode in enumerate(sublist.items):
                        subdefinition = process_templates(
                            word, clean(subcode), locale)
                        subdefinitions.append(subdefinition)
                        subsubdefinitions: List[str] = []
                        for subsublist in sublist.sublists(
                                i=idx2, pattern=sublist_patterns[locale]):
                            for subsubcode in subsublist.items:
                                subsubdefinitions.append(
                                    process_templates(word, clean(subsubcode),
                                                      locale))
                        if subsubdefinitions:
                            subdefinitions.append(tuple(subsubdefinitions))
                if subdefinitions:
                    definitions.append(tuple(subdefinitions))

    return definitions
Exemple #4
0
def find_etymology(word: str, locale: str, parsed_section: wtp.Section) -> str:
    """Find the etymology."""

    etyl: str

    if locale == "ca":
        return process_templates(word, clean(parsed_section.contents), locale)

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                return etyl

    elif locale == "es":
        etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        return process_templates(word, clean(etyl), locale)

    elif locale == "pt":
        section_title = parsed_section.title.strip()
        if section_title == "{{etimologia|pt}}":
            try:
                etyl = parsed_section.get_lists()[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        else:
            # "Etimologia" title section
            try:
                etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        return process_templates(word, clean(etyl), locale)

    etymologies = chain.from_iterable(
        section.items for section in parsed_section.get_lists())
    for etymology in etymologies:
        if any(ignore_me in etymology.lower()
               for ignore_me in definitions_to_ignore[locale]):
            continue
        etyl = process_templates(word, clean(etymology), locale)
        if etyl:
            return etyl
    return ""
Exemple #5
0
def find_etymology(word: str, locale: str,
                   parsed_section: wtp.Section) -> List[Definitions]:
    """Find the etymology."""
    definitions: List[Definitions] = []
    etyl: str

    if locale in {"ca", "no"}:
        definitions.append(
            process_templates(word, clean(parsed_section.contents), locale))
        return definitions

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            if etyl := process_templates(word, clean(item), locale):
                definitions.append(etyl)
        return definitions
def find_etymology(word: str, locale: str,
                   parsed_section: wtp.Section) -> List[Definitions]:
    """Find the etymology."""
    definitions: List[Definitions] = []
    etyl: str

    if locale in ("ca", "no"):
        definitions.append(
            process_templates(word, clean(parsed_section.contents), locale))
        return definitions

    elif locale == "en":
        items = [
            item for item in parsed_section.get_lists(pattern=("", ))[0].items
            if not item.lstrip().startswith(("===Etymology", "{{PIE root"))
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                definitions.append(etyl)
        return definitions

    elif locale in ("es", "it"):
        items = [
            item.strip()
            for item in parsed_section.get_lists(pattern=("", ))[0].items[1:]
        ]
        for item in items:
            etyl = process_templates(word, clean(item), locale)
            if etyl:
                definitions.append(etyl)
        return definitions

    elif locale == "pt":
        section_title = parsed_section.title.strip()
        if section_title == "{{etimologia|pt}}":
            try:
                etyl = parsed_section.get_lists()[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        else:
            # "Etimologia" title section
            try:
                etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0]
            except IndexError:
                etyl = parsed_section.get_lists(pattern=("", ))[0].items[1]
        definitions.append(process_templates(word, clean(etyl), locale))
        return definitions

    for section in parsed_section.get_lists():
        for idx, section_item in enumerate(section.items):
            if any(ignore_me in section_item.lower()
                   for ignore_me in definitions_to_ignore[locale]):
                continue
            definitions.append(
                process_templates(word, clean(section_item), locale))
            subdefinitions: List[SubDefinitions] = []
            for sublist in section.sublists(i=idx):
                for idx2, subcode in enumerate(sublist.items):
                    subdefinitions.append(
                        process_templates(word, clean(subcode), locale))
            if subdefinitions:
                definitions.append(tuple(subdefinitions))

    return definitions