Example #1
0
from kiyuna.utils.pickle import load  # noqa: E402 isort:skip
from kiyuna.utils.message import Renderer, message  # noqa: E402 isort:skip
from kiyuna.utils.message import green  # noqa: E402 isort:skip


def remove_em(od: OrderedDict) -> OrderedDict:
    """remove emphasis expressions
        ''italics''
        '''bold'''
        '''''both'''''
    """
    res = OrderedDict()
    reg = re.compile(r"'{2,}")
    for key in od:
        res[key] = reg.sub("", od[key])
    return res


if __name__ == "__main__":
    infobox = load("infobox")
    res = remove_em(infobox)

    with Renderer("knock26") as out:
        for (key, src), (_, dst) in zip(infobox.items(), res.items()):
            if src == dst:
                out.cnt += 1
            else:
                out.result(key, (src, green(dst)))
        if infobox == res:
            message("変化なし", type="warning")
Example #2
0
def exec_findall(wiki: str, pattern: str) -> Iterator[Tuple[str, Group]]:
    reg = re.compile(pattern)
    for line in wiki.split("\n"):
        for match in reg.findall(line):
            yield line, match


if __name__ == "__main__":
    wiki = load("UK")

    pat = (
        r"(?:\s=\s)?"  # 「基礎情報 国]」対策
        r"([^:=]+)"  # '/' を [^] の中に追加すると <ref> 内のファイル名も取得できる
        r"\.(?i)(png|gif|jpg|jpeg|xcf|pdf|mid|ogg|svg|djvu)")
    with Renderer("knock24") as out:
        for line, filename in exec_findall(wiki, pat):
            fname = ".".join(filename)
            if "/" not in fname:  # <ref> 対策
                out.result(trunc(line), green(fname))
    """ NOTE
    - ウィキペディアの画像
        - [[ファイル:Uk topo en.jpg|thumb|200px|イギリスの地形図]]
    - 基礎情報 国
        - |国旗画像 = Flag of the United Kingdom.svg
    - <gallery>
        - Stonehenge2007 07 30.jpg|[[ストーンヘンジ]]
    - <ref>
        - <ref>[http://warp.da.ndl.go.jp/.../country.pdf
    """
Example #3
0
        ^{{基礎情報\s国
        (?P<Infobox_body>.+?)
        ^}}$
        """,
        wiki,
        flags=re.VERBOSE | re.DOTALL | re.MULTILINE,
    ).group("Infobox_body")
    reg = re.compile(r"(.+?)\s*=\s*(.+)", re.DOTALL)
    od = OrderedDict(
        reg.search(line.strip()).groups() for line in infobox.split("\n|")
        if line)
    dump(od, "infobox")

    with Renderer("knock25") as out:
        for k, v in od.items():
            out.result(k, green(v))

    assert od == OrderedDict(
        reg.search(line.strip()).groups()
        for line in extract_infobox(wiki).lstrip("基礎情報 国").split("\n|")
        if line)

    assert od == OrderedDict(
        re.findall(
            r"""
            \|                  # |
            (?P<Key>.+?)        # 略名
            \s*                 # _
            =                   # =
            \s*                 # _
            (?P<Value>.+?)      # イギリス