from kiyuna.utils.pickle import load # noqa: E402 isort:skip from kiyuna.utils.message import Renderer, message # noqa: E402 isort:skip from kiyuna.utils.message import green # noqa: E402 isort:skip def remove_em(od: OrderedDict) -> OrderedDict: """remove emphasis expressions ''italics'' '''bold''' '''''both''''' """ res = OrderedDict() reg = re.compile(r"'{2,}") for key in od: res[key] = reg.sub("", od[key]) return res if __name__ == "__main__": infobox = load("infobox") res = remove_em(infobox) with Renderer("knock26") as out: for (key, src), (_, dst) in zip(infobox.items(), res.items()): if src == dst: out.cnt += 1 else: out.result(key, (src, green(dst))) if infobox == res: message("変化なし", type="warning")
def exec_findall(wiki: str, pattern: str) -> Iterator[Tuple[str, Group]]: reg = re.compile(pattern) for line in wiki.split("\n"): for match in reg.findall(line): yield line, match if __name__ == "__main__": wiki = load("UK") pat = ( r"(?:\s=\s)?" # 「基礎情報 国]」対策 r"([^:=]+)" # '/' を [^] の中に追加すると <ref> 内のファイル名も取得できる r"\.(?i)(png|gif|jpg|jpeg|xcf|pdf|mid|ogg|svg|djvu)") with Renderer("knock24") as out: for line, filename in exec_findall(wiki, pat): fname = ".".join(filename) if "/" not in fname: # <ref> 対策 out.result(trunc(line), green(fname)) """ NOTE - ウィキペディアの画像 - [[ファイル:Uk topo en.jpg|thumb|200px|イギリスの地形図]] - 基礎情報 国 - |国旗画像 = Flag of the United Kingdom.svg - <gallery> - Stonehenge2007 07 30.jpg|[[ストーンヘンジ]] - <ref> - <ref>[http://warp.da.ndl.go.jp/.../country.pdf """
^{{基礎情報\s国 (?P<Infobox_body>.+?) ^}}$ """, wiki, flags=re.VERBOSE | re.DOTALL | re.MULTILINE, ).group("Infobox_body") reg = re.compile(r"(.+?)\s*=\s*(.+)", re.DOTALL) od = OrderedDict( reg.search(line.strip()).groups() for line in infobox.split("\n|") if line) dump(od, "infobox") with Renderer("knock25") as out: for k, v in od.items(): out.result(k, green(v)) assert od == OrderedDict( reg.search(line.strip()).groups() for line in extract_infobox(wiki).lstrip("基礎情報 国").split("\n|") if line) assert od == OrderedDict( re.findall( r""" \| # | (?P<Key>.+?) # 略名 \s* # _ = # = \s* # _ (?P<Value>.+?) # イギリス