예제 #1
0
파일: q28.py 프로젝트: simaki/nlp100
import re

from path import project_path
from q21 import load_uk
from q25 import read_info
from q25 import info_to_dict
from q26 import rm_emph
from q27 import rm_link


def rm_tags(text):
    text = re.sub(r"<br />", r"", text)
    text = re.sub(r"<([^>\s]+)(\s[^>]+)*(>[^<]*</\1>|\s*/>)", r"", text)
    return text


if __name__ == "__main__":
    print("\rSolving Q28 ... ", end="")

    x = info_to_dict(rm_tags(rm_link(rm_emph(read_info(load_uk())))))

    with open(project_path / "output/wiki/28.txt", "w") as f:
        f.write("\n".join("{} {}".format(k, v) for k, v in x.items()))

    print("Done.")
예제 #2
0
import re

from path import project_path
from q21 import load_uk

if __name__ == "__main__":
    print("\rSolving Q24 ... ", end="")

    r = re.findall(r"\[\[ファイル:(.+?)\|", load_uk())

    with open(project_path / "output/wiki/24.txt", "w") as f:
        f.write("\n".join(r))

    print("Done.")
예제 #3
0
파일: q25.py 프로젝트: simaki/nlp100
    return re.findall(r"{{基礎情報 国\|((?:[^{{}}]*{{[^{{}}]*}})*[^{{}}]*)}}", text)[0]


def info_to_dict(info):
    x = info

    r0 = r"({{[^}\|]+)\|([^}]+}})"
    r1 = r"\1<PIPE>\2"
    while x != re.sub(r0, r1, x):
        x = re.sub(r0, r1, x)

    r0 = r"(\[\[[^\]\|]+)\|([^\]]+\]\])"
    r1 = r"\1<PIPE>\2"
    while x != re.sub(r0, r1, x):
        x = re.sub(r0, r1, x)

    items = [i.replace("<PIPE>", "|") for i in re.split(r"\|", x)]

    return dict([re.findall(r"(\S+)\s*=\s*(.+)", item)[0] for item in items])


if __name__ == "__main__":
    print("\rSolving Q25 ... ", end="")

    r = info_to_dict(read_info(load_uk()))

    with open(project_path / "output/wiki/25.txt", "w") as f:
        f.write("\n".join("{} {}".format(k, v) for k, v in r.items()))

    print("Done.")
예제 #4
0
import requests

from path import project_path
from q21 import load_uk
from q25 import read_info
from q25 import info_to_dict


if __name__ == "__main__":
    print("\rSolving Q29 ... ", end="")

    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "imageinfo",
        "iiprop": ["url"],
        "format": "json",
        "titles": "File:{}".format(info_to_dict(read_info(load_uk()))["国旗画像"]),
    }
    r = requests.get(url=url, params=params).json()
    r = r["query"]["pages"]["23473560"]["imageinfo"][0]["url"]

    with open(project_path / "output/wiki/29.txt", "w") as f:
        f.write(r)

    print("Done.")
예제 #5
0
import re

from path import project_path
from q21 import load_uk
from q25 import read_info
from q25 import info_to_dict


def rm_emph(text):
    return re.sub(r"('{2,})([^']+)\1", r"\2", text)


if __name__ == "__main__":
    print("\rSolving Q26 ... ", end="")

    r = info_to_dict(rm_emph(read_info(load_uk())))

    with open(project_path / "output/wiki/26.txt", "w") as f:
        f.write("\n".join("{} {}".format(k, v) for k, v in r.items()))

    print("Done.")
예제 #6
0
import re

from path import project_path
from q21 import load_uk

if __name__ == "__main__":
    print("\rSolving Q22 ... ", end="")

    r = re.findall(r"\[\[Category:([^\|]+?)(?:|\|.+)\]\]", load_uk())

    with open(project_path / "output/wiki/22.txt", "w") as f:
        f.write("\n".join(r))

    print("Done.")
예제 #7
0
파일: q23.py 프로젝트: simaki/nlp100
import re

from path import project_path
from q21 import load_uk

if __name__ == "__main__":
    print("\rSolving Q23 ... ", end="")

    r = re.findall(r"(={2,})\s?([^=]+)\s?\1", load_uk())

    with open(project_path / "output/wiki/23.txt", "w") as f:
        f.write("\n".join("{} {}".format(len(s[0]), s[1]) for s in r))

    print("Done.")