Beispiel #1
0
import re
import sample20

lines = sample20.f('イギリス').split('\n')

for i in lines:
    file_line = re.search('(File|ファイル):(.*?)\|', i)
    if file_line is not None:
        #print(file_line)
        print(file_line.group(2))
Beispiel #2
0
import re
import sample20

temp_dict = {}
lines = re.split('\n[\|}]', sample20.f('イギリス'))

for i in lines:
    temp_line = re.search('^(.*?)\s=\s(.*)', i, re.S)
    if temp_line is not None:
        temp_dict[temp_line.group(1)] = re.sub(r"'+", r"", temp_line.group(2))

for k, v in sorted(temp_dict.items(), key=lambda x: x[1]):
    print(k, v)
Beispiel #3
0
        else:
            ret_dict[k] = v
    return ret_dict


def remove_markup(str):
    str = re.sub(r"'{2,5}", r"", str)
    str = re.sub(r"\[{2}([^|\]]+?\|)*(.+?)\]{2}", r"\2", str)
    str = re.sub(r"\{{2}.+?\|.+?\|(.+?)\}{2}", r"\1 ", str)
    str = re.sub(r"<.*?>", r"", str)
    str = re.sub(r"\[.*?\]", r"", str)
    return str


temp_dict = {}
lines = sample20.f(u"イギリス").split("\n")

for line in lines:
    temp_line = re.search("^\|(.*?)\s=\s(.*)", line)
    if temp_line is not None:
        temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2))

url = "https://en.wikipedia.org/w/api.php"
payload = {
    "action": "query",
    "titles": "File:{}".format(temp_dict[u"国旗画像"]),
    "prop": "imageinfo",
    "format": "json",
    "iiprop": "url"
}
Beispiel #4
0
import re
import sample20


def remove_markup(str):
    str = re.sub(r"'{2,5}", r"", str)
    str = re.sub(r"\[{2}([^|\]]+?\|)*(.+?)\]{2}", r"\2", str)
    str = re.sub(r"\{{2}.+?\|.+?\|(.+?)\}{2}", r"\1 ", str)
    str = re.sub(r"<.*?>", r"", str)
    str = re.sub(r"\[.*?\]", r"", str)
    return str


temp_dict = {}
lines = sample20.f("日本").split("\n")

for i in lines:
    temp_line = re.search("^\|(.*?)\s=\s(.*)", i)
    if temp_line is not None:
        temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2))

for k, v in sorted(temp_dict.items(), key=lambda x: x[0]):
    print(k, v)