import re import sample20 lines = sample20.f('イギリス').split('\n') for i in lines: file_line = re.search('(File|ファイル):(.*?)\|', i) if file_line is not None: #print(file_line) print(file_line.group(2))
import re import sample20 temp_dict = {} lines = re.split('\n[\|}]', sample20.f('イギリス')) for i in lines: temp_line = re.search('^(.*?)\s=\s(.*)', i, re.S) if temp_line is not None: temp_dict[temp_line.group(1)] = re.sub(r"'+", r"", temp_line.group(2)) for k, v in sorted(temp_dict.items(), key=lambda x: x[1]): print(k, v)
else: ret_dict[k] = v return ret_dict def remove_markup(str): str = re.sub(r"'{2,5}", r"", str) str = re.sub(r"\[{2}([^|\]]+?\|)*(.+?)\]{2}", r"\2", str) str = re.sub(r"\{{2}.+?\|.+?\|(.+?)\}{2}", r"\1 ", str) str = re.sub(r"<.*?>", r"", str) str = re.sub(r"\[.*?\]", r"", str) return str temp_dict = {} lines = sample20.f(u"イギリス").split("\n") for line in lines: temp_line = re.search("^\|(.*?)\s=\s(.*)", line) if temp_line is not None: temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2)) url = "https://en.wikipedia.org/w/api.php" payload = { "action": "query", "titles": "File:{}".format(temp_dict[u"国旗画像"]), "prop": "imageinfo", "format": "json", "iiprop": "url" }
import re import sample20 def remove_markup(str): str = re.sub(r"'{2,5}", r"", str) str = re.sub(r"\[{2}([^|\]]+?\|)*(.+?)\]{2}", r"\2", str) str = re.sub(r"\{{2}.+?\|.+?\|(.+?)\}{2}", r"\1 ", str) str = re.sub(r"<.*?>", r"", str) str = re.sub(r"\[.*?\]", r"", str) return str temp_dict = {} lines = sample20.f("日本").split("\n") for i in lines: temp_line = re.search("^\|(.*?)\s=\s(.*)", i) if temp_line is not None: temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2)) for k, v in sorted(temp_dict.items(), key=lambda x: x[0]): print(k, v)