import re from ch3_module import ch3_extract_json def remove_markup(str): str = re.sub(r"'{2,5}", r"", str) str = re.sub(r"\[{2}([^|\]]+?\|)*(.+?)\]{2}", r"\2", str) return str temp_dict = {} lines = ch3_extract_json.extract_json(u"イギリス").split("\n") for line in lines: category_line = re.search("^\|(.*?)\s=\s(.*)", line) if category_line is not None: temp_dict[category_line.group(1)] = remove_markup( category_line.group(2)) for k, v in sorted(temp_dict.items(), key=lambda x: x[0]): print(k, v)
import re from ch3_module import ch3_extract_json lines = ch3_extract_json.extract_json('イギリス').split('\n') for line in lines: file_line = re.search("(File|ファイル):(.*?)\|", line) if file_line is not None: print(file_line.group(2))
import re from ch3_module import ch3_extract_json temp_dict = {} lines = re.split(r"\n[\|}]", ch3_extract_json.extract_json(u"イギリス")) for line in lines: temp_line = re.search("^(.*?)\s=\s(.*)", line, re.S) if temp_line is not None: temp_dict[temp_line.group(1)] = temp_line.group(2) for k, v in sorted(temp_dict.items(), key=lambda x: x[1]): print(k, v)