import re from mymodule import extract_from_json strCheck = r'\[\[ファイル:.+\]\]$' lines = extract_from_json(u"イギリス").split("\n") for line in lines: # print(line) # 連続した小文字のアルファベットを検索する matchObj = re.search(strCheck, line) if matchObj: print(matchObj.group())
import re from mymodule import extract_from_json temp_dict = {} lines = re.split(r"\n[\|}]", extract_from_json(u"イギリス")) for line in lines: temp_line = re.search("^(.*?)\s=\s(.*)", line, re.S) if temp_line is not None: temp_dict[temp_line.group(1)] = re.sub(r"'{2,5}", r"", temp_line.group(2)) # 25.py と同様 Python3 参照 for k, v in sorted(temp_dict.items(), key=lambda x: x[1]): print(k, v)
#!/usr/bin/env python # -*- coding: utf-8 -*- # 20.py import json from mymodule import extract_from_json with open("../language100_another/jawiki-country.json") as f: # 1行ずつ読み込む article_json = f.readline() while article_json: article_dict = json.loads(article_json) if article_dict["title"] == u"イギリス": print(article_dict["text"]) article_json = f.readline() print("======================================================================") lines = extract_from_json(u"イギリス") for line in lines: if line == "Category": print(line)
#!/usr/bin/env python # -*- coding: utf-8 -*- # 22.py import json import re from mymodule import extract_from_json lines = extract_from_json(u'イギリス').split('\n') for line in lines: category_line = re.search("\[\[Category:(.*)\]\]", line) if category_line is not None: print(category_line.group(1))
#!/usr/bin/env python # -*- coding: utf-8 -*- # 27.py def remove_markup(text): # remove emphasis text = re.sub(r"'{2,5}", r"", text) # remove link text = re.sub(r"\[{2}([^\]]+?\|)*(.*?)\]{2}", r"\2", text) # remove br text = re.sub(r"<br\s?/>", r"", text) return text import re from mymodule import extract_from_json temp_dict = {} lines = re.split(r'\n[\|}]', extract_from_json(u'イギリス')) for line in lines: temp_line = re.search('^(.*?)\s=\s(.*)$', line, re.S) if temp_line is not None: temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2)) for k, v in sorted(temp_dict.items(), key=lambda x: x[1]): print(k, v)