import json import re from p20 import get_britain fp = open("jawiki-country.json", encoding='utf-8') text = get_britain(fp).replace('\n', '') basic = re.findall('{{基礎情報\s国(\|[^{}]+\=([^{}]+|((\**)?{{[^{}]+}}[^{}]*)+)+)+}}', text) basic = basic[0][0] info = {} for var, arg in zip(re.findall('[^\|]+\s\=', basic), re.split('[^\|]+\s\=', basic)[1:]): info[re.sub('\s\=\s?', '', var)] = re.sub('\|$', '', arg) for key in info.keys(): print(key, info[key])
import json import re import string from p20 import get_britain fp = open("jawiki-country.json", encoding='utf-8') text = get_britain(fp) fp.close() for line in re.findall('(.+Category:.+)', text): print(re.sub('[{}]'.format(string.punctuation), '', line.split(':')[-1]))