Beispiel #1
0
import json
import re
from p20 import get_britain



fp = open("jawiki-country.json", encoding='utf-8')

text = get_britain(fp).replace('\n', '')


basic = re.findall('{{基礎情報\s国(\|[^{}]+\=([^{}]+|((\**)?{{[^{}]+}}[^{}]*)+)+)+}}', text)
basic = basic[0][0]

info = {}
for var, arg in zip(re.findall('[^\|]+\s\=', basic), re.split('[^\|]+\s\=', basic)[1:]):
    info[re.sub('\s\=\s?', '', var)] = re.sub('\|$', '', arg)

for key in info.keys():
    print(key, info[key])
Beispiel #2
0
import json
import re
import string
from p20 import get_britain

fp = open("jawiki-country.json", encoding='utf-8')

text = get_britain(fp)

fp.close()

for line in re.findall('(.+Category:.+)', text):
    print(re.sub('[{}]'.format(string.punctuation), '', line.split(':')[-1]))