Esempio n. 1
0
import re
from mymodule import extract_from_json

strCheck = r'\[\[ファイル:.+\]\]$'

lines = extract_from_json(u"イギリス").split("\n")
for line in lines:
    # print(line)
    # 連続した小文字のアルファベットを検索する
    matchObj = re.search(strCheck, line)
    if matchObj:
        print(matchObj.group())
Esempio n. 2
0
import re
from mymodule import extract_from_json

temp_dict = {}
lines = re.split(r"\n[\|}]", extract_from_json(u"イギリス"))

for line in lines:
    temp_line = re.search("^(.*?)\s=\s(.*)", line, re.S)
    if temp_line is not None:
        temp_dict[temp_line.group(1)] = re.sub(r"'{2,5}", r"",
                                               temp_line.group(2))

# 25.py と同様 Python3 参照
for k, v in sorted(temp_dict.items(), key=lambda x: x[1]):
    print(k, v)
Esempio n. 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 20.py

import json
from mymodule import extract_from_json

with open("../language100_another/jawiki-country.json") as f:

    # 1行ずつ読み込む
    article_json = f.readline()
    while article_json:
        article_dict = json.loads(article_json)
        if article_dict["title"] == u"イギリス":
            print(article_dict["text"])
        article_json = f.readline()

print("======================================================================")

lines = extract_from_json(u"イギリス")
for line in lines:
    if line == "Category":
        print(line)
Esempio n. 4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 22.py

import json
import re
from mymodule import extract_from_json

lines = extract_from_json(u'イギリス').split('\n')

for line in lines:
    category_line = re.search("\[\[Category:(.*)\]\]", line)
    if category_line is not None:
        print(category_line.group(1))
Esempio n. 5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 27.py


def remove_markup(text):
    # remove emphasis
    text = re.sub(r"'{2,5}", r"", text)
    # remove link
    text = re.sub(r"\[{2}([^\]]+?\|)*(.*?)\]{2}", r"\2", text)
    # remove br
    text = re.sub(r"<br\s?/>", r"", text)
    return text


import re
from mymodule import extract_from_json

temp_dict = {}
lines = re.split(r'\n[\|}]', extract_from_json(u'イギリス'))

for line in lines:
    temp_line = re.search('^(.*?)\s=\s(.*)$', line, re.S)
    if temp_line is not None:
        temp_dict[temp_line.group(1)] = remove_markup(temp_line.group(2))

for k, v in sorted(temp_dict.items(), key=lambda x: x[1]):
    print(k, v)