Exemple #1
0
#!/bin/python
# -*- coding: utf-8 -*-

from gct import get_country_text

lines = get_country_text("jawiki-country.json", u'イギリス').split("\n")

for line in lines:
    if line.startswith("[[Category:"):
        print(line)
Exemple #2
0
#!/bin/python
# -*- coding: utf-8 -*-

from gct import get_country_text

lines = get_country_text("jawiki-country.json", u"イギリス")\
        .strip().split("\n}}\n")[0].split("\n")
tps = [line.lstrip("|") for line in lines if line.startswith("|")]

d = dict([tuple(line.split("<ref")[0].split(" = ")) for line in tps])

print(d)
Exemple #3
0
def flat_json(res_json):
    flatted_json = {}
    for k, v in res_json.items():
        if isinstance(v, list):
            for e in v:
                flatted_json.update(flat_json(e))
        elif isinstance(v, dict):
            flatted_json.update(flat_json(v))
        else:
            flatted_json[k] = v
    return flatted_json



country_text = get_country_text("jawiki-country.json", u"イギリス")
tps_val = get_template_value(country_text)

d = {}

for k, v in tps_val.items():
    plant_val = re.sub(r"'{2,5}", r"", v)
    plant_val = re.sub(r"\[{2}(.*?)\]{2}", r"\1", plant_val)
    plant_val = re.sub(r"\{{2}(.*?)\}{2}", r"\1", plant_val)
    plant_val = re.sub(r"\(.*?\)", r"", plant_val)
    plant_val = re.sub(r"<.*?>", r"", plant_val)
    plant_val = re.sub(r"\[.*?\]", r"", plant_val)
    plant_val = re.sub(r"lang\|\w{2}\|", r"", plant_val)
    d[k] = plant_val

url = "https://en.wikipedia.org/w/api.php"