コード例 #1
0
def maru():
    text = read20.file_reading()
    template_dictionary = {}

    basic_information = re.search(r'^\{\{基礎情報\s国\n(.*)\n\}\}$', text,
                                  re.MULTILINE + re.DOTALL)
    basic_data = basic_information.group(1)
    basic_data = re.split('\n(?=\|)', basic_data)
    for line in basic_data:
        data = re.match(r'^\|(.+?)\s*\=\s*(.*)', line,
                        re.DOTALL)  #???????????????
        if data == None:
            continue
        else:
            key = re.sub('\|', '', data.group(1))
            key = re.sub('\s$', '', key)
            value = re.sub('(\'{2,4})', '', data.group(2))
            if re.search(r'\[\[.+\]\]', value):
                value = re.sub('[\[\]#\|]', '', value)
            if re.search(r'\{\{.+\}\}', value):
                value = re.sub('[{}]', '', value)
            pattern = re.compile(r'\[http:([^]]*)\]')
            value = pattern.sub(r'', value)
            template_dictionary[key] = value
    return template_dictionary
コード例 #2
0
#記事のカテゴリ名を(行単位ではなく名前で)抽出せよ.

#カテゴリの名前だけ?とってくるの?

import read20
import re

text = read20.file_reading()
text = text.split('\n')  #一行ごとに分けた
"""
def CategoryName(line):
    name = re.match(r'\[\[Category:(.*)\]\]', line)
    name_group = name.group(1).split('|')
    return name_group[0]
"""


def category_name(line):
    name = re.search(r'(?<=Category:)\w+', line)
    return name.group()


for line in text:
    if re.match(r'\[\[Category:.+\]\]', line):
        print(category_name(line))
#[ぁ-んァ-ン一-龥0-90−9a-zA-Z・] = \w