Ejemplo n.º 1
0
def remove_Mediawiki_markup():
    uk_data = extract_json().split('\n')
    basic_info = {info_pattern.search(line).group(1):
                     mediawiki_pattern1.sub(r'',
                     mediawiki_pattern2.sub(r'\1',
                     link_pattern.sub(r'\2',
                     emp_pattern.sub('',info_pattern.search(line).group(2)))))
                  for line in uk_data if info_pattern.match(line)}
    return basic_info
Ejemplo n.º 2
0
def extract_basic_info():
    uk_data = extract_json().split('\n')
    basic_info = {info_pattern.search(line).group(1): info_pattern.search(line).group(2) \
                  for line in uk_data if info_pattern.match(line)}
    return basic_info
Ejemplo n.º 3
0
def remove_emp_markup():
    uk_data = extract_json().split('\n')
    basic_info = {info_pattern.search(line).group(1): emp_pattern.sub('',info_pattern.search(line).group(2)) \
                  for line in uk_data if info_pattern.match(line)}
    return basic_info
Ejemplo n.º 4
0
# coding :utf-8

import re
from k20 import extract_json

if __name__ == '__main__':
    section_pattern = re.compile(r'^(=+)(.*?)(=+)$')

    uk_data = extract_json().split('\n')
    print('\n'.join(['{}:{}'.format(section_pattern.search(line).group(2),\
        len(section_pattern.search(line).group(1))) for line in uk_data if section_pattern.match(line)]))