def remove_Mediawiki_markup(): uk_data = extract_json().split('\n') basic_info = {info_pattern.search(line).group(1): mediawiki_pattern1.sub(r'', mediawiki_pattern2.sub(r'\1', link_pattern.sub(r'\2', emp_pattern.sub('',info_pattern.search(line).group(2))))) for line in uk_data if info_pattern.match(line)} return basic_info
def extract_basic_info(): uk_data = extract_json().split('\n') basic_info = {info_pattern.search(line).group(1): info_pattern.search(line).group(2) \ for line in uk_data if info_pattern.match(line)} return basic_info
def remove_emp_markup(): uk_data = extract_json().split('\n') basic_info = {info_pattern.search(line).group(1): emp_pattern.sub('',info_pattern.search(line).group(2)) \ for line in uk_data if info_pattern.match(line)} return basic_info
# coding :utf-8 import re from k20 import extract_json if __name__ == '__main__': section_pattern = re.compile(r'^(=+)(.*?)(=+)$') uk_data = extract_json().split('\n') print('\n'.join(['{}:{}'.format(section_pattern.search(line).group(2),\ len(section_pattern.search(line).group(1))) for line in uk_data if section_pattern.match(line)]))