def main(): data = load_data() info = extract_basic_info(data) for key, val in info.items(): print('key = {}'.format(key.encode('utf8'))) val = remove_emphasis(val) val = remove_internal_link(val) print('value = {}\n'.format(val.encode('utf8'))) return 0
def main(): data = load_data() info = extract_basic_info(data) # # Tests # print(remove_emphasis("'''''aiueo'''''")) # print(remove_emphasis("''''aiueo''''")) # print(remove_emphasis("'''aiueo'''")) # print(remove_emphasis("''aiueo''")) # print(remove_emphasis("'aiueo'")) for key, val in info.items(): print('key = {}'.format(key.encode('utf8'))) print('value = {}\n'.format(remove_emphasis(val).encode('utf8'))) return 0
def main(): data = load_data() info = extract_basic_info(data) param = { 'action': 'query', 'format': 'json', 'iiprop': 'url', 'prop': 'imageinfo', 'titles': 'Image:{}'.format(info[u'国旗画像']) } url = u'http://ja.wikipedia.org/w/api.php?' + urllib.urlencode(param) try: r = urllib.urlopen(url) data = json.loads(r.read().decode('utf8')) print(data[u'query'][u'pages'][u'-1'][u'imageinfo'][0][u'url']) finally: r.close() return 0