Example #1
0
def main():
    data = load_data()

    pat = r'[\[\[]?(?:ファイル|File):(?P<name>[^|]+)\|'
    for m in re.finditer(pat, data):
        print(m.group('name').encode('utf8'))

    return 0
Example #2
0
def main():
    data = load_data()
    result = extract_basic_info(data)

    for key, val in result.items():
        print('key = {}'.format(key.encode('utf8')))
        print('value = {}\n'.format(val.encode('utf8')))
    return 0
Example #3
0
def main():
    data = load_data()

    pat = r'[\[\[]?(?:ファイル|File):(?P<name>[^|]+)\|'
    for m in re.finditer(pat, data):
        print(m.group('name').encode('utf8'))

    return 0
Example #4
0
def main():
    data = load_data()

    pat = r'\[\[Category:(?P<name>.+?)(\|.*)?\]\]'
    for m in re.finditer(pat, data):
        print(m.group('name').encode('utf8'))

    return 0
Example #5
0
def main():
    data = load_data()

    pat = r'\[\[Category:.+\]\]'
    for m in re.finditer(pat, data):
        print(m.group().encode('utf8'))

    return 0
Example #6
0
def main():
    data = load_data()
    result = extract_basic_info(data)

    for key, val in result.items():
        print('key = {}'.format(key.encode('utf8')))
        print('value = {}\n'.format(val.encode('utf8')))
    return 0
Example #7
0
def main():
    data = load_data()

    pat = r'\[\[Category:(?P<name>.+?)(\|.*)?\]\]'
    for m in re.finditer(pat, data):
        print(m.group('name').encode('utf8'))

    return 0
Example #8
0
def main():
    data = load_data()

    pat = r'\[\[Category:.+\]\]'
    for m in re.finditer(pat, data):
        print(m.group().encode('utf8'))

    return 0
Example #9
0
def main():
    data = load_data()
    info = extract_basic_info(data)

    for key, val in info.items():
        print('key = {}'.format(key.encode('utf8')))
        val = remove_emphasis(val)
        val = remove_internal_link(val)
        print('value = {}\n'.format(val.encode('utf8')))
    return 0
Example #10
0
def main():
    data = load_data()
 
    pat = r'(?P<sep>==+)(?P<title>.+?)(?P=sep)'
    
    for m in re.finditer(pat, data):
        print('{},{}'.format(m.group('title').strip().encode('utf8'),
                             len(m.group('sep'))-1))

    return 0
Example #11
0
def main():
    data = load_data()

    pat = r'(?P<sep>==+)(?P<title>.+?)(?P=sep)'

    for m in re.finditer(pat, data):
        print('{},{}'.format(
            m.group('title').strip().encode('utf8'),
            len(m.group('sep')) - 1))

    return 0
Example #12
0
def main():
    data = load_data()
    info = extract_basic_info(data)

    # # Tests 
    # print(remove_emphasis("'''''aiueo'''''"))
    # print(remove_emphasis("''''aiueo''''"))
    # print(remove_emphasis("'''aiueo'''"))
    # print(remove_emphasis("''aiueo''"))
    # print(remove_emphasis("'aiueo'"))

    for key, val in info.items():
        print('key = {}'.format(key.encode('utf8')))
        print('value = {}\n'.format(remove_emphasis(val).encode('utf8')))
    return 0
Example #13
0
def main():
    data = load_data()
    info = extract_basic_info(data)

    # # Tests
    # print(remove_emphasis("'''''aiueo'''''"))
    # print(remove_emphasis("''''aiueo''''"))
    # print(remove_emphasis("'''aiueo'''"))
    # print(remove_emphasis("''aiueo''"))
    # print(remove_emphasis("'aiueo'"))

    for key, val in info.items():
        print('key = {}'.format(key.encode('utf8')))
        print('value = {}\n'.format(remove_emphasis(val).encode('utf8')))
    return 0
Example #14
0
def main():
    data = load_data()
    info = extract_basic_info(data)

    param = {
        'action': 'query',
        'format': 'json',
        'iiprop': 'url',
        'prop': 'imageinfo',
        'titles': 'Image:{}'.format(info[u'国旗画像'])
    }
    url = u'http://ja.wikipedia.org/w/api.php?' + urllib.urlencode(param)

    try:
        r = urllib.urlopen(url)
        data = json.loads(r.read().decode('utf8'))
        print(data[u'query'][u'pages'][u'-1'][u'imageinfo'][0][u'url'])
    finally:
        r.close()
    return 0