Ejemplo n.º 1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import tweet

tweets = tweet.load_tweets()

re_postal_code = re.compile('[0-90-9]{3}-[0-90-9]{4}')
re_prefecture = re.compile(
    '(?:北海道|東京都|(?:大阪|京都)府|(?:青森|岩手|宮城|秋田|山形|福島|茨城|栃木|群馬|埼玉|千葉|神奈川|新潟|富山|石川|福井|山梨|長野|岐阜|静岡|愛知|三重|滋賀|兵庫|奈良|和歌山|鳥取|島根|岡山|広島|山口|徳島|香川|愛媛|高知|福岡|佐賀|長崎|熊本|大分|宮崎|鹿児島|沖縄)県)'
)
re_address = re.compile('([\u4e00-\u9fa5あ-んア-ン]+?[市区郡町村字])')

address_dict = {
    'postal_code': {},
    'prefecture': {},
    'address': {},
}

for tweet in tweets:
    postal_code_matched = re_postal_code.search(tweet)
    if postal_code_matched:
        postal_code = postal_code_matched.group(0)
        if postal_code:
            address_dict['postal_code'][postal_code] = True

    prefecture_matched = re_prefecture.search(tweet)
    if prefecture_matched:
        prefecture = prefecture_matched.group(0)
        if prefecture:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import tweet

tweets = tweet.load_tweets()

re_user_name = re.compile('@[0-9a-zA-Z_]+')
for tweet in tweets:
    matched = re_user_name.search(tweet)
    if matched:
        print(matched.group(0));