Ejemplo n.º 1
0
def text_to_word(text, conv):
    word = Word()
    word.kaki = text
    word.yomi = conv.do(text)

    # 同じ文字の三回以上の繰り返しを消し去る
    # ex. おはよ!!! → おはよ!
    word.yomi = re.sub(r'(.)\1{2,}', r'\1', word.yomi)

    # 括弧以降を無視
    # ex. ちょん↑ぱぁ!(しょうり) → ちょん↑ぱぁ!
    word.yomi = re.sub(r'^([^()()「」]+)[((「].*$', r'\1', word.yomi)

    # ひらがなと一部の記号のみにする
    # ex. ちょん↑ぱぁ! → ちょんぱぁ
    word.yomi = "".join(re.findall(r'[ぁ-ん、。ー]+', word.yomi))

    print(f"kaki: {word.kaki}, yomi: {word.yomi}")

    return word
Ejemplo n.º 2
0
def main():
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    api = tweepy.API(auth)

    kakasi = kks()
    kakasi.setMode('K', 'H')
    kakasi.setMode('J', 'H')
    conv = kakasi.getConverter()

    results = api.user_timeline(screen_name=ACCOUNT, count=COUNT)

    for r in results:

        hashtag_tags = r.entities['hashtags']
        hashtag = ""
        if hashtag_tags:
            hashtag = hashtag_tags[0]['text']
        text = r.text

        text = filter_text(text, hashtag)

        if not text:
            pass

        lines = text.splitlines()
        for l in lines:
            if find_word(l):
                word = Word()
                word.kaki = l
                word.yomi = conv.do(l)
                print(f"kaki: {word.kaki}, yomi: {word.yomi}")
                find_or_add_word(session, word)

        tweet = Tweet()
        tweet.twitterId = r.id
        tweet.text = text
        find_or_add_tweet(session, tweet)
Ejemplo n.º 3
0
import unittest
from analysis import save_word_from_tweet
from models import find_or_add_tweet, Word, Tweet, session
""" ImportErrorで、直接は実行できないのでインタープリタでドーンするしかNASA """

sample = [("チョン↑パァ!(勝利)", [], "ちょんぱぁ", "チョン↑パァ!(勝利)", 10),
          ("ばにしぇだよ〜wwwww", [], "ばにしぇだよ", "ばにしぇだよ〜wwwww", 20),
          ("任せてほ↑しい", [], "まかせてほしい", "任せてほ↑しい", 30)]

for text, hashtags, yomi, kaki, n in sample:
    for i in range(1, 6):
        tweet = Tweet()
        tweet.text = text
        tweet.twitterId = 200 + n + i
        find_or_add_tweet(session, tweet)
    word = Word()
    word.yomi = yomi
    word.kaki = kaki
    tmp = save_word_from_tweet(text, hashtags)
    print("generated: ", word.yomi, " => ", word.kaki)
    print("expected:  ", tmp.yomi, " => ", tmp.kaki)
    print(word.yomi == tmp.yomi and word.kaki == tmp.kaki)