def preprocess(text): #TODO とりあえず最初の文の最初の名詞を利用, なければ最初の形態素 sentences = api.sentences(text)['sentences'] for sentence in api.sentences(text)['sentences']: morphs = api.morph(sentence)['morphs'] for morph in morphs: if u'名詞' in morph['pos']: seed = morph break else: seed = morphs[1] return seed
def twitter_based(text): seed = generate_seed(text) # TODO とりあえずツイート検索結果の最初のツイートを利用 tweet_example = api.search_tweets(seed['norm_surface'], limit=1) # 空ならNoneを返す if tweet_example['count'] == 0: return None # TODO とりあえず先頭の文を利用 # 形態素列書き換え sent = api.sentences(tweet_example['texts'][0])['sentences'][0] return postprocess(sent)
def generate_seed(text): max_length = 0 for sentence in api.sentences(text)['sentences']: morphs = api.morph(sentence)['morphs'] for morph in morphs: if u'固有' in morph['pos'] or u'名詞' in morph['pos']: seed = morph break else: seed = morphs[1] return seed
def scenario_based(text): sent = api.sentences(text) text = [] for s in sent['sentences']: morphs = api.morph(s) query = list() for morph in morphs['morphs']: query.append(u'{}:{}'.format(morph['surface'], morph['pos'])) texts = api.trigger(scenario_file,query) for t in texts['texts']: text.append(t) if len(text)>0: r = random.randint(0,len(text)-1) return text[r] return None
# -*- coding: utf-8 -*- import api api = api.API('https://52.68.75.108', 'secret', 'js2015cps') print print '文分割' s = '日本語文字列を文単位で分割する。複数文を渡すと、文ごとに区切ってくれる。' for sentence in api.sentences(s)['sentences']: print sentence print print 'ツイート検索' print '=' * 20 for text in api.search_tweets('検索')['texts']: print text print print 'リプライ検索' print '=' * 20 for text in api.search_reply('検索')['texts']: print text print print 'マルコフ連鎖' print '=' * 20 seed = {'norm_surface': "今日", 'pos': "名詞"} for morph in api.markov_chain(seed)['morphs']: print morph,
if __name__ == "__main__": api = get_api() reps = api.get_reply() print json.dumps(reps, ensure_ascii=False, indent=4) for rep in reps["replies"]: print "======================================================" if reps["grade"] == 0: scenario_file = "scenario_c09.txt" elif reps["grade"] == 1: scenario_file = "scenario_c09.txt" else: scenario_file = "scenario_c09.txt" sent = api.sentences(rep["text"]) text = [] for s in sent["sentences"]: print "-------------------------------------------------------" print s morphs = api.morph(s) print json.dumps(morphs, ensure_ascii=False, indent=4) query = list() for morph in morphs["morphs"]: query.append(u"{}:{}".format(morph["surface"], morph["pos"])) print json.dumps(query, ensure_ascii=False, indent=4) texts = api.trigger(scenario_file, query) print json.dumps(texts, ensure_ascii=False, indent=4) for t in texts["texts"]: text.append(t) print t