def triple_classifier(tweet): ''' 输出结果: 0 中性 1 积极 2 生气 3 焦虑 4 悲伤 5 厌恶 6 消极其他 ''' sentiment = 0 if isinstance(tweet['text'],unicode): text = tweet['text'] else: text = tweet['text'].decode('utf-8') keywords_list = [] emoticon_sentiment = emoticon(text.encode('utf-8')) if emoticon_sentiment != MIDDLE: entries = cut(cut_str, text.encode('utf-8')) entry = [e.decode('utf-8', 'ignore') for e in entries] keywords_list = entry if emoticon_sentiment == POSITIVE: sentiment = emoticon_sentiment text = u'' else: sentiment = flow_psychology_classfiy(text.encode('utf-8')) if sentiment == 0: sentiment = 6 text = u'' if text != u'': entries = cut(cut_str, text.encode('utf-8')) entry = [e.decode('utf-8', 'ignore') for e in entries] keywords_list = entry bow = dictionary_1.doc2bow(entry) s = [1, 1] for pair in bow: s[0] = s[0] * (step1_score[pair[0]][0] ** pair[1]) s[1] = s[1] * (step1_score[pair[0]][1] ** pair[1]) if s[0] <= s[1]: bow = dictionary_2.doc2bow(entry) s2 = [1, 1] for pair in bow: s2[0] = s2[0] * (step2_score[pair[0]][0] ** pair[1]) s2[1] = s2[1] * (step2_score[pair[0]][1] ** pair[1]) if s2[0] > s2[1]: sentiment = POSITIVE else: sentiment = flow_psychology_classfiy(text.encode('utf-8')) if sentiment == 0: sentiment = 6 else: sentiment = MIDDLE return sentiment
def triple_classifier(tweet): """ 输出结果: 0 中性 1 积极 2 生气 3 焦虑 4 悲伤 5 厌恶 6 消极其他 """ sentiment = 0 text = tweet["text"] # encode keywords_list = [] emoticon_sentiment = emoticon(text) if emoticon_sentiment != 0: entries = cut(cut_str, text) entry = [e.decode("utf-8", "ignore") for e in entries] keywords_list = entry if emoticon_sentiment == HAPPY: sentiment = emoticon_sentiment text = "" else: sentiment = flow_psychology_classfiy(text) if sentiment == 0: sentiment = emoticon_sentiment text = "" if text != "": entries = cut(cut_str, text) entry = [e.decode("utf-8", "ignore") for e in entries] keywords_list = entry bow = dictionary_1.doc2bow(entry) s = [1, 1] for pair in bow: s[0] = s[0] * (step1_score[pair[0]][0] ** pair[1]) s[1] = s[1] * (step1_score[pair[0]][1] ** pair[1]) if s[0] <= s[1]: bow = dictionary_2.doc2bow(entry) s = [1, 1, 1] for pair in bow: s[0] = s[0] * (step2_score[pair[0]][0] ** pair[1]) s[1] = s[1] * (step2_score[pair[0]][1] ** pair[1]) s[2] = s[2] * (step2_score[pair[0]][2] ** pair[1]) if s[0] > s[1] and s[0] > s[2]: sentiment = HAPPY else: sentiment = flow_psychology_classfiy(text) if sentiment == 0: if s[1] > s[0] and s[1] > s[2]: sentiment = SAD elif s[2] > s[1] and s[2] > s[0]: sentiment = ANGRY else: sentiment = 6 else: sentiment = 0 return sentiment
def triple_classifier(tweet): ''' 输出结果: 0 中性 1 积极 2 生气 3 焦虑 4 悲伤 5 厌恶 6 消极其他 ''' sentiment = 0 # text = tweet['text'] # encode text = tweet['text_ch'] keywords_list = [] try: emoticon_sentiment = emoticon(text) if emoticon_sentiment != 0: entries = cut(cut_str, text.encode('utf-8')) entry = [e.decode('utf-8', 'ignore') for e in entries] keywords_list = entry if emoticon_sentiment == HAPPY: sentiment = emoticon_sentiment text = u'' else: sentiment = flow_psychology_classfiy(text) if sentiment == 0: sentiment = emoticon_sentiment text = u'' if text != u'': entries = cut(cut_str, text.encode('utf-8')) entry = [e.decode('utf-8', 'ignore') for e in entries] keywords_list = entry bow = dictionary_1.doc2bow(entry) s = [1, 1] for pair in bow: s[0] = s[0] * (step1_score[pair[0]][0]**pair[1]) s[1] = s[1] * (step1_score[pair[0]][1]**pair[1]) if s[0] <= s[1]: bow = dictionary_2.doc2bow(entry) s = [1, 1, 1] for pair in bow: s[0] = s[0] * (step2_score[pair[0]][0]**pair[1]) s[1] = s[1] * (step2_score[pair[0]][1]**pair[1]) s[2] = s[2] * (step2_score[pair[0]][2]**pair[1]) if s[0] > s[1] and s[0] > s[2]: sentiment = HAPPY else: sentiment = flow_psychology_classfiy(text) if sentiment == 0: if s[1] > s[0] and s[1] > s[2]: sentiment = SAD elif s[2] > s[1] and s[2] > s[0]: sentiment = ANGRY else: sentiment = 6 else: sentiment = 0 except: pass return sentiment, keywords_list