def test(): s = Sentiment() r = Region( "/Users/aaronhe/Documents/NutStore/Aaron He/FDU/Big-Data-Communication/Stereotype-Analysis-in-NetEase-News-Comments/Dict/region_dict/region.txt" ) # 构造输入数据 text = [ ["潮汕人很帅,湖北人挺会做生意的!", "上海"], ["老铁牛逼!", "重庆"], ["我觉得很好吃啊", "北京"], ] df = pd.DataFrame(text, columns=["text", "src"]) print(df.head()) df = r.region_detect(df, on=["text"]) # dataFrame中批量添加region字段 print( s.sentiment_detect(df, on=["text"], srcs=["src"], dists=["region_1", "region_2", "region_3"])) print(s.output_record(src="北京"))
def main(): # 数据加载 date = sys.argv[1] path_prefix = "./new_data" df = pd.read_pickle( os.path.join(path_prefix, "%s_select_comments.p" % date)) # 模型加载 s = Sentiment() df = s.sentiment_detect(df, on=["content"], srcs=["province"], dists=["region_1", "region_2", "region_3"]) df_freq = s.table_record() # 结果保存 df.to_pickle(os.path.join(path_prefix, "%s_sentiment.p" % date)) df_freq.to_pickle(os.path.join(path_prefix, "%s_senti_freq.p" % date)) print(df) print(df_freq)