コード例 #1
0
def test():
    s = Sentiment()
    r = Region(
        "/Users/aaronhe/Documents/NutStore/Aaron He/FDU/Big-Data-Communication/Stereotype-Analysis-in-NetEase-News-Comments/Dict/region_dict/region.txt"
    )

    # 构造输入数据
    text = [
        ["潮汕人很帅,湖北人挺会做生意的!", "上海"],
        ["老铁牛逼!", "重庆"],
        ["我觉得很好吃啊", "北京"],
    ]

    df = pd.DataFrame(text, columns=["text", "src"])
    print(df.head())

    df = r.region_detect(df, on=["text"])

    # dataFrame中批量添加region字段
    print(
        s.sentiment_detect(df,
                           on=["text"],
                           srcs=["src"],
                           dists=["region_1", "region_2", "region_3"]))
    print(s.output_record(src="北京"))
コード例 #2
0
def main():
    # 数据加载
    date = sys.argv[1]
    path_prefix = "./new_data"
    df = pd.read_pickle(
        os.path.join(path_prefix, "%s_select_comments.p" % date))

    # 模型加载
    s = Sentiment()
    df = s.sentiment_detect(df,
                            on=["content"],
                            srcs=["province"],
                            dists=["region_1", "region_2", "region_3"])
    df_freq = s.table_record()

    # 结果保存
    df.to_pickle(os.path.join(path_prefix, "%s_sentiment.p" % date))
    df_freq.to_pickle(os.path.join(path_prefix, "%s_senti_freq.p" % date))
    print(df)
    print(df_freq)