Esempio n. 1
0
def predict_sentiment_file():
    model_path = "/home/kuan.li/msnlp/output_models/sentiment"
    pretrain_type = "bert"
    task_name = "text_classification"
    data_dir = "/home/kuan.li/msnlp/data/sentiment"
    test_file = os.path.join(data_dir, "test.tsv")
    output_file = os.path.join(data_dir, "test_predict.tsv")
    test_set = pd.read_table(test_file, sep='\t')
    texts = {"text": test_set.text.tolist()}
    # texts = {
    #     "text": [
    #         "已经被你们坑了两次了",
    #         "草泥马逼的30次都没提现成功",
    #         "还光了吧我",
    #         ]
    # }
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest(texts)
    test_set["predict_label"] = pres["data"]
    test_set.to_csv(path_or_buf=output_file, sep='\t', header=False, index=False)
    print("file saved.")
    num_correct = 0
    for v in test_set.values:
        if v[1] == v[2]:
            num_correct = num_correct + 1
    print("Accuracy:{}".format(float(num_correct)/len(test_set)))
    print("total lines:{}".format(len(test_set)))
Esempio n. 2
0
def predict_nextintent_file(topn=1):
    model_path = "/home/kuan.li/msnlp/output_models/nextintent"
    pretrain_type = "bert"
    task_name = "text_classification"
    data_dir = "/home/kuan.li/msnlp/data/nextintent"
    test_file = os.path.join(data_dir, "test.tsv")
    output_file = os.path.join(data_dir, "test_predict.tsv")
    test_set = pd.read_table(test_file, sep='\t')
    texts = {"text": test_set.text.tolist()}
    # texts = {
    #     "text": [
    #         "放款未到账我要举报±贷款到账时间咨询§到银监会举报±投诉咨询§",
    #         "可以二次申请贷款吗?±二次申请§",
    #         ]
    # }
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest_topn(texts, topn)
    # test_set["predict_label"] = [','.join(p) for p in pres["data"]]
    test_set["predict_label"] = pres["data"]
    test_set.to_csv(path_or_buf=output_file, sep='\t', header=False, index=False)
    print("file saved.")
    for i in range(topn):
        num_correct = 0
        for v in test_set.values:
            top_set = set(v[2][:i+1])
            if v[1] in top_set:
                num_correct = num_correct + 1
        print("top_{topN} Accuracy:{acc}".format(topN=i+1, acc=float(num_correct)/len(test_set)))
    print("total lines:{}".format(len(test_set)))
Esempio n. 3
0
def test_toutiao():
    task_name = "text_classification"
    model_name = "bert-base-chinese"
    pretrain_type = "bert"
    data_dir = "/home/dingnan.jin/projects/nlpsdk/data/classification/toutiao"
    model_path = "/home/dingnan.jin/projects/nlpsdk/testout/toutiao"
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    predictor.predict_testdataset(data_dir)
Esempio n. 4
0
def predict_tnews_topn(topn=1):
    model_path = "/home/kuan.li/msnlp/output_models/tnews"
    pretrain_type = "bert"
    task_name = "text_classification"
    texts = {
        "text": [
            "以色列暗杀叙利亚7名“国家英雄”的行为该如何定性?",
            "小学生高校食堂写作业:不会的问大学生,他们也做错,只能靠自己",
            "芜湖有哪些上市公司?",
            "赵本山在东北的地位如何?最近怎么没有消息了呢?", ]
    }
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest_topn(texts, topn)
    print(pres)
Esempio n. 5
0
def predict_xma():
    model_path = "/home/dingnan.jin/projects/nlpsdk/testout/xma"
    pretrain_type = "bert"
    task_name = "text_classification"
    texts = {
        "text": [
            "几点到几点上班",
            "贷20000利息多少",
            "我不用了,怎么解绑",
            "立即还款里显示我没借过钱 我怎么还",]
    }
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest(texts)
    print(pres)
Esempio n. 6
0
def predict_sst():
    model_path = "/home/dingnan.jin/projects/nlpsdk/testout/SST-2"
    pretrain_type = "bert"
    task_name = "text_classification"
    texts = {
        "text": [
            "  it 's just incredibly dull",
            "it 's a charming and often affecting journey . ",
            "unflinchingly bleak and desperate",
            "allows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker .",]
    }
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest(texts)
    print(pres)
Esempio n. 7
0
def predict_nextintent_topn(topn=1):
    model_path = "/home/kuan.li/msnlp/output_models/nextintent"
    pretrain_type = "bert"
    task_name = "text_classification"
    texts = {
        "text": [
            "放款未到账我要举报±贷款到账时间咨询§到银监会举报±投诉咨询§",
            "可以二次申请贷款吗?±二次申请§",
            "提现失败怎么办?±提现失败§贷款什么时候能到账?±贷款到账时间咨询§贷款审核需要多久?±审核时间要多久§?",
            "额度没变是不是意味着审核失败?±借款成功额度变化咨询§贷款审核状态查不了啊±审核是否通过查询§", ]
    }
    # 贷款到账时间咨询
    # 提现失败
    # 提现失败
    # 审核是否通过查询
    predictor = NLPPredictor(task_name, model_path=model_path, pretrain_type=pretrain_type)
    pres = predictor.predict_rest_topn(texts, topn)
    print(pres)