コード例 #1
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
from yujv_process import process_line
from load_mysql import load_dataset
from GBDT_cidai import dafu_prediction
#数据匹配
disease_list = load_dataset("疾病库")  #调用疾病库
question_list = []  #问答系列列表
line = question_list[-1]  #调最新的一句问话
line = process_line(line)  #语句预处理
for i in line:
    if i in disease_list:
        leibie, dafu = dafu_prediction(line)
        print(leibie + '\n' + dafu)
    else:
        question_list = question_list[:-1]
        for line1 in question_list[-1::-1]:
            line1 = process_line(line1)
            for i in line1:
                if i in disease_list:
                    line2 = line.append(i)
                    leibie, dafu = dafu_prediction(line2)
                    print(leibie + '\n' + dafu)
                    break
                else:
                    print("不好意思没有找到您所说的疾病名称")
                    disease_name = input("请输入您描述的疾病名称:")
                    line3 = line.append(disease_name)
                    leibie, dafu = dafu_prediction(line3)
                    print(leibie + '\n' + dafu)
コード例 #2
0
ファイル: GBDT_cidai.py プロジェクト: wutonghua/fenlei
#!/usr/bin/python
# -*- coding: utf-8 -*-
#导入常用的函数包
import random
from sklearn.model_selection import train_test_split
from preprocess import preprocess
from preprocess import preprocess1
from classifierGBDT import TextClassifier
from load_mysql import load_dataset
from load_mysql import processing_null
from cos import ComputerNearestNeighbor
#读取数据并预处理
df_bingyin_list = load_dataset('bingyin')
df_zhenduan_list = load_dataset('zhenduan')
df_zhengzhuang_list = load_dataset('zhengzhuang')
df_zhiliao_list = load_dataset('zhiliao')

#对各个类别数据进行空值符处理
df_bingyin_word = processing_null(df_bingyin_list)[0:1000]
# print(len(df_bingyin_word))
df_zhenduan_word = processing_null(df_zhenduan_list)[0:1000]
df_zhengzhuang_word = processing_null(df_zhengzhuang_list)[0:1000]
df_zhiliao_word = processing_null(df_zhiliao_list)[0:1000]

bingyin = df_bingyin_word.values.tolist()
zhenduan = df_zhenduan_word.values.tolist()
zhengzhuang = df_zhengzhuang_word.values.tolist()
zhiliao = df_zhiliao_word.values.tolist()

#分别把各个类别数据整理成一个列表形式
sentences = []
コード例 #3
0
from load_mysql import load_dataset
from load_mysql import processing_null
import pandas as pd
import gensim
import jieba
from random import shuffle
import multiprocessing
stopwords = pd.read_csv('data/stopwords.txt',
                        index_col=False,
                        quoting=3,
                        sep="\t",
                        names=['stopword'],
                        encoding='utf-8')
stopwords = stopwords['stopword'].values  #导入停用词
#读取数据并预处理
df_bingyin = load_dataset('bingyin')
df_zhenduan = load_dataset('zhenduan')
df_zhengzhuang = load_dataset('zhengzhuang')
df_zhiliao = load_dataset('zhiliao')

df_bingyin = processing_null(df_bingyin)[0:1000]
df_zhenduan = processing_null(df_zhenduan)[0:1000]
df_zhengzhuang = processing_null(df_zhengzhuang)[0:1000]
df_zhiliao = processing_null(df_zhiliao)[0:1000]
frames = [df_bingyin, df_zhenduan, df_zhengzhuang, df_zhiliao]
df = pd.concat(frames, axis=0, join='outer')
# print(len(df))
# print(df.head())
# for line in df:
# 	print(line)