def SplitData(self, index_load: str = "false"): xys_dd = self.xys_c_end.values if index_load == "false": self.train_index_list = [] self.test_index_list = [] x_train, y_train, x_test, y_test = Kc.SplitGroup( xys_dd, train_index_list=self.train_index_list, test_index_list=self.test_index_list) elif index_load == "true": self.SplitData_Load() y_test = xys_dd[self.test_index_list, 0] x_test = xys_dd[self.test_index_list, 1:] y_train = xys_dd[self.train_index_list, 0] x_train = xys_dd[self.train_index_list, 1:] y_train = y_train.astype(np.float64) y_test = y_test.astype(np.float64) self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test return x_train, y_train, x_test, y_test
def main(): # path_net = os.path.join('/', 'public', 'home', 'liqi', 'data', 'analysis', 'transcribe_CNN') path_net = os.path.join('Z:\\', 'datas', 'analysis') datas = pd.read_csv(os.path.join(path_net, "save_CNN_text_2_datas.csv")) text_xys = Kc.DataFrameToXY(datas, ["HistoryOfPastIllness"], "MajorDiagnosisCoding", manualFilePath=os.path.join( path_net, r"CodingToClass_1to16 - new 多分类.csv")) text_xys = text_xys[text_xys["MajorDiagnosisCoding"] != "-1"] # 字符串向量化 text_x_all = "" text_x_thus = [] text_x_d2v_train = [] thu1 = thulac.thulac(seg_only=True) # 默认模式 for i in range(len(text_xys["HistoryOfPastIllness"])): try: index_text_y = text_xys["HistoryOfPastIllness"][i] except KeyError: index_text_y = "" # 分词 temp_thu1_rt = thu1.cut(index_text_y, text=True) # document = gensim.models.doc2vec.TaggedDocument(temp_thu1_rt, tags=[i]) text_x_d2v_train.append(temp_thu1_rt) model = gensim.models.word2vec.Word2Vec(text_x_d2v_train, min_count=1, iter=20) model.save(os.path.join(path_net, r'model_text_word2vec.model')) # model = gensim.models.Doc2Vec(text_x_d2v_train, size=50, window=8, min_count=5, workers=4) # model.save(os.path.join(path_net, r'model_text_2.model')) text_x_vds = list( map(lambda x: np.array(x), model.docvecs.vectors_docs.tolist())) text_xys.insert(1, 'HistoryOfPastIllness_vec', text_x_vds) text_xys.drop(['HistoryOfPastIllness'], axis=1, inplace=True) text_xys_np = text_xys.values x_train, y_train, x_test, y_test = Kc.SplitGroup(text_xys_np) y_train = y_train.astype(np.float64) - 1 # 没有得到的消除,只有1和2两类 y_test = y_test.astype(np.float64) - 1 # 没有得到的消除,只有1和2两类 x_train = np.array(list(map(lambda x: x[0].tolist(), x_train.tolist()))) x_test = np.array(list(map(lambda x: x[0].tolist(), x_test.tolist()))) x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1) x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1) numclasses = y_train.max() + 1 onesharp = len(x_train[0]) # 创建神经网络 print("搭建神经网络") model = tf.keras.Sequential([ layers.LSTM(16, return_sequences=True, input_shape=(onesharp, 1)), layers.Conv1D(32, 5, padding='same', activation=tf.nn.relu), layers.Conv1D(64, 5, padding='same', activation=tf.nn.relu), layers.MaxPooling1D(5), layers.Dropout(0.25), layers.LSTM(64, return_sequences=True), layers.Conv1D(128, 5, padding='same', activation=tf.nn.relu), layers.Conv1D(64, 5, padding='same', activation=tf.nn.relu), layers.MaxPooling1D(5), layers.Dropout(0.25), layers.LSTM(64, return_sequences=True), layers.Conv1D(32, 5, padding='same', activation=tf.nn.relu), layers.GlobalAveragePooling1D(), layers.Dropout(0.5), layers.Dense(numclasses, activation=tf.nn.softmax) ]) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print(model.summary()) print("训练神经网络") metrics = Kc.Metrics(validation_data=(x_test, y_test)) history = model.fit( x_train, y_train, batch_size=400, validation_data=(x_test, y_test), epochs=500, callbacks=[ metrics, TensorBoard(log_dir=os.path.join('logs', '{}').format( "模型名-{}".format(int(time.time())))) ]) nowtime = str(int(time.time())) try: plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.plot(metrics.val_f1s) plt.plot(metrics.val_recalls) plt.plot(metrics.val_precisions) plt.legend([ 'training', 'validation', 'val_f1', 'val_recall', 'val_precision' ], loc='upper left') plt.savefig( os.path.join(path_net, 'result', 'CNN_text_2', nowtime + '_cnn_result' + '.png')) plt.show() except: print("无法画图") finally: with open( os.path.join(path_net, 'result', 'CNN_text_2', nowtime + '_history.txt'), 'w+') as f: f.write(str(history.history)) f = open('cnn_text.py', 'r', encoding='utf-8') fff = f.read() f.close() nf = open( os.path.join(path_net, 'result', 'CNN_text_2', nowtime + '_code' + '.py'), 'w+') nf.write(fff) nf.close() print('结束') return 0
def main(): nowtime = str(int(time.time())) py_filename = "cnn_mix_vgg16x_preSet" path_net = os.path.join('/', 'public', 'home', 'liqi', 'data', 'analysis', 'transcribe_CNN') # path_net = os.path.join('Z:', 'datas', 'analysis') # xys_c_end = PreprocessData(path_net) # xys_c_end.to_csv(os.path.join(path_net, r'xys_c_end.csv')) xys_c_end = pd.read_csv(os.path.join(path_net, r'xys_c_end.csv')) xys_c_end.drop(xys_c_end.columns[[0]], axis=1, inplace=True)# 删除多出来的序号列 xys_c_end['HistoryOfPastIllness_vec'] = xys_c_end['HistoryOfPastIllness_vec'].apply(lambda x: eval(re.sub(r'(?<=\d)\s+', ',', x, flags=re.S))) xys_c_end.loc[xys_c_end[xys_c_end['MajorDiagnosisCoding'] != 0].index, 'MajorDiagnosisCoding'] = 1 xys_c_end = Generate_TLCPD(xys_c_end.copy()) ########### # 转为np.array xys_dd = xys_c_end.values x_train, y_train, x_test, y_test = Kc.SplitGroup(xys_dd) y_train = y_train.astype(np.float64) y_test = y_test.astype(np.float64) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'x_train.npy'), x_train) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'x_test.npy'), x_test) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'y_train.npy'), y_train) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'y_test.npy'), y_test) # 分离数据 x_train_text = np.array(list(map(lambda x: x[13], x_train))) x_train_numerics = np.array(list(map(lambda x: x[0:12], x_train)), dtype=np.float64) y_train_all = y_train x_test_text = np.array(list(map(lambda x: x[13], x_test))) x_test_numerics = np.array(list(map(lambda x: x[0:12], x_test)), dtype=np.float64) y_test_all = y_test x_train_text = x_train_text.reshape(x_train_text.shape[0], x_train_text.shape[1], 1) x_train_numerics = x_train_numerics.reshape(x_train_numerics.shape[0], x_train_numerics.shape[1], 1) x_test_text = x_test_text.reshape(x_test_text.shape[0], x_test_text.shape[1], 1) x_test_numerics = x_test_numerics.reshape(x_test_numerics.shape[0], x_test_numerics.shape[1], 1) numclasses = y_train.max() + 1 # 创建神经网络 print("搭建神经网络") model = DefineModel_textAndNumerics_VGG16x([x_train_text.shape[1], x_train_numerics.shape[1]], numclasses) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print(model.summary()) print("训练神经网络") metrics1 = Kc.Mertics_f1s_recalls_precisions(validation_data=([x_test_text, x_test_numerics], y_test)) # metrics2 = Kc.Mertics_roc_auc(validation_data=([x_test_text, x_test_numeric], y_test)) history = model.fit([x_train_text, x_train_numerics], y_train, batch_size=400, validation_data=([x_test_text, x_test_numerics], y_test), epochs=300, callbacks=[metrics1]) y_test_result = model.predict([x_test_text, x_test_numerics]) Kc.CreateDir(os.path.join(path_net, 'result', py_filename)) Kc.CreateDir(os.path.join(path_net, 'model', py_filename)) try: plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.plot(metrics1.val_f1s) plt.plot(metrics1.val_recalls) plt.plot(metrics1.val_precisions) plt.legend(['training', 'validation', 'val_f1', 'val_recall', 'val_precision'], loc='upper left') plt.savefig(os.path.join(path_net, 'result', py_filename, nowtime + '_cnn_result' + '.png')) plt.ion() plt.show() # plt.plot(metrics2.val_fprs, metrics2.val_tprs) # plt.show() except: print("无法画图") finally: with open(os.path.join(path_net, 'result', py_filename, nowtime + '_history.txt'), 'w+') as f: f.write(str(history.history)) model.save(os.path.join(path_net, 'model', py_filename, nowtime + r'_' + py_filename +'.model')) Kc.CreateDir(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename)) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'x_test_text.npy'), x_test_text) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'x_test_numeric.npy'), x_test_numerics) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'y_test_result.npy'), y_test_result) np.save(os.path.join(path_net, 'result', py_filename, nowtime + r'_' + py_filename, 'y_test.npy'), y_test) f = open(py_filename + '.py', 'r', encoding='utf-8') fff = f.read() f.close() nf = open(os.path.join(path_net, 'result', py_filename, nowtime + '_code' + '.py'), 'w+') nf.write(fff) nf.close() print('结束') return 0
def main(): path_net = os.path.join('/', 'public', 'home', 'liqi', 'data', 'analysis', 'transcribe_CNN') # path_net = os.path.join('Z:', 'datas', 'analysis') # xys_c_end = PreprocessData(path_net) # xys_c_end.to_csv(os.path.join(path_net, r'xys_c_end.csv')) xys_c_end = pd.read_csv(os.path.join(path_net, r'xys_c_end.csv')) xys_c_end['HistoryOfPastIllness_vec'] = xys_c_end[ 'HistoryOfPastIllness_vec'].apply( lambda x: eval(re.sub(r'(?<=\d)\s+', ',', x, flags=re.S))) ########### # 转为np.array xys_dd = xys_c_end.values x_train, y_train, x_test, y_test = Kc.SplitGroup(xys_dd) y_train = y_train.astype(np.float64) y_test = y_test.astype(np.float64) # 分离数据 x_train_text = np.array(list(map(lambda x: x[13], x_train))) x_train_numerics = np.array(list(map(lambda x: x[0:12], x_train)), dtype=np.float64) y_train_all = y_train x_test_text = np.array(list(map(lambda x: x[13], x_test))) x_test_numerics = np.array(list(map(lambda x: x[0:12], x_test)), dtype=np.float64) y_test_all = y_test x_train_text = x_train_text.reshape(x_train_text.shape[0], 1, x_train_text.shape[1]) x_train_numerics = x_train_numerics.reshape(x_train_numerics.shape[0], 1, x_train_numerics.shape[1]) x_test_text = x_test_text.reshape(x_test_text.shape[0], 1, x_test_text.shape[1]) x_test_numerics = x_test_numerics.reshape(x_test_numerics.shape[0], 1, x_test_numerics.shape[1]) numclasses = y_train.max() + 1 # 创建神经网络 print("搭建神经网络") model = DefineModel_textAndNumerics( [x_train_text.shape[2], x_train_numerics.shape[2]], numclasses) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print(model.summary()) print("训练神经网络") metrics = Kc.Mertics_f1s_recalls_precisions( validation_data=([x_test_text, x_test_numerics], y_test)) history = model.fit([x_train_text, x_train_numerics], y_train, batch_size=200, validation_data=([x_test_text, x_test_numerics], y_test), epochs=700, callbacks=[metrics]) nowtime = str(int(time.time())) try: plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.plot(metrics.val_f1s) plt.plot(metrics.val_recalls) plt.plot(metrics.val_precisions) plt.legend([ 'training', 'validation', 'val_f1', 'val_recall', 'val_precision' ], loc='upper left') plt.savefig( os.path.join(path_net, 'result', 'cnn_mix_1', nowtime + '_cnn_result' + '.png')) plt.show() except: print("无法画图") finally: with open( os.path.join(path_net, 'result', 'cnn_mix_1', nowtime + '_history.txt'), 'w+') as f: f.write(str(history.history)) model.save( os.path.join(path_net, 'model', 'cnn_mix_1', nowtime + r'_cnn_mix_1.model')) f = open('cnn_text.py', 'r', encoding='utf-8') fff = f.read() f.close() nf = open( os.path.join(path_net, 'result', 'cnn_mix_1', nowtime + '_code' + '.py'), 'w+') nf.write(fff) nf.close() print('结束') return 0