import math import os import sys import os, os.path, shutil import codecs import EMRdef import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR-all') #txt目录提取 for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_', emrtxt) #提取ID emrtxt = "".join(emrtxt_str) #转成str out = [] for line in f.readlines(): if line.find(r'男') > -1: out.append('M') if line.find(r'女') > -1: out.append('W') if line.find('岁') > -1: line = re.sub('岁', '', line) line = ''.join(line) se = int(line) if se <= 20: a = 'Yo' out.append(line) break output = ' '.join(out) EMRdef.text_create(r'D:\DeepLearning ER\EHRbase', '.txt', emrtxt, output)
import os, os.path, shutil import codecs import EMRdef import re import pandas as pd emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2') #txt目录提取 dis = open(r'C:\Users\Administrator\Desktop\JBML.txt', errors='ignore') ds = dis.readlines() ds_cs = [] for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) for emrtxt in emrtxts: out = [] f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 lines = f.readlines() for line in lines: line = re.sub('\n', '', line) line = re.sub('\n', '', line) for ds_c in ds_cs: if set(line) == set(ds_c): out.append(ds_c) elif EMRdef.SBS(line, ds_c) > 0.6 and EMRdef.SBS(line, ds_c) < 1: out.append(ds_c) out = EMRdef.delre(out) output = '\n'.join(out) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd3', '.txt', emrpath, output)
#-*- coding: UTF-8 -*- import re import EMRdef import os, os.path,shutil emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR1')#txt目录提取 pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|\^|&|=|,|。|:|;|‘|’|【|】|·|!|、|…'#根据标点分词 for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] f_out = f.readlines() for line in f_out: if line.find('吸入')>-1:or line.find('')>-1: f_out = ''.join(f_out) EMRdef.text_create(u'D:\DeepLearning ER\EHRxiaochuan','.txt',emrpath,f_out) #EMRdef.text_save(emrtxt,f_end)
import math import os import sys import os, os.path,shutil import codecs import EMRdef import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')#txt目录提取 zljhs = [] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID emrtxt = "".join(emrtxt_str)#转成str pattern = r',|.|,|。|;|;'#清除标点 #txtp=txtp.decode('utf-8') for line in f.readlines(): line = re.sub(' ','',line)#删除空格 if line.find (u'入院诊断:',0,6) >-1: line = re.sub(r'h|H', '小时', line)#小时替换成中文 line = re.sub(r':', '', line)#删除入院诊断字样 line_deldl = re.split(r';|。|,',line)#根据标点分行 line_deld = '\n'.join(line_deldl) #转成str格式 line_out = re.sub(r'\d+、','',line_deld) #删除序号 line_output = re.split('\n',line_out) line = '\n'.join(line_output) EMRdef.text_create(r'D:\DeepLearning ER\EHRryzd','.txt' ,emrtxt,line)#导出带有诊疗计划的文件和诊疗计划 #zljhs.append(emrtxt+':'+line) #EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)
#提取最后诊断之后的内容 并进入下一步处理 import time import math import os import sys import os, os.path, shutil import codecs import EMRdef import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR-all') #txt目录提取 for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_', emrtxt) #提取ID emrtxt = "".join(emrtxt_str) #转成str pattern = r',|.|,|。|;|;' #清除标点 #txtp=txtp.decode('utf-8') temp = f.readlines() tem_del = [] for line in temp: tem_del.append(line) if line.find(u'初步诊断') > -1: break elif line.find(u'最后诊断') > -1: break elif line.find(u'最后诊断') > -1: break temp = list(set(temp) - set(tem_del)) line = '\n'.join(temp) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd', '.txt', emrtxt, line)
line = re.sub('\n', '', line) ds_cs.append(line) ryzd = [] output = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 pattern = r'\s*\d+、+\s?(.*)' c = re.compile(pattern) for line in f.readlines(): line1 = line.strip('\n') line2 = ''.join(line1) line2 = line2.strip() line3 = c.findall(line2) line3 = ''.join(line3) line4 = str(line3) out = line4 out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) out = re.sub(r'\?|?', '', out) out = re.sub(r',|.|;|', '') for ds in ds_cs: if EMRdef.SBS(out, ds) > 0.8: output.append(out) output = EMRdef.delre(output) output1 = '\n'.join(output) EMRdef.text_create(r'D:\DeepLearning ER', '.txt', emrpath, output1)
#-*- coding: UTF-8 -*- import re import EMRdef import os, os.path,shutil emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR1')#txt目录提取 pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|\^|&|=|,|。|:|;|‘|’|【|】|·|!|、|…'#根据标点分词 for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] for line in f.readlines(): if line.find('吸入')>-1: f_out = EMRdef.text_create(u'D:\DeepLearning ER\EHR2','.txt',emrpath,f_out) #EMRdef.text_save(emrtxt,f_end)
#-*- coding: UTF-8 -*- #本文件用于根据指标参数提取所有指标 import os import EMRdef import re #根据句号分词 emrtxt2s = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc') #txt目录提取 pattern2 = r'、|;|:|、|:|,' #根据标点分词 for emrtxt2 in emrtxt2s: f2 = open(emrtxt2, 'r', errors="ignore") #中文加入errors f2_end = re.split(pattern2, f2.read()) f2_out = "\n".join(f2_end) #转成str emrpath2 = os.path.basename(emrtxt2) emrpath2 = os.path.splitext(emrpath2)[0] EMRdef.text_create(u'D:\DeepLearning ER\EHRzlgc3', '.txt', emrpath2, f2_out) #EMRdef.text_save(emrtxt,f_end) '''----------------------------------------------------------------------------------------------------------------------------------------------''' #根据化验指标提取段落 b = open('D:\python\EMR\hyzb.txt', 'r', errors="ignore") emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc3') #txt目录提取 pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|【|】|(|)|·|!|、|…' #清除标点 brl = b.readlines() for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 test_out = [] adult = [] adult_c = []
for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_', emrtxt) #提取ID emrtxt = "".join(emrtxt_str) #转成str out = [] for line in f.readlines(): line = re.sub(' ', '', line) line = re.sub('\n', '', line) line = ''.join(line) if line == '男' or line == '男性': out.append('M') elif line == '女' or line == '女性': out.append('W') output = ' '.join(out) EMRdef.text_create(r'D:\DeepLearning ER\EHRsex', '.txt', emrtxt, output) out = [] for line in f.readlines(): if line.find('岁') > -1: line = re.sub('岁', '', line) line = ''.join(line) out.append(line) break ''' se = int(line) if se <=20: a = 'Child' elif se <=40: a = 'Younth' elif se <= 60: a = 'Mid'
#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录, # 并提取关键词新建文件,关键词 主诉 import os import sys import os, os.path,shutil import codecs import EMRdef import re #关键词提取 关键词为诊疗计划 emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')#txt目录提取 pattern2 = r'。|:|、|,'#根据标点分词 zljhs = [] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID emrtxt = "".join(emrtxt_str)#转成str #txtp=txtp.decode('utf-8') for line in f.readlines(): line = re.sub(' ','',line)#删除空格 if line.find (u'诊疗计划') >-1: #line = re.sub('主诉:','',line) f2_end = re.split(pattern2,line) f2_out = "\n".join(f2_end)#转成str EMRdef.text_create(r'D:\DeepLearning ER\EHRzhusu','.txt' ,emrtxt,f2_out)#导出 #zljhs.append(emrtxt+':'+line) #EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)''' '''------------------------------------------------------------------------------------------------------------'''
#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录, # 并提取关键词新建文件,关键词 主诉 import os import sys import os, os.path,shutil import codecs import EMRdef import re #关键词提取 关键词为诊疗计划 emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')#txt目录提取 pattern2 = r'。|:|“|”|;|,'#根据标点分词 zljhs = [] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID emrtxt = "".join(emrtxt_str)#转成str #txtp=txtp.decode('utf-8') for line in f.readlines(): line = re.sub(' ','',line)#删除空格 if line.find (u'体格检查') >-1: line = re.sub('体格检查:','',line) f2_end = re.split(pattern2,line) f2_out = "\n".join(f2_end)#转成str EMRdef.text_create(r'D:\DeepLearning ER\EHRtigejiancha','.txt' ,emrtxt,f2_out)#导出 #zljhs.append(emrtxt+':'+line) #EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)''' '''------------------------------------------------------------------------------------------------------------'''
#-*- coding: UTF-8 -*- #本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录, # 并提取关键词新建文件,关键词 主诉 import os import sys import os, os.path,shutil import codecs import EMRdef import re #关键词提取 关键词为诊疗计划 emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')#txt目录提取 pattern2 = r'。|:|、|,'#根据标点分词 zljhs = [] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID emrtxt = "".join(emrtxt_str)#转成str a_out = f.readlines() #txtp=txtp.decode('utf-8') for line in a_out: line = re.sub(' ','',line)#删除空格 if line.find (u'') >-1: f2_out = "".join(a_out)#转成str EMRdef.text_create(r'D:\DeepLearning ER\EHRxiaochuan','.txt' ,emrtxt,f2_out)#导出 #zljhs.append(emrtxt+':'+line) #EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)''' '''------------------------------------------------------------------------------------------------------------'''
import string import re emrtxts = EMRdef.txttq(r'D:\DeepLearning ER\EHRzlgc4') #txt目录提取 #pattern = r',|;|\*|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|\+|\-|【|】| \)|\( |(|)|·|!|、|…'#清除标点 pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…' #清除标点 for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 f_end = [] for line in f.readlines(): c = line line = re.sub(' ', '', line) #删除空格 line = re.sub('\.', '', line) #删除. line = re.sub('×', '', line) #删除. a = EMRdef.tq_bnum(line) a_end = "".join(a) #转成str a_end = re.split(pattern, a_end) a_end = "".join(a_end) #转成str a_end = re.sub(' ', '', a_end) #删除空格 a_end = "".join(a_end) #转成str if a_end == '': a_end = 1 else: acb = EMRdef.rre(c, a_end, a_end + ':', 1) #f_end = re.split(pattern, f_start2) f_end.append(acb) f_out = "".join(f_end) #转成str EMRdef.text_create(r'D:\DeepLearning ER\EHRzlgc5', '.txt', emrpath, f_out)