if '2、' in line: str12 = str(line.split('、')) out.append(str12) if '3、' in line: str13 = str(line.split('、')) out.append(str13) if '4、' in line: str14 = str(line.split('、')) out.append(str14) if '5、' in line: str15 = str(line.split('、')) out.append(str15) if '6、' in line: str16 = str(line.split('、')) out.append(str16) if '7、' in line: str17 = str(line.split('、')) out.append(str17) if '8、' in line: str18 = str(line.split('、')) out.append(str18) if '9、' in line: str19 = str(line.split('、')) out.append(str19) if '10、' in line: str20 = str(line.split('、')) out.append(str20) out=EMRdef.delre(out) line = ''.join(out) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2','.txt',emrpath,line)
line4 = str(line3) line = line4 line = re.sub('\n', '', line) line = re.sub(' ', '', line) line = re.sub(r'\?|?', '', line) line = re.sub(r'\,|\.|;', '', line) out = line out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) line = out line_re.append(line) while '' in line_re: line_re.remove('') output = EMRdef.delre(line_re) output1 = '\n'.join(output) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2', '.txt', emrpath, output1) ryzd.append(output) #导入关联规则 import orangecontrib.associate.fpgrowth as oaf def dealRules(rules): returnRules = [] for i in rules: temStr = '' for j in i[0]: #处理第一个frozenset temStr = temStr + j + '&'
# 并提取关键词新建文件,关键词 诊疗过程 import time import math import os import sys import os, os.path, shutil import codecs import EMRdef import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc4') #txt目录提取 a_out = [] pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…' #清除标点 for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_', emrtxt) #提取ID emrtxt = "".join(emrtxt_str) #转成str #txtp=txtp.decode('utf-8') for line in f.readlines(): line = re.sub(' ', '', line) #删除空格 line = re.sub('\.', '', line) #删除. line = re.sub('×', '', line) #删除. a = EMRdef.tq_bnum(line) a_end = "".join(a) #转成str a_end = re.split(pattern, a_end) a_end = "".join(a_end) #转成str a_end = re.sub(' ', '', a_end) #删除空格 a_out.append(a_end) adult_a = EMRdef.delre(a_out) EMRdef.text_save('D:\python\EMR\hyxm.txt', adult_a)
import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRryzd') #txt目录提取 hxjb = open(r'D:\python\EMR\hxjbml.txt', errors="ignore") #呼吸疾病目录 hxjbdic = hxjb.readlines() #读行 ryzd = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] line_out = [] for line in f.readlines(): line = re.sub('\n', '', line) line = re.sub(r'(.+?)肺炎', '肺炎', line) #替换所有的肺炎 for hxjbc in hxjbdic: #检索每个词 hxjbc = re.sub('\n', '', hxjbc) if line.find(hxjbc) > -1: line_out.append(line) line_output = EMRdef.delre(line_out) ryzd.append(line_out) #line = '\n'.join(line_output) #EMRdef.text_create(r'D:\DeepLearning ER\EHRryzd2','.txt' ,emrpath,line) import orangecontrib.associate.fpgrowth as oaf often = dict(oaf.frequent_itemsets(ryzd, .01)) #生成频繁度 rules = oaf.association_rules(often, .01, frozenset({'肺炎'})) #这里设置置信度 rules = list(rules) print(rules)
#-*- coding: UTF-8 -*- #本文件用于提取给药方式 import os import EMRdef import re pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|\^|&|=|,|。|:|;|‘|’|【|】|·|!|、|…'#根据标点分词 b = open('D:\python\EMR\967ywml.txt','r',errors="ignore") brl = b.readlines() adult = [] adult_c = [] for bl in brl: bl = re.sub('\n','',bl) bl = re.sub('','',bl) adult.append(bl) adult_c = EMRdef.delre(adult) EMRdef.text_save(u'D:\python\EMR\967yw.txt',adult_c)