ds_cs = [] for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) ryzd = [] output = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 pattern = r'\s*\d+、+\s?(.*)' c = re.compile(pattern) for line in f.readlines(): line1 = line.strip('\n') line2 = ''.join(line1) line2 = line2.strip() line3 = c.findall(line2) line3 = ''.join(line3) line4 = str(line3) out = line4 out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) for ds in ds_cs: if EMRdef.SBS(out, ds) > 0.8: output.append(out) output = EMRdef.delre(output) output1 = ''.join(output) EMRdef.text_save(u'D:\python\EMR\jbml.txt', output1)
ds_c = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) ryzd=[] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 pattern =r'\s*\d+、+\s?(.*)' c=re.compile(pattern) output=[] for line in f.readlines(): line1=line.strip('\n') line2 = ''.join(line1) line2 = line2.strip( ) line3=c.findall(line2) line3=''.join(line3) line4 = str(line3) out = line4 out= re.sub(r'右侧|两侧|双侧|左侧|右|左|双','',out) out = re.sub(r'肺肺','肺',out) out = re.sub('(.*?)', '', out) for ds in ds_cs: if EMRdef.SBS(out,ds) output.append(out) output=EMRdef.delre(output) output1='\n'.join(output) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2','.txt',emrpath,output1)
import time import math import os import sys import os, os.path,shutil import codecs import EMRdef import re import pandas as pd emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2')#txt目录提取 dis = open(r'C:\Users\Administrator\Desktop\JBML.txt',errors='ignore') ds=dis.readlines() ds_cs = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) for emrtxt in emrtxts: out = [] f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 lines = f.readlines() for line in lines: line = re.sub('\n','',line) for ds_c in ds_cs: if set(line) == set(ds_c): out.append(ds_c) elif EMRdef.SBS(line,ds_c)>0.8 and SBS(line,ds) <1:
import codecs import EMRdef import re import pandas as pd emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2') #txt目录提取 dis = open(r'C:\Users\Administrator\Desktop\JBML.txt', errors='ignore') ds = dis.readlines() ds_cs = [] for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) for emrtxt in emrtxts: out = [] f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 lines = f.readlines() for line in lines: line = re.sub('\n', '', line) line = re.sub(r'急性|慢性', '', line) for ds_c in ds_cs: ds_c = re.sub(r'急性|慢性', '', ds_c) ds_c = re.sub(r'阻塞性肺疾病', '慢性', ds_c) if set(line) == set(ds_c): out.append(ds_c) elif EMRdef.SBS(line, ds_c) > 0.6 and EMRdef.SBS(line, ds_c) < 1: out.append(ds_c) out = EMRdef.delre(out) output = '\n'.join(out) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd3', '.txt', emrpath, output)
import time import math import os import sys import os, os.path,shutil import codecs import EMRdef import re import pandas as pd emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2')#txt目录提取 dis = open(r'C:\Users\Administrator\Desktop\JBML.txt',errors='ignore') ds=dis.readlines() ds_cs = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) for emrtxt in emrtxts: out = [] f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 lines = f.readlines() for line in lines: line = re.sub('\n','',line) for ds_c in ds_cs: if set(line) == set(ds_c): out.append(ds_c) elif EMRdef.SBS(line,ds)>0.8 and SBS(line,dic) <1:
ds_c = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) ryzd=[] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 pattern =r'\s*\d+、+\s?(.*)' c=re.compile(pattern) output=[] for line in f.readlines(): line1=line.strip('\n') line2 = ''.join(line1) line2 = line2.strip( ) line3=c.findall(line2) line3=''.join(line3) line4 = str(line3) out = line4 out= re.sub(r'右侧|两侧|双侧|左侧|右|左|双','',out) out = re.sub(r'肺肺','肺',out) out = re.sub('(.*?)', '', out) for ds in ds_cs: if EMRdef.SBS() output.append(out) output=EMRdef.delre(output) output1='\n'.join(output) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2','.txt',emrpath,output1)
line = re.sub(r'\,|\.|;','',line) out = line out= re.sub(r'右侧|两侧|双侧|左侧|右|左|双','',out) out = re.sub(r'肺肺','肺',out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) line = out line_re.append(line) while '' in line_re: line_re.remove('') for line in line_re: for dic in dics: dic=re.sub('\n','',dic) if set(line) == set(dic): output.append(dic) elif EMRdef.SBS(line,dic)>0.8 and EMRdef.SBS(line,dic) <1: output.append(dic) output=EMRdef.delre(output) #output1='\n'.join(output) #EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2','.txt',emrpath,output1) ryzd.append(output) #导入关联规则 import orangecontrib.associate.fpgrowth as oaf def dealRules(rules): returnRules = [] for i in rules: temStr = ''; for j in i[0]: #处理第一个frozenset
ds_c = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) ryzd=[] for emrtxt in emrtxts: f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 pattern =r'\s*\d+、+\s?(.*)' c=re.compile(pattern) output=[] for line in f.readlines(): line1=line.strip('\n') line2 = ''.join(line1) line2 = line2.strip( ) line3=c.findall(line2) line3=''.join(line3) line4 = str(line3) out = line4 out= re.sub(r'右侧|两侧|双侧|左侧|右|左|双','',out) out = re.sub(r'肺肺','肺',out) out = re.sub('(.*?)', '', out) for ds in ds_cs: if EMRdef.SBS(out) output.append(out) output=EMRdef.delre(output) output1='\n'.join(output) EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2','.txt',emrpath,output1)
import time import math import os import sys import os, os.path,shutil import codecs import EMRdef import re import pandas as pd emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2')#txt目录提取 dis = open(r'C:\Users\Administrator\Desktop\JBML.txt',errors='ignore') ds=dis.readlines() ds_cs = [] for line in ds: line = re.sub('\n','',line) ds_cs.append(line) for emrtxt in emrtxts: out = [] f = open(emrtxt,'r',errors="ignore")#中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0]#提取目录 lines = f.readlines() for line in lines: line = re.sub('\n','',line) for ds_c in ds_cs: if set(line) == set(ds_c): out.append(ds_c) elif EMRdef.SBS(line,ds_c)>0.8 and EMRSBS(line,ds_c) <1: