コード例 #1
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd4')  #txt目录提取
ryzd = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    lines = f.readlines()
    if len(lines) > 1:  #对疾病数量判断
        lines = ''.join(lines)
        output = re.sub('\n', ' ', lines)
        out
        EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd6', '.txt', emrpath,
                           output)
'''
        out = re.split('',output)
        ryzd.append(out)
'''
#导入关联规则
import orangecontrib.associate.fpgrowth as oaf


def dealRules(rules):
コード例 #2
0
#-*- coding: UTF-8 -*-

#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录,并提取关键词新建文件
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re

emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')  #txt目录提取
zljhs = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_', emrtxt)  #提取ID
    emrtxt = "".join(emrtxt_str)  #转成str

    pattern = r',|.|,|。|;|;'  #清除标点
    #txtp=txtp.decode('utf-8')
    for line in f.readlines():
        line = re.sub(' ', '', line)  #删除空格
        if line.find(u'入院诊断:', 0, 6) > -1:
            line = re.sub(r'h|H', '小时', line)
            line = re.sub(r'入院诊断:', '', line)
            line_out = re.split()
            EMRdef.text_create(r'D:\DeepLearning ER\EHRryzd', '.txt', emrtxt,
                               line)  #导出带有诊疗计划的文件和诊疗计划
        #zljhs.append(emrtxt+':'+line)
コード例 #3
0
#-*- coding: UTF-8 -*-
#根据给药方式和剂量剂型分词
import os
import EMRdef
import string
import re

emrtxts = EMRdef.txttq(r'D:\DeepLearning ER\EHRzlgc4')  #txt目录提取
#pattern = r',|;|\*|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|\+|\-|【|】| \)|\( |(|)|·|!|、|…'#清除标点
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…'  #清除标点
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    f_end = []
    for line in f.readlines():
        c = line
        line = re.sub(' ', '', line)  #删除空格
        line = re.sub('\.', '', line)  #删除.
        line = re.sub('×', '', line)  #删除.
        a = EMRdef.tq_bnum(line)
        a_end = "".join(a)  #转成str
        a_end = re.split(pattern, a_end)
        a_end = "".join(a_end)  #转成str
        a_end = re.sub(' ', '', a_end)  #删除空格
        a_end = "".join(a_end)  #转成str
        if a_end == '':
            a_end = 1
        else:
            acb = EMRdef.rre(c, a_end, a_end + ':', 1)
            #f_end = re.split(pattern, f_start2)
コード例 #4
0
#-*- coding: UTF-8 -*- 

import re
import EMRdef
import os, os.path,shutil
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR1')#txt目录提取
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|\^|&|=|,|。|:|;|‘|’|【|】|·|!|、|…'#根据标点分词
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]
    f_out = f.readlines()
    for line in f_out:
        if line.find('都保')>-1 or line.find('舒利迭')>-1:
            f_out = ''.join(f_out)
            EMRdef.text_create(u'D:\DeepLearning ER\EHRxiaochuan','.txt',emrpath,f_out)
#EMRdef.text_save(emrtxt,f_end)
コード例 #5
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR-all')#txt目录提取
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID
    emrtxt = "".join(emrtxt_str)#转成str
    out = []
    for line in f.readlines():
        if line.find('男')>-1:
            out.append('男')
        elif line.find('女')>-1:
            out.append('女')
        if line.find(‘岁')
コード例 #6
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd5')#txt目录提取
emrtxt2s = EMRdef.txttq(u'D:\DeepLearning ER\EHRsex')
ryzd = []
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]#提取目录 
    lines=f.readlines()
    line = ''.join(lines)
    lines = re.sub(' ','',line)
    lines = re.split('\n',lines)
    for emrtxt2 in emrtxt2s:
        f2 = open(emrtxt2,'r',errors="ignore")#中文加入errors
        emrpath2 = os.path.basename(emrtxt2)
        emrpath2 = os.path.splitext(emrpath2)[0]#提取目录 
        lines2 = f2.readlines()
        lines2 = ''.join(lines2)
        if emrpath == emrpath2:
            lines.append(lines2)
            out = '\n'.join(lines)
            EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd6','.txt',emrpath,out)
コード例 #7
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd5')  #txt目录提取
emrtxt2s = EMRdef.txttq(u'D:\DeepLearning ER\EHRsex')
ryzd = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    lines = f.readlines()
    lines = ''.join(lines)
    lines = re.sub(' ', '', lines)
    lines = re.split('\n', output)
    for emrtxt2 in emrtxt2s:
        f2 = open(emrtxt2, 'r', errors="ignore")  #中文加入errors
        emrpath2 = os.path.basename(emrtxt2)
        emrpath2 = os.path.splitext(emrpath2)[0]  #提取目录
        lines2 = f2.readlines()
        lines2 = ''.join(lines2)
        if emrpath == emrpath2:
            lines.append(lines2)
    ryzd.append(lines)
コード例 #8
0
#-*- coding: UTF-8 -*-

#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录,
# 并提取关键词新建文件,关键词 诊疗过程
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re

emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc4')  #txt目录提取
a_out = []
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…'  #清除标点
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_', emrtxt)  #提取ID
    emrtxt = "".join(emrtxt_str)  #转成str
    #txtp=txtp.decode('utf-8')
    for line in f.readlines():
        line = re.sub(' ', '', line)  #删除空格
        line = re.sub('\.', '', line)  #删除.
        line = re.sub('×', '', line)  #删除.
        a = EMRdef.tq_bnum(line)
        a_end = "".join(a)  #转成str
        a_end = re.split(pattern, a_end)
        a_end = "".join(a_end)  #转成str
        a_end = re.sub(' ', '', a_end)  #删除空格
コード例 #9
0
#根据词典提取
#-*- coding: UTF-8 -*-
#本文件用于根据指标参数提取所有指标
import os
import EMRdef
import re
#根据句号分词

emrtxt2s = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc')  #txt目录提取
pattern2 = r'、|;|:|、|:|,'  #根据标点分词
for emrtxt2 in emrtxt2s:
    f2 = open(emrtxt2, 'r', errors="ignore")  #中文加入errors
    f2_end = re.split(pattern2, f2.read())
    f2_out = "\n".join(f2_end)  #转成str
    emrpath2 = os.path.basename(emrtxt2)
    emrpath2 = os.path.splitext(emrpath2)[0]
    EMRdef.text_create(u'D:\DeepLearning ER\EHRzlgc3', '.txt', emrpath2,
                       f2_out)
#EMRdef.text_save(emrtxt,f_end)
'''----------------------------------------------------------------------------------------------------------------------------------------------'''

#根据化验指标提取段落
b = open('D:\python\EMR\hyzb.txt', 'r', errors="ignore")
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc3')  #txt目录提取
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|【|】|(|)|·|!|、|…'  #清除标点
brl = b.readlines()
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    test_out = []
コード例 #10
0
ファイル: SBSteat.py プロジェクト: cyc19950621/python
        C.append(i)
    return C


import re

f = open('D:\DeepLearning ER\Z1006014.txt', 'r', errors='ignore')
g = open(r'C:\Users\Administrator\Desktop\ICD-10.txt', 'r', errors='ignore')
line_re = []
lines = f.readlines()
dics = g.readlines()
out = []
for line in lines:
    line = re.sub('\n', '', line)
    line = re.sub(' ', '', line)
    line = re.sub(r'\?|?', '', line)
    line = re.sub(r'\,|\.|;', '', line)
    line_re.append(line)
while '' in line_re:
    line_re.remove('')
for line in line_re:
    for dic in dics:
        dic = re.sub('\n', '', dic)
        if set(line) == set(dic):
            out.append(dic)
        elif SBS(line, dic) > 0.8 and SBS(line, dic) < 1:
            out.append(dic)

import EMRdef
out = EMRdef.delre(out)
EMRdef.dic_save(r'D:\DeepLearning ER\JBML.txt', out)
コード例 #11
0
ファイル: wenjianhebing.py プロジェクト: cyc19950621/python
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRryzd')  #txt目录提取
emrtxt2s = EMRdef.txttq(r'D:\DeepLearning ER\EHRzlgc4')  #txt目录提取
out = []
for emrtxt in emrtxts:
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]
    for emrtxt2 in emrtxt2s:
        emrpath2 = os.path.basename(emrtxt2)
        emrpath2 = os.path.splitext(emrpath2)[0]
        if emrpath == emrpath2:
            f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
            f2 = open(emrtxt2, 'r', errors="ignore")  #中文加入errors
            a = f.readlines()
            b = f2.readlines()
            c = b + a
コード例 #12
0
#-*- coding: UTF-8 -*- 

#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录,
# 并提取关键词新建文件,关键词 主诉
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re

#关键词提取 关键词为诊疗计划
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR')#txt目录提取
pattern2 = r'。|:|、|,'#根据标点分词
zljhs = []
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID
    emrtxt = "".join(emrtxt_str)#转成str
    #txtp=txtp.decode('utf-8')
    for line in f.readlines():
        line = re.sub(' ','',line)#删除空格
        if line.find (u'诊疗计划') >-1:
            #line = re.sub('主诉:','',line)
            f2_end = re.split(pattern2,line)
            f2_out = "\n".join(f2_end)#转成str
            EMRdef.text_create(r'D:\DeepLearning ER\EHRzhusu','.txt' ,emrtxt,f2_out)#导出
            #zljhs.append(emrtxt+':'+line)
#EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)'''
'''------------------------------------------------------------------------------------------------------------'''
コード例 #13
0
#-*- coding: UTF-8 -*-
#根据给药方式和剂量剂型分词
import os
import EMRdef
import string
import re

emrtxts = EMRdef.txttq(r'D:\DeepLearning ER\EHRzlgc4')  #txt目录提取
#pattern = r',|;|\*|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|\+|\-|【|】| \)|\( |(|)|·|!|、|…'#清除标点
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…'  #清除标点
hyjg = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    f_end = []
    for line in f.readlines():
        c = line
        line = re.sub(' ', '', line)  #删除空格
        line = re.sub('\.', '', line)  #删除.
        line = re.sub('×', '', line)  #删除.
        a = EMRdef.tq_bnum(line)
        a_end = "".join(a)  #转成str
        a_end = re.split(pattern, a_end)
        a_end = "".join(a_end)  #转成str
        a_end = re.sub(' ', '', a_end)  #删除空格
        a_end = "".join(a_end)  #转成str
        if a_end == '':
            a_end = 1
        else:
            acb = EMRdef.rre(c, a_end, a_end + ':', 1)
コード例 #14
0
#-*- coding: UTF-8 -*- 
#本文件用于提取给药方式
import os
import EMRdef
import re
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|\^|&|=|,|。|:|;|‘|’|【|】|·|!|、|…'#根据标点分词
b = open('D:\python\EMR\967ywml.txt','r',errors="ignore")
brl = b.readlines()
adult = []
adult_c = []
for bl in brl:
    bl = re.sub('\n','',bl)
    bl = re.sub('','',bl)
    adult.append(bl)
adult_c = EMRdef.delre(adult)
EMRdef.text_save(u'D:\python\EMR\967yw.txt',adult_c)
コード例 #15
0
#-*- coding: UTF-8 -*-

#
#提取最后诊断之后的内容 并进入下一步处理
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR-all')  #txt目录提取
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_', emrtxt)  #提取ID
    emrtxt = "".join(emrtxt_str)  #转成str
    pattern = r',|.|,|。|;|;'  #清除标点
    #txtp=txtp.decode('utf-8')
    temp = f.readlines()
    tem_del = []
    for line in temp:
        tem_del.append(line)
        if line.find(u'初步诊断') > -1:
            break
        elif line.find(u'最后诊断') > -1:
            break
        elif line.find(u'最后诊断') > -1:
            break
    temp = list(set(temp) - set(tem_del))
コード例 #16
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')#txt目录提取
dis = open(r'C:\Users\Administrator\Desktop\ICD-10.txt',errors='ignore')
ds=dis.readlines()
ds_cs = []
for line in ds:
    line = re.sub('\n','',line)
    ds_cs.append(line)
ryzd=[] 
output=[] 
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]#提取目录 
    pattern =r'\s*\d+、+\s?(.*)'
    c=re.compile(pattern)
    for line in f.readlines():
        line1=line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip( )
        line3=c.findall(line2)
        line3=''.join(line3)
        line4 = str(line3)
コード例 #17
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd

emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')  #txt目录提取
g = open(r'C:\Users\Administrator\Desktop\JBML.txt', errors='ignore')
dics = g.readlines()
ryzd = []
output = []
line_re = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
        line = line4
        line = re.sub('\n', '', line)
コード例 #18
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')#txt目录提取
dis = open(r'C:\Users\Administrator\Desktop\ICD-10.txt',errors='ignore')
ds=dis.readlines()
ds_cs = []
for line in ds:
    line = re.sub('\n','',line)
    ds_cs.append(line)
ryzd=[] 
output=[] 
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]#提取目录 
    pattern =r'\s*\d+、+\s?(.*)'
    c=re.compile(pattern)
    for line in f.readlines():
        line1=line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip( )
        line3=c.findall(line2)
        line3=''.join(line3)
        line4 = str(line3)
コード例 #19
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')  #txt目录提取
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    output = []
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
        out = line4
        if out.find('肺')>-1 or out.find('呼吸')>-1 or out.find('气管')>-1 or out.find('呼吸')>-1 \
        or out.find('筛窦')>-1 or out.find('上额窦')>-1 or out.find('胸腔')>-1 or out.find('鼻')>-1 \
        or out.find('蝶窦')>-1  or out.find('蝶窦')>-1 :
            output.append(out)
            output = EMRdef.delre(output)
            output1 = '\n'.join(output)
            EMRdef.text_create(r'D:\DeepLearning ER\EHRzhzd2', '.txt', emrpath,
コード例 #20
0
        C.append(i)
    return C


import re

f = open('D:\DeepLearning ER\Z1006014.txt', 'r', errors='ignore')
g = open(r'C:\Users\Administrator\Desktop\ICD-10.txt', 'r', errors='ignore')
line_re = []
lines = f.readlines()
dics = g.readlines()
out = []
for line in lines:
    line = re.sub('\n', '', line)
    line = re.sub(' ', '', line)
    line = re.sub(r'\?|?', '', line)
    line = re.sub(r'\,|\.|;', '', line)
    line_re.append(line)
while '' in line_re:
    line_re.remove('')
for line in line_re:
    for dic in dics:
        dic = re.sub('\n', '', dic)
        if set(line) == set(dic):
            out.append(dic)
        elif SBS(line, dic) > 0.9:
            out.append(dic)

import EMRdef
out = EMRdef.delre(out)
print(out)
コード例 #21
0
# -*- coding:utf-8 -*-
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR-all')#txt目录提取
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID
    emrtxt = "".join(emrtxt_str)#转成str
    out = []
    for line in f.readlines():
        if line.find(r'男')>-1:
            out.append('M')
        elif line.find(r'女')>-1:
            out.append('W')
        if line.find('岁')>-1:
            line = re.sub('岁','',line)
            line = ''.join(line)

    output = ' '.join(out)
    EMRdef.text_create(r'D:\DeepLearning ER\EHRbase','.txt' ,emrtxt,output)
                    se = int(line)
            if se <=20:
                a = 'Child'
            elif se <=40:
コード例 #22
0
#本文件用于数据清洗
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
import numpy as np
#设置pandas参数
np.set_printoptions(threshold=np.inf)

emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRryzd')  #txt目录提取
hxjb = open(r'D:\python\EMR\hxjbml.txt', errors="ignore")  #呼吸疾病目录
hxjbdic = hxjb.readlines()  #读行
line_out = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]
    for line in f.readlines():
        line = re.sub('\n', '', line)
        line = re.sub(r'(.+?)肺炎', '肺炎', line)  #替换所有的肺炎
        for hxjbc in hxjbdic:  #检索每个词
            hxjbc = re.sub('\n', '', hxjbc)
            if line.find(hxjbc) > -1:
                line_out.append(line)
line_output = EMRdef.delre(line_out)
コード例 #23
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')  #txt目录提取
dis = open(r'C:\Users\Administrator\Desktop\ICD-10.txt', errors='ignore')
ds = dis.readlines()
ds_cs = []
for line in ds:
    line = re.sub('\n', '', line)
    ds_cs.append(line)
ryzd = []
output = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
コード例 #24
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd')#txt目录提取
g = open(r'C:\Users\Administrator\Desktop\JBML.txt',errors='ignore')
dics=g.readlines()
ryzd=[] 
output=[] 
line_re = []
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]#提取目录 
    pattern =r'\s*\d+、+\s?(.*)'
    c=re.compile(pattern)
    for line in f.readlines():
        line1=line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip( )
        line3=c.findall(line2)
        line3=''.join(line3)
        line4 = str(line3)
        line = line4
        line=re.sub('\n','',line)
        line=re.sub(' ','',line)
コード例 #25
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2')  #txt目录提取
dis = open(r'C:\Users\Administrator\Desktop\ICD-10.txt', errors='ignore')
ds = dis.readlines()
ds_cs = []
ryzd = []
for line in ds:
    line = re.sub('\n', '', line)
    ds_cs.append(line)
for emrtxt in emrtxts:
    out = []
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    lines = f.readlines()
    for line in lines:
        line = re.sub('\n', '', line)
        for ds_c in ds_cs:
            if line.find(ds_c) > -1:
                out.append(d)
            if set(line) == set(ds_c):
                out.append(ds_c)
            elif EMRdef.SBS(line, ds_c) > 0.6 and EMRdef.SBS(line, ds_c) < 1:
コード例 #26
0
import time
import math
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd2')#txt目录提取
dis = open(r'C:\Users\Administrator\Desktop\JBML.txt',errors='ignore')
ds=dis.readlines()
ds_cs = []
for line in ds:
    line = re.sub('\n','',line)
    ds_cs.append(line)
for emrtxt in emrtxts:
    out = []
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]#提取目录 
    lines = f.readlines()
    for line in lines:
        line = re.sub('\n','',line)
        for ds_c in ds_cs:
            if set(line) == set(ds_c):
                out.append(ds_c)
            elif EMRdef.SBS(line,dic)>0.8  and SBS(line,dic) <1:


コード例 #27
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re
import pandas as pd
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzhzd4')  #txt目录提取
for emrtxt in emrtxts:
    out = []
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
コード例 #28
0
#-*- coding: UTF-8 -*- 

#本文件用于提取目标目录中的所有txt,并提取关键词所在行到指定目录,
# 并提取关键词新建文件,关键词 主诉
import os
import sys
import os, os.path,shutil
import codecs 
import EMRdef
import re

#关键词提取 关键词为诊疗计划
emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHR1')#txt目录提取
pattern2 = r'。|:|、|,'#根据标点分词
zljhs = []
for emrtxt in emrtxts:
    f = open(emrtxt,'r',errors="ignore")#中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_',emrtxt)#提取ID
    emrtxt = "".join(emrtxt_str)#转成str
    a_out = f.readlines()
    #txtp=txtp.decode('utf-8')
    for line in a_out:
        line = re.sub(' ','',line)#删除空格
        if line.find (u'吸入剂') >-1:
                f2_out = "".join(a_out)#转成str
                EMRdef.text_create(r'D:\DeepLearning ER\EHRxiaochuan','.txt' ,emrtxt,f2_out)#导出
            #zljhs.append(emrtxt+':'+line)
#EMRdef.text_save('D:\python\EMR\zljh.txt',zljhs)'''
'''------------------------------------------------------------------------------------------------------------'''