for line in ds:
    line = re.sub('\n', '', line)
    ds_cs.append(line)
ryzd = []
output = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
        out = line4
        out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out)
        out = re.sub(r'肺肺', '肺', out)
        out = re.sub('(.*?)', '', out)
        out = re.sub(r'很高危|极高危', '', out)
        out = re.sub(r'/?|?', '', out)
        for ds in ds_cs:
            if EMRdef.SBS(out, ds) > 0.8:
                output.append(out)
output = EMRdef.delre(output)

EMRdef.text_save(u'D:\python\EMR\jbml.txt', output1)
Exemple #2
0
#-*- coding: UTF-8 -*-
#根据给药方式和剂量剂型分词
import os
import EMRdef
import string
import re

pattern = r',|;|\*|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|\+|\-|【|】| \)|\( |(|)|·|!|、|…'  #清除标点
c = open(r'C:\Users\Administrator\Desktop\quanbu.txt', 'r',
         errors="ignore")  #给药剂量加剂型加给药方式词典
b = open(r'C:\Users\Administrator\Desktop\tc.txt', 'r',
         errors="ignore")  #给药剂量词典
crl = c.readlines()
brl = b.readlines()
test_out = []
for cl in crl:
    cl = re.sub('\n', '', cl)
    for bl in brl:
        bl = re.sub('\n', '', bl)
        if cl == bl:
            test_out.append(cl)
adult_a = EMRdef.delre(test_out)
EMRdef.text_save(r'C:\Users\Administrator\Desktop\output.txt', adult_a)
Exemple #3
0
ds_cs = []
for line in ds:
    line = re.sub('\n', '', line)
    ds_cs.append(line)
ryzd = []
output = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
        out = line4
        out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out)
        out = re.sub(r'肺肺', '肺', out)
        out = re.sub('(.*?)', '', out)
        out = re.sub(r'很高危|极高危', '', out)
        for ds in ds_cs:
            if EMRdef.SBS(out, ds) > 0.8:
                output.append(out)
output = EMRdef.delre(output)
output1 = '\n'.join(output)
EMRdef.text_save(u'D:\python\EMR\jbml.txt', adult_c)
Exemple #4
0
ds_cs = []
for line in ds:
    line = re.sub('\n', '', line)
    ds_cs.append(line)
ryzd = []
output = []
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrpath = os.path.basename(emrtxt)
    emrpath = os.path.splitext(emrpath)[0]  #提取目录
    pattern = r'\s*\d+、+\s?(.*)'
    c = re.compile(pattern)
    for line in f.readlines():
        line1 = line.strip('\n')
        line2 = ''.join(line1)
        line2 = line2.strip()
        line3 = c.findall(line2)
        line3 = ''.join(line3)
        line4 = str(line3)
        out = line4
        out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out)
        out = re.sub(r'肺肺', '肺', out)
        out = re.sub('(.*?)', '', out)
        out = re.sub(r'很高危|极高危', '', out)
        for ds in ds_cs:
            if EMRdef.SBS(out, ds) > 0.8:
                output.append(out)
output = EMRdef.delre(output)
output1 = '\n'.join(output)
EMRdef.text_save(u'D:\python\EMR\967yw.txt', adult_c)
Exemple #5
0
import time
import math
import os
import sys
import os, os.path, shutil
import codecs
import EMRdef
import re

emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc4')  #txt目录提取
a_out = []
pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…'  #清除标点
for emrtxt in emrtxts:
    f = open(emrtxt, 'r', errors="ignore")  #中文加入errors
    emrtxt = os.path.basename(emrtxt)
    emrtxt_str = re.findall(r'(^.+?)\_', emrtxt)  #提取ID
    emrtxt = "".join(emrtxt_str)  #转成str
    #txtp=txtp.decode('utf-8')
    for line in f.readlines():
        line = re.sub(' ', '', line)  #删除空格
        line = re.sub('\.', '', line)  #删除.
        line = re.sub('×', '', line)  #删除.
        a = EMRdef.tq_bnum(line)
        a_end = "".join(a)  #转成str
        a_end = re.split(pattern, a_end)
        a_end = "".join(a_end)  #转成str
        a_end = re.sub(' ', '', a_end)  #删除空格
        a_out.append(a_end)
adult_a = EMRdef.delre(a_out)
EMRdef.text_save('D:\python\EMR\hyxm.txt', adult_a)