def synyi_id_icd10(file_name, code_file): if code_file == '去重': file = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code匹配去重text_code.csv', 'r', encoding='utf-8') elif code_file == '不去重': file = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code匹配不去重text_code.csv', 'r', encoding='utf-8') dic = get_standard_ICD("系统对照表") dic_list = [] for i in dic.values(): dic_list.extend(i) if code_file == '去重': file_out = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code_icd_去重.csv', 'w', encoding='utf-8') elif code_file == '不去重': file_out = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code_icd_不去重.csv', 'w', encoding='utf-8') for line in file: line = line.strip() if line.split(',')[0] in dic_list: file_out.write(line + '\n') file.close() file_out.close()
def true_false_diag_code(file_name, code_file): if code_file == '去重': file = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code_icd_去重.csv', 'r', encoding='utf-8') elif code_file == '不去重': file = codecs.open(path + '\内部质量同步表\\' + file_name + '\synyi_code_icd_不去重.csv', 'r', encoding='utf-8') if code_file == '去重': file_out_true = codecs.open(path + '\内部质量同步表\\' + file_name + '\去重synyi_icd表中code和诊断描述匹配对的实例.csv', 'w', encoding='utf-8') file_out_false = codecs.open(path + '\内部质量同步表\\' + file_name + '\去重synyi_icd表中code和诊断描述匹配错误的实例.csv', 'w', encoding='utf-8') elif code_file == '不去重': file_out_true = codecs.open(path + '\内部质量同步表\\' + file_name + '\不去重synyi_icd表中code和诊断描述匹配对的实例.csv', 'w', encoding='utf-8') file_out_false = codecs.open(path + '\内部质量同步表\\' + file_name + '\不去重synyi_icd表中code和诊断描述匹配错误的实例.csv', 'w', encoding='utf-8') dic = get_standard_ICD("系统对照表") for line in file: line = line.strip() cause = line.split(',')[0] icd = line.split(',')[1] if cause in dic[icd]: file_out_true.write(line + '\n') else: file_out_false.write(line + '\n') file.close() file_out_true.close() file_out_false.close()
def sample_code(file_name): file = pd.read_csv(path + '\内部质量同步表\\' + file_name + '\只有code的数据没有包含在去重数据中的数量.csv', encoding='utf-8') code = file['诊断编码'].tolist() file_out = codecs.open(path + '\内部质量同步表\\' + file_name + '\sample\抽样只有诊断编码.csv', 'w', encoding='utf-8') dic = get_standard_ICD("系统对照表") for i in code: if i.upper() in dic.keys(): for v in dic[i.upper()]: standard_synyi = post_new(v) else: standard_synyi = '[]' file_out.write(i + ',' + str(post_name(standard_synyi)) + '\n')
def nlp_code(file_name): # *********************************跑不一致的数据****************************************** # file_in = pd.read_csv(path + '\编码匹配\\' + file_name + '\编码匹配.csv', encoding='utf-8') # cause = file_in['数据集中诊断名称'].tolist() # icd = file_in['数据集中诊断编码'].tolist() # file = codecs.open(path + '\编码匹配\\' + file_name + '\\nlp\使用nlp算法生成编码.csv', 'w', encoding='utf-8') # file = codecs.open(path + '\用旧算法跑全部数据\\' + file_name + '\\使用nlp算法生成编码.csv', 'w', encoding='utf-8') # file.write('诊断名称,诊断编码,算法结果' + '\n') # for index in range(len(cause)): # code = post(cause[index]) # file.write(cause[index] + ',' + icd[index] + ',' + code + '\n') # ***********************************跑全体数据************************************************ cause_original, icd_original, _ = get_CDC_ICD(file_name) icd = [] cause = [] for index in range(len(cause_original)): if cause_original[index] != '^' and icd_original[index] != '^': cause.append(cause_original[index]) icd.append(icd_original[index]) num_code = len(cause) # 随机抽样100条 random_list = random.sample(range(0, len(cause)), 100) dic = get_standard_ICD("系统对照表") file = codecs.open(path + '\用旧算法跑全部数据\\' + file_name + '\\使用nlp算法生成编码.csv', 'w', encoding='utf-8') file.write('诊断名称,诊断编码,诊断编码对应的标准诊断名称,算法结果,算法结果对应的标准诊断名称' + '\n') for item in random_list: code = post(cause[item]) if len(code) == 7 and icd[item] in dic.keys() and code[0:5] in dic.keys(): file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',' + str(dic[code[0:5]]) + '\n') elif len(code) == 7 and icd[item] in dic.keys() and code[0:5] not in dic.keys(): file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',,' + '\n') elif code == '0' and icd[item] in dic.keys(): file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',,' + '\n') else: file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',' + str(dic[code]) + '\n')
def synyi_id(file_name, code_file): # cause_original, icd_original, _ = get_CDC_ICD(file_name) # icd = [] # cause = [] # for index in range(len(cause_original)): # if cause_original[index] != '^' and icd_original[index] != '^': # # 按照诊断名称去重的 # if cause_original[index] not in cause: # cause.append(cause_original[index]) # icd.append(icd_original[index]) # print(len(cause), len(icd)) table = pd.read_csv(path + '\内部质量同步表\\' + file_name + '\\' + code_file + '.csv', encoding='utf-8') cause = table['CDC诊断名称'].tolist() icd = table['CDC诊断编码'].tolist() dic = get_standard_ICD("系统对照表") file = codecs.open(path + '\内部质量同步表\\' + file_name + '\\synyi_code匹配' + code_file + '.csv', 'w', encoding='utf-8') file.write( 'CDC中诊断名称,CDC中诊断编码,CDC诊断名称对应的synyi_code,CDC诊断编码对应的标准诊断名称对应的synyi_code' + '\n') for index in range(len(cause)): cdc_synyi = post_new(cause[index]) if icd[index].upper() in dic.keys(): for i in dic[icd[index].upper()]: standard_synyi = post_new(i) else: standard_synyi = '[]' file.write(cause[index] + ',' + icd[index] + ',' + cdc_synyi + ',' + standard_synyi + '\n')
import os import codecs from read_file import get_standard_ICD path = os.getcwd() file = pd.read_csv(path + '\编码不同.csv', encoding='utf-8') file = file.drop_duplicates(subset=['诊断名称', '诊断编码', '算法结果'], keep='first') file_out = codecs.open(path + '\编码不同的数据对照表V2.csv', 'w', encoding='utf-8') cause = file['诊断名称'].tolist() code_cdc = file['诊断编码'].tolist() code_nlp = file['算法结果'].tolist() dic = get_standard_ICD('系统对照表') # file_out.write('CDC诊断名称,CDC诊断编码,CDC诊断编码对应的标准诊断名称,算法结果,算法结果对应的标准诊断名称' + '\n') # for index in range(len(cause)): # if len(code_nlp[index]) == 7 and code_cdc[index] in dic.keys() and code_nlp[index][0:5] in dic.keys(): # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index][0:5]]) + '\n') # elif len(code_nlp[index]) == 7 and code_cdc[index] in dic.keys() and code_nlp[index][0:5] not in dic.keys(): # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + '\n') # elif code_cdc[index] in dic.keys() and code_nlp[index] not in dic.keys() and len(code_nlp[index]) != 7: # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + '\n') # elif code_cdc[index] not in dic.keys() and code_nlp[index] in dic.keys(): # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index]]) + '\n') # elif code_cdc[index] not in dic.keys() and code_nlp[index] not in dic.keys(): # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + ',' + code_nlp[index] + ',' + '\n') # else: # file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index]]) + '\n')