コード例 #1
0
ファイル: new_algo.py プロジェクト: xiaoqing8/data_process_py
def synyi_id_icd10(file_name, code_file):
    if code_file == '去重':
        file = codecs.open(path + '\内部质量同步表\\' + file_name +
                           '\synyi_code匹配去重text_code.csv',
                           'r',
                           encoding='utf-8')
    elif code_file == '不去重':
        file = codecs.open(path + '\内部质量同步表\\' + file_name +
                           '\synyi_code匹配不去重text_code.csv',
                           'r',
                           encoding='utf-8')

    dic = get_standard_ICD("系统对照表")
    dic_list = []
    for i in dic.values():
        dic_list.extend(i)

    if code_file == '去重':
        file_out = codecs.open(path + '\内部质量同步表\\' + file_name +
                               '\synyi_code_icd_去重.csv',
                               'w',
                               encoding='utf-8')
    elif code_file == '不去重':
        file_out = codecs.open(path + '\内部质量同步表\\' + file_name +
                               '\synyi_code_icd_不去重.csv',
                               'w',
                               encoding='utf-8')

    for line in file:
        line = line.strip()
        if line.split(',')[0] in dic_list:
            file_out.write(line + '\n')

    file.close()
    file_out.close()
コード例 #2
0
ファイル: new_algo.py プロジェクト: xiaoqing8/data_process_py
def true_false_diag_code(file_name, code_file):
    if code_file == '去重':
        file = codecs.open(path + '\内部质量同步表\\' + file_name +
                           '\synyi_code_icd_去重.csv',
                           'r',
                           encoding='utf-8')
    elif code_file == '不去重':
        file = codecs.open(path + '\内部质量同步表\\' + file_name +
                           '\synyi_code_icd_不去重.csv',
                           'r',
                           encoding='utf-8')

    if code_file == '去重':
        file_out_true = codecs.open(path + '\内部质量同步表\\' + file_name +
                                    '\去重synyi_icd表中code和诊断描述匹配对的实例.csv',
                                    'w',
                                    encoding='utf-8')
        file_out_false = codecs.open(path + '\内部质量同步表\\' + file_name +
                                     '\去重synyi_icd表中code和诊断描述匹配错误的实例.csv',
                                     'w',
                                     encoding='utf-8')
    elif code_file == '不去重':
        file_out_true = codecs.open(path + '\内部质量同步表\\' + file_name +
                                    '\不去重synyi_icd表中code和诊断描述匹配对的实例.csv',
                                    'w',
                                    encoding='utf-8')
        file_out_false = codecs.open(path + '\内部质量同步表\\' + file_name +
                                     '\不去重synyi_icd表中code和诊断描述匹配错误的实例.csv',
                                     'w',
                                     encoding='utf-8')

    dic = get_standard_ICD("系统对照表")

    for line in file:
        line = line.strip()
        cause = line.split(',')[0]
        icd = line.split(',')[1]
        if cause in dic[icd]:
            file_out_true.write(line + '\n')
        else:
            file_out_false.write(line + '\n')
    file.close()
    file_out_true.close()
    file_out_false.close()
コード例 #3
0
ファイル: new_algo.py プロジェクト: xiaoqing8/data_process_py
def sample_code(file_name):
    file = pd.read_csv(path + '\内部质量同步表\\' + file_name +
                       '\只有code的数据没有包含在去重数据中的数量.csv',
                       encoding='utf-8')
    code = file['诊断编码'].tolist()

    file_out = codecs.open(path + '\内部质量同步表\\' + file_name +
                           '\sample\抽样只有诊断编码.csv',
                           'w',
                           encoding='utf-8')

    dic = get_standard_ICD("系统对照表")

    for i in code:
        if i.upper() in dic.keys():
            for v in dic[i.upper()]:
                standard_synyi = post_new(v)
        else:
            standard_synyi = '[]'
        file_out.write(i + ',' + str(post_name(standard_synyi)) + '\n')
コード例 #4
0
ファイル: old_algo.py プロジェクト: xiaoqing8/data_process_py
def nlp_code(file_name):
    # *********************************跑不一致的数据******************************************
    # file_in = pd.read_csv(path + '\编码匹配\\' + file_name + '\编码匹配.csv', encoding='utf-8')
    # cause = file_in['数据集中诊断名称'].tolist()
    # icd = file_in['数据集中诊断编码'].tolist()
    # file = codecs.open(path + '\编码匹配\\' + file_name + '\\nlp\使用nlp算法生成编码.csv', 'w', encoding='utf-8')
    # file = codecs.open(path + '\用旧算法跑全部数据\\' + file_name + '\\使用nlp算法生成编码.csv', 'w', encoding='utf-8')
    # file.write('诊断名称,诊断编码,算法结果' + '\n')
    # for index in range(len(cause)):
    #     code = post(cause[index])
    #     file.write(cause[index] + ',' + icd[index] + ',' + code + '\n')

    # ***********************************跑全体数据************************************************
    cause_original, icd_original, _ = get_CDC_ICD(file_name)
    icd = []
    cause = []
    for index in range(len(cause_original)):
        if cause_original[index] != '^' and icd_original[index] != '^':
            cause.append(cause_original[index])
            icd.append(icd_original[index])
    num_code = len(cause)
    # 随机抽样100条
    random_list = random.sample(range(0, len(cause)), 100)

    dic = get_standard_ICD("系统对照表")

    file = codecs.open(path + '\用旧算法跑全部数据\\' + file_name + '\\使用nlp算法生成编码.csv', 'w', encoding='utf-8')
    file.write('诊断名称,诊断编码,诊断编码对应的标准诊断名称,算法结果,算法结果对应的标准诊断名称' + '\n')
    for item in random_list:
        code = post(cause[item])
        if len(code) == 7 and icd[item] in dic.keys() and code[0:5] in dic.keys():
            file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',' + str(dic[code[0:5]]) + '\n')
        elif len(code) == 7 and icd[item] in dic.keys() and code[0:5] not in dic.keys():
            file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',,' + '\n')
        elif code == '0' and icd[item] in dic.keys():
            file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',,' + '\n')
        else:
            file.write(cause[item] + ',' + icd[item] + ',' + str(dic[icd[item]]) + ',' + code + ',' + str(dic[code]) + '\n')
コード例 #5
0
ファイル: new_algo.py プロジェクト: xiaoqing8/data_process_py
def synyi_id(file_name, code_file):
    # cause_original, icd_original, _ = get_CDC_ICD(file_name)
    # icd = []
    # cause = []
    # for index in range(len(cause_original)):
    #     if cause_original[index] != '^' and icd_original[index] != '^':
    #         # 按照诊断名称去重的
    #         if cause_original[index] not in cause:
    #             cause.append(cause_original[index])
    #             icd.append(icd_original[index])
    # print(len(cause), len(icd))

    table = pd.read_csv(path + '\内部质量同步表\\' + file_name + '\\' + code_file +
                        '.csv',
                        encoding='utf-8')
    cause = table['CDC诊断名称'].tolist()
    icd = table['CDC诊断编码'].tolist()

    dic = get_standard_ICD("系统对照表")

    file = codecs.open(path + '\内部质量同步表\\' + file_name + '\\synyi_code匹配' +
                       code_file + '.csv',
                       'w',
                       encoding='utf-8')
    file.write(
        'CDC中诊断名称,CDC中诊断编码,CDC诊断名称对应的synyi_code,CDC诊断编码对应的标准诊断名称对应的synyi_code'
        + '\n')

    for index in range(len(cause)):
        cdc_synyi = post_new(cause[index])
        if icd[index].upper() in dic.keys():
            for i in dic[icd[index].upper()]:
                standard_synyi = post_new(i)
        else:
            standard_synyi = '[]'
        file.write(cause[index] + ',' + icd[index] + ',' + cdc_synyi + ',' +
                   standard_synyi + '\n')
コード例 #6
0
import os
import codecs
from read_file import get_standard_ICD

path = os.getcwd()
file = pd.read_csv(path + '\编码不同.csv', encoding='utf-8')
file = file.drop_duplicates(subset=['诊断名称', '诊断编码', '算法结果'], keep='first')

file_out = codecs.open(path + '\编码不同的数据对照表V2.csv', 'w', encoding='utf-8')


cause = file['诊断名称'].tolist()
code_cdc = file['诊断编码'].tolist()
code_nlp = file['算法结果'].tolist()

dic = get_standard_ICD('系统对照表')

# file_out.write('CDC诊断名称,CDC诊断编码,CDC诊断编码对应的标准诊断名称,算法结果,算法结果对应的标准诊断名称' + '\n')
# for index in range(len(cause)):
#     if len(code_nlp[index]) == 7 and code_cdc[index] in dic.keys() and code_nlp[index][0:5] in dic.keys():
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index][0:5]]) + '\n')
#     elif len(code_nlp[index]) == 7 and code_cdc[index] in dic.keys() and code_nlp[index][0:5] not in dic.keys():
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + '\n')
#     elif code_cdc[index] in dic.keys() and code_nlp[index] not in dic.keys() and len(code_nlp[index]) != 7:
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + '\n')
#     elif code_cdc[index] not in dic.keys() and code_nlp[index] in dic.keys():
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index]]) + '\n')
#     elif code_cdc[index] not in dic.keys() and code_nlp[index] not in dic.keys():
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + ',' + code_nlp[index] + ',' + '\n')
#     else:
#         file_out.write(cause[index] + ',' + code_cdc[index] + ',' + str(dic[code_cdc[index]]) + ',' + code_nlp[index] + ',' + str(dic[code_nlp[index]]) + '\n')