Exemple #1
0
def convert_icd_9_to_10(s):
    try:
        return str(
            _icd_mapping_9_to_10.get(s)
            or _icd_mapping_9_to_10.get(icd9.search(s).parent.alt_code)
            or _icd_mapping_9_to_10.get(icd9.search(s).parent.parent.alt_code))
    except:
        warnings.warn('Conversion failed: ' + str(s))
        return s
Exemple #2
0
def map_icd_hierarchy(s, version=9):
    s = str(s)
    code9 = icd9.search(s)
    code10 = icd10.find(s)
    if code9 is None and code10 is None:
        raise Exception("Invalid ICD code", s)

    if version == 9:
        if code9 is not None:
            return list(reversed([code9.alt_code] + code9.ancestors()))
        else:
            raise Exception("Invalid ICD version", s)
    elif version == 10:
        if code10 is not None:
            return [code10.chapter, code10.block, code10.code[:3], code10.code]
        else:
            try:
                # Attempt to convert from version 9 to 10
                s_ = convert_icd_9_to_10(code9.alt_code)
                code = icd10.find(s_)
                if code is None:
                    # Fall back to version 9
                    return list(reversed([code9.alt_code] + code9.ancestors()))
                else:
                    return [code.chapter, code.block, code.code[:3], code.code]
            except:
                warnings.warn('Conversion failed: ' + str(s))
                return list(reversed([code9.alt_code] + code9.ancestors()))
                # raise Exception('Conversion error: ' + str(s))
    else:
        raise Exception("Invalid ICD version", s)
Exemple #3
0
 def get_comorbidites_x(self, hadm_id):
     comorbidities = self.comorbidites[hadm_id]
     comorbidities_x = [0, 0]
     for comorbidity_code in comorbidities:
         info = icd.search(comorbidity_code)
         while info is not None:
             description = info.short_desc
             if description == 'Infectious And Parasitic Diseases':
                 comorbidities_x[0] = 1
                 break
             elif description == 'Diseases Of The Skin And Subcutaneous Tissue':
                 comorbidities_x[1] = 1
                 break
             else:
                 info = info.parent
     return comorbidities_x
Exemple #4
0
def ccs_kg_building(graph_file, out_file):
    _TEST_RATIO = 0.2
    _VALIDATION_RATIO = 0.1
    entities = {}
    code2desc = {}
    relation = {
        'is_parent_of': 0,
        'is_child_of': 1
    }
    train_set = set()

    infd = open(graph_file, 'r')
    _ = infd.readline()

    # build dictionary of entities
    for line in infd:
        tokens = line.strip().split(',')
        icd9 = tokens[0][1:-1].strip()
        cat1 = tokens[1][1:-1].strip()
        desc1 = 'A_L1_' + cat1
        cat2 = tokens[3][1:-1].strip()
        desc2 = 'A_L2_' + cat2
        cat3 = tokens[5][1:-1].strip()
        desc3 = 'A_L3_' + cat3
        cat4 = tokens[7][1:-1].strip()
        desc4 = 'A_L4_' + cat4

        icd9 = 'D_' + icd9

        if icd9 not in entities:
            entities[icd9] = len(entities)

        if desc1 not in entities:
            entities[desc1] = len(entities)

        if len(cat2) > 0:
            if desc2 not in entities:
                entities[desc2] = len(entities)
        if len(cat3) > 0:
            if desc3 not in entities:
                entities[desc3] = len(entities)
        if len(cat4) > 0:
            if desc4 not in entities:
                entities[desc4] = len(entities)

        sr = search(icd9[2:])
        i = 1
        while sr is None:
            # print(sr)
            l = len(icd9)
            if l-i == 0:
                break
            sr = search(icd9[2:l-i])
            i += 1

        if sr is not None:
            ds = str(sr).split(':')
            if ds[2] == 'None':
                code2desc[icd9] = ds[1]
            else:
                code2desc[icd9] = ds[2]
    infd.close()

    # add root_code
    entities['A_ROOT'] = len(entities)
    rootCode = entities['A_ROOT']

    infd = open(infile, 'r')
    infd.readline()

    for line in infd:
        tokens = line.strip().split(',')
        icd9 = tokens[0][1:-1].strip()
        cat1 = tokens[1][1:-1].strip()
        desc1 = 'A_L1_' + cat1
        cat2 = tokens[3][1:-1].strip()
        desc2 = 'A_L2_' + cat2
        cat3 = tokens[5][1:-1].strip()
        desc3 = 'A_L3_' + cat3
        cat4 = tokens[7][1:-1].strip()
        desc4 = 'A_L4_' + cat4

        icd9 = 'D_' + icd9

        # if icd9 not in entities:
        #     continue
        icdCode = entities[icd9]

        if len(cat4) > 0:
            code4 = entities[desc4]
            code3 = entities[desc3]
            code2 = entities[desc2]
            code1 = entities[desc1]
            train_set.add(str(rootCode) + ',' + str(code1) + ',0')
            train_set.add(str(code1) + ',' + str(rootCode) + ',1')
            train_set.add(str(code1) + ',' + str(code2) + ',0')
            train_set.add(str(code2) + ',' + str(code1) + ',1')
            train_set.add(str(code2) + ',' + str(code3) + ',0')
            train_set.add(str(code3) + ',' + str(code2) + ',1')
            train_set.add(str(code3) + ',' + str(code4) + ',0')
            train_set.add(str(code4) + ',' + str(code3) + ',1')
            train_set.add(str(code4) + ',' + str(icdCode) + ',0')
            train_set.add(str(icdCode) + ',' + str(code4) + ',1')

        elif len(cat3) > 0:
            code3 = entities[desc3]
            code2 = entities[desc2]
            code1 = entities[desc1]

            train_set.add(str(rootCode) + ',' + str(code1) + ',0')
            train_set.add(str(code1) + ',' + str(rootCode) + ',1')
            train_set.add(str(code1) + ',' + str(code2) + ',0')
            train_set.add(str(code2) + ',' + str(code1) + ',1')
            train_set.add(str(code2) + ',' + str(code3) + ',0')
            train_set.add(str(code3) + ',' + str(code2) + ',1')
            train_set.add(str(code3) + ',' + str(icdCode) + ',0')
            train_set.add(str(icdCode) + ',' + str(code3) + ',1')

        elif len(cat2) > 0:
            code2 = entities[desc2]
            code1 = entities[desc1]
            train_set.add(str(rootCode) + ',' + str(code1) + ',0')
            train_set.add(str(code1) + ',' + str(rootCode) + ',1')
            train_set.add(str(code1) + ',' + str(code2) + ',0')
            train_set.add(str(code2) + ',' + str(code1) + ',1')
            train_set.add(str(code2) + ',' + str(icdCode) + ',0')
            train_set.add(str(icdCode) + ',' + str(code2) + ',1')

        else:
            code1 = entities[desc1]

            train_set.add(str(rootCode) + ',' + str(code1) + ',0')
            train_set.add(str(code1) + ',' + str(rootCode) + ',1')
            train_set.add(str(code1) + ',' + str(icdCode)+ ',0')
            train_set.add(str(icdCode) + ',' + str(code1) + ',1')

    outfd = open(out_file + 'relation2id', 'w')
    for k, v in relation.items():
        outfd.write(str(v) + ',' + k + '\n')
    outfd.close()

    outfd = open(out_file + 'entity2id', 'w')
    for k, v in entities.items():
        outfd.write(str(v) + ',' + k + '\n')
    outfd.close()

    edges_ls = list(train_set)
    data_size = len(edges_ls)
    ind = np.random.permutation(data_size)
    nTest = int(_TEST_RATIO * data_size)
    nValid = int(_VALIDATION_RATIO * data_size)

    test_indices = ind[:nTest]
    valid_indices = ind[nTest:nTest + nValid]
    train_indices = ind[nTest + nValid:]

    train = [edges_ls[i] for i in train_indices]
    valid = [edges_ls[i] for i in valid_indices]
    test = [edges_ls[i] for i in test_indices]

    outfd = open(out_file + 'train_file', 'w')
    for edge in train:
        outfd.write(edge + '\n')
    outfd.close()

    outfd = open(out_file + 'valid_file', 'w')
    for edge in valid:
        outfd.write(edge + '\n')
    outfd.close()

    outfd = open(out_file + 'test_file', 'w')
    for edge in test:
        outfd.write(edge + '\n')
    outfd.close()

    with open(out_file + 'code2desc.pickle', 'wb') as handle:
        pickle.dump(code2desc, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(len(code2desc))