Example #1
0
def ana_patient():
    def get_patients(task):
        if task == 'task2':
            master_file = '/home/yin/contestdata2/DII_sepsis2_task2_evaluation/sepsis2_task2_evaluation_master.csv'
        elif task == 'case1':
            master_file = '/home/yin/contestdata2/DII_sepsis2_task1_evaluation/sepsis2_task1_evaluation_case1_master.csv'
        else:
            master_file = '/home/yin/contestdata2/DII_sepsis2_task1_evaluation/sepsis2_task1_evaluation_case2_master.csv'
        pids = set()
        for i,line in enumerate(open(master_file)):
            if i == 0:
                # print line
                continue
            pid = line.split(',')[0]
            pids.add(pid)
        return pids
    pids_case1 = get_patients('case1')
    pids_case2 = get_patients('case2')
    pids_task2 = get_patients('task2')
    print 'case1', len(pids_case1), len(pids_case1 & pids_case2)
    print 'case2', len(pids_case2)
    print 'task2', len(pids_task2), len(pids_task2 & pids_case2)
    print pids_task2 & pids_case2
    test_patient_dict = {
            'case1': sorted(pids_case1),
            'task1': sorted(pids_case2),
            'task2': sorted(pids_task2)
            }
    py_op.mywritejson(os.path.join(args.file_dir, 'test_patient_dict.json'), test_patient_dict)
Example #2
0
def compare_sepsis():
    print('reading')
    sepsis_label_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'sepsis_label_dict.json'))
    print('reading')
    patient_label_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_label_dict.json'))
    print(len(set(sepsis_label_dict) & set(patient_label_dict)))
    # sepsis_label_dict = [k for k,v in sepsis_label_dict.items() if v ]
    print(len(set(sepsis_label_dict) & set(patient_label_dict)))
    d = dict()
    for p, l in sepsis_label_dict.items():
        if p not in patient_label_dict:
            continue
        if l == 0:
            d[p] = 0
        else:
            d[p] = 1
    print(len(d))
    print(sum(d.values()))
    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_label_dict.json'),
                      d)

    sepsis_time_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'sepsis_time_dict.json'))
    sepsis_time_dict = {k: v for k, v in sepsis_time_dict.items() if k in d}
    print(len(sepsis_time_dict))
    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_time_dict.json'),
                      sepsis_time_dict)
Example #3
0
def gen_patient_time_dict_dii():
    vital_file = args.vital_file
    patient_time_dict = dict()
    for i_line, line in enumerate(open(vital_file)):
        if i_line:
            patient, time = line.strip().split(',')[:2]
            patient_time_dict[patient] = max(patient_time_dict.get(patient, 0),
                                             float(time))
    py_op.mywritejson(os.path.join(args.result_dir, 'patient_time_dict.json'),
                      patient_time_dict)
Example #4
0
def gen_patient_label_dict():
    patient_label_dict = dict()
    label_file = args.label_file
    for i_line, line in enumerate(open(label_file)):
        if i_line != 0:
            data = line.strip().split(',')
            patient = data[0]
            label = data[-1]
            patient_label_dict[patient] = int(label)
    py_op.mywritejson(os.path.join(args.result_dir, 'patient_label_dict.json'),
                      patient_label_dict)
Example #5
0
def gen_feature_index():
    vital_file = args.vital_file
    for i_line, line in enumerate(open(vital_file)):
        line = line.replace('"', '')
        index_feature_list = line.strip().split(',')[2:]
        break
    feature_index_dict = {f: i for i, f in enumerate(index_feature_list)}

    py_op.mywritejson(os.path.join(args.result_dir, 'feature_index_dict.json'),
                      feature_index_dict)
    py_op.mywritejson(os.path.join(args.result_dir, 'index_feature_list.json'),
                      index_feature_list)
Example #6
0
def gen_patient_time_dict():
    vital_file = args.vital_file
    patient_time_dict = dict()
    for i_line, line in enumerate(open(vital_file)):
        if i_line % 10000 == 0:
            print(i_line)
        if i_line:
            patient, time = line.strip().split(',')[:2]
            time = time_to_min(time)
            patient_time_dict[patient] = max(patient_time_dict.get(patient, 0),
                                             time)
    py_op.mywritejson(os.path.join(args.result_dir, 'patient_time_dict.json'),
                      patient_time_dict)
Example #7
0
def test_all():
    test_clean = '../data/test_clean'
    try:
        pred_dict = py_op.myreadjson('../data/result/result.json')
    except:
        pred_dict = dict()
    for i,pred_clean in enumerate(os.listdir(test_clean)):
        if pred_clean in pred_dict:
            if pred_dict[pred_clean] < 0.85:
                os.system('rm -r {:s}'.format(os.path.join(test_clean, pred_clean)))
            continue
        result = measures.compute_pred_clean_psnr(pred_clean,'../data/AI/testB', '../data/result')
        if result < 0.88:
            os.system('rm -r {:s}'.format(os.path.join(test_clean, pred_clean)))
        pred_dict[pred_clean] = result
    pred_dict = py_op.mysorteddict(pred_dict, key=lambda s:pred_dict[s])
    py_op.mywritejson('../data/result/result.json',pred_dict)
Example #8
0
def gen_patient_label_dict():
    patient_label_dict = dict()
    label_file = args.label_file
    for i_line, line in enumerate(open(label_file)):
        if i_line != 0:
            data = line.strip().split(',')
            patient = str(int(float(data[0])))
            # patient = data[0]
            label = data[-1]
            patient_label_dict[patient] = int(float(label))
    py_op.mywritejson(os.path.join(args.result_dir, 'patient_label_dict.json'),
                      patient_label_dict)

    print('There are {:d} positive samples.'.format(
        sum(patient_label_dict.values())))
    print('There are {:d} negtive samples.'.format(
        len(patient_label_dict) - sum(patient_label_dict.values())))
Example #9
0
def split_data():
    patient_label_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_label_dict.json'))
    # patients = patient_label_dict.keys()
    # patients = sorted(patients)
    patients = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_list.json'))
    n = int(len(patients) * 0.8)
    patient_train = patients[:n]
    patient_valid = patients[n:]
    py_op.mywritejson(os.path.join(args.result_dir, 'train.json'),
                      patient_train)
    py_op.mywritejson(os.path.join(args.result_dir, 'valid.json'),
                      patient_valid)
    print(sum([patient_label_dict[k] for k in patient_train]))
    print(sum([patient_label_dict[k] for k in patient_valid]))
    print(len([patient_label_dict[k] for k in patient_train]))
def gen_patient_master_dict(master_list):
    patient_master_dict = dict()
    # master information
    master_file = args.master_file
    master_set = [set() for _ in range(6)]
    for i_line, line in enumerate(open(master_file)):
        if i_line != 0:
            data = line.strip().split(',')
            patient = data[0]
            feature = ['0' for _ in range(43)]
            for i, d in enumerate(data[1:]):
                m = str(i) + d
                idx = master_list.index(m)
                feature[idx] = '1'
            patient_master_dict[patient] = ''.join(feature)
    py_op.mywritejson(
        os.path.join(args.result_dir, 'patient_master_dict.json'),
        patient_master_dict)
Example #11
0
def gen_normal_range_order():
    feature_value_order_dict = py_op.myreadjson(os.path.join(args.result_dir, 'feature_value_order_dict.json'))
    index_vital_list = py_op.myreadjson(os.path.join(args.result_dir, 'index_feature_list.json'))
    vital_normal_range_dict  = py_op.myreadjson(os.path.join(args.result_dir, 'vital_normal_range_dict.json'))
    feature_normal_range_order_dict = { }
    for feature, d in feature_value_order_dict.items():
        if 'time' in feature:
            continue
        normal_range = vital_normal_range_dict[feature]
        values = sorted(d.keys(), key = lambda s:float(s))
        feature_normal_range_order_dict[feature] = []
        for v in values:
            if float(v) > normal_range[0] and len(feature_normal_range_order_dict[feature]) == 0:
                feature_normal_range_order_dict[feature].append(d[v])
            if float(v) > normal_range[1] and len(feature_normal_range_order_dict[feature]) == 1:
                feature_normal_range_order_dict[feature].append(d[v])
                break
    print(feature_normal_range_order_dict) 
    py_op.mywritejson(os.path.join(args.result_dir, 'feature_normal_range_order_dict.json'), feature_normal_range_order_dict)
Example #12
0
def get_cases():
    sepsis_label_dict = py_op.myreadjson(os.path.join(args.result_dir, 'sepsis_label_dict.json'))
    print(len(sepsis_label_dict))
    icu_file  = '../data/icustays.csv'
    print('reading icustays.csv')
    icu_data = pd.read_csv(icu_file)
    icu_adm_dict = dict()
    icu_intime_dict = dict()
    for iline in range(len(icu_data)):
        icu = icu_data.loc[iline, 'icustay_id']
        intime = icu_data.loc[iline, 'intime']
        adm = icu_data.loc[iline, 'hadm_id']
        icu_adm_dict[icu] = adm
        icu_intime_dict[icu] = time_to_min(intime)

    sepsis_label_dict = { k:0 for k in sepsis_label_dict }
    sepsis_time_dict = py_op.myreadjson(os.path.join(args.result_dir, 'sepsis_time_dict.json'))

    for iline, line in enumerate(open('../data/sepsis_onset_time.csv')):
        icustay_id, h = line.strip().split(',')
        adm = icu_adm_dict[int(icustay_id)]
        sepsis_label_dict[adm] = 1
        time = icu_intime_dict[int(icustay_id)] + 60 * int(h)
        sepsis_time_dict[adm] = time


    for iline, line in enumerate(open('../data/sepsis3_cases.csv')):
        break
        if iline:
            icustay_id,intime,outtime,length_of_stay,delta_score,sepsis_onset,sepsis_onset_day,sepsis_onset_hour = line.strip().split(',')
            adm = icu_adm_dict[int(icustay_id)]
            sepsis_label_dict[adm] = 1

            time = time_to_min(sepsis_onset)
            sepsis_time_dict[adm] = time

    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_label_dict.json'), sepsis_label_dict)
    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_time_dict.json'), sepsis_time_dict)
Example #13
0
def gen_sepsis_label_dict():
    sepsis_label_dict = dict()

    sepsis_file = '../data/sepsis3.csv'
    print('reading sepsis3.csv')
    sepsis_data = pd.read_csv(sepsis_file)
    sepsis_infection_dict = dict()
    sepsis_set = set()
    for iline in range(len(sepsis_data)):
        adm = sepsis_data.loc[iline, 'hadm_id']
        adm = str(adm)
        excluded = sepsis_data.loc[iline, 'excluded']
        suspected_infection_time_poe = sepsis_data.loc[
            iline, 'suspected_infection_time_poe']
        if len(str(suspected_infection_time_poe)) > 5:
            sepsis_infection_dict[adm] = time_to_min(
                suspected_infection_time_poe)
            # sepsis_set.add(adm)
            if excluded == 0:
                sepsis_set.add(adm)
        # if len(str(suspected_infection_time_poe)) > 5:
        # sepsis_infection_dict[adm] = time_to_min(suspected_infection_time_poe)
        # print(suspected_infection_time_poe)
        # if excluded == 0 and len(str(suspected_infection_time_poe)) > 0:
        #     sepsis_set.add(adm)
        #     return
    # print(len(sepsis_infection_dict))
    # print(len(sepsis_set))
    print('Infection No: {:d}'.format(len(sepsis_infection_dict)))
    print('Sepsis No: {:d}'.format(len(sepsis_set)))
    # py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_time_dict.json'), patient_label_dict)
    # return

    icu_file = '../data/icustays.csv'
    print('reading icustays.csv')
    icu_data = pd.read_csv(icu_file)
    icu_adm_dict = dict()
    for iline in range(len(icu_data)):
        icu = icu_data.loc[iline, 'icustay_id']
        adm = icu_data.loc[iline, 'hadm_id']
        icu_adm_dict[icu] = adm

    sofa_file = '../data/sofa.csv'
    print('reading sofa.csv')
    sofa_data = pd.read_csv(sofa_file)

    print('mapping sofa to adm')
    adm_sofa_dict = dict()
    for iline in range(len(sofa_data)):
        break
        if iline and iline % 10000 == 0:
            print('mapping sofa to adm', iline, len(sofa_data))
        icu = sofa_data.loc[iline, 'icustay_id']
        sofa = sofa_data.loc[iline, 'sofa_24hours']
        starttime = sofa_data.loc[iline, 'starttime']
        endtime = sofa_data.loc[iline, 'endtime']
        adm = icu_adm_dict[icu]
        adm_sofa_dict[adm] = adm_sofa_dict.get(
            adm, []) + [[sofa, starttime, endtime]]
    # py_op.mywritejson('../result/adm_sofa_dict.json', adm_sofa_dict)
    # return
    adm_sofa_dict = py_op.myreadjson('../result/adm_sofa_dict.json')

    print('set sepsis label')
    pos_num = 0
    for iline, (adm, sofa_list) in enumerate(adm_sofa_dict.items()):
        # print(adm, type(adm))
        if iline and iline % 10000 == 0:
            print('set sepsis label', iline, len(adm_sofa_dict))
        # if adm not in sepsis_infection_dict:
        if adm in sepsis_infection_dict:
            sepsis_label_dict[adm] = [0, sepsis_infection_dict[adm]]
        else:
            continue
        if adm not in sepsis_set:
            continue

        # sofa_list = sofa_list

        # if time_to_min(sofa_list[0][1]) < sepsis_infection_dict[adm] :
        #     continue

        # print('have data')

        sofa_init = ''
        for sofa in sofa_list:
            starttime = sofa[1]
            endtime = sofa[2]
            time = time_to_min(endtime)
            sofa = int(sofa[0])
            if time - sepsis_infection_dict[
                    adm] >= -48 * 60 and time - sepsis_infection_dict[
                        adm] <= 24 * 60:
                if sofa_init == '':
                    sofa_init = sofa
                elif sofa - sofa_init >= 2 and sofa >= 2:
                    sepsis_label_dict[adm] = [1, sepsis_infection_dict[adm]]
                    sepsis_infection_dict[adm] = max(
                        time, sepsis_infection_dict[adm])
                    pos_num += 1
                    break

    print('writing sepsis_label_dict')
    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_time_dict.json'),
                      sepsis_infection_dict)
    py_op.mywritejson(os.path.join(args.result_dir, 'sepsis_label_dict.json'),
                      {k: v[0]
                       for k, v in sepsis_label_dict.items()})

    print('There are {:d} positive samples.'.format(pos_num))
    print('There are {:d} negtive samples.'.format(
        len(sepsis_label_dict) - pos_num))
def gen_feature_order_dict():
    '''
    generate the order of value for each feature
    '''

    feature_value_order_dict = dict()

    # vital information
    vital_file = args.vital_file
    vital_dict = {}  # key-valuelist-dict
    for i_line, line in enumerate(open(vital_file)):
        if i_line % 10000 == 0:
            print i_line
        # if i_line > 10000:
        #     break
        if i_line == 0:
            new_line = ''
            vis = 0
            for c in line:
                if c == '"':
                    vis = (vis + 1) % 2
                if vis == 1 and c == ',':
                    c = ';'
                new_line += c
            line = new_line
            col_list = line.strip().split(',')[1:]
            for col in col_list:
                vital_dict[col] = []
        else:
            ctt_list = line.strip().split(',')[1:]
            assert len(ctt_list) == len(col_list)
            for col, ctt in zip(col_list, ctt_list):
                if len(ctt):
                    vital_dict[col].append(float(ctt))
        # if i_line > 10000:
        #    break
        # if i_line % 10000 == 0:
        #     print i_line

    # add group info
    groups = py_op.myreadjson(os.path.join(args.file_dir, 'similar.json'))
    feature_index_dict = py_op.myreadjson(
        os.path.join(args.file_dir, 'feature_index_dict.json'))
    index_feature_list = py_op.myreadjson(
        os.path.join(args.file_dir, 'index_feature_list.json'))
    for g in groups:
        for k in g:
            mg = min(g)
            if k != mg:
                kf = index_feature_list[k]
                mf = index_feature_list[mg]
                vital_dict[mf] = vital_dict[mf] + vital_dict[kf]
                vital_dict.pop(kf)
    print 'features', len(vital_dict)

    # feature_count_dict = { k: len(v) for k,v in vital_dict.items() }
    # py_op.mywritejson(os.path.join(args.file_dir, 'feature_count_dict.json'), feature_count_dict)

    ms_list = []
    for col in col_list:
        if col not in vital_dict:
            continue
        value_list = sorted(vital_dict[col])
        value_order_dict = dict()
        value_minorder_dict = dict()
        value_maxorder_dict = dict()
        for i_value, value in enumerate(value_list):
            if value not in value_minorder_dict:
                value_minorder_dict[value] = i_value
            if value == value_list[-1]:
                value_maxorder_dict[value] = len(value_list) - 1
                break
            if value != value_list[i_value + 1]:
                value_maxorder_dict[value] = i_value
        for value in value_maxorder_dict:
            value_order_dict[value] = (
                value_maxorder_dict[value] +
                value_minorder_dict[value]) / 2.0 / len(value_list)
        feature_value_order_dict[col] = value_order_dict
    py_op.mywritejson(
        os.path.join(args.file_dir, 'feature_value_order_dict.json'),
        feature_value_order_dict)
Example #15
0
def gen_feature_order_dict():
    '''
    generate the order of value for each feature
    '''

    feature_value_order_dict = dict()

    # vital information
    vital_file = args.vital_file
    vital_dict = { } # key-valuelist-dict
    for i_line,line in enumerate(open(vital_file)):
        line = line.strip().replace('"', '')
        if i_line % 10000 == 0:
            print(i_line) 
        # if i_line > 10000:
        #     break
        if i_line == 0:
            new_line = ''
            vis = 0
            for c in line:
                if c == '"':
                    vis = (vis + 1) % 2
                if vis == 1 and c == ',':
                    c = ';'
                new_line += c
            line = new_line
            col_list = line.strip().split(',')[1:]
            for col in col_list:
                vital_dict[col] = []
        else:
            ctt_list = line.strip().split(',')[1:]
            ctt_list[0] = str(time_to_min(ctt_list[0]))
            assert len(ctt_list) == len(col_list)
            for col,ctt in zip(col_list, ctt_list):
                if len(ctt):
                    vital_dict[col].append(ctt)
        # if i_line > 10000:
        #    break
        # if i_line % 10000 == 0:
        #     print(i_line) 



    feature_count_dict = { k: len(v) for k,v in vital_dict.items() }
    py_op.mywritejson(os.path.join(args.result_dir, 'feature_count_dict.json'), feature_count_dict)



    ms_list = []
    for col in col_list:
        if col not in vital_dict:
            continue
        value_list = sorted(vital_dict[col], key=lambda s:float(s))
        value_order_dict = dict()
        value_minorder_dict = dict()
        value_maxorder_dict = dict()
        for i_value, value in enumerate(value_list):
            if value not in value_minorder_dict:
                value_minorder_dict[value] = i_value
            if value == value_list[-1]:
                value_maxorder_dict[value] = len(value_list) - 1
                break
            if value != value_list[i_value+1]:
                value_maxorder_dict[value] = i_value
        for value in value_maxorder_dict:
            value_order_dict[value] = (value_maxorder_dict[value] + value_minorder_dict[value]) / 2.0 / len(value_list)
        feature_value_order_dict[col] = value_order_dict
    py_op.mywritejson(os.path.join(args.result_dir, 'feature_value_order_dict.json'), feature_value_order_dict)