예제 #1
0
def get_prescribe_df(no):
    '''
    환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수
    '''
    global prescribe_output_path, MEDI_USE_COLS
    store_pres = pd.HDFStore(prescribe_output_path,mode='r')

    if not '/metadata/usecol' in store_pres.keys():
        set_prescribe_row(); store_pres = pd.HDFStore(prescribe_output_path,mode='r')

    col_list = get_timeseries_column()
    # create empty dataframe
    use_prescribe_values = store_pres.select('metadata/usecol').col.values
    result_df = pd.DataFrame(columns=col_list,index=use_prescribe_values)
    # target patient dataframe
    target_df = store_pres.select('data',where='no=={}'.format(no))

    for value in target_df.values:
        _ , _medi_code, _date, _times = value
        if _medi_code in use_prescribe_values:
            start_index = result_df.columns.get_loc(_date)
            end_index = start_index + _times + 1
            result_df.loc[_medi_code].iloc[start_index:end_index] = 1

    del target_df

    index_name_dict = get_index_name_map()
    result_df.index = result_df.index.map(index_name_dict.get)
    return result_df
예제 #2
0
def get_demo_df(no):
    per_demo_series = get_demographic_series(no)
    per_demo_df = pd.DataFrame(index=per_demo_series.index,
                               columns=get_timeseries_column())

    for column in per_demo_df.columns:
        per_demo_df[column] = per_demo_series

    return per_demo_df
예제 #3
0
def get_patient_timeseries_label(no,label_name):
    global  PREP_OUTPUT_DIR,  SAMPLE_PATIENT_PATH
    # syntax checking existence for directory
    PREP_OUTPUT_DIR = check_directory(PREP_OUTPUT_DIR)
    output_path = PREP_OUTPUT_DIR + SAMPLE_PATIENT_PATH
    x = pd.HDFStore(output_path,mode='r').select('data/{}'.format(label_name))

    result_series = pd.Series(index=get_timeseries_column())
    
    for _,x_date,x_label in x[x.no==no].values:
        result_series[x_date]=x_label
    
    return result_series
예제 #4
0
def get_labtest_df(no): 
    '''
    환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수
    '''
    global labtest_output_path
    store_lab = pd.HDFStore(labtest_output_path,mode='r')
    
    if not '/metadata/usecol' in store_lab.keys():
        store_lab.close()
        set_labtest_row(); 
    else:
        store_lab.close()
    store_lab = pd.HDFStore(labtest_output_path,mode='r')

    col_list = get_timeseries_column()
    # create empty dataframe
    use_labtest_values = store_lab.select('metadata/usecol').col.values
    result_df = pd.DataFrame(columns=col_list, index=use_labtest_values)

    lab_node = store_lab.get_node('data')
    for lab_name in lab_node._v_children.keys():
        result_lab_series = result_df.loc[lab_name]
        target_df = store_lab.select('data/{}'.format(lab_name),where='no=={}'.format(no))
        target_df = target_df.groupby(['no','date']).mean().reset_index() # 같은달에 한번 이상 했을 시, 결과의 평균으로 저장
        
        pre_value = None
        for value in target_df.values:
            _, _date, _result = value
            result_df.loc[lab_name].loc[_date] = _result
            # if pre_value is not None:
            #     min_time = pre_value[1];max_time =_date
            #     min_value = pre_value[2];max_value = _result
            #     inter_time = get_time_interval(min_time,max_time)
            #     for i in result_lab_series.loc[pre_value[1]:_date].index:
            #         intpol_time = get_time_interval(min_time,i) / inter_time
            #         intpol_value = (max_value-min_value)*intpol_time + min_value
            #         result_df.loc[lab_name].loc[i] = intpol_value
            #     pre_value = value
            # else :
            #     result_df.loc[lab_name].loc[:_date] = _result
            #     pre_value = value

        # missing_index = result_lab_series[result_lab_series.isnull()].index
        # if missing_index.min() > col_list[0]:
        #     last_loc = result_df.columns.get_loc(missing_index.min()) - 1
        #     result_df.loc[lab_name].iloc[last_loc+1:]= result_df.loc[lab_name].iloc[last_loc]

    del target_df
    store_lab.close()
    return result_df
예제 #5
0
def get_diagnosis_df(no):
    '''
    환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수
    '''
    global diagnosis_output_path, KCD_USE_COLS, DIAG_TIME_INTERVAL

    store_diag = pd.HDFStore(diagnosis_output_path, mode='r')

    if not '/metadata/usecol' in store_diag.keys():
        set_diagnosis_row()
        store_diag = pd.HDFStore(diagnosis_output_path, mode='r')

    col_list = get_timeseries_column()
    # create empty dataframe
    use_diagnosis_values = store_diag.select('metadata/usecol').col.values
    result_df = pd.DataFrame(columns=col_list, index=use_diagnosis_values)
    # target paitent dataframe
    target_df = store_diag.select('data', where='no=={}'.format(no))
    target_df = target_df.sort_values(['KCD_code', 'date'], axis=0)

    KCD_code_i = KCD_USE_COLS.index('KCD_code')
    date_i = KCD_USE_COLS.index('date')

    _prev_value = []
    for value in target_df.values:
        _, curr_code, curr_date = value
        if curr_code in use_diagnosis_values:

            if len(_prev_value) > 0 and _prev_value[KCD_code_i] == curr_code:
                prev_date = _prev_value[date_i]

                if get_time_interval(prev_date,
                                     curr_date) <= DIAG_TIME_INTERVAL:
                    start_index = result_df.columns.get_loc(prev_date)
                    end_index = result_df.columns.get_loc(curr_date)
                    result_df.loc[int(
                        value[KCD_code_i])].iloc[start_index:end_index] = 1

            result_df.loc[curr_code].loc[curr_date] = 1
        _prev_value = value

    del target_df

    index_name_dict = get_index_name_map()
    result_df.index = result_df.index.map(index_name_dict.get)
    return result_df