def get_prescribe_df(no): ''' 환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수 ''' global prescribe_output_path, MEDI_USE_COLS store_pres = pd.HDFStore(prescribe_output_path,mode='r') if not '/metadata/usecol' in store_pres.keys(): set_prescribe_row(); store_pres = pd.HDFStore(prescribe_output_path,mode='r') col_list = get_timeseries_column() # create empty dataframe use_prescribe_values = store_pres.select('metadata/usecol').col.values result_df = pd.DataFrame(columns=col_list,index=use_prescribe_values) # target patient dataframe target_df = store_pres.select('data',where='no=={}'.format(no)) for value in target_df.values: _ , _medi_code, _date, _times = value if _medi_code in use_prescribe_values: start_index = result_df.columns.get_loc(_date) end_index = start_index + _times + 1 result_df.loc[_medi_code].iloc[start_index:end_index] = 1 del target_df index_name_dict = get_index_name_map() result_df.index = result_df.index.map(index_name_dict.get) return result_df
def get_demo_df(no): per_demo_series = get_demographic_series(no) per_demo_df = pd.DataFrame(index=per_demo_series.index, columns=get_timeseries_column()) for column in per_demo_df.columns: per_demo_df[column] = per_demo_series return per_demo_df
def get_patient_timeseries_label(no,label_name): global PREP_OUTPUT_DIR, SAMPLE_PATIENT_PATH # syntax checking existence for directory PREP_OUTPUT_DIR = check_directory(PREP_OUTPUT_DIR) output_path = PREP_OUTPUT_DIR + SAMPLE_PATIENT_PATH x = pd.HDFStore(output_path,mode='r').select('data/{}'.format(label_name)) result_series = pd.Series(index=get_timeseries_column()) for _,x_date,x_label in x[x.no==no].values: result_series[x_date]=x_label return result_series
def get_labtest_df(no): ''' 환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수 ''' global labtest_output_path store_lab = pd.HDFStore(labtest_output_path,mode='r') if not '/metadata/usecol' in store_lab.keys(): store_lab.close() set_labtest_row(); else: store_lab.close() store_lab = pd.HDFStore(labtest_output_path,mode='r') col_list = get_timeseries_column() # create empty dataframe use_labtest_values = store_lab.select('metadata/usecol').col.values result_df = pd.DataFrame(columns=col_list, index=use_labtest_values) lab_node = store_lab.get_node('data') for lab_name in lab_node._v_children.keys(): result_lab_series = result_df.loc[lab_name] target_df = store_lab.select('data/{}'.format(lab_name),where='no=={}'.format(no)) target_df = target_df.groupby(['no','date']).mean().reset_index() # 같은달에 한번 이상 했을 시, 결과의 평균으로 저장 pre_value = None for value in target_df.values: _, _date, _result = value result_df.loc[lab_name].loc[_date] = _result # if pre_value is not None: # min_time = pre_value[1];max_time =_date # min_value = pre_value[2];max_value = _result # inter_time = get_time_interval(min_time,max_time) # for i in result_lab_series.loc[pre_value[1]:_date].index: # intpol_time = get_time_interval(min_time,i) / inter_time # intpol_value = (max_value-min_value)*intpol_time + min_value # result_df.loc[lab_name].loc[i] = intpol_value # pre_value = value # else : # result_df.loc[lab_name].loc[:_date] = _result # pre_value = value # missing_index = result_lab_series[result_lab_series.isnull()].index # if missing_index.min() > col_list[0]: # last_loc = result_df.columns.get_loc(missing_index.min()) - 1 # result_df.loc[lab_name].iloc[last_loc+1:]= result_df.loc[lab_name].iloc[last_loc] del target_df store_lab.close() return result_df
def get_diagnosis_df(no): ''' 환자번호를 넣으면 column은 KCDcode, row는 time-serial의 형태인 dataframe이 나오는 함수 ''' global diagnosis_output_path, KCD_USE_COLS, DIAG_TIME_INTERVAL store_diag = pd.HDFStore(diagnosis_output_path, mode='r') if not '/metadata/usecol' in store_diag.keys(): set_diagnosis_row() store_diag = pd.HDFStore(diagnosis_output_path, mode='r') col_list = get_timeseries_column() # create empty dataframe use_diagnosis_values = store_diag.select('metadata/usecol').col.values result_df = pd.DataFrame(columns=col_list, index=use_diagnosis_values) # target paitent dataframe target_df = store_diag.select('data', where='no=={}'.format(no)) target_df = target_df.sort_values(['KCD_code', 'date'], axis=0) KCD_code_i = KCD_USE_COLS.index('KCD_code') date_i = KCD_USE_COLS.index('date') _prev_value = [] for value in target_df.values: _, curr_code, curr_date = value if curr_code in use_diagnosis_values: if len(_prev_value) > 0 and _prev_value[KCD_code_i] == curr_code: prev_date = _prev_value[date_i] if get_time_interval(prev_date, curr_date) <= DIAG_TIME_INTERVAL: start_index = result_df.columns.get_loc(prev_date) end_index = result_df.columns.get_loc(curr_date) result_df.loc[int( value[KCD_code_i])].iloc[start_index:end_index] = 1 result_df.loc[curr_code].loc[curr_date] = 1 _prev_value = value del target_df index_name_dict = get_index_name_map() result_df.index = result_df.index.map(index_name_dict.get) return result_df