コード例 #1
0
data1 = pd.read_csv("Data_analysis_Data_IER.csv", sep=",", encoding='cp1252')

#Sort the data in the right samples
data1.sort_values(by=['year'], inplace=True)
df_1_t = data1.iloc[:94, :]  #all the samples of 2019
df_2 = data1.iloc[95:, :]  #all the samples of 2020
#drop_indices = np.random.choice(df_2_t.index, 3, replace=False) # make the sample sizes of 2020 and 2019 same length by deleting three random entries
#df_2=df_2_t.drop(drop_indices)
df_1 = df_1_t.drop(57)  #drop the nan data sample
df_1.sort_values(by=['living'], inplace=True)  #sort by living situation
df_2.sort_values(by=['living'], inplace=True)  #sort by living situation

#make table 1
columns = ['year', 'gender', 'bmi', 'living']
mytable = TableOne(data1, columns=columns, pval=False)
print(mytable.tabulate(tablefmt="fancy_grid"))
mytable.to_csv('mytable.csv')

#split the data samples in living with their parents and moved out for 2020 and calculate means
grouped1 = df_1.groupby(df_1.living)
Moved_out_2019 = grouped1.get_group("Moved_out")
mean1 = Moved_out_2019["attitu_2"].mean()
Parents_2019 = grouped1.get_group("Living_with_parents")
mean2 = Parents_2019["attitu_2"].mean()
#split the data samples in living with their parents and moved out for 2019 and calculate means
grouped2 = df_2.groupby(df_2.living)
Moved_out_2020 = grouped2.get_group("Moved_out")
mean3 = Moved_out_2020["attitu_2"].mean()
Parents_2020 = grouped2.get_group("Living_with_parents")
mean4 = Parents_2020["attitu_2"].mean()
コード例 #2
0
!pip install tableone
from tableone import TableOne

df_day1['diabetes_flag'].fillna(value=0,inplace=True)

categorical = ['gender','ethnicity','diabetes_flag',
               'surgery']
groupby = 'hospitaldischargestatus'
columns = ['gender', 'age','ethnicity', 'diabetes_flag',
           'apachescore','Glucose_mean','delta_glucose_mean',
           'surgery']
glu_table = TableOne(df_day1, groupby = groupby,
                     columns = columns, categorical = categorical,
                     pval= True)
print(glu_table.tabulate(tablefmt="github"))

display(HTML('<H4>Brain problem counts<H4>'))
print(f'AIS: {sum(df_day1.brain_problem=="AIS")}')
print(f'HEM: {sum(df_day1["brain_problem"]=="HEM")}')
print(f'Other: {sum(df_day1["brain_problem"]=="Other")}')
print(f'TBI: {sum(df_day1["brain_problem"]=="TBI")}')
print(f'SZ: {sum(df_day1["brain_problem"]=="SZ")}')
print(f'''Total: {sum(df_day1.brain_problem=="AIS")+ 
sum(df_day1["brain_problem"]=="HEM")+
sum(df_day1["brain_problem"]=="Other")+
sum(df_day1["brain_problem"]=="TBI")+
sum(df_day1["brain_problem"]=="SZ")}''')

"""## Modeling"""
コード例 #3
0
participants_2019 = data[data['year'].isin([2019])]

participants_2019_male= participants_2019[participants_2019['gender'].isin(['Male'])]
participants_2019_female= participants_2019[participants_2019['gender'].isin(['Female'])]

participants_2020 = data[data['year'].isin([2020])]

participants_2020_male= participants_2020[participants_2020['gender'].isin(['Male'])]
participants_2020_female= participants_2020[participants_2020['gender'].isin(['Female'])]

#CREATING TABLE ONE
columns = ['gender','bmi','living']
categorical = ['living']
groupby = 'gender'
mytable = TableOne(participants_2019 , columns=columns, categorical=categorical, groupby=groupby)
print(mytable.tabulate(tablefmt="latex"))
mytable_2 = TableOne(participants_2020 , columns=columns, categorical=categorical, groupby=groupby)
print(mytable_2.tabulate(tablefmt="latex"))

#FILTERING NECESSARY DATA AND CALCULATING WEEKEND/WEEKDAY AVERAGES
#Overall 2019 weekdays app
temp = participants_2019[~participants_2019[['stap_app_1_aantal','stap_app_2_aantal','stap_app_3_aantal','stap_app_4_aantal','stap_app_5_aantal','stap_app_6_aantal','stap_app_7_aantal']].isin(['nan']).any(axis=1)]
participants_2019_weekdays_app=temp[['stap_app_1_aantal','stap_app_2_aantal','stap_app_6_aantal','stap_app_7_aantal','stap_app_5_aantal']]
temp_avg=participants_2019_weekdays_app.mean(axis=1)
participants_2019_weekdays_app['avg']=temp_avg

#Overall 2020 weekdays app
temp = participants_2020[~participants_2020[['stap_app_1_aantal','stap_app_2_aantal','stap_app_3_aantal','stap_app_4_aantal','stap_app_5_aantal','stap_app_6_aantal','stap_app_7_aantal']].isin(['nan']).any(axis=1)]
participants_2020_weekdays_app=temp[['stap_app_1_aantal','stap_app_2_aantal','stap_app_6_aantal','stap_app_7_aantal','stap_app_5_aantal']]
temp_avg=participants_2020_weekdays_app.mean(axis=1)
participants_2020_weekdays_app['avg']=temp_avg