# save_dist = df_dist # else: # save_dist = pd.concat([save_dist,df_dist],axis=1, join='inner') # outputFile = "output/Dist_tables/%s_Dist.csv" %file_name # os.makedirs(os.path.dirname(outputFile), exist_ok=True) # if os.path.exists(outputFile) == False: # with open(outputFile, 'w') as f: # save_dist.to_csv(f) # elif os.path.exists(outputFile) == True: # with open(outputFile, 'a') as f: # save_dist.to_csv(f) # ##################### END ######################## for c in range(0, len(col)): func.dist_Plot(combined_df, col[c], ctrl_var) elif ctrl_var in col: list_value = list(Counter(combined_df[ctrl_var]).keys()) if len(list_value) < 6: for i_value in list_value: ctrl_combined_df = combined_df[combined_df[ctrl_var] == i_value] for c in range(0, len(col)): if ctrl_var != col[c]: func.dist_Plot(ctrl_combined_df, col[c], str(ctrl_var + '_' + str(i_value))) else: logger.error( "Sorry, control variable has too many different values! Please choose categorical variable as control" )
if input['check_missing'] == True: func.check_missing(df, col, year) if input['data_description'] == True: func.data_describe(df, col, year) ### For three plots ### loop = input['age_range'] for i in loop: df_avg = func.groupAgeRange(df_vektis, i, 0) if input['correlation_matrix'] == True: func.corr_Matrix(df_avg, i, year) if input['pie_chart'] == True: func.pie_Chart(df_avg, i, year) if input['distribution_plot'] == True: func.dist_Plot(df_avg, 'SUM', i, year) ### Only for the Stack plot ### if input['stacked_area'] == True: loop = list(range(0, 90, 1)) df_stack = pd.DataFrame() for i in loop: df_avg = func.groupAgeRange(df_vektis, i, df_stack) df_stack[i] = df_avg.mean(axis=0, skipna=True) df_stack_trans = df_stack.transpose() df_stack_trans = func.merge(df_stack_trans) func.stacked_Plot(df_stack_trans, year) print('Stacked Area plot is done')
### Separate features to numerical and categorical ### numFea = [] catFea = [] for c in col: if len(Counter(df[c].dropna())) > 20: numFea.append(c) else: catFea.append(c) ### Function for distribution plot ### if inputYAML['distribution_plot'] == True: if inputYAML['distribution_feature'] == 'ALL': for f in numFea: try: func.dist_Plot(df[numFea], f, file_name) except: if f not in catFea: logger.error(f, " -- Data type does not support numerical distribution plot") for f in catFea: try: func.cate_Dist(df[catFea], f, file_name) except: if f not in numFea: logger.error(f, " -- Data type does not support categorical distribution plot") else: for f in inputYAML['distribution_feature']: if f in numFea: try: