예제 #1
0
            #         save_dist = df_dist
            #     else:
            #         save_dist = pd.concat([save_dist,df_dist],axis=1, join='inner')

            # outputFile = "output/Dist_tables/%s_Dist.csv" %file_name
            # os.makedirs(os.path.dirname(outputFile), exist_ok=True)

            # if os.path.exists(outputFile) == False:
            #     with open(outputFile, 'w') as f:
            #         save_dist.to_csv(f)
            # elif os.path.exists(outputFile) == True:
            #     with open(outputFile, 'a') as f:
            #         save_dist.to_csv(f)
            # ##################### END  ########################
            for c in range(0, len(col)):
                func.dist_Plot(combined_df, col[c], ctrl_var)

        elif ctrl_var in col:
            list_value = list(Counter(combined_df[ctrl_var]).keys())
            if len(list_value) < 6:
                for i_value in list_value:
                    ctrl_combined_df = combined_df[combined_df[ctrl_var] ==
                                                   i_value]
                    for c in range(0, len(col)):
                        if ctrl_var != col[c]:
                            func.dist_Plot(ctrl_combined_df, col[c],
                                           str(ctrl_var + '_' + str(i_value)))
            else:
                logger.error(
                    "Sorry, control variable has too many different values! Please choose categorical variable as control"
                )
예제 #2
0
if input['check_missing'] == True:
    func.check_missing(df, col, year)
if input['data_description'] == True:
    func.data_describe(df, col, year)

### For three plots ###
loop = input['age_range']
for i in loop:
    df_avg = func.groupAgeRange(df_vektis, i, 0)

    if input['correlation_matrix'] == True:
        func.corr_Matrix(df_avg, i, year)

    if input['pie_chart'] == True:
        func.pie_Chart(df_avg, i, year)

    if input['distribution_plot'] == True:
        func.dist_Plot(df_avg, 'SUM', i, year)

### Only for the Stack plot ###
if input['stacked_area'] == True:
    loop = list(range(0, 90, 1))
    df_stack = pd.DataFrame()
    for i in loop:
        df_avg = func.groupAgeRange(df_vektis, i, df_stack)
        df_stack[i] = df_avg.mean(axis=0, skipna=True)
        df_stack_trans = df_stack.transpose()
        df_stack_trans = func.merge(df_stack_trans)
    func.stacked_Plot(df_stack_trans, year)
    print('Stacked Area plot is done')
예제 #3
0
            ### Separate features to numerical and categorical ###
            numFea = []
            catFea = []
            for c in col:
                if len(Counter(df[c].dropna())) > 20:
                    numFea.append(c)
                else:
                    catFea.append(c)

            ### Function for distribution plot ###
            if inputYAML['distribution_plot'] == True:
                if inputYAML['distribution_feature'] == 'ALL':
                    for f in numFea:
                        try:
                            func.dist_Plot(df[numFea], f, file_name)
                        except:
                            if f not in catFea:
                                logger.error(f, " -- Data type does not support numerical distribution plot")

                    for f in catFea:
                        try:
                            func.cate_Dist(df[catFea], f, file_name)
                        except:
                            if f not in numFea:
                                logger.error(f, " -- Data type does not support categorical distribution plot")

                else:
                    for f in inputYAML['distribution_feature']:
                        if f in numFea:
                            try: