コード例 #1
0
### Add new age column ###
df_vektis['AGE'] = age

### For getting some basic info ###
if input['check_missing'] == True:
    func.check_missing(df, col, year)
if input['data_description'] == True:
    func.data_describe(df, col, year)

### For three plots ###
loop = input['age_range']
for i in loop:
    df_avg = func.groupAgeRange(df_vektis, i, 0)

    if input['correlation_matrix'] == True:
        func.corr_Matrix(df_avg, i, year)

    if input['pie_chart'] == True:
        func.pie_Chart(df_avg, i, year)

    if input['distribution_plot'] == True:
        func.dist_Plot(df_avg, 'SUM', i, year)

### Only for the Stack plot ###
if input['stacked_area'] == True:
    loop = list(range(0, 90, 1))
    df_stack = pd.DataFrame()
    for i in loop:
        df_avg = func.groupAgeRange(df_vektis, i, df_stack)
        df_stack[i] = df_avg.mean(axis=0, skipna=True)
        df_stack_trans = df_stack.transpose()
コード例 #2
0
ファイル: model.py プロジェクト: sunchang0124/DataSharing
for i in range(0, len(input['taskName'])):
    file = input['taskName'][i]

    ###############################
    # 1.Overview on combined data #
    ###############################
    ### For getting some basic info ###
    checkMissing = input['check_missing'][i]
    if checkMissing == True:
        func.check_missing(combined_df, col, file)

    ### Function for correlation matrix ###
    CorrMatrix = input['correlation_matrix'][i]
    if CorrMatrix == True:
        func.corr_Matrix(combined_df[col], file)

    ### Function for Cat-Num plot ###
    CN_plot = input["Cat_Num_plot"][i]
    if CN_plot == True:
        CN_feature = input["Cat_Num_feature"][i]
        if len(CN_feature) > 0:
            for f in CN_feature:
                print(f)
                func.plot_catNum(combined_df, f, file)

    ### Function for Box plot ###
    BoxPlot = input["Box_plot"]
    if BoxPlot == True:
        BoxPlot_feature = input["Box_plot_feature"]
        if len(BoxPlot_feature) > 0:
コード例 #3
0
        except:
            logger.error("Some of your selected_features and excluded_features are not in the dataset")
        
        else:
            ### Check missing values in the dataset ###
            if inputYAML['check_missing'] == True:
                func.check_missing(df, col, file_name)

            ### Get the basic description about the dataset ###
            if inputYAML['data_description'] == True:
                func.data_describe(df, col, file_name)

            ### Function for correlation matrix ###
            if inputYAML['correlation_matrix'] == True:
                func.corr_Matrix(df[col], file_name)

            ### Separate features to numerical and categorical ###
            numFea = []
            catFea = []
            for c in col:
                if len(Counter(df[c].dropna())) > 20:
                    numFea.append(c)
                else:
                    catFea.append(c)

            ### Function for distribution plot ###
            if inputYAML['distribution_plot'] == True:
                if inputYAML['distribution_feature'] == 'ALL':
                    for f in numFea:
                        try: