コード例 #1
0
    if type(i) == str:
        try:
            age.append(float(i))
        except:
            age.append(float(i[:-1]))
    elif type(i) == float:
        age.append(i)
    elif type(i) == int:
        age.append(i)

### Add new age column ###
df_vektis['AGE'] = age

### For getting some basic info ###
if input['check_missing'] == True:
    func.check_missing(df, col, year)
if input['data_description'] == True:
    func.data_describe(df, col, year)

### For three plots ###
loop = input['age_range']
for i in loop:
    df_avg = func.groupAgeRange(df_vektis, i, 0)

    if input['correlation_matrix'] == True:
        func.corr_Matrix(df_avg, i, year)

    if input['pie_chart'] == True:
        func.pie_Chart(df_avg, i, year)

    if input['distribution_plot'] == True:
コード例 #2
0
ファイル: model.py プロジェクト: sunchang0124/DataSharing
for c in col:
    if len(Counter(combined_df[c].dropna())) > 10:
        numFea.append(c)
    else:
        catFea.append(c)

for i in range(0, len(input['taskName'])):
    file = input['taskName'][i]

    ###############################
    # 1.Overview on combined data #
    ###############################
    ### For getting some basic info ###
    checkMissing = input['check_missing'][i]
    if checkMissing == True:
        func.check_missing(combined_df, col, file)

    ### Function for correlation matrix ###
    CorrMatrix = input['correlation_matrix'][i]
    if CorrMatrix == True:
        func.corr_Matrix(combined_df[col], file)

    ### Function for Cat-Num plot ###
    CN_plot = input["Cat_Num_plot"][i]
    if CN_plot == True:
        CN_feature = input["Cat_Num_feature"][i]
        if len(CN_feature) > 0:
            for f in CN_feature:
                print(f)
                func.plot_catNum(combined_df, f, file)
コード例 #3
0
            excluded_features = inputYAML['excluded_features']
            if selected_features == "ALL":
                if excluded_features == False: 
                    col = df.columns
                else:
                    col = df.drop(excluded_features, axis=1).columns
            else:
                col = selected_features

        except:
            logger.error("Some of your selected_features and excluded_features are not in the dataset")
        
        else:
            ### Check missing values in the dataset ###
            if inputYAML['check_missing'] == True:
                func.check_missing(df, col, file_name)

            ### Get the basic description about the dataset ###
            if inputYAML['data_description'] == True:
                func.data_describe(df, col, file_name)

            ### Function for correlation matrix ###
            if inputYAML['correlation_matrix'] == True:
                func.corr_Matrix(df[col], file_name)

            ### Separate features to numerical and categorical ###
            numFea = []
            catFea = []
            for c in col:
                if len(Counter(df[c].dropna())) > 20:
                    numFea.append(c)