예제 #1
0
    ctrl_var = inputYAML['control_var']

    ### 1.Overview on combined data ###
    #############################
    ### For checking missings ###
    #############################
    checkMissing = inputYAML['check_missing']
    if checkMissing == True:
        func.check_missing(combined_df, col, file_name)

    ###################################
    ### For getting some basic info ###
    ###################################
    basicInfo = inputYAML['basic_Information']
    if basicInfo == True:
        func.data_describe(combined_df, col, file_name)

    #######################################
    ### Function for correlation matrix ###
    #######################################
    CorrMatrix = inputYAML['correlation_matrix']
    if CorrMatrix == True:
        func.corr_Matrix(combined_df[col], file_name)

    existFile = "output/%s_Corr.csv" % file_name
    if os.path.exists(existFile):
        os.remove(existFile)

    ######################################
    ### Function for distribution plot ###
    ######################################
예제 #2
0
            age.append(float(i))
        except:
            age.append(float(i[:-1]))
    elif type(i) == float:
        age.append(i)
    elif type(i) == int:
        age.append(i)

### Add new age column ###
df_vektis['AGE'] = age

### For getting some basic info ###
if input['check_missing'] == True:
    func.check_missing(df, col, year)
if input['data_description'] == True:
    func.data_describe(df, col, year)

### For three plots ###
loop = input['age_range']
for i in loop:
    df_avg = func.groupAgeRange(df_vektis, i, 0)

    if input['correlation_matrix'] == True:
        func.corr_Matrix(df_avg, i, year)

    if input['pie_chart'] == True:
        func.pie_Chart(df_avg, i, year)

    if input['distribution_plot'] == True:
        func.dist_Plot(df_avg, 'SUM', i, year)
예제 #3
0
                else:
                    col = df.drop(excluded_features, axis=1).columns
            else:
                col = selected_features

        except:
            logger.error("Some of your selected_features and excluded_features are not in the dataset")
        
        else:
            ### Check missing values in the dataset ###
            if inputYAML['check_missing'] == True:
                func.check_missing(df, col, file_name)

            ### Get the basic description about the dataset ###
            if inputYAML['data_description'] == True:
                func.data_describe(df, col, file_name)

            ### Function for correlation matrix ###
            if inputYAML['correlation_matrix'] == True:
                func.corr_Matrix(df[col], file_name)

            ### Separate features to numerical and categorical ###
            numFea = []
            catFea = []
            for c in col:
                if len(Counter(df[c].dropna())) > 20:
                    numFea.append(c)
                else:
                    catFea.append(c)

            ### Function for distribution plot ###