ctrl_var = inputYAML['control_var'] ### 1.Overview on combined data ### ############################# ### For checking missings ### ############################# checkMissing = inputYAML['check_missing'] if checkMissing == True: func.check_missing(combined_df, col, file_name) ################################### ### For getting some basic info ### ################################### basicInfo = inputYAML['basic_Information'] if basicInfo == True: func.data_describe(combined_df, col, file_name) ####################################### ### Function for correlation matrix ### ####################################### CorrMatrix = inputYAML['correlation_matrix'] if CorrMatrix == True: func.corr_Matrix(combined_df[col], file_name) existFile = "output/%s_Corr.csv" % file_name if os.path.exists(existFile): os.remove(existFile) ###################################### ### Function for distribution plot ### ######################################
age.append(float(i)) except: age.append(float(i[:-1])) elif type(i) == float: age.append(i) elif type(i) == int: age.append(i) ### Add new age column ### df_vektis['AGE'] = age ### For getting some basic info ### if input['check_missing'] == True: func.check_missing(df, col, year) if input['data_description'] == True: func.data_describe(df, col, year) ### For three plots ### loop = input['age_range'] for i in loop: df_avg = func.groupAgeRange(df_vektis, i, 0) if input['correlation_matrix'] == True: func.corr_Matrix(df_avg, i, year) if input['pie_chart'] == True: func.pie_Chart(df_avg, i, year) if input['distribution_plot'] == True: func.dist_Plot(df_avg, 'SUM', i, year)
else: col = df.drop(excluded_features, axis=1).columns else: col = selected_features except: logger.error("Some of your selected_features and excluded_features are not in the dataset") else: ### Check missing values in the dataset ### if inputYAML['check_missing'] == True: func.check_missing(df, col, file_name) ### Get the basic description about the dataset ### if inputYAML['data_description'] == True: func.data_describe(df, col, file_name) ### Function for correlation matrix ### if inputYAML['correlation_matrix'] == True: func.corr_Matrix(df[col], file_name) ### Separate features to numerical and categorical ### numFea = [] catFea = [] for c in col: if len(Counter(df[c].dropna())) > 20: numFea.append(c) else: catFea.append(c) ### Function for distribution plot ###