if type(i) == str: try: age.append(float(i)) except: age.append(float(i[:-1])) elif type(i) == float: age.append(i) elif type(i) == int: age.append(i) ### Add new age column ### df_vektis['AGE'] = age ### For getting some basic info ### if input['check_missing'] == True: func.check_missing(df, col, year) if input['data_description'] == True: func.data_describe(df, col, year) ### For three plots ### loop = input['age_range'] for i in loop: df_avg = func.groupAgeRange(df_vektis, i, 0) if input['correlation_matrix'] == True: func.corr_Matrix(df_avg, i, year) if input['pie_chart'] == True: func.pie_Chart(df_avg, i, year) if input['distribution_plot'] == True:
for c in col: if len(Counter(combined_df[c].dropna())) > 10: numFea.append(c) else: catFea.append(c) for i in range(0, len(input['taskName'])): file = input['taskName'][i] ############################### # 1.Overview on combined data # ############################### ### For getting some basic info ### checkMissing = input['check_missing'][i] if checkMissing == True: func.check_missing(combined_df, col, file) ### Function for correlation matrix ### CorrMatrix = input['correlation_matrix'][i] if CorrMatrix == True: func.corr_Matrix(combined_df[col], file) ### Function for Cat-Num plot ### CN_plot = input["Cat_Num_plot"][i] if CN_plot == True: CN_feature = input["Cat_Num_feature"][i] if len(CN_feature) > 0: for f in CN_feature: print(f) func.plot_catNum(combined_df, f, file)
excluded_features = inputYAML['excluded_features'] if selected_features == "ALL": if excluded_features == False: col = df.columns else: col = df.drop(excluded_features, axis=1).columns else: col = selected_features except: logger.error("Some of your selected_features and excluded_features are not in the dataset") else: ### Check missing values in the dataset ### if inputYAML['check_missing'] == True: func.check_missing(df, col, file_name) ### Get the basic description about the dataset ### if inputYAML['data_description'] == True: func.data_describe(df, col, file_name) ### Function for correlation matrix ### if inputYAML['correlation_matrix'] == True: func.corr_Matrix(df[col], file_name) ### Separate features to numerical and categorical ### numFea = [] catFea = [] for c in col: if len(Counter(df[c].dropna())) > 20: numFea.append(c)