def get_classes(df): classes = [] feat_list = parse.get_features_list(df) for feature in df: # Skip the Index column and the features that do not contain exclusively numeric values if feature not in feat_list: continue classes.append(feature) return classes
def my_scatter_plot(df): fig = go.Figure() feat_list = parse.get_features_list(df) for feature in df: # Skip the Index column and the features that do not contain exclusively numeric values if feature not in feat_list: continue fig.add_trace( go.Scatter(y=df[feature], name=feature, opacity=0.8, mode='markers')) fig.update_layout(title="Normalized grades for each class", xaxis_title="Index", yaxis_title="Normalized grades") fig.show()
def plot_hist(df): i = 0 j = 0 fig, axs = plt.subplots(4, 4) feat_list = parse.get_features_list(df) for feature in df: if feature not in feat_list: continue for house in cst.houses: axs[i, j].hist(get_feature_per_house(df, house, feature), bins=25, alpha=0.5, color=cst.houses_colors[house]) axs[i, j].set_title(feature) j += 1 if j == 4: i += 1 j = 0 plt.tight_layout() plt.show()
def my_histogram(df): std = [] feat_name = [] n_feat = 0 feat_list = parse.get_features_list(df) for feature in df: if feature not in feat_list: continue count = describe.get_count(df[feature]) std.append( describe.get_std(count, describe.get_mean(count, df[feature]), df[feature])) feat_name.append(feature) n_feat += 1 plt.bar(range(n_feat), std, color=cst.colors) plt.xticks(range(n_feat), feat_name, rotation=-45, fontsize=6, ha="left") plt.title( "Standard deviation between the student's grades for each feature \n(less std means that the student's grades are homogeneous)" ) plt.show()
def test_describe(df, describe, print_describe=True): # Values calculated with system/numpy functions go to control_values dict # It will allow us to check if our functions return correct values feat_list = parse.get_features_list(df) control_values = df.describe().loc[:, feat_list[0]:] errors = 0 for feature in df: # Skip the features that do not contain exclusively numeric values if feature not in feat_list: continue # For each significative difference between results from our functions and results from system/numpy functions, we output a warning for value in control_values[feature].keys(): if not np.isclose(describe[feature][value], control_values[feature][value]): errors += 1 warn_diff(feature, value, describe[feature][value], control_values[feature][value]) if print_describe is True: print(tabulate(describe, headers="keys", tablefmt="fancy_grid", floatfmt=".6f")) print(tabulate(control_values, headers="keys", tablefmt="fancy_grid", floatfmt=".6f")) return errors