def prepare_data(): ansur_ii_male = load_ansur_ii_male_data().sample(500) ansur_ii_female = load_ansur_ii_female_data().sample(500) df = pd.concat([ansur_ii_male, ansur_ii_female]) cols = [ 'elbowrestheight', 'wristcircumference', 'anklecircumference', 'buttockheight', 'crotchheight' ] ansur_df = df[cols] return ansur_df
def prepare_data(): ansur_ii_male = load_ansur_ii_male_data().sample(500) ansur_ii_female = load_ansur_ii_female_data().sample(500) df = pd.concat([ansur_ii_male, ansur_ii_female]) non_numeric = [ 'Branch', 'Gender', 'Component', "weight_kg", "stature_m", "BMI_class", "Height_class" ] ansur_df = df.drop(non_numeric, axis=1) return ansur_df
def prepare_data(): ansur_ii_male = load_ansur_ii_male_data().sample(500) ansur_ii_female = load_ansur_ii_female_data().sample(500) df = pd.concat([ansur_ii_male, ansur_ii_female]) cols = [ 'headbreadth', 'headcircumference', 'headlength', 'tragiontopofhead' ] head_df = df[cols] head_df["n_hairs"] = np.random.normal(100000, 10) head_df["measurement_error"] = np.random.normal(0.1, 1.387893e-17) return head_df
# Fit and transform the t-SNE model on the numeric dataset tsne_features = m.fit_transform(df_numeric) print(tsne_features.shape) return tsne_features def tsne_visualization_of_dimensionality(df, tsne_features): df["x"] = tsne_features[:, 0] df["y"] = tsne_features[:, 1] # Color the points according to Army Component sns.scatterplot(x="x", y="y", hue="Component", data=df) # Show the plot plt.show() # Color the points by Army Branch sns.scatterplot(x="x", y="y", hue="Branch", data=df) plt.show() # Color the points by Gender sns.scatterplot(x="x", y="y", hue="Gender", data=df) plt.show() ansur_ii_male = load_ansur_ii_male_data() ansur_ii_female = load_ansur_ii_female_data() df = pd.concat([ansur_ii_male, ansur_ii_female]) tsne_features = fitting_tsne_to_ansur_data(df) tsne_visualization_of_dimensionality(df, tsne_features)
def accuracy_after_dimensionality_reduction(ansur_df): # Assign just the 'neckcircumferencebase' column from ansur_df to X X = ansur_df[["neckcircumferencebase"]] y = ansur_df["Gender"] # Split the data, instantiate a classifier and fit the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) svc = SVC() svc.fit(X_train, y_train) # Calculate accuracy scores on both train and test data accuracy_train = accuracy_score(y_train, svc.predict(X_train)) accuracy_test = accuracy_score(y_test, svc.predict(X_test)) print("{0:.1%} accuracy on test set vs. {1:.1%} on training set".format( accuracy_test, accuracy_train)) ansur_ii_male = load_ansur_ii_male_data().sample(500) ansur_ii_female = load_ansur_ii_female_data().sample(500) df = pd.concat([ansur_ii_male, ansur_ii_female]) non_numeric = [ 'Branch', 'Component', "weight_kg", "stature_m", "BMI", "BMI_class", "Height_class" ] df.drop(non_numeric, axis=1, inplace=True) accuracy_with_large_dimension(df) accuracy_after_dimensionality_reduction(df)
def prepare_data(): ansur_ii_male = load_ansur_ii_male_data().sample(500) ansur_ii_female = load_ansur_ii_female_data().sample(500) df = pd.concat([ansur_ii_male, ansur_ii_female]) return df