예제 #1
0
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    cols = [
        'elbowrestheight', 'wristcircumference', 'anklecircumference',
        'buttockheight', 'crotchheight'
    ]
    ansur_df = df[cols]
    return ansur_df
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    non_numeric = [
        'Branch', 'Gender', 'Component', "weight_kg", "stature_m", "BMI_class",
        "Height_class"
    ]
    ansur_df = df.drop(non_numeric, axis=1)
    return ansur_df
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    cols = [
        'headbreadth', 'headcircumference', 'headlength', 'tragiontopofhead'
    ]
    head_df = df[cols]
    head_df["n_hairs"] = np.random.normal(100000, 10)
    head_df["measurement_error"] = np.random.normal(0.1, 1.387893e-17)
    return head_df
    # Fit and transform the t-SNE model on the numeric dataset
    tsne_features = m.fit_transform(df_numeric)
    print(tsne_features.shape)

    return tsne_features


def tsne_visualization_of_dimensionality(df, tsne_features):
    df["x"] = tsne_features[:, 0]
    df["y"] = tsne_features[:, 1]
    # Color the points according to Army Component
    sns.scatterplot(x="x", y="y", hue="Component", data=df)
    # Show the plot
    plt.show()

    # Color the points by Army Branch
    sns.scatterplot(x="x", y="y", hue="Branch", data=df)
    plt.show()

    # Color the points by Gender
    sns.scatterplot(x="x", y="y", hue="Gender", data=df)
    plt.show()


ansur_ii_male = load_ansur_ii_male_data()
ansur_ii_female = load_ansur_ii_female_data()
df = pd.concat([ansur_ii_male, ansur_ii_female])
tsne_features = fitting_tsne_to_ansur_data(df)
tsne_visualization_of_dimensionality(df, tsne_features)
예제 #5
0
def accuracy_after_dimensionality_reduction(ansur_df):
    # Assign just the 'neckcircumferencebase' column from ansur_df to X
    X = ansur_df[["neckcircumferencebase"]]
    y = ansur_df["Gender"]

    # Split the data, instantiate a classifier and fit the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    svc = SVC()
    svc.fit(X_train, y_train)

    # Calculate accuracy scores on both train and test data
    accuracy_train = accuracy_score(y_train, svc.predict(X_train))
    accuracy_test = accuracy_score(y_test, svc.predict(X_test))

    print("{0:.1%} accuracy on test set vs. {1:.1%} on training set".format(
        accuracy_test, accuracy_train))


ansur_ii_male = load_ansur_ii_male_data().sample(500)
ansur_ii_female = load_ansur_ii_female_data().sample(500)
df = pd.concat([ansur_ii_male, ansur_ii_female])
non_numeric = [
    'Branch', 'Component', "weight_kg", "stature_m", "BMI", "BMI_class",
    "Height_class"
]

df.drop(non_numeric, axis=1, inplace=True)
accuracy_with_large_dimension(df)
accuracy_after_dimensionality_reduction(df)
def prepare_data():
    ansur_ii_male = load_ansur_ii_male_data().sample(500)
    ansur_ii_female = load_ansur_ii_female_data().sample(500)
    df = pd.concat([ansur_ii_male, ansur_ii_female])
    return df