Beispiel #1
0
def stratified_plot_scree_plot():
    dataset = db["stratified_sampled"].find({}, {"_id": False})
    labelled_dataset = label_categorical_data(dataset, categoric_meta_)
    pca_data = pca_reduction(labelled_dataset, categories)
    print("Done with pca")
    pca_squared_loadings = pca_data["pca_squared_loadings"]
    print("plotting scree plot")
    plot_scree(pca_squared_loadings, categories)
Beispiel #2
0
def do_pca():
    types = ["stratified"]
    for type in types:
        dataset = db[type + "_sampled"].find({}, {"_id": False})
        labelled_dataset = label_categorical_data(dataset, categoric_meta_)
        pca_data = pca_reduction(labelled_dataset, categories)
        top_three_features = pca_data["features"]
        insert_pca_to_mongo(top_three_features, type)
Beispiel #3
0
def plot_k_means_elbow():
    print("K-means optimization using elbow method")
    dataset = data_collection.find({}, {"_id": False})
    print("Read data formatting it.")
    dataset_formatted = format_dataset(dataset)
    print("Finished formatting data")
    labelled_dataset = np.array(
        label_categorical_data(dataset_formatted, categoric_meta_))
    print("Labelled Data done ! ")
    elbow(labelled_dataset)
Beispiel #4
0
def do_mds():
    mds_types = ["euclidean", "correlation"]
    types = ["random", "stratified"]

    for type in types:
        dataset = db[type + "_pca_reduced_data"].find({}, {"_id": False})
        labelled_dataset = label_categorical_data(dataset, categoric_meta_)
        for mds_type in mds_types:
            mds_scaled_dataset = mds_reduction(labelled_dataset, mds_type)
            insert_mds_to_mongo(list(mds_scaled_dataset), mds_type, type)
Beispiel #5
0
def random_plot_scree_plot():
    dataset = db["random_sampled"].find({}, {"_id": False})
    labelled_dataset = label_categorical_data(dataset, categoric_meta_)
    pca_data = pca_reduction(labelled_dataset, categories)
    pca_squared_loadings = pca_data["pca_squared_loadings"]
    plot_scree(pca_squared_loadings, categories)
Beispiel #6
0
def plot_pca():
    dataset = db["stratified_sampled"].find({}, {"_id": False})
    labelled_dataset = label_categorical_data(dataset, categoric_meta_)
    draw_pca_plot(labelled_dataset)