def stratified_plot_scree_plot(): dataset = db["stratified_sampled"].find({}, {"_id": False}) labelled_dataset = label_categorical_data(dataset, categoric_meta_) pca_data = pca_reduction(labelled_dataset, categories) print("Done with pca") pca_squared_loadings = pca_data["pca_squared_loadings"] print("plotting scree plot") plot_scree(pca_squared_loadings, categories)
def do_pca(): types = ["stratified"] for type in types: dataset = db[type + "_sampled"].find({}, {"_id": False}) labelled_dataset = label_categorical_data(dataset, categoric_meta_) pca_data = pca_reduction(labelled_dataset, categories) top_three_features = pca_data["features"] insert_pca_to_mongo(top_three_features, type)
def plot_k_means_elbow(): print("K-means optimization using elbow method") dataset = data_collection.find({}, {"_id": False}) print("Read data formatting it.") dataset_formatted = format_dataset(dataset) print("Finished formatting data") labelled_dataset = np.array( label_categorical_data(dataset_formatted, categoric_meta_)) print("Labelled Data done ! ") elbow(labelled_dataset)
def do_mds(): mds_types = ["euclidean", "correlation"] types = ["random", "stratified"] for type in types: dataset = db[type + "_pca_reduced_data"].find({}, {"_id": False}) labelled_dataset = label_categorical_data(dataset, categoric_meta_) for mds_type in mds_types: mds_scaled_dataset = mds_reduction(labelled_dataset, mds_type) insert_mds_to_mongo(list(mds_scaled_dataset), mds_type, type)
def random_plot_scree_plot(): dataset = db["random_sampled"].find({}, {"_id": False}) labelled_dataset = label_categorical_data(dataset, categoric_meta_) pca_data = pca_reduction(labelled_dataset, categories) pca_squared_loadings = pca_data["pca_squared_loadings"] plot_scree(pca_squared_loadings, categories)
def plot_pca(): dataset = db["stratified_sampled"].find({}, {"_id": False}) labelled_dataset = label_categorical_data(dataset, categoric_meta_) draw_pca_plot(labelled_dataset)