def dist_sqrt_area_rand_triangle(data): mesh = data["poly_data"] verts_list = FeatureExtractor.generate_random_ints(0, len(mesh.points) - 1, (FeatureExtractor.number_vertices_sampled, 3)) triangle_areas = PSBDataset._get_cell_areas(mesh.points, verts_list) sqrt_areas = np.sqrt(triangle_areas) del verts_list return {"hist_sqrt_area_rand_three_verts": FeatureExtractor.make_bins(sqrt_areas, FeatureExtractor.number_bins)}
def initialise_everything(): print('''This procedure can take up to hours to finish. The program will now run: - Normalisation pipeline over the shape database. (~3hrs) - Feature extraction over shape database. (~2hrs)\n Are you sure you want to continue (y/n)?\n ''') choice = input(">> ") if choice == "n" or choice == "no": return with open('config.json') as f: data = json.load(f) path_psd = data["DATA_PATH_PSB"] path_normed = data["DATA_PATH_NORMED"] path_feature = data["FEATURE_DATA_FILE"] db = PSBDataset() if len(os.listdir(path_psd)) == 0: print("No valid dataset found.\nPoint to a valid dataset.") return else: prompt_for_class_files(path_psd) choice = input( "Do you wish to go back to the menu to change the current classification settings? (y/n)\n>> " ) if choice == "n": return if not os.path.isfile(path_normed): print("No valid normalised dataset found.\nRunning normalisation.") norm = Normalizer(db) norm.run_full_pipeline() if not os.path.isfile(path_feature): print("No valid feature file found.\nRun feature extraction.") FE = FeatureExtractor(db) FE.run_full_pipeline()
def surface_area(data): mesh = data["poly_data"] cell_ids = PSBDataset._get_cells(mesh) cell_areas = PSBDataset._get_cell_areas(mesh.points, cell_ids) return {"scalar_surface_area": sum(cell_areas)}
from reader import PSBDataset, DataSet import numpy as np import matplotlib.pyplot as plt import seaborn as sns from helper.config import DEBUG, DATA_PATH_PSB, DATA_PATH_NORMED, DATA_PATH_DEBUG, CLASS_FILE import pyvista as pv import itertools from collections import Counter import pandas as pd import scipy if __name__ == "__main__": origFaceareas = [] normedFaceareas = [] origDB = PSBDataset(DATA_PATH_PSB, class_file_path=CLASS_FILE) origDB.read() origDB.load_files_in_memory() normedDB = PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE) normedDB.read() normedDB.load_files_in_memory() origFaceareas = [DataSet.get_only_cell_areas(mesh.get('data').get('vertices'), mesh.get('data').get('faces')) for mesh in origDB.full_data] origFaceareas = list(itertools.chain(*origFaceareas)) origFaceareas_sub = pd.DataFrame(origFaceareas, columns=['fa']) origFaceareas_sub.to_csv("bitstats_origFaceAreas.csv") normedFaceareas = [DataSet.get_only_cell_areas(mesh.get('data').get('vertices'), mesh.get('data').get('faces')) for mesh in normedDB.full_data] normedFaceareas = list(itertools.chain(*normedFaceareas)) normedFaceareas = pd.DataFrame(normedFaceareas, columns=['fa']) normedFaceareas.to_csv("bitstats_normedFaceAreas.csv")
show_edges=True) plotter.show_bounds(all_edges=True) plotter.subplot(0, 1) plotter.add_text("Normalized", font_size=30) plotter.add_mesh(pv.PolyData(normed_data_item["data"]["vertices"], normed_data_item["data"]["faces"]), show_edges=True) plotter.show_bounds(all_edges=True) plotter.show() print( "======================================= Done! ===========================================" ) print("=" * 10 + "Testing full pipeline for dataset reader" + "=" * 10) dataset = PSBDataset(DATA_PATH_DEBUG, class_file_path=CLASS_FILE) dataset.run_full_pipeline() dataset.compute_shape_statistics() dataset.detect_outliers() dataset.convert_all_to_polydata() dataset.save_statistics("./trash", "stats_test.csv") print( "======================================= Done! ===========================================" ) print("=" * 10 + "Testing full pipeline for normalizer" + "=" * 10) init_dataset = PSBDataset(DATA_PATH_DEBUG, class_file_path=CLASS_FILE) norm = Normalizer(init_dataset) norm.target_path = DATA_PATH_NORMED_SUBSET normed_data = norm.run_full_pipeline() print(
convex_hull_volume, rectangularity, compactness, sphericity, diameter, aabb_volume, surface_area, eccentricity, cube_root_volume_four_rand_verts, angle_three_rand_verts, dist_two_rand_verts, dist_bar_vert, dist_sqrt_area_rand_triangle, gaussian_curvature, mean_curvature, skeleton_singleton_features, ] if __name__ == "__main__": FE = FeatureExtractor(PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE, class_file_path_coarse=CLASS_FILE_COARSE)) params = list(zip(FE.reader.full_data, len(FE.reader.full_data) * [pipeline])) results = mp.Pool(9).imap(FE.mono_run_pipeline_debug, tqdm(params)) with jsonlines.open("computed_features.jsonl", mode="w", flush=True) as writer: for item in results: writer.write(item) # plt.imshow(results[0][0]) # plt.show() # print(len(results)) # print(img.max()) # print(img.min())
def construct_descriptor_string_scientific(ndarray): textstr_1 = "Mean " + f"({ndarray.mean():.2e})" textstr_2 = "Median " + f"({np.median(ndarray):.2e})" textstr_3 = "Std.dev " + f"({np.std(ndarray):.2e})" textstr_4 = "Std.err " + f"({scipy.stats.sem(ndarray):.2e})" stat_string = ", ".join([textstr_1, textstr_2, textstr_3, textstr_4]) return stat_string if __name__ == "__main__": origFaceareas = [] normedFaceareas = [] if not pathlib.Path("stats/orig_stats.csv").exists(): origDB = PSBDataset(DATA_PATH_PSB, class_file_path=CLASS_FILE) origDB.run_full_pipeline() origDB.compute_shape_statistics() origDB.save_statistics(stats_path="stats", stats_filename="orig_stats.csv") if not pathlib.Path("stats/norm_stats.csv").exists(): normedDB = PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE) normedDB.run_full_pipeline() normedDB.compute_shape_statistics() normedDB.save_statistics(stats_path="stats", stats_filename="norm_stats.csv") orig_stats = pd.read_csv("stats/orig_stats.csv") norm_stats = pd.read_csv("stats/norm_stats.csv") orig_stats_cleansed = remove_outlier(orig_stats, "cell_area_mean") norm_stats_cleansed = remove_outlier(norm_stats, "cell_area_mean")