def dist_sqrt_area_rand_triangle(data):
    mesh = data["poly_data"]
    verts_list = FeatureExtractor.generate_random_ints(0, len(mesh.points) - 1, (FeatureExtractor.number_vertices_sampled, 3))
    triangle_areas = PSBDataset._get_cell_areas(mesh.points, verts_list)
    sqrt_areas = np.sqrt(triangle_areas)
    del verts_list
    return {"hist_sqrt_area_rand_three_verts": FeatureExtractor.make_bins(sqrt_areas, FeatureExtractor.number_bins)}
Exemple #2
0
def initialise_everything():
    print('''This procedure can take up to hours to finish.
    The program will now run:
     - Normalisation pipeline over the shape database. (~3hrs)
     - Feature extraction over shape database. (~2hrs)\n
     Are you sure you want to continue (y/n)?\n
    ''')
    choice = input(">> ")
    if choice == "n" or choice == "no":
        return

    with open('config.json') as f:
        data = json.load(f)
    path_psd = data["DATA_PATH_PSB"]
    path_normed = data["DATA_PATH_NORMED"]
    path_feature = data["FEATURE_DATA_FILE"]
    db = PSBDataset()
    if len(os.listdir(path_psd)) == 0:
        print("No valid dataset found.\nPoint to a valid dataset.")
        return
    else:
        prompt_for_class_files(path_psd)
        choice = input(
            "Do you wish to go back to the menu to change the current classification settings? (y/n)\n>> "
        )
        if choice == "n":
            return
    if not os.path.isfile(path_normed):
        print("No valid normalised dataset found.\nRunning normalisation.")
        norm = Normalizer(db)
        norm.run_full_pipeline()
    if not os.path.isfile(path_feature):
        print("No valid feature file found.\nRun feature extraction.")
        FE = FeatureExtractor(db)
        FE.run_full_pipeline()
 def surface_area(data):
     mesh = data["poly_data"]
     cell_ids = PSBDataset._get_cells(mesh)
     cell_areas = PSBDataset._get_cell_areas(mesh.points, cell_ids)
     return {"scalar_surface_area": sum(cell_areas)}
Exemple #4
0
from reader import PSBDataset, DataSet
import numpy as np
import  matplotlib.pyplot as plt
import seaborn as sns
from helper.config import DEBUG, DATA_PATH_PSB, DATA_PATH_NORMED, DATA_PATH_DEBUG, CLASS_FILE
import pyvista as pv
import itertools
from collections import Counter
import pandas as pd
import scipy

if __name__ == "__main__":
    origFaceareas = []
    normedFaceareas = []

    origDB = PSBDataset(DATA_PATH_PSB, class_file_path=CLASS_FILE)
    origDB.read()
    origDB.load_files_in_memory()

    normedDB = PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE)
    normedDB.read()
    normedDB.load_files_in_memory()

    origFaceareas = [DataSet.get_only_cell_areas(mesh.get('data').get('vertices'), mesh.get('data').get('faces')) for mesh in origDB.full_data]
    origFaceareas = list(itertools.chain(*origFaceareas))
    origFaceareas_sub = pd.DataFrame(origFaceareas, columns=['fa'])
    origFaceareas_sub.to_csv("bitstats_origFaceAreas.csv")
    normedFaceareas = [DataSet.get_only_cell_areas(mesh.get('data').get('vertices'), mesh.get('data').get('faces')) for mesh in normedDB.full_data]
    normedFaceareas = list(itertools.chain(*normedFaceareas))
    normedFaceareas = pd.DataFrame(normedFaceareas, columns=['fa'])    
    normedFaceareas.to_csv("bitstats_normedFaceAreas.csv")
                     show_edges=True)
    plotter.show_bounds(all_edges=True)
    plotter.subplot(0, 1)
    plotter.add_text("Normalized", font_size=30)
    plotter.add_mesh(pv.PolyData(normed_data_item["data"]["vertices"],
                                 normed_data_item["data"]["faces"]),
                     show_edges=True)
    plotter.show_bounds(all_edges=True)
    plotter.show()

    print(
        "======================================= Done! ==========================================="
    )

    print("=" * 10 + "Testing full pipeline for dataset reader" + "=" * 10)
    dataset = PSBDataset(DATA_PATH_DEBUG, class_file_path=CLASS_FILE)
    dataset.run_full_pipeline()
    dataset.compute_shape_statistics()
    dataset.detect_outliers()
    dataset.convert_all_to_polydata()
    dataset.save_statistics("./trash", "stats_test.csv")
    print(
        "======================================= Done! ==========================================="
    )

    print("=" * 10 + "Testing full pipeline for normalizer" + "=" * 10)
    init_dataset = PSBDataset(DATA_PATH_DEBUG, class_file_path=CLASS_FILE)
    norm = Normalizer(init_dataset)
    norm.target_path = DATA_PATH_NORMED_SUBSET
    normed_data = norm.run_full_pipeline()
    print(
    convex_hull_volume,
    rectangularity,
    compactness,
    sphericity,
    diameter,
    aabb_volume,
    surface_area,
    eccentricity,
    cube_root_volume_four_rand_verts,
    angle_three_rand_verts,
    dist_two_rand_verts,
    dist_bar_vert,
    dist_sqrt_area_rand_triangle,
    gaussian_curvature,
    mean_curvature,
    skeleton_singleton_features,
]

if __name__ == "__main__":
    FE = FeatureExtractor(PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE, class_file_path_coarse=CLASS_FILE_COARSE))
    params = list(zip(FE.reader.full_data, len(FE.reader.full_data) * [pipeline]))
    results = mp.Pool(9).imap(FE.mono_run_pipeline_debug, tqdm(params))
    with jsonlines.open("computed_features.jsonl", mode="w", flush=True) as writer:
        for item in results:
            writer.write(item)
    # plt.imshow(results[0][0])
    # plt.show()
    # print(len(results))
    # print(img.max())
    # print(img.min())
def construct_descriptor_string_scientific(ndarray):
    textstr_1 = "Mean " + f"({ndarray.mean():.2e})"
    textstr_2 = "Median " + f"({np.median(ndarray):.2e})"
    textstr_3 = "Std.dev " + f"({np.std(ndarray):.2e})"
    textstr_4 = "Std.err " + f"({scipy.stats.sem(ndarray):.2e})"
    stat_string = ", ".join([textstr_1, textstr_2, textstr_3, textstr_4])
    return stat_string


if __name__ == "__main__":
    origFaceareas = []
    normedFaceareas = []

    if not pathlib.Path("stats/orig_stats.csv").exists():
        origDB = PSBDataset(DATA_PATH_PSB, class_file_path=CLASS_FILE)
        origDB.run_full_pipeline()
        origDB.compute_shape_statistics()
        origDB.save_statistics(stats_path="stats", stats_filename="orig_stats.csv")

    if not pathlib.Path("stats/norm_stats.csv").exists():
        normedDB = PSBDataset(DATA_PATH_NORMED, class_file_path=CLASS_FILE)
        normedDB.run_full_pipeline()
        normedDB.compute_shape_statistics()
        normedDB.save_statistics(stats_path="stats", stats_filename="norm_stats.csv")

    orig_stats = pd.read_csv("stats/orig_stats.csv")
    norm_stats = pd.read_csv("stats/norm_stats.csv")

    orig_stats_cleansed = remove_outlier(orig_stats, "cell_area_mean")
    norm_stats_cleansed = remove_outlier(norm_stats, "cell_area_mean")