def modify_pcpds(pcpds): modifications = [] print("What modification would you like to apply to the pcpds object?") print("[0] None, [1] Rotation, [2] Add Noise.") choice = menu.get_int_input() if choice is 0: pass elif choice is 1: print("Rotation theta for X axis:") x = menu.get_float_input() print("Rotation theta for Y axis:") y = menu.get_float_input() print("Rotation theta for Z axis:") z = menu.get_float_input() pcpds = modifiers.rotate_section(pcpds, x, y, z) modifications.append("Rotated by: X-theta; " + str(x) + " Y-theta; " + str(y) + " Z-theta; " + str(z)) elif choice is 2: print("Add the Sigma value for noise:") sigma = menu.get_float_input() pcpds = modifiers.add_noise(pcpds, sigma) modifications.append("Noise Applied. Sigma:", sigma) else: print("Invalid option.") if len(modifications) > 0: print("Regenerating Persistance Diagram for altered pcpds...") pcpds = pcpds.get_filtration_used()(pcpds) return pcpds, modifications
def generate_files(): print("What LAS file would you like to use?") filename = menu.get_filename_input() print("How manny partitions would you like?") partition = menu.get_int_input() las_obj = ProcessLas(filename, partition) dir_name = file_manager.make_folder(filename) pfm = PCPDS_Manager() dir_name = pfm.generate_collection(filename, partition) # TODO: Add multithreading option to input_las? las_obj.input_las(dir_name) print("File generation complete.")
def process_run(): pcpds_manager = PCPDS_Manager() # List the directories # Ask for the directory print("Enter the Collection of pcpds objects you wish to generate persistance diagramsfor.") collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() while(not valid): print("Invalid collection name:", pcpds_manager.get_path_manager().get_cur_dir() ,"try again.", valid) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() # Verify the directory print("Valid Directory Confirmed:", pcpds_manager.get_path_manager().get_full_cur_dir()) # Loop for choosing filtration method: print("Choose a filtration method: [0] Rips, [1] Upper Star, [2] Lower Star.") choice = menu.get_int_input() while not (choice < 3 and choice > -1): print("Please enter a valid number between 0-2.") choice = menu.get_int_input() # Selects the filter function to be used. filter = None if choice is 0: filter = Filtration.get_rips_diagram elif choice is 1: filter = Filtration.get_upper_star elif choice is 2: filter = Filtration.get_lower_star # Start timer # start_time = time.time() print("Would you like to use multi-processing to attempt to speed things up? [0] No. [1] Yes.") print("Please do note that using multiprocessing only speeds up the generation of persistance diagrams with larger point clouds.") multiproc = menu.get_int_input() if(multiproc): # with concurrent.futures.ProcessPoolExecutor as executor: for file in os.listdir(pcpds_manager.get_path_manager().get_full_cur_dir_var(collection)): # Sets up the process # generate_persistence_diagram(pcpds_manager, file, filter) process = multiprocessing.Process(target=generate_persistence_diagram, args=(pcpds_manager, file, filter)) process.start() process.join() process.terminate() else: print("NOT MULTIPROCESSING:") # Process the point clouds into persistance diagrams without using multiprocessing files = os.listdir(pcpds_manager.get_path_manager().get_full_cur_dir_var(collection)) iter = 0 for file in files: menu.progress(iter, len(files), ("Generating persistance diagram for:"+str(file))) generate_persistence_diagram(pcpds_manager, file, filter) iter += 1 menu.progress(1, 1, "Generating persistance diagrams completed.")
def compute_bottle_neck_dist(): # This computes the bottleneck distance using a pre-processed/filtrated collection pcpds_manager = PCPDS_Manager() print("Collections:") collections_string = "" collections = os.listdir( pcpds_manager.get_path_manager().get_collections_path()) collections.sort() for directory in collections: collections_string += directory + " \t" print(collections_string) print("Please enter a collection that has already been filtrated:") # Loop here for valid directory collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() while (True): # If not a valid directory, ask again saying it is invalid while (not valid): if not pcpds_manager.get_collection_dir(): print("Invalid collection name:", pcpds_manager.get_path_manager().get_cur_dir(), "try again.") collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() # Checks the first pcpds object in this directory for if it has a persistance diagram pcpds_temp = None for file in os.listdir( pcpds_manager.get_path_manager().get_full_cur_dir_var( collection)): file_path = os.path.join( pcpds_manager.get_path_manager().get_full_cur_dir(), file) pcpds_temp = file_manager.load(file_path) break if pcpds_temp is not None: if pcpds_temp.get_persistance_diagram() is not None: print("Valid Directory Chosen:", valid) break else: valid = False print( "\nNo persistance diagram present for files @ collection:", pcpds_manager.get_path_manager().get_full_cur_dir() + ".\n") print( "Please Either enter a directory that has been filtrated for persistance diagrams or run 'generate_persistance_diagrams.py' on the collection." ) else: print("Problem loading pcpds file, it loaded as None.") print("Ready to process, how manny n_nearest results would you like?") # TODO: Validate that n_results is a valid number for the current dataset. n_results = menu.get_int_input() # Choose a modifier and apply it here pcpds = choose_pcpds(pcpds_manager) print("PCPDS Selected:", pcpds.get_cellID()) pcpds, mods = modify_pcpds(pcpds) # Calculated closest n matching bottleneck distances. closest_matches = bottleneck_distances.search_distances( n_results, pcpds.get_persistance_diagram(), valid) wb = Workbook() excel_sheet = wb.add_sheet('Bottle_Neck_Distance_Comparison') excel_sheet.write(0, 0, "Closest_" + str(n_results) + "_BD_Matches") excel_sheet.write(0, 1, "Bottle_Neck_Distance") excel_sheet.write(0, 2, "Cell_ID_Compared_Against:") excel_sheet.write(1, 2, pcpds.get_cellID()) if len(mods) > 0: excel_sheet.write(0, 3, str(pcpds.get_cellID()) + " Modifications") iter = 1 for mod in mods: excel_sheet.write(iter, 3, mod) iter += 1 iter = 1 for idx in closest_matches: # Write results .xls file excel_sheet.write(iter, 0, idx[0][:-5]) excel_sheet.write(iter, 1, idx[1]) iter = iter + 1 # Adds a tag to make the file name more unique to avoid mindlessly over writing data file_end_tag = str(pcpds.get_cellID()) if len(mods) > 0: file_end_tag += ":" + mods[0] wb.save( os.path.join("results", pcpds_manager.get_path_manager().get_cur_dir()) + "-" + file_end_tag + ":" + pcpds.get_filtration_used_name() + '.xls') print("Results saved as Excel file.")
valid = pm.get_collection_dir() while (not valid): print("Invalid results dir name:", pm.get_path_manager().get_cur_dir(), "try again.") collection = menu.get_input("Directory: ") pm.get_path_manager().set_cur_dir(collection) valid = pm.get_collection_dir() # Load in all files from directory file_names = fm.find_files(pm.get_collection_dir(), ".xls") print("DIR:", pm.get_collection_dir(), "\n") print("Ready to process, how manny n_nearest results would you like?") # Takes in the n_nearest you want to include from those files n_nearest = menu.get_int_input() + 1 print("Mixing into a single result file:", file_names, "\n") # Generate new results file wb = Workbook() excel_sheet = wb.add_sheet('Compiled results') iter = 0 for file_name in file_names: if ".~lock" in file_name or 'compiled_results' in file_name: print("\nSkipping irrelevant file:", file_name) else: try: # To open Workbook rb = xlrd.open_workbook(
def process_run(): pcpds_manager = PCPDS_Manager() # List the directories # Ask for the directory print( "Enter the Collection of pcpds objects you wish to generate persistance diagramsfor." ) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() while (not valid): print("Invalid collection name:", pcpds_manager.get_path_manager().get_cur_dir(), "try again.", valid) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() # Verify the directory print("Valid Directory Confirmed:", pcpds_manager.get_path_manager().get_full_cur_dir()) # Loop for choosing filtration method: print( "Choose a filtration method: [0] Rips, [1] Upper Star, [2] Lower Star." ) choice = menu.get_int_input() while not (choice < 3 and choice > -1): print("Please enter a valid number between 0-2.") choice = menu.get_int_input() # Selects the filter function to be used. filter = None if choice is 0: filter = Filtration.get_rips_diagram elif choice is 1: filter = Filtration.get_upper_star elif choice is 2: filter = Filtration.get_lower_star # Start timer start_time = time.time() print( "Would you like to use multi-processing to attempt to speed things up? [0] No. [1] Yes." ) multiproc = menu.get_int_input() if (multiproc): for file in os.listdir( pcpds_manager.get_path_manager().get_full_cur_dir_var( collection)): # Sets up the process process = multiprocessing.Process( target=generate_persistence_diagram, args=(pcpds_manager, file, filter)) process.start() process.join() process.terminate() else: # Process the point clouds into persistance diagrams without using multiprocessing for file in os.listdir( pcpds_manager.get_path_manager().get_full_cur_dir_var( collection)): generate_persistence_diagram(pcpds_manager, file, filter) print("Finished filtrating persistance diagrams for files in: ", str(time.time() - start_time))
def pool_run(): pcpds_manager = PCPDS_Manager() # List the directories # Ask for the directory print( "Enter the Collection of pcpds objects you wish to generate persistance diagramsfor." ) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() while (not valid): print("Invalid collection name:", pcpds_manager.get_path_manager().get_cur_dir(), "try again.", valid) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() # Verify the directory print("Valid Directory Confirmed:", pcpds_manager.get_path_manager().get_full_cur_dir()) # Loop for choosing filtration method: print( "Choose a filtration method: [0] Rips, [1] Upper Star, [2] Lower Star." ) choice = menu.get_int_input() while not (choice < 3 and choice > -1): print("Please enter a valid number between 0-2.") choice = menu.get_int_input() # Selects the filter function to be used. filter = None if choice is 0: filter = Filtration.get_rips_diagram elif choice is 1: filter = Filtration.get_upper_star elif choice is 2: filter = Filtration.get_lower_star # Start timer start_time = time.time() # TODO: Add filter for '.json' objects as it will have problems on macs otherwise? # TODO: set to the number of items we think the cpu should handle at a time based on total cpu count. pool_size = 10 process_pool = [] pool = multiprocessing.Pool() for file in os.listdir( pcpds_manager.get_path_manager().get_full_cur_dir_var(collection)): # Build a process pool process_pool.append(file) if (len(process_pool) >= pool_size): # send the process pool to a cpu # TODO: Need a better way of passing in arguements to make using this method justifiable when I can't gaurentee it's time complexity will be beter. pool.map(generate_persistence_diagram, process_pool, args(pcpds_manager, file, filter)) # Empty pool for next set. process_pool.clear() pool.close() # finish processing the items left in process pool print("Finished filtrating persistance diagrams for files in: ", str(time.time() - start_time))
def input_las(self, path): # Load data, put list of touples in an array in_file = File(self.filename + '.las', mode='r') # Import coordinates and change them to manipulative type float32 x_vals = in_file.X y_vals = in_file.Y z_vals = in_file.Z coords, grid_dimensions = self.__format_data(x_vals,y_vals,z_vals) # Dictionary of point cloud coordinates points = {'idx':'coords[c]'} print("\nWould you like to use multi-processing to attempt to speed things up? [0] No. [1] Yes.") print("Please do note that using multiprocessing only speeds up this process with larger data sets.") multiproc = menu.get_int_input() # Start timer start_time = time.time() if multiproc: print("Multithreading") # split up the list by cpu cores avalible cores = multiprocessing.cpu_count() coords_split_amount = round(len(coords)/cores) #print("COORDS SPLIT AMOUNT:", coords_split_amount, "LEN(COORDS):", len(coords), " = CORES:", cores) chunks = [coords[x:x+coords_split_amount] for x in range(0, len(coords), coords_split_amount)] #print("CHUNKS:", len(chunks)) # Sets up the manager to be able to return values properly to the points dict. manager = multiprocessing.Manager() points = manager.dict() # Sets up a pool of processes, one on each cpu by defualt. with concurrent.futures.ProcessPoolExecutor() as executor: # TODO: Implement menu.progress here? #executor.map(self.split_pointcloud, chunks, points) # print("Chunks len:", len(chunks)) # # TODO: Instead of passing in points, generate them in the method & concatinate them together after passing them back? # for chunk, point in executor.map(self.split_pointcloud, chunks, points): # print("CHUNK:", chunk) for chunk in chunks: future = executor.submit(self.split_pointcloud, chunk, points) # process = multiprocessing.Process(target=self.split_pointcloud, args=(chunk, points)) # process.start() # process.join() # process.terminate() else: print("Not multi threading.") self.split_pointcloud(coords, points, count=True) points.pop('idx') menu.progress(1, 1, ("Processing points completed.")) print("\n") print("Processing points completed in: ", str(time.time() - start_time)) # Creates a pcpds object for each idx and stores it's respective # point cloud in it before saving the file. tracker = 0 # print("STATS:\nLength:",len(points), "\nPoints:",points) #pcpds_num = len(points) individual_dimensions = (grid_dimensions[0]/self.partition, grid_dimensions[1]/self.partition, grid_dimensions[2]/self.partition) for id in points: temp = pcpds(id, individual_dimensions) temp.set_point_cloud(points[id]) # Generates and sets the persistance diagram # temp = Filtration.get_rips_diagram(temp) # print('diagram set') file_manager.save(temp, path, id) # Keeps track of the PCPDS objects being generated menu.progress(tracker, len(points), ("Processing PCPDS object for idx: "+str(id))) tracker = tracker + 1 menu.progress(1, 1, ("Processing PCPDS files completed.")) print("\n")