def main(): number_of_data = 400 # Create las object and calculate corresponding values filename = 'tiny' partition = 70 las_obj = ProcessLas(filename, partition) pfm = PCPDS_Manager() dir_name = pfm.generate_collection(filename, partition) print('Dir:' + str(dir_name)) las_obj.input_las(dir_name) datafile = open("bdripson70partitions.txt", "a") #import functions n_results = 4 # menu.get_n_result_input() for n in range(number_of_data): # Generates random idx value for pcpds object random_idx = str(las_obj.random_grid()) random_pcpds = None first = True # TODO: Validate the idx from random_grid is valid, else run random_grid again. # Generate & validate a random_pcpds to use. while (not pfm.get_path_manager().validate_file( os.path.join(dir_name, random_idx + ".json")) or first): random_idx = str(las_obj.random_grid()) print("Attempting RANDOM ID:", random_idx) first = False # Grabs the pcpds object that was generated random_pcpds = pfm.get_pcpds(random_idx) # Calculate bottleneck distance, print n_result matches closest_matches = bottleneck_distances.search_distances( n_results, random_pcpds.get_persistance_diagram(), dir_name) datafile.write(str(random_idx)) datafile.write(":") # Calculate bottleneck distance, print n_result matches for idx in closest_matches: datafile.write(str(idx)) print(idx) datafile.write(",") datafile.write('\n') menu.progress(n, number_of_data, ("Processing random grid: " + str(random_idx) + "...")) print("Job done.")
def split_pointcloud(self, coords, points, count=False): # Split up the list into sections depending on how many cpus are avalible for c,_ in enumerate(coords): x = math.floor(coords[c][0] * self.partition) y = math.floor(coords[c][1] * self.partition) x = str(x).zfill(self.leading_zeros) y = str(y).zfill(self.leading_zeros) z = str(1).zfill(self.leading_zeros) idx = int('1' + x + y + z) # Make a dictionary with each [idx], if it already exists, append the coord try: points[idx] except: points[idx] = coords[c] else: points[idx] = np.vstack((points[idx],coords[c])) # Keeps track of the progress of dividing up points if count: menu.progress(c, len(coords), ("Processing point: "+str(idx)+"..."))
def process_run(): pcpds_manager = PCPDS_Manager() # List the directories # Ask for the directory print("Enter the Collection of pcpds objects you wish to generate persistance diagramsfor.") collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() while(not valid): print("Invalid collection name:", pcpds_manager.get_path_manager().get_cur_dir() ,"try again.", valid) collection = menu.get_input("Directory: ") pcpds_manager.get_path_manager().set_cur_dir(collection) valid = pcpds_manager.get_collection_dir() # Verify the directory print("Valid Directory Confirmed:", pcpds_manager.get_path_manager().get_full_cur_dir()) # Loop for choosing filtration method: print("Choose a filtration method: [0] Rips, [1] Upper Star, [2] Lower Star.") choice = menu.get_int_input() while not (choice < 3 and choice > -1): print("Please enter a valid number between 0-2.") choice = menu.get_int_input() # Selects the filter function to be used. filter = None if choice is 0: filter = Filtration.get_rips_diagram elif choice is 1: filter = Filtration.get_upper_star elif choice is 2: filter = Filtration.get_lower_star # Start timer # start_time = time.time() print("Would you like to use multi-processing to attempt to speed things up? [0] No. [1] Yes.") print("Please do note that using multiprocessing only speeds up the generation of persistance diagrams with larger point clouds.") multiproc = menu.get_int_input() if(multiproc): # with concurrent.futures.ProcessPoolExecutor as executor: for file in os.listdir(pcpds_manager.get_path_manager().get_full_cur_dir_var(collection)): # Sets up the process # generate_persistence_diagram(pcpds_manager, file, filter) process = multiprocessing.Process(target=generate_persistence_diagram, args=(pcpds_manager, file, filter)) process.start() process.join() process.terminate() else: print("NOT MULTIPROCESSING:") # Process the point clouds into persistance diagrams without using multiprocessing files = os.listdir(pcpds_manager.get_path_manager().get_full_cur_dir_var(collection)) iter = 0 for file in files: menu.progress(iter, len(files), ("Generating persistance diagram for:"+str(file))) generate_persistence_diagram(pcpds_manager, file, filter) iter += 1 menu.progress(1, 1, "Generating persistance diagrams completed.")
def main(): pfm = PCPDS_Manager() number_of_data = 200 #Max 256 when saving to excel num_partitions_to_slide = 3 # Will need the filtration method for new point cloud filtering later. filt_method = None leading_zeros = 0 dir_name = "" pfm.get_path_manager().set_cur_dir("") valid = False print("Please enter a collection that has already been filtered:") # If not a valid directory, ask again saying it is invalid while(not valid): if not pfm.get_collection_dir(): print("Invalid collection name:", pfm.get_path_manager().get_cur_dir(), "try again.") dir_name = menu.get_input("Directory: ") pfm.get_path_manager().set_cur_dir(dir_name) valid = pfm.get_collection_dir() # Checks the first pcpds object in this directory for if it has a persistance diagram pcpds_temp = None for file in os.listdir(pfm.get_path_manager().get_full_cur_dir_var(dir_name)): file_path = os.path.join(pfm.get_path_manager().get_full_cur_dir(), file) pcpds_temp = file_manager.load(file_path) break if pcpds_temp is not None: if pcpds_temp.get_persistance_diagram() is not None: print("Valid Directory Chosen:", valid) # Stores the filtration method used to form the persistence diagram for later use. filt_method = pcpds_temp.get_filtration_used() # Stores the leading zeros here based on the directory name. break else: valid = False print("\nNo persistance diagram present for files @ collection:", pfm.get_path_manager().get_full_cur_dir() + ".\n") print("Please Either enter a directory that has been filtrated for persistance diagrams or run 'generate_persistance_diagrams.py' on the collection.") else: print("Problem loading pcpds file, it loaded as None.") wb = Workbook() excel_sheet = wb.add_sheet('Sheet 2') # Grabs the leading_zeros variable using X from a random idx's cell_ID. tmp_cellID = pfm.get_random_pcpds().get_cellID() leading_zeros = int((len(str(tmp_cellID))-1)/3) print("LEADING ZEROS:", leading_zeros) for n in range(number_of_data): pcpds = None valid_idx = False while valid_idx == False: # Grabs a random pcpds from the currently selected directory. pcpds = pfm.get_random_pcpds() (X, Y, Z) = pcpds.get_xyz() print("XYZ of random pcpds: Z", X, "Y:", Y, "Z:", Z) # Do this to check for if we are on a lower bound to avoid errors from negative values. if X < 1 or Y < 1: print("Invalid XYZ") continue slide_left_X = pfm.gen_idx(X-1, Y, leading_zeros) slide_right_X = pfm.gen_idx(X+1, Y, leading_zeros) slide_up_Y = pfm.gen_idx(X, Y+1, leading_zeros) slide_down_Y = pfm.gen_idx(X, Y-1, leading_zeros) slide_left_down = pfm.gen_idx(X-1, Y-1, leading_zeros) slide_right_down = pfm.gen_idx(X+1, Y-1, leading_zeros) slide_right_up = pfm.gen_idx(X+1, Y+1, leading_zeros) slide_left_up = pfm.gen_idx(X-1, Y+1, leading_zeros) if pfm.get_path_manager().validate_file(os.path.join(pfm.get_collection_dir(), str(slide_left_X) +".json")) == True: if pfm.get_path_manager().validate_file(os.path.join(pfm.get_collection_dir(), str(slide_right_X) +".json")) == True: if pfm.get_path_manager().validate_file(os.path.join(pfm.get_collection_dir(), str(slide_up_Y) +".json")) == True: if pfm.get_path_manager().validate_file(os.path.join(pfm.get_collection_dir(), str(slide_down_Y) +".json")) == True: valid_idx = True # Get the random pcpds's details idx = pcpds.get_cellID() print("Random IDX chosen:", str(idx)) (dimX, dimY, dimZ) = pcpds.get_dimensions() bounds = pcpds.get_bounds() # Grab persistance diagram of random idx. test_pd = pcpds.get_persistance_diagram() # TODO: Change how Validation of these slid idx values is done? slide_left_X = pfm.get_pcpds(slide_left_X) slide_right_X = pfm.get_pcpds(slide_right_X) slide_up_Y = pfm.get_pcpds(slide_up_Y) slide_down_Y = pfm.get_pcpds(slide_down_Y) num_slides = 10 num_directions = 4 #results = [0]*(num_slides * num_partitions_to_slide) excel_sheet.write(0, n, str(idx)) # Applies transform to point cloud and generates a persistence diagram to compare for bottleneck distances. print("num_slides * num_partitions_to_slide:",num_slides * num_partitions_to_slide) for overlay in range(1, num_slides * num_partitions_to_slide): # Left bounds_left_X = menu.transform(bounds, dimX, -1, True, overlay, num_slides) left_X_pcpds = menu.within_point_cloud(pcpds, slide_left_X, bounds_left_X) # Right bounds_right_X = menu.transform(bounds, dimX, 1, True, overlay, num_slides) right_X_pcpds = menu.within_point_cloud(pcpds, slide_right_X, bounds_right_X) # Up bounds_up_Y = menu.transform(bounds, dimY, 1, False, overlay, num_slides) up_Y_pcpds = menu.within_point_cloud(pcpds, slide_up_Y, bounds_up_Y) # Down bounds_down_Y = menu.transform(bounds, dimY, -1, False, overlay, num_slides) down_Y_pcpds = menu.within_point_cloud(pcpds, slide_down_Y, bounds_down_Y) overlay_avg = -1 num_dir = 0 sum = 0 try: left_X_pcpds = filt_method(left_X_pcpds) left_X_pd = left_X_pcpds.get_persistance_diagram() sum = sum + bottleneck_distances.get_distances(left_X_pd, test_pd) num_dir = num_dir + 1 except: print("ERROR LEFT") right_bn = 0 try: right_X_pcpds = filt_method(right_X_pcpds) right_X_pd = right_X_pcpds.get_persistance_diagram() sum = sum + bottleneck_distances.get_distances(right_X_pd, test_pd) num_dir = num_dir + 1 except: print("ERROR RIGHT") right_bn = 0 try: up_Y_pcpds = filt_method(up_Y_pcpds) up_Y_pd = up_Y_pcpds.get_persistance_diagram() sum = sum + bottleneck_distances.get_distances(up_Y_pd, test_pd) num_dir = num_dir + 1 except: print("ERROR UP") up_bn = 0 try: down_Y_pcpds = filt_method(down_Y_pcpds) down_Y_pd = down_Y_pcpds.get_persistance_diagram() sum = sum + bottleneck_distances.get_distances(down_Y_pd, test_pd) num_dir = num_dir + 1 except: print("ERROR DOWN") down_bn = 0 if (num_dir != 0): overlay_avg = sum / num_dir else: overlay_avg = -1 excel_sheet.write(overlay, n, str(overlay_avg)) menu.progress(n, number_of_data, ("Processing random grid: "+str(idx)+"...")) menu.progress(1, 1, ("Processing complete.")) # Write results .xls file wb.save(dir_name + '.xls') print("Job done.")
else: valid = False print("\nNo persistance diagram present for files @ collection:", pcpds_manager.get_path_manager().get_full_cur_dir() + ".\n") print( "Please Either enter a directory that has been filtrated for persistance diagrams or run 'generate_persistance_diagrams.py' on the collection." ) else: print("Problem loading pcpds file, it loaded as None.") # Go through the pcpds objects in the dir & check that their filtration method is the same iter = 0 filtration_name = None files = file_manager.find_files(pcpds_manager.get_collection_dir(), '.json') menu.progress(iter, 1, "Checking for filtration mismatches...") for file in files: pcpds = file_manager.load( os.path.join(pcpds_manager.get_collection_dir(), file)) if filtration_name is None: filtration_name = pcpds.get_filtration_used_name() if filtration_name in pcpds.get_filtration_used_name(): pass #print("PCPDS:", pcpds.get_cellID(), " is Valid. Filtration:", filtration_name) else: print("INVALID PCPDS:", pcpds.get_cellID(), "Missmatched Filtration:", pcpds.get_filtration_used_name())
# Store results in other workbook for n in range(1, n_nearest): val = sheet.cell_value(n, 1) excel_sheet.write(n - 1, iter, val) # Check if the idx of the closest Bottleneck distance is the same as the one it was compared against: closest_idx = int(sheet.cell_value(1, 0)) searched_idx = int(sheet.cell_value(1, 2)) if closest_idx == searched_idx: excel_sheet.write(n_nearest - 1, iter, "yes") else: excel_sheet.write(n_nearest - 1, iter, "no") iter = iter + 1 # Limit it to 255 since that is the max for excel files. if iter > 199: print( "\nLimiting results folder to excel file capacity of 255 columns for results." ) break except: print("\nERROR Invalid file:", file_name) menu.progress(iter, len(file_names), "Processed: " + file_name) wb.save(os.path.join(pm.get_collection_dir(), 'compiled_results.xls')) menu.progress(1, 1, "Finished compiling result data.\n") # smaller_25 lower star results
def input_las(self, path): # Load data, put list of touples in an array in_file = File(self.filename + '.las', mode='r') # Import coordinates and change them to manipulative type float32 x_vals = in_file.X y_vals = in_file.Y z_vals = in_file.Z coords, grid_dimensions = self.__format_data(x_vals,y_vals,z_vals) # Dictionary of point cloud coordinates points = {'idx':'coords[c]'} print("\nWould you like to use multi-processing to attempt to speed things up? [0] No. [1] Yes.") print("Please do note that using multiprocessing only speeds up this process with larger data sets.") multiproc = menu.get_int_input() # Start timer start_time = time.time() if multiproc: print("Multithreading") # split up the list by cpu cores avalible cores = multiprocessing.cpu_count() coords_split_amount = round(len(coords)/cores) #print("COORDS SPLIT AMOUNT:", coords_split_amount, "LEN(COORDS):", len(coords), " = CORES:", cores) chunks = [coords[x:x+coords_split_amount] for x in range(0, len(coords), coords_split_amount)] #print("CHUNKS:", len(chunks)) # Sets up the manager to be able to return values properly to the points dict. manager = multiprocessing.Manager() points = manager.dict() # Sets up a pool of processes, one on each cpu by defualt. with concurrent.futures.ProcessPoolExecutor() as executor: # TODO: Implement menu.progress here? #executor.map(self.split_pointcloud, chunks, points) # print("Chunks len:", len(chunks)) # # TODO: Instead of passing in points, generate them in the method & concatinate them together after passing them back? # for chunk, point in executor.map(self.split_pointcloud, chunks, points): # print("CHUNK:", chunk) for chunk in chunks: future = executor.submit(self.split_pointcloud, chunk, points) # process = multiprocessing.Process(target=self.split_pointcloud, args=(chunk, points)) # process.start() # process.join() # process.terminate() else: print("Not multi threading.") self.split_pointcloud(coords, points, count=True) points.pop('idx') menu.progress(1, 1, ("Processing points completed.")) print("\n") print("Processing points completed in: ", str(time.time() - start_time)) # Creates a pcpds object for each idx and stores it's respective # point cloud in it before saving the file. tracker = 0 # print("STATS:\nLength:",len(points), "\nPoints:",points) #pcpds_num = len(points) individual_dimensions = (grid_dimensions[0]/self.partition, grid_dimensions[1]/self.partition, grid_dimensions[2]/self.partition) for id in points: temp = pcpds(id, individual_dimensions) temp.set_point_cloud(points[id]) # Generates and sets the persistance diagram # temp = Filtration.get_rips_diagram(temp) # print('diagram set') file_manager.save(temp, path, id) # Keeps track of the PCPDS objects being generated menu.progress(tracker, len(points), ("Processing PCPDS object for idx: "+str(id))) tracker = tracker + 1 menu.progress(1, 1, ("Processing PCPDS files completed.")) print("\n")
def main(): pfm = PCPDS_Manager() number_of_data = 400 print("Please enter a collection that has already been filtered:") #TODO: list collections # Loop here for valid directory collection = menu.get_input("Directory: ") pfm.get_path_manager().set_cur_dir(collection) valid = pfm.get_collection_dir() # If not a valid directory, ask again saying it is invalid while (not valid): if not pfm.get_collection_dir(): print("Invalid collection name:", pfm.get_path_manager().get_cur_dir(), "try again.") collection = menu.get_input("Directory: ") pfm.get_path_manager().set_cur_dir(collection) valid = pfm.get_collection_dir() # Checks the first pcpds object in this directory for if it has a persistance diagram pcpds_temp = None for file in os.listdir( pfm.get_path_manager().get_full_cur_dir_var(collection)): file_path = os.path.join(pfm.get_path_manager().get_full_cur_dir(), file) pcpds_temp = file_manager.load(file_path) break if pcpds_temp is not None: if pcpds_temp.get_persistance_diagram() is not None: print("Valid Directory Chosen:", valid) break else: valid = False print( "\nNo persistance diagram present for files @ collection:", pfm.get_path_manager().get_full_cur_dir() + ".\n") print( "Please Either enter a directory that has been filtrated for persistance diagrams or run 'generate_persistance_diagrams.py' on the collection." ) else: print("Problem loading pcpds file, it loaded as None.") cur_dir = pfm.get_path_manager().get_full_cur_dir() wb = Workbook() excel_sheet = wb.add_sheet('Sheet 1') for n in range(number_of_data): # Find random valid index with valid slide pcpds test_idx = file_manager.get_random_file(cur_dir, '.json')[:-5] valid_idx = False while valid_idx == False: # Find valid center pcpds test_idx = file_manager.get_random_file(cur_dir, '.json')[:-5] while pfm.get_path_manager().validate_file( os.path.join(cur_dir, test_idx + ".json")) == False: test_idx = file_manager.get_random_file(cur_dir, '.json')[:-5] test_pcpds = pfm.get_random_pcpds(test_idx) (X, Y, Z) = test_pcpds.get_xyz(str(test_idx)) # Find valid slide directional pcpds objects slide_left_X = las_obj.find_index(X - 1, Y) slide_right_X = las_obj.find_index(X + 1, Y) slide_up_Y = las_obj.find_index(X, Y + 1) slide_down_Y = las_obj.find_index(X, Y - 1) if pfm.get_path_manager().validate_file( os.path.join(dir_name, str(slide_left_X) + ".json")) == True: if pfm.get_path_manager().validate_file( os.path.join(dir_name, str(slide_right_X) + ".json")) == True: if pfm.get_path_manager().validate_file( os.path.join(dir_name, str(slide_up_Y) + ".json")) == True: if pfm.get_path_manager().validate_file( os.path.join(dir_name, str(slide_down_Y) + ".json")) == True: valid_idx = True print("VALID RANDOM ID: ", test_idx) # Get the random pcpds's details print('COORDINATES: ' + 'X:' + str(X) + ' Y:' + str(Y) + ' Z:' + str(Z)) (dimX, dimY, dimZ) = test_pcpds.get_dimensions() bounds = test_pcpds.get_bounds(str(test_idx)) test_pcpds = filtration.get_rips_diagram(test_pcpds) test_pd = test_pcpds.get_persistance_diagram() results = [0] * 11 num_dir = 4 slide_left_X = pfm.get_pcpds(slide_left_X) slide_right_X = pfm.get_pcpds(slide_right_X) slide_up_Y = pfm.get_pcpds(slide_up_Y) slide_down_Y = pfm.get_pcpds(slide_down_Y) # Slide frame 10% across each direction for overlay in range(1, 10): # Left bounds_left_X = menu.transform(bounds, dimX, -1, True, overlay) left_X_pcpds = menu.within_point_cloud(test_pcpds, slide_left_X, bounds_left_X) left_X_pcpds = filtration.get_rips_diagram(left_X_pcpds) left_X_pd = left_X_pcpds.get_persistance_diagram() # Right bounds_right_X = menu.transform(bounds, dimX, 1, True, overlay) right_X_pcpds = menu.within_point_cloud(test_pcpds, slide_right_X, bounds_right_X) right_X_pcpds = filtration.get_rips_diagram(right_X_pcpds) right_X_pd = right_X_pcpds.get_persistance_diagram() # Up bounds_up_Y = menu.transform(bounds, dimY, 1, False, overlay) up_Y_pcpds = menu.within_point_cloud(test_pcpds, slide_up_Y, bounds_up_Y) up_Y_pcpds = filtration.get_rips_diagram(up_Y_pcpds) up_Y_pd = up_Y_pcpds.get_persistance_diagram() # Down bounds_down_Y = menu.transform(bounds, dimY, -1, False, overlay) down_Y_pcpds = menu.within_point_cloud(test_pcpds, slide_down_Y, bounds_down_Y) down_Y_pcpds = filtration.get_rips_diagram(down_Y_pcpds) down_Y_pd = down_Y_pcpds.get_persistance_diagram() # Find average bottleneck at each overlay percentage results[overlay - 1] = bottleneck_distances.get_distances( left_X_pd, test_pd) results[overlay - 1] = results[overlay] + bottleneck_distances.get_distances( right_X_pd, test_pd) results[overlay - 1] = results[overlay] + bottleneck_distances.get_distances( up_Y_pd, test_pd) results[overlay - 1] = (results[overlay] + bottleneck_distances.get_distances( down_Y_pd, test_pd)) / num_dir # Write results .xls file num = 1 excel_sheet.write(n, 0, str(test_idx)) for overlay_avg in results: excel_sheet.write(n, num, str(overlay_avg)) num = num + 1 wb.save(dir_name + '.xls') menu.progress(n, number_of_data, ("Processing random grid: " + str(test_idx) + "...")) print("Job done.")