def process_block_queue(lock, block_queue, dst_queue, full_image_name, assess_quality, stretch_params, white_balance, tds, im_metadata): ''' Function run by each process. Will process blocks placed in the block_queue until the 'STOP' command is reached. ''' # Parse input arguments lower, upper, wb_reference, bp_reference = stretch_params wb_reference = np.array(wb_reference, dtype=np.float) bp_reference = np.array(bp_reference, dtype=np.float) image_type = im_metadata[0] for block_indices in iter(block_queue.get, 'STOP'): x, y, read_size_x, read_size_y = block_indices # Load block data with gdal (offset and block size) lock.acquire() src_ds = gdal.Open(full_image_name, gdal.GA_ReadOnly) image_data = src_ds.ReadAsArray(x, y, read_size_x, read_size_y) src_ds = None lock.release() # Restructure raster for panchromatic images: if image_data.ndim == 2: image_data = np.reshape(image_data, (1, read_size_y, read_size_x)) # Calculate the quality score on an arbitrary band if assess_quality: quality_score = pp.calc_q_score(image_data[0]) else: quality_score = 1. # Apply correction to block based on earlier histogram analysis (if applying correction) # Converts image to 8 bit by rescaling lower -> 1 and upper -> 255 image_data = pp.rescale_band(image_data, lower, upper) if white_balance: # Applies a white balance correction image_data = pp.white_balance(image_data, wb_reference, np.amax(wb_reference)) # Segment image segmented_blocks = segment_image(image_data, image_type=image_type) # Classify image classified_block = classify_image(image_data, segmented_blocks, tds, im_metadata, wb_reference, bp_reference) # Add the pixel counts from this classified split to the # running total. pixel_counts_block = utils.count_features(classified_block) # Pass the data back to the main thread for writing dst_queue.put( (quality_score, pixel_counts_block, x, y, classified_block)) dst_queue.put(None)
# OSSP Process
def process(input_filename, training_dataset, output_filepath=False, quality_control=False, debug_flag=False, verbose=False): ''' Pulls training data from the directory containing watershed segments if it exists, otherwise searches for training data in the root directory, containing all of the date directories. ''' # Parse the training_dataset input label_vector = training_dataset[0] training_feature_matrix = training_dataset[1] # Set the output filepath to the input one if there is no path argument provided if output_filepath == False: output_filepath = os.path.dirname(input_filename) # Removes the '_watersheds.h5' portion of the input filename output_filename = os.path.split(input_filename)[1][:-13] # Loading the image data input_file = h5py.File(input_filename, 'r') input_image = [] #[subimage:row:column:band] band_1 = input_file['original_1'][:] watershed_image = input_file['watershed'][:] watershed_dimensions = input_file['dimensions'][:] num_x_subimages = watershed_dimensions[0] num_y_subimages = watershed_dimensions[1] im_type = input_file.attrs.get('Image Type') im_date = input_file.attrs.get('Image Date') # Use this to read files created before the image type attribute change # im_type = 'wv02_ms' #Method for assessing the quality of the training dataset. if quality_control == True: test_training(label_vector, training_feature_matrix) aa = raw_input("Continue? ") if aa == 'n': quit() # If there is no information in this image file, save a dummy classified image and exit # This can often happen depending on the original image dimensions and the amount it was split if np.sum(band_1) == 0: classified_image_path = os.path.join( output_filepath, output_filename + '_classified_image.png') outfile = h5py.File( os.path.join(output_filepath, output_filename + '_classified.h5'), 'w') if im_type == 'wv02_ms': empty_bands = np.zeros(np.shape(band_1)[0], np.shape(band_1)[1], 8) empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 8) elif im_type == 'srgb': empty_bands = np.zeros(np.shape(band_1)[0], np.shape(band_1)[1], 3) empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 3) elif im_type == 'pan': empty_image = np.zeros(np.shape(band_1)) outfile.create_dataset('classified', data=empty_image, compression='gzip', compression_opts=9) outfile.create_dataset('original', data=empty_image, compression='gzip', compression_opts=9) outfile.close() # return a 1x5 array with values of one for the pixel counts return output_filename, np.ones(5) # Adds remaining 7 bands if they exist if im_type == 'wv02_ms': if verbose: print "Reconstructing multispectral image... " band_2 = input_file['original_2'][:] band_3 = input_file['original_3'][:] band_4 = input_file['original_4'][:] band_5 = input_file['original_5'][:] band_6 = input_file['original_6'][:] band_7 = input_file['original_7'][:] band_8 = input_file['original_8'][:] for i in range(len(band_1)): input_image.append( create_composite([ band_1[i], band_2[i], band_3[i], band_4[i], band_5[i], band_6[i], band_7[i], band_8[i] ])) if verbose: print "Done. " elif im_type == 'srgb': if verbose: print "Reconstructing multispectral image... " band_2 = input_file['original_2'][:] band_3 = input_file['original_3'][:] for i in range(len(band_1)): input_image.append( create_composite([band_1[i], band_2[i], band_3[i]])) if verbose: print "Done. " elif im_type == 'pan': input_image = band_1 input_file.close() #Constructing the random forest tree based on the training data set and labels rfc = RandomForestClassifier() rfc.fit(training_feature_matrix, label_vector) #Using the random forest tree to classify the input image. #Runs an analysis on each subimage in the watershed_image list if verbose: start_time = time.clock() prog1 = 0 prog2 = 10 print "Predicting Image..." classified_image = [] for subimage in range(len(watershed_image)): if verbose: ## Progress tracker if int(float(prog1) / float(len(watershed_image)) * 100) == prog2: print "%s Percent" % prog2 prog2 += 10 prog1 += 1 subimage_start_time = time.clock() if debug_flag == True: if subimage < 100: classified_image.append( np.zeros(np.shape(input_image[subimage])[0:2])) continue cur_image = np.copy(input_image[subimage]) cur_ws = np.copy(watershed_image[subimage]) ## Skip any subimages that contain no data, and set the classification values to 0 if np.amax(cur_image) < 2: classified_image.append(np.zeros(np.shape(cur_image)[0:2])) continue # If the entire image is a single watershed, we have to handle the neighboring region # calculation specially. This assignes the neighboring regions values to be the same # as the internal values. if np.amax(cur_ws) == 1 and im_type == 'pan': entropy_image = entropy(bytescale(cur_image), disk(4)) features = [] #Average Pixel Value features.append(np.average(cur_image)) #Pixel Median features.append(np.median(cur_image)) #Pixel min features.append(np.amin(cur_image)) #Pixel max features.append(np.amax(cur_image)) #Standard Deviation features.append(np.std(cur_image)) #Size of Superpixel features.append(len(cur_image)) features.append(np.average(entropy_image)) #"Neighbor" average features.append(np.average(cur_image)) #"Neighbor" std features.append(np.std(cur_image)) #"Neighbor" max features.append(np.max(cur_image)) #"Neighbor" entropy features.append(np.average(entropy_image)) # Date features.append(int(im_date)) input_features = features input_features = np.array(input_features).reshape(1, -1) ws_pred = rfc.predict(input_features) classified_image.append(plotPrediction(ws_pred, cur_ws, cur_image)) continue # We need the superpixel labels to start at 0. This shifts the entire label image down so that # the first label is 0, if it isn't already. if np.amin(cur_ws) > 0: cur_ws -= np.amin(cur_ws) if im_type == 'wv02_ms': input_feature_matrix = feature_calculations.analyze_ms_image( cur_image, cur_ws) elif im_type == 'srgb': entropy_image = entropy(bytescale(cur_image[:, :, 0]), disk(4)) input_feature_matrix = feature_calculations.analyze_srgb_image( cur_image, cur_ws, entropy_image) elif im_type == 'pan': entropy_image = entropy(bytescale(cur_image), disk(4)) input_feature_matrix = feature_calculations.analyze_pan_image( cur_image, cur_ws, entropy_image, im_date) input_feature_matrix = np.array(input_feature_matrix) # Predict the classification based on each input feature list ws_pred = rfc.predict(input_feature_matrix) # Create the classified image by replacing watershed id's with classification values. # If there is more than one band, we have to select one (using 2 for no particular reason). if im_type == 'pan': classified_image.append(plotPrediction(ws_pred, cur_ws, cur_image)) else: classified_image.append( plotPrediction(ws_pred, cur_ws, cur_image[:, :, 2])) if verbose: subimage_time = time.clock() - subimage_start_time print str(subimage + 1) + "/" + str( len(watershed_image)) + " Time: " + str(subimage_time) ## Display one classification result at a time if the --debug flag was input. if debug_flag == True: if im_type == 'pan': display_image(cur_image, cur_ws, classified_image[subimage], 1) else: display_image(cur_image[:, :, 1], cur_ws, classified_image[subimage], 1) sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features( classified_image[subimage]) print "Number Snow: %i" % (sum_snow) print "Number Pond: %i" % (sum_melt_ponds) print "Number Gray Ice: %i" % (sum_gray_ice) print "Number Water: %i" % (sum_open_water) print "Number Shadow: %i" % (sum_shadow) aa = raw_input("Another? ") if aa == 'n': quit() if verbose: elapsed_time = time.clock() - start_time print "Done. " print "Time elapsed: {0}".format(elapsed_time) compiled_classified = utils.compile_subimages(classified_image, num_x_subimages, num_y_subimages, 1) if im_type == 'wv02_ms': compiled_original = utils.compile_subimages(input_image, num_x_subimages, num_y_subimages, 8) elif im_type == 'srgb': compiled_original = utils.compile_subimages(input_image, num_x_subimages, num_y_subimages, 3) elif im_type == 'pan': compiled_original = utils.compile_subimages(input_image, num_x_subimages, num_y_subimages, 1) if verbose: print "Saving..." classified_image_path = os.path.join( output_filepath, output_filename + '_classified_image.png') utils.save_color(compiled_classified, classified_image_path) with h5py.File( os.path.join(output_filepath, output_filename + '_classified.h5'), 'w') as outfile: outfile.create_dataset('classified', data=compiled_classified, compression='gzip', compression_opts=9) outfile.create_dataset('original', data=compiled_original, compression='gzip', compression_opts=9) #### Count the number of pixels that were in each classification category. sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features( compiled_classified) pixel_counts = [ sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow ] # Clear the image datasets from memory compiled_classified = None input_image = None watershed_image = None cur_image = None cur_ws = None entropy_image = None if verbose: print "Done." return output_filename, pixel_counts
def process_split(filename,training_set,save_path): # filename = split_data[0] tds = training_set # base is the directory containing the raw splits base, fname = os.path.split(filename) # output_dir is the location where the segmented and classified images are saved. output_dir = os.path.join(base,'processed') # Pause for a brief time before starting the first process. This prevents overlapping text output # by staggering the start time of these processes. time.sleep(round(random.random(),1)) # If the classified image already exists, don't reclassify. Open the existing classification, # so that we can collect and return the pixel count data from that image. if os.path.isdir(output_dir): completed_files = os.listdir(output_dir) for completed_name in completed_files: if os.path.splitext(fname)[0] in completed_name and 'classified.h5' in completed_name: print "Already classified: %s" %fname try: classified_file = h5py.File(os.path.join(output_dir,completed_name),'r') classified_image = classified_file['classified'][:] classified_file.close() pixel_counts = utils.count_features(classified_image) pixel_counts = list(pixel_counts) return os.path.splitext(fname)[0], pixel_counts except IOError: print "Corrupted file (reclassifying): %s" %completed_name seg_time = time.clock() # ---------------------------- # Optional section to save a color image for only a subset of all image # splits - chosen randomly. # c_check_int = int(round(random.random()*5,0)) # if c_check_int == 1: # c_check = True # else: # c_check = False # ---------------------------- # Segment the image print "Segmenting image: %s" %fname segment_image(base, fname, color_check=True)#c_check) print "Segment finished: %s: %f" %(fname, time.clock() - seg_time) segment = os.path.splitext(fname)[0] + '_segmented.h5' if os.path.isfile(os.path.join(output_dir, segment)): # Classify the split image class_time = time.clock() print "Classifying image: %s" %segment image_name, pixel_counts = classify_image(os.path.join(output_dir, segment), tds) print "Classification finished: %s: %f" %(segment,time.clock()-class_time) # Save the results to a common file utils.save_results("classification_results_raw", save_path, image_name, pixel_counts) # #Remove the split used in segmentation # print "Cleaning up split: Removed " + fname # os.remove(os.path.join(base,fname)) # Remove the segmented image print "Cleaning up segments: Removed " + segment os.remove(os.path.join(output_dir,segment)) return image_name, pixel_counts else: print "Skipped classification of: %s" %segment
def main(): # Set Up Arguments parser = argparse.ArgumentParser() parser.add_argument("input_dir", help='''directory path containing date directories of images to be processed''') parser.add_argument("image_type", type=str, choices=["srgb", "wv02_ms", "pan"], help="image type: 'srgb', 'wv02_ms', 'pan'") parser.add_argument("training_dataset", help="training data file") parser.add_argument("--training_label", type=str, default=None, help="name of training classification list") parser.add_argument("-o", "--output_dir", type=str, default=None, help="directory to place output results.") parser.add_argument( "-s", "--splits", metavar='int', type=int, default=1, help="number of subdividing splits to preform on raw image") parser.add_argument("-p", "--parallel", metavar='int', type=int, default=1, help='''number of processing threads to create.''') parser.add_argument("-v", "--verbose", action="store_true", help="display text information and progress") parser.add_argument("-e", "--extended_output", action="store_true", help='''Save additional data: 1) classified image (png) 2) classified results (csv) ''') parser.add_argument( "-c", "--nostretch", action="store_false", help="Do not apply a histogram stretch image correction to input.") # Parse Arguments args = parser.parse_args() # System filepath that contains the directories or files for batch processing user_input = args.input_dir if os.path.isdir(user_input): src_dir = user_input src_file = '' elif os.path.isfile(user_input): src_dir, src_file = os.path.split(user_input) else: raise IOError('Invalid input') # Image type, choices are 'srgb', 'pan', or 'wv02_ms' image_type = args.image_type # File with the training data tds_file = args.training_dataset # Default tds label is the image type if args.training_label is None: tds_label = image_type else: tds_label = args.training_label # Default output directory (if not provided) if args.output_dir is None: dst_dir = os.path.join(src_dir, 'classified') else: dst_dir = args.output_dir if not os.path.isdir(dst_dir): os.makedirs(dst_dir) num_splits = args.splits num_threads = args.parallel verbose = args.verbose extended_output = args.extended_output stretch = args.nostretch # For Ames OIB Processing: assess_quality = True # Set a default quality score until this value is calculated quality_score = 1. # Directory where temporary files are saved if num_splits > 1: working_dir = os.path.join(src_dir, 'splits') else: working_dir = None # Prepare a list of images to be processed based on the user input # list of task objects based on the files in the input directory. # Each task is an image to process, and has a subtask for each split # of that image. task_list = utils.create_task_list(os.path.join(src_dir, src_file), dst_dir, num_splits) # Load Training Data tds = utils.load_tds(tds_file, tds_label) for task in task_list: # ASP: Restrict processing to the frame range # try: # frameNum = getFrameNumberFromFilename(file) # except Exception, e: # continue # if (frameNum < args.min_frame) or (frameNum > args.max_frame): # continue # Skip this task if it is already marked as complete if task.is_complete(): continue # If the image has not yet been split or if no splitting was requested, # proceed to the preprocessing step. image_name = task.get_id() if not task.is_split() or num_splits == 1: image_data, im_info = prepare_image(src_dir, image_name, image_type, output_path=working_dir, number_of_splits=num_splits, apply_correction=stretch, verbose=verbose) if assess_quality: if verbose: print("Calculating image quality score...") # Calculate the quality score for this image: quality_score = utils.calc_q_score(image_data[1]) block_dims = im_info[0] image_date = im_info[1] pixel_counts = [0, 0, 0, 0, 0] classified_image = [] # Loop until all subtasks are complete. # Breaks when task.get_next_subtask() returns None (all subtasks complete) # or if the task is complete. while True: if task.is_complete(): break elif task.has_subtask(): subtask = task.get_next_subtask() if subtask is None: break # If there is a subtask, the image data is stored in a split on the # drive. Subtask == {} when there are no subtasks. image_data = os.path.join(working_dir, subtask) + '.h5' with h5py.File(image_data, 'r') as f: block_dims = f.attrs.get("Block Dimensions") image_date = f.attrs.get("Image Date") else: subtask = task.get_id() # Segment image seg_time = time.clock() if verbose: print("Segmenting image: %s" % subtask) image_data, segmented_blocks = segment_image(image_data, image_type=image_type, threads=num_threads, verbose=verbose) if verbose: print("Segment finished: %s: %f" % (subtask, time.clock() - seg_time)) # Classify image class_time = time.clock() if verbose: print("Classifying image: %s" % subtask) classified_blocks = classify_image(image_data, segmented_blocks, tds, [image_type, image_date], threads=num_threads, verbose=verbose) if verbose: print("Classification finished: %s: %f" % (subtask, time.clock() - class_time)) # Hold onto the output of this subtask clsf_split = utils.compile_subimages(classified_blocks, block_dims[0], block_dims[1]) # Save the results to the temp folder if there is more than 1 split if num_splits > 1: with h5py.File( os.path.join(working_dir, subtask + '_classified.h5'), 'w') as f: f.create_dataset('classified', data=clsf_split, compression='gzip', compression_opts=3) # Add the pixel counts from this classified split to the # running total. pixel_counts_split = utils.count_features(clsf_split) for i in range(len(pixel_counts)): pixel_counts[i] += pixel_counts_split[i] # Mark this subtask as complete. This sets task.complete to True # if there are no subtasks. task.update_subtask(subtask) # Writing the results to a sqlite database. (Only works for # a specific database structure that has already been created) # db_name = 'ImageDatabase.db' # db_dir = '/media/sequoia/DigitalGlobe/' # image_name = task.get_id() # image_name = os.path.splitext(image_name)[0] # image_id = image_name.split('_')[2] # part = image_name.split('_')[5] # utils.write_to_database(db_name, db_dir, image_id, part, pixel_counts) # Create a sorted list of the tasks. Then create the correct filename # for each split saved on the drive. # Compile the split images back into a single image if num_splits > 1: if verbose: print("Recompiling: %s" % task.get_id()) clsf_splits = [] task_list = task.get_tasklist() task_list.sort() for task_id in task_list: cname = os.path.join(working_dir, task_id + "_classified.h5") clsf_splits.append(cname) classified_image = utils.stitch(clsf_splits) else: classified_image = clsf_split # Open input file to read metadata/projection src_ds = gdal.Open(os.path.join(src_dir, image_name)) input_xsize = src_ds.RasterXSize input_ysize = src_ds.RasterYSize # Trim output image to correct size classified_image = classified_image[:input_ysize, :input_xsize] # Save the classified image output as a geotiff fileformat = "GTiff" image_name = os.path.splitext(image_name)[0] dst_filename = os.path.join(dst_dir, image_name + '_classified.tif') driver = gdal.GetDriverByName(fileformat) dst_ds = driver.Create(dst_filename, xsize=input_xsize, ysize=input_ysize, bands=1, eType=gdal.GDT_Byte, options=["TILED=YES", "COMPRESS=LZW"]) # Transfer the metadata from input image # dst_ds.SetMetadata(src_ds.GetMetadata()) # Transfer the input projection dst_ds.SetGeoTransform( src_ds.GetGeoTransform()) ##sets same geotransform as input dst_ds.SetProjection( src_ds.GetProjection()) ##sets same projection as input # Write information to output dst_ds.GetRasterBand(1).WriteArray(classified_image) # Close dataset and write to disk dst_ds = None src_ds = None # Write extra data (total pixel counts and quality score to the database (or csv) output_csv = os.path.join(dst_dir, image_name + '_md.csv') with open(output_csv, "wb") as csvfile: writer = csv.writer(csvfile) writer.writerow([ "Quality Score", "White Ice", "Gray Ice", "Melt Ponds", "Open Water" ]) writer.writerow([ quality_score, pixel_counts[0], pixel_counts[1], pixel_counts[2], pixel_counts[3] ]) # Save color image for viewing if extended_output: utils.save_color(classified_image, os.path.join(dst_dir, image_name + '.png')) # Remove temp folders if working_dir is not None: if os.path.isdir(working_dir): shutil.rmtree(working_dir) if verbose: print("Done")
def classify_image(input_image, watershed_data, training_dataset, meta_data, threads=2, quality_control=False, debug_flag=False, verbose=False): ''' Run a random forest classification. Input: input_image: preprocessed image data (preprocess.py) watershed_image: Image objects created with the segmentation algorithm. (segment.py) training_dataset: Tuple of training data in the form: (label_vector, attribute_matrix) meta_data: [im_type, im_date] Returns: Raster of classified data. ''' #### Prepare Data and Variables num_blocks = len(input_image[1]) num_bands = len(input_image.keys()) image_type = meta_data[0] image_date = meta_data[1] ## Restructure the input data. # We are creating a single list where each element of the list is one # block (old: subimage) of the image and is a stack of all bands. image_data = [] # [block:row:column:band] for blk in range(num_blocks): image_data.append( utils.create_composite( [input_image[b][blk] for b in range(1, num_bands + 1)])) input_image = None # watershed_image = input_file['watershed'][:] # watershed_dimensions = input_file['dimensions'][:] # num_x_subimages = dimensions[0] # num_y_subimages = dimensions[1] ## Parse training_dataset input label_vector = training_dataset[0] training_feature_matrix = training_dataset[1] # im_type = input_file.attrs.get('Image Type') # im_date = input_file.attrs.get('Image Date') #Method for assessing the quality of the training dataset. if quality_control == True: test_training(label_vector, training_feature_matrix) aa = raw_input("Continue? ") if aa == 'n': quit() # # If there is no information in this image file, save a dummy classified image and exit # # This can often happen depending on the original image dimensions and the amount it was split # if np.sum(band_1) == 0: # classified_image_path = os.path.join(output_filepath, output_filename + '_classified_image.png') # outfile = h5py.File(os.path.join(output_filepath, output_filename + '_classified.h5'),'w') # if im_type == 'wv02_ms': # empty_bands = np.zeros(np.shape(band_1)[0],np.shape(band_1)[1],8) # empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 8) # elif im_type == 'srgb': # empty_bands = np.zeros(np.shape(band_1)[0],np.shape(band_1)[1],3) # empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 3) # elif im_type == 'pan': # empty_image = np.zeros(np.shape(band_1)) # outfile.create_dataset('classified', data=empty_image,compression='gzip',compression_opts=9) # outfile.create_dataset('original', data=empty_image,compression='gzip',compression_opts=9) # outfile.close() # # return a 1x5 array with values of one for the pixel counts # return output_filename, np.ones(5) #### Construct the random forest decision tree using the training data set rfc = RandomForestClassifier() rfc.fit(training_feature_matrix, label_vector) #### Classify each image block # Define multiprocessing-safe queues containing data to process clsf_block_queue = Queue() num_blocks = len(watershed_data) im_block_queue = construct_block_queue(image_data, watershed_data, num_blocks) # Define the number of threads to create NUMBER_OF_PROCESSES = threads block_procs = [ Process(target=process_block_helper, args=(im_block_queue, clsf_block_queue, image_type, image_date, rfc)) for _ in range(NUMBER_OF_PROCESSES) ] # Start the worker processes. for proc in block_procs: # Add a stop command to the end of the queue for each of the # processes started. This will signal for the process to stop. im_block_queue.put('STOP') # Start the process proc.start() # Display a progress bar if verbose: try: from tqdm import tqdm except ImportError: print "Install tqdm to display progress bar." verbose = False else: pbar = tqdm(total=num_blocks, unit='block') # Each process adds the classification results to clsf_block_queue, when it # finishes a row. Adds 'None' when there are not more rows left # in the queue. # This loop continues as long as all of the processes have not finished # (i.e. fewer than NUMBER_OF_PROCESSES have returned None). When a row is # added to the queue, the tqdm progress bar updates. # Initialize the output dataset as an empty list of length = input dataset # This needs to be initialized since blocks will be added non-sequentially clsf_block_list = [None for _ in range(num_blocks)] finished_threads = 0 while finished_threads < NUMBER_OF_PROCESSES: if not clsf_block_queue.empty(): val = clsf_block_queue.get() if val == None: finished_threads += 1 else: block_num = val[0] segmnt_data = val[1] clsf_block_list[block_num] = segmnt_data if verbose: pbar.update() # Close the progress bar if verbose: pbar.close() print "Finished Processing. Closing threads..." # Join all of the processes back together for proc in block_procs: proc.join() return clsf_block_list # Lite version: Save only the classified output, and do not save the original image data compiled_classified = utils.compile_subimages(classified_image, num_x_subimages, num_y_subimages, 1) if verbose: print "Saving..." with h5py.File( os.path.join(output_filepath, output_filename + '_classified.h5'), 'w') as outfile: outfile.create_dataset('classified', data=compiled_classified, compression='gzip', compression_opts=9) #### Count the number of pixels that were in each classification category. sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features( compiled_classified) pixel_counts = [ sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow ] # Clear the image datasets from memory compiled_classified = None input_image = None watershed_image = None cur_image = None cur_ws = None entropy_image = None if verbose: print "Done." return output_filename, pixel_counts