Exemple #1
0
def process_block_queue(lock, block_queue, dst_queue, full_image_name,
                        assess_quality, stretch_params, white_balance, tds,
                        im_metadata):
    '''
    Function run by each process. Will process blocks placed in the block_queue until the 'STOP' command is reached.
    '''
    # Parse input arguments
    lower, upper, wb_reference, bp_reference = stretch_params
    wb_reference = np.array(wb_reference, dtype=np.float)
    bp_reference = np.array(bp_reference, dtype=np.float)
    image_type = im_metadata[0]

    for block_indices in iter(block_queue.get, 'STOP'):

        x, y, read_size_x, read_size_y = block_indices
        # Load block data with gdal (offset and block size)
        lock.acquire()
        src_ds = gdal.Open(full_image_name, gdal.GA_ReadOnly)
        image_data = src_ds.ReadAsArray(x, y, read_size_x, read_size_y)
        src_ds = None
        lock.release()

        # Restructure raster for panchromatic images:
        if image_data.ndim == 2:
            image_data = np.reshape(image_data, (1, read_size_y, read_size_x))

        # Calculate the quality score on an arbitrary band
        if assess_quality:
            quality_score = pp.calc_q_score(image_data[0])
        else:
            quality_score = 1.
        # Apply correction to block based on earlier histogram analysis (if applying correction)
        # Converts image to 8 bit by rescaling lower -> 1 and upper -> 255
        image_data = pp.rescale_band(image_data, lower, upper)
        if white_balance:
            # Applies a white balance correction
            image_data = pp.white_balance(image_data, wb_reference,
                                          np.amax(wb_reference))

        # Segment image
        segmented_blocks = segment_image(image_data, image_type=image_type)

        # Classify image
        classified_block = classify_image(image_data, segmented_blocks, tds,
                                          im_metadata, wb_reference,
                                          bp_reference)

        # Add the pixel counts from this classified split to the
        #   running total.
        pixel_counts_block = utils.count_features(classified_block)

        # Pass the data back to the main thread for writing
        dst_queue.put(
            (quality_score, pixel_counts_block, x, y, classified_block))

    dst_queue.put(None)
Exemple #2
0
# OSSP Process
Exemple #3
0
def process(input_filename,
            training_dataset,
            output_filepath=False,
            quality_control=False,
            debug_flag=False,
            verbose=False):
    '''
	Pulls training data from the directory containing watershed segments if it exists, otherwise
	searches for training data in the root directory, containing all of the date directories. 
	'''

    # Parse the training_dataset input
    label_vector = training_dataset[0]
    training_feature_matrix = training_dataset[1]

    # Set the output filepath to the input one if there is no path argument provided
    if output_filepath == False:
        output_filepath = os.path.dirname(input_filename)
    # Removes the '_watersheds.h5' portion of the input filename
    output_filename = os.path.split(input_filename)[1][:-13]

    # Loading the image data
    input_file = h5py.File(input_filename, 'r')

    input_image = []  #[subimage:row:column:band]
    band_1 = input_file['original_1'][:]
    watershed_image = input_file['watershed'][:]
    watershed_dimensions = input_file['dimensions'][:]
    num_x_subimages = watershed_dimensions[0]
    num_y_subimages = watershed_dimensions[1]

    im_type = input_file.attrs.get('Image Type')
    im_date = input_file.attrs.get('Image Date')

    # Use this to read files created before the image type attribute change
    # im_type = 'wv02_ms'

    #Method for assessing the quality of the training dataset.
    if quality_control == True:
        test_training(label_vector, training_feature_matrix)
        aa = raw_input("Continue? ")
        if aa == 'n':
            quit()

    # If there is no information in this image file, save a dummy classified image and exit
    # This can often happen depending on the original image dimensions and the amount it was split
    if np.sum(band_1) == 0:
        classified_image_path = os.path.join(
            output_filepath, output_filename + '_classified_image.png')
        outfile = h5py.File(
            os.path.join(output_filepath, output_filename + '_classified.h5'),
            'w')

        if im_type == 'wv02_ms':
            empty_bands = np.zeros(np.shape(band_1)[0], np.shape(band_1)[1], 8)
            empty_image = utils.compile_subimages(empty_bands, num_x_subimages,
                                                  num_y_subimages, 8)
        elif im_type == 'srgb':
            empty_bands = np.zeros(np.shape(band_1)[0], np.shape(band_1)[1], 3)
            empty_image = utils.compile_subimages(empty_bands, num_x_subimages,
                                                  num_y_subimages, 3)
        elif im_type == 'pan':
            empty_image = np.zeros(np.shape(band_1))

        outfile.create_dataset('classified',
                               data=empty_image,
                               compression='gzip',
                               compression_opts=9)
        outfile.create_dataset('original',
                               data=empty_image,
                               compression='gzip',
                               compression_opts=9)
        outfile.close()
        # return a 1x5 array with values of one for the pixel counts
        return output_filename, np.ones(5)

    # Adds remaining 7 bands if they exist
    if im_type == 'wv02_ms':

        if verbose: print "Reconstructing multispectral image... "
        band_2 = input_file['original_2'][:]
        band_3 = input_file['original_3'][:]
        band_4 = input_file['original_4'][:]
        band_5 = input_file['original_5'][:]
        band_6 = input_file['original_6'][:]
        band_7 = input_file['original_7'][:]
        band_8 = input_file['original_8'][:]

        for i in range(len(band_1)):
            input_image.append(
                create_composite([
                    band_1[i], band_2[i], band_3[i], band_4[i], band_5[i],
                    band_6[i], band_7[i], band_8[i]
                ]))
        if verbose: print "Done. "

    elif im_type == 'srgb':
        if verbose: print "Reconstructing multispectral image... "
        band_2 = input_file['original_2'][:]
        band_3 = input_file['original_3'][:]

        for i in range(len(band_1)):
            input_image.append(
                create_composite([band_1[i], band_2[i], band_3[i]]))
        if verbose: print "Done. "

    elif im_type == 'pan':
        input_image = band_1

    input_file.close()

    #Constructing the random forest tree based on the training data set and labels
    rfc = RandomForestClassifier()
    rfc.fit(training_feature_matrix, label_vector)

    #Using the random forest tree to classify the input image.
    #Runs an analysis on each subimage in the watershed_image list

    if verbose:
        start_time = time.clock()
        prog1 = 0
        prog2 = 10
        print "Predicting Image..."

    classified_image = []
    for subimage in range(len(watershed_image)):

        if verbose:
            ## Progress tracker
            if int(float(prog1) / float(len(watershed_image)) * 100) == prog2:
                print "%s Percent" % prog2
                prog2 += 10
            prog1 += 1
            subimage_start_time = time.clock()

        if debug_flag == True:
            if subimage < 100:
                classified_image.append(
                    np.zeros(np.shape(input_image[subimage])[0:2]))
                continue

        cur_image = np.copy(input_image[subimage])
        cur_ws = np.copy(watershed_image[subimage])

        ## Skip any subimages that contain no data, and set the classification values to 0
        if np.amax(cur_image) < 2:
            classified_image.append(np.zeros(np.shape(cur_image)[0:2]))
            continue

        # If the entire image is a single watershed, we have to handle the neighboring region
        #	calculation specially. This assignes the neighboring regions values to be the same
        #	as the internal values.
        if np.amax(cur_ws) == 1 and im_type == 'pan':
            entropy_image = entropy(bytescale(cur_image), disk(4))
            features = []

            #Average Pixel Value
            features.append(np.average(cur_image))
            #Pixel Median
            features.append(np.median(cur_image))
            #Pixel min
            features.append(np.amin(cur_image))
            #Pixel max
            features.append(np.amax(cur_image))
            #Standard Deviation
            features.append(np.std(cur_image))
            #Size of Superpixel
            features.append(len(cur_image))

            features.append(np.average(entropy_image))

            #"Neighbor" average
            features.append(np.average(cur_image))
            #"Neighbor" std
            features.append(np.std(cur_image))
            #"Neighbor" max
            features.append(np.max(cur_image))
            #"Neighbor" entropy
            features.append(np.average(entropy_image))

            # Date
            features.append(int(im_date))

            input_features = features
            input_features = np.array(input_features).reshape(1, -1)
            ws_pred = rfc.predict(input_features)
            classified_image.append(plotPrediction(ws_pred, cur_ws, cur_image))
            continue

        # We need the superpixel labels to start at 0. This shifts the entire label image down so that
        # the first label is 0, if it isn't already.
        if np.amin(cur_ws) > 0:
            cur_ws -= np.amin(cur_ws)

        if im_type == 'wv02_ms':
            input_feature_matrix = feature_calculations.analyze_ms_image(
                cur_image, cur_ws)
        elif im_type == 'srgb':
            entropy_image = entropy(bytescale(cur_image[:, :, 0]), disk(4))
            input_feature_matrix = feature_calculations.analyze_srgb_image(
                cur_image, cur_ws, entropy_image)
        elif im_type == 'pan':
            entropy_image = entropy(bytescale(cur_image), disk(4))
            input_feature_matrix = feature_calculations.analyze_pan_image(
                cur_image, cur_ws, entropy_image, im_date)

        input_feature_matrix = np.array(input_feature_matrix)

        # Predict the classification based on each input feature list
        ws_pred = rfc.predict(input_feature_matrix)

        # Create the classified image by replacing watershed id's with classification values.
        # If there is more than one band, we have to select one (using 2 for no particular reason).
        if im_type == 'pan':
            classified_image.append(plotPrediction(ws_pred, cur_ws, cur_image))
        else:
            classified_image.append(
                plotPrediction(ws_pred, cur_ws, cur_image[:, :, 2]))

        if verbose:
            subimage_time = time.clock() - subimage_start_time
            print str(subimage + 1) + "/" + str(
                len(watershed_image)) + " Time: " + str(subimage_time)

        ## Display one classification result at a time if the --debug flag was input.
        if debug_flag == True:

            if im_type == 'pan':
                display_image(cur_image, cur_ws, classified_image[subimage], 1)
            else:
                display_image(cur_image[:, :, 1], cur_ws,
                              classified_image[subimage], 1)

            sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features(
                classified_image[subimage])
            print "Number Snow: %i" % (sum_snow)
            print "Number Pond: %i" % (sum_melt_ponds)
            print "Number Gray Ice: %i" % (sum_gray_ice)
            print "Number Water: %i" % (sum_open_water)
            print "Number Shadow: %i" % (sum_shadow)

            aa = raw_input("Another? ")
            if aa == 'n':
                quit()

    if verbose:
        elapsed_time = time.clock() - start_time
        print "Done. "
        print "Time elapsed: {0}".format(elapsed_time)

    compiled_classified = utils.compile_subimages(classified_image,
                                                  num_x_subimages,
                                                  num_y_subimages, 1)
    if im_type == 'wv02_ms':
        compiled_original = utils.compile_subimages(input_image,
                                                    num_x_subimages,
                                                    num_y_subimages, 8)
    elif im_type == 'srgb':
        compiled_original = utils.compile_subimages(input_image,
                                                    num_x_subimages,
                                                    num_y_subimages, 3)
    elif im_type == 'pan':
        compiled_original = utils.compile_subimages(input_image,
                                                    num_x_subimages,
                                                    num_y_subimages, 1)

    if verbose: print "Saving..."

    classified_image_path = os.path.join(
        output_filepath, output_filename + '_classified_image.png')
    utils.save_color(compiled_classified, classified_image_path)

    with h5py.File(
            os.path.join(output_filepath, output_filename + '_classified.h5'),
            'w') as outfile:
        outfile.create_dataset('classified',
                               data=compiled_classified,
                               compression='gzip',
                               compression_opts=9)
        outfile.create_dataset('original',
                               data=compiled_original,
                               compression='gzip',
                               compression_opts=9)

    #### Count the number of pixels that were in each classification category.
    sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features(
        compiled_classified)
    pixel_counts = [
        sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow
    ]

    # Clear the image datasets from memory
    compiled_classified = None
    input_image = None
    watershed_image = None

    cur_image = None
    cur_ws = None
    entropy_image = None

    if verbose: print "Done."

    return output_filename, pixel_counts
Exemple #4
0
def process_split(filename,training_set,save_path):

	# filename = split_data[0]
	tds = training_set

	# base is the directory containing the raw splits
	base, fname = os.path.split(filename)

	# output_dir is the location where the segmented and classified images are saved.
	output_dir = os.path.join(base,'processed')

	# Pause for a brief time before starting the first process. This prevents overlapping text output
	# by staggering the start time of these processes. 
	time.sleep(round(random.random(),1))

	# If the classified image already exists, don't reclassify. Open the existing classification, 
	# so that we can collect and return the pixel count data from that image.
	if os.path.isdir(output_dir):
		completed_files = os.listdir(output_dir)
		for completed_name in completed_files:
			if os.path.splitext(fname)[0] in completed_name and 'classified.h5' in completed_name:
				print "Already classified: %s" %fname
				try:
					classified_file = h5py.File(os.path.join(output_dir,completed_name),'r')
					classified_image = classified_file['classified'][:]
					classified_file.close()
					pixel_counts = utils.count_features(classified_image)
					pixel_counts = list(pixel_counts)
					return os.path.splitext(fname)[0], pixel_counts
				except IOError:
					print "Corrupted file (reclassifying): %s" %completed_name



	seg_time = time.clock()

	# ----------------------------
	# Optional section to save a color image for only a subset of all image 
	# splits - chosen randomly. 
	# c_check_int = int(round(random.random()*5,0))
	# if c_check_int == 1:
	# 	c_check = True
	# else:
	# 	c_check = False
	# ----------------------------


	# Segment the image
	print "Segmenting image: %s" %fname
	segment_image(base, fname, color_check=True)#c_check)
	print "Segment finished: %s: %f" %(fname, time.clock() - seg_time)

	segment = os.path.splitext(fname)[0] + '_segmented.h5'
	if os.path.isfile(os.path.join(output_dir, segment)):

		# Classify the split image
		class_time = time.clock()
		print "Classifying image: %s" %segment
		image_name, pixel_counts = classify_image(os.path.join(output_dir, segment), tds)
		print "Classification finished: %s: %f" %(segment,time.clock()-class_time)

		# Save the results to a common file
		utils.save_results("classification_results_raw", save_path, image_name, pixel_counts)

		# #Remove the split used in segmentation
		# print "Cleaning up split: Removed " + fname
		# os.remove(os.path.join(base,fname))

		# Remove the segmented image
		print "Cleaning up segments: Removed " + segment
		os.remove(os.path.join(output_dir,segment))
		return image_name, pixel_counts
	else:
		print "Skipped classification of: %s" %segment
Exemple #5
0
def main():
    # Set Up Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("input_dir",
                        help='''directory path containing date directories of 
                        images to be processed''')
    parser.add_argument("image_type",
                        type=str,
                        choices=["srgb", "wv02_ms", "pan"],
                        help="image type: 'srgb', 'wv02_ms', 'pan'")
    parser.add_argument("training_dataset", help="training data file")
    parser.add_argument("--training_label",
                        type=str,
                        default=None,
                        help="name of training classification list")
    parser.add_argument("-o",
                        "--output_dir",
                        type=str,
                        default=None,
                        help="directory to place output results.")
    parser.add_argument(
        "-s",
        "--splits",
        metavar='int',
        type=int,
        default=1,
        help="number of subdividing splits to preform on raw image")
    parser.add_argument("-p",
                        "--parallel",
                        metavar='int',
                        type=int,
                        default=1,
                        help='''number of processing threads to create.''')
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="display text information and progress")
    parser.add_argument("-e",
                        "--extended_output",
                        action="store_true",
                        help='''Save additional data:
                                    1) classified image (png)
                                    2) classified results (csv)
                        ''')
    parser.add_argument(
        "-c",
        "--nostretch",
        action="store_false",
        help="Do not apply a histogram stretch image correction to input.")

    # Parse Arguments
    args = parser.parse_args()

    # System filepath that contains the directories or files for batch processing
    user_input = args.input_dir
    if os.path.isdir(user_input):
        src_dir = user_input
        src_file = ''
    elif os.path.isfile(user_input):
        src_dir, src_file = os.path.split(user_input)
    else:
        raise IOError('Invalid input')
    # Image type, choices are 'srgb', 'pan', or 'wv02_ms'
    image_type = args.image_type
    # File with the training data
    tds_file = args.training_dataset
    # Default tds label is the image type
    if args.training_label is None:
        tds_label = image_type
    else:
        tds_label = args.training_label
    # Default output directory (if not provided)
    if args.output_dir is None:
        dst_dir = os.path.join(src_dir, 'classified')
    else:
        dst_dir = args.output_dir
    if not os.path.isdir(dst_dir):
        os.makedirs(dst_dir)

    num_splits = args.splits
    num_threads = args.parallel
    verbose = args.verbose
    extended_output = args.extended_output
    stretch = args.nostretch

    # For Ames OIB Processing:
    assess_quality = True
    # Set a default quality score until this value is calculated
    quality_score = 1.

    # Directory where temporary files are saved
    if num_splits > 1:
        working_dir = os.path.join(src_dir, 'splits')
    else:
        working_dir = None

    # Prepare a list of images to be processed based on the user input
    #   list of task objects based on the files in the input directory.
    #   Each task is an image to process, and has a subtask for each split
    #   of that image.
    task_list = utils.create_task_list(os.path.join(src_dir, src_file),
                                       dst_dir, num_splits)
    # Load Training Data
    tds = utils.load_tds(tds_file, tds_label)

    for task in task_list:

        # ASP: Restrict processing to the frame range
        # try:
        #     frameNum = getFrameNumberFromFilename(file)
        # except Exception, e:
        #     continue
        # if (frameNum < args.min_frame) or (frameNum > args.max_frame):
        #     continue

        # Skip this task if it is already marked as complete
        if task.is_complete():
            continue

        # If the image has not yet been split or if no splitting was requested,
        # proceed to the preprocessing step.
        image_name = task.get_id()
        if not task.is_split() or num_splits == 1:
            image_data, im_info = prepare_image(src_dir,
                                                image_name,
                                                image_type,
                                                output_path=working_dir,
                                                number_of_splits=num_splits,
                                                apply_correction=stretch,
                                                verbose=verbose)

            if assess_quality:
                if verbose:
                    print("Calculating image quality score...")
                # Calculate the quality score for this image:
                quality_score = utils.calc_q_score(image_data[1])

            block_dims = im_info[0]
            image_date = im_info[1]

        pixel_counts = [0, 0, 0, 0, 0]
        classified_image = []
        # Loop until all subtasks are complete.
        # Breaks when task.get_next_subtask() returns None (all subtasks complete)
        #   or if the task is complete.
        while True:

            if task.is_complete():
                break
            elif task.has_subtask():
                subtask = task.get_next_subtask()

                if subtask is None:
                    break
                # If there is a subtask, the image data is stored in a split on the
                #   drive. Subtask == {} when there are no subtasks.
                image_data = os.path.join(working_dir, subtask) + '.h5'
                with h5py.File(image_data, 'r') as f:
                    block_dims = f.attrs.get("Block Dimensions")
                    image_date = f.attrs.get("Image Date")
            else:
                subtask = task.get_id()

            # Segment image
            seg_time = time.clock()
            if verbose:
                print("Segmenting image: %s" % subtask)
            image_data, segmented_blocks = segment_image(image_data,
                                                         image_type=image_type,
                                                         threads=num_threads,
                                                         verbose=verbose)
            if verbose:
                print("Segment finished: %s: %f" %
                      (subtask, time.clock() - seg_time))

            # Classify image
            class_time = time.clock()
            if verbose:
                print("Classifying image: %s" % subtask)
            classified_blocks = classify_image(image_data,
                                               segmented_blocks,
                                               tds, [image_type, image_date],
                                               threads=num_threads,
                                               verbose=verbose)
            if verbose:
                print("Classification finished: %s: %f" %
                      (subtask, time.clock() - class_time))

            # Hold onto the output of this subtask
            clsf_split = utils.compile_subimages(classified_blocks,
                                                 block_dims[0], block_dims[1])

            # Save the results to the temp folder if there is more than 1 split
            if num_splits > 1:
                with h5py.File(
                        os.path.join(working_dir, subtask + '_classified.h5'),
                        'w') as f:
                    f.create_dataset('classified',
                                     data=clsf_split,
                                     compression='gzip',
                                     compression_opts=3)

            # Add the pixel counts from this classified split to the
            #   running total.
            pixel_counts_split = utils.count_features(clsf_split)
            for i in range(len(pixel_counts)):
                pixel_counts[i] += pixel_counts_split[i]

            # Mark this subtask as complete. This sets task.complete to True
            #   if there are no subtasks.
            task.update_subtask(subtask)

        # Writing the results to a sqlite database. (Only works for
        #   a specific database structure that has already been created)
        # db_name = 'ImageDatabase.db'
        # db_dir = '/media/sequoia/DigitalGlobe/'
        # image_name = task.get_id()
        # image_name = os.path.splitext(image_name)[0]
        # image_id = image_name.split('_')[2]
        # part = image_name.split('_')[5]
        # utils.write_to_database(db_name, db_dir, image_id, part, pixel_counts)

        # Create a sorted list of the tasks. Then create the correct filename
        #   for each split saved on the drive.
        # Compile the split images back into a single image
        if num_splits > 1:
            if verbose:
                print("Recompiling: %s" % task.get_id())
            clsf_splits = []
            task_list = task.get_tasklist()
            task_list.sort()
            for task_id in task_list:
                cname = os.path.join(working_dir, task_id + "_classified.h5")
                clsf_splits.append(cname)
            classified_image = utils.stitch(clsf_splits)
        else:
            classified_image = clsf_split

        # Open input file to read metadata/projection
        src_ds = gdal.Open(os.path.join(src_dir, image_name))

        input_xsize = src_ds.RasterXSize
        input_ysize = src_ds.RasterYSize

        # Trim output image to correct size
        classified_image = classified_image[:input_ysize, :input_xsize]

        # Save the classified image output as a geotiff
        fileformat = "GTiff"
        image_name = os.path.splitext(image_name)[0]
        dst_filename = os.path.join(dst_dir, image_name + '_classified.tif')
        driver = gdal.GetDriverByName(fileformat)
        dst_ds = driver.Create(dst_filename,
                               xsize=input_xsize,
                               ysize=input_ysize,
                               bands=1,
                               eType=gdal.GDT_Byte,
                               options=["TILED=YES", "COMPRESS=LZW"])

        # Transfer the metadata from input image
        # dst_ds.SetMetadata(src_ds.GetMetadata())
        # Transfer the input projection
        dst_ds.SetGeoTransform(
            src_ds.GetGeoTransform())  ##sets same geotransform as input
        dst_ds.SetProjection(
            src_ds.GetProjection())  ##sets same projection as input

        # Write information to output
        dst_ds.GetRasterBand(1).WriteArray(classified_image)

        # Close dataset and write to disk
        dst_ds = None
        src_ds = None

        # Write extra data (total pixel counts and quality score to the database (or csv)
        output_csv = os.path.join(dst_dir, image_name + '_md.csv')
        with open(output_csv, "wb") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([
                "Quality Score", "White Ice", "Gray Ice", "Melt Ponds",
                "Open Water"
            ])
            writer.writerow([
                quality_score, pixel_counts[0], pixel_counts[1],
                pixel_counts[2], pixel_counts[3]
            ])

        # Save color image for viewing
        if extended_output:
            utils.save_color(classified_image,
                             os.path.join(dst_dir, image_name + '.png'))

        # Remove temp folders
        if working_dir is not None:
            if os.path.isdir(working_dir):
                shutil.rmtree(working_dir)

        if verbose:
            print("Done")
Exemple #6
0
def classify_image(input_image,
                   watershed_data,
                   training_dataset,
                   meta_data,
                   threads=2,
                   quality_control=False,
                   debug_flag=False,
                   verbose=False):
    '''
    Run a random forest classification. 
    Input: 
        input_image: preprocessed image data (preprocess.py)
        watershed_image: Image objects created with the segmentation 
            algorithm. (segment.py)
        training_dataset: Tuple of training data in the form:
            (label_vector, attribute_matrix)
        meta_data: [im_type, im_date]
    Returns:
        Raster of classified data. 
    '''

    #### Prepare Data and Variables
    num_blocks = len(input_image[1])
    num_bands = len(input_image.keys())
    image_type = meta_data[0]
    image_date = meta_data[1]

    ## Restructure the input data.
    # We are creating a single list where each element of the list is one
    #   block (old: subimage) of the image and is a stack of all bands.
    image_data = []  # [block:row:column:band]
    for blk in range(num_blocks):
        image_data.append(
            utils.create_composite(
                [input_image[b][blk] for b in range(1, num_bands + 1)]))
    input_image = None

    # watershed_image = input_file['watershed'][:]
    # watershed_dimensions = input_file['dimensions'][:]
    # num_x_subimages = dimensions[0]
    # num_y_subimages = dimensions[1]

    ## Parse training_dataset input
    label_vector = training_dataset[0]
    training_feature_matrix = training_dataset[1]

    # im_type = input_file.attrs.get('Image Type')
    # im_date = input_file.attrs.get('Image Date')

    #Method for assessing the quality of the training dataset.
    if quality_control == True:
        test_training(label_vector, training_feature_matrix)
        aa = raw_input("Continue? ")
        if aa == 'n':
            quit()

    # # If there is no information in this image file, save a dummy classified image and exit
    # # This can often happen depending on the original image dimensions and the amount it was split
    # if np.sum(band_1) == 0:
    #     classified_image_path = os.path.join(output_filepath, output_filename + '_classified_image.png')
    #     outfile = h5py.File(os.path.join(output_filepath, output_filename + '_classified.h5'),'w')

    #     if im_type == 'wv02_ms':
    #             empty_bands = np.zeros(np.shape(band_1)[0],np.shape(band_1)[1],8)
    #                     empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 8)
    #             elif im_type == 'srgb':
    #                     empty_bands = np.zeros(np.shape(band_1)[0],np.shape(band_1)[1],3)
    #                     empty_image = utils.compile_subimages(empty_bands, num_x_subimages, num_y_subimages, 3)
    #             elif im_type == 'pan':
    #                     empty_image = np.zeros(np.shape(band_1))

    #     outfile.create_dataset('classified', data=empty_image,compression='gzip',compression_opts=9)
    #     outfile.create_dataset('original', data=empty_image,compression='gzip',compression_opts=9)
    #     outfile.close()
    #     # return a 1x5 array with values of one for the pixel counts
    #     return output_filename, np.ones(5)

    #### Construct the random forest decision tree using the training data set
    rfc = RandomForestClassifier()
    rfc.fit(training_feature_matrix, label_vector)

    #### Classify each image block
    # Define multiprocessing-safe queues containing data to process
    clsf_block_queue = Queue()
    num_blocks = len(watershed_data)
    im_block_queue = construct_block_queue(image_data, watershed_data,
                                           num_blocks)

    # Define the number of threads to create
    NUMBER_OF_PROCESSES = threads
    block_procs = [
        Process(target=process_block_helper,
                args=(im_block_queue, clsf_block_queue, image_type, image_date,
                      rfc)) for _ in range(NUMBER_OF_PROCESSES)
    ]

    # Start the worker processes.
    for proc in block_procs:
        # Add a stop command to the end of the queue for each of the
        #   processes started. This will signal for the process to stop.
        im_block_queue.put('STOP')
        # Start the process
        proc.start()

    # Display a progress bar
    if verbose:
        try:
            from tqdm import tqdm
        except ImportError:
            print "Install tqdm to display progress bar."
            verbose = False
        else:
            pbar = tqdm(total=num_blocks, unit='block')

    # Each process adds the classification results to clsf_block_queue, when it
    #   finishes a row. Adds 'None' when there are not more rows left
    #   in the queue.
    # This loop continues as long as all of the processes have not finished
    #   (i.e. fewer than NUMBER_OF_PROCESSES have returned None). When a row is
    #   added to the queue, the tqdm progress bar updates.

    # Initialize the output dataset as an empty list of length = input dataset
    #   This needs to be initialized since blocks will be added non-sequentially
    clsf_block_list = [None for _ in range(num_blocks)]
    finished_threads = 0
    while finished_threads < NUMBER_OF_PROCESSES:
        if not clsf_block_queue.empty():
            val = clsf_block_queue.get()
            if val == None:
                finished_threads += 1
            else:
                block_num = val[0]
                segmnt_data = val[1]
                clsf_block_list[block_num] = segmnt_data
                if verbose: pbar.update()

    # Close the progress bar
    if verbose:
        pbar.close()
        print "Finished Processing. Closing threads..."

    # Join all of the processes back together
    for proc in block_procs:
        proc.join()

    return clsf_block_list

    # Lite version: Save only the classified output, and do not save the original image data
    compiled_classified = utils.compile_subimages(classified_image,
                                                  num_x_subimages,
                                                  num_y_subimages, 1)

    if verbose: print "Saving..."

    with h5py.File(
            os.path.join(output_filepath, output_filename + '_classified.h5'),
            'w') as outfile:
        outfile.create_dataset('classified',
                               data=compiled_classified,
                               compression='gzip',
                               compression_opts=9)

    #### Count the number of pixels that were in each classification category.
    sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow = utils.count_features(
        compiled_classified)
    pixel_counts = [
        sum_snow, sum_gray_ice, sum_melt_ponds, sum_open_water, sum_shadow
    ]

    # Clear the image datasets from memory
    compiled_classified = None
    input_image = None
    watershed_image = None

    cur_image = None
    cur_ws = None
    entropy_image = None

    if verbose: print "Done."

    return output_filename, pixel_counts