def generate_neurons_blocks(input_filename, output_filename, num_processes=multiprocessing.cpu_count(), block_shape=None, num_blocks=None, half_window_shape=None, half_border_shape=None, use_drmaa=False, num_drmaa_cores=16, debug=False, **parameters): # TODO: Move function into new module with its own command line interface. # TODO: Heavy refactoring required on this function. # Extract and validate file extensions. # Parse input filename and validate that the name is acceptable input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path( input_filename) # Parse output filename and validate that the name is acceptable output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path( output_filename) # Directory where individual block runs will be stored. intermediate_output_dir = output_filename_ext.rsplit( os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks" # Read the input data. original_images_shape_array = None with h5py.File(input_filename_ext, "r") as input_file_handle: original_images_shape_array = numpy.array( input_file_handle[input_dataset_name].shape) # Get the amount of the border to slice half_border_shape_array = None if half_border_shape is None: half_border_shape_array = numpy.zeros(len(original_images_shape_array), dtype=int) else: assert (len(half_window_shape) == len(original_images_shape_array)) half_border_shape_array = numpy.array(half_border_shape) # Should be of type integer assert (issubclass(half_border_shape_array.dtype.type, numpy.integer)) # Should not cut along temporal portion. # Maybe replace with a warning. assert (half_border_shape[0] == 0) # TODO: Refactor to expanded_numpy. # Cuts boundaries from original_images_shape original_images_pared_shape_array = original_images_shape_array - \ 2*half_border_shape_array # At least one of them must be specified. If not some mixture of both. assert ((block_shape is not None) or (num_blocks is not None)) # Size of the block to use by pixels block_shape_array = None block_shape_array_undefined = None if block_shape is None: block_shape_array = -numpy.ones( original_images_pared_shape_array.shape, dtype=int) block_shape_array_undefined = numpy.ones( original_images_pared_shape_array.shape, dtype=bool) else: # Should have the same number of values in each assert (len(original_images_pared_shape_array) == len(block_shape)) block_shape_array = numpy.array(block_shape, dtype=int) # Should be of type integer assert issubclass(block_shape_array.dtype.type, numpy.integer) block_shape_array_undefined = (block_shape_array == -1) # Number of num_blocks_array = None num_blocks_array_undefined = None if num_blocks is None: num_blocks_array = - \ numpy.ones(original_images_pared_shape_array.shape, dtype=int) num_blocks_array_undefined = numpy.ones( original_images_pared_shape_array.shape, dtype=bool) else: # Should have the same number of values in each assert (len(original_images_pared_shape_array) == len(num_blocks)) num_blocks_array = numpy.array(num_blocks, dtype=int) # Should be of type integer assert issubclass(num_blocks_array.dtype.type, numpy.integer) num_blocks_array_undefined = (num_blocks_array == -1) # Want to ensure that both aren't defined. assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all() # If both are undefined, then the block should span that dimension missing_both = (block_shape_array_undefined & num_blocks_array_undefined) block_shape_array[missing_both] = original_images_pared_shape_array[ missing_both] num_blocks_array[missing_both] = 1 # Thus, we have resolved these values and can continue. block_shape_array_undefined[missing_both] = False num_blocks_array_undefined[missing_both] = False # Replace undefined values in block_shape_array missing_block_shape_array, block_shape_array_remainder = divmod( original_images_pared_shape_array[block_shape_array_undefined], num_blocks_array[block_shape_array_undefined]) # Block shape must be well defined. assert (block_shape_array_remainder == 0).all() missing_block_shape_array = missing_block_shape_array.astype(int) block_shape_array[block_shape_array_undefined] = missing_block_shape_array # Replace undefined values in num_blocks_array missing_num_blocks_array, num_blocks_array_remainder = divmod( original_images_pared_shape_array[num_blocks_array_undefined], block_shape_array[num_blocks_array_undefined]) # Allow some blocks to be smaller missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int) num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array # Get the overlap window half_window_shape_array = None if half_window_shape is None: half_window_shape_array = block_shape_array / 2.0 else: assert ( len(half_window_shape) == len(original_images_pared_shape_array)) half_window_shape_array = numpy.array(half_window_shape) assert issubclass(half_window_shape_array.dtype.type, numpy.integer) # Want to make our window size is at least as large as the one used for # the f0 calculation. if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]: #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0]) assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"] ["half_window_size"] <= half_window_shape_array[0]) # Estimate bounds for each slice. Uses typical python [begin, end) for the # indices. estimated_bounds = numpy.zeros( tuple(num_blocks_array), dtype=(int, original_images_pared_shape_array.shape + (2, ))) for each_block_indices in iters.index_generator(*num_blocks_array): for each_dim, each_block_dim_index in enumerate(each_block_indices): estimated_lower_bound = each_block_dim_index * block_shape_array[ each_dim] estimated_upper_bound = (each_block_dim_index + 1) * block_shape_array[each_dim] estimated_bounds[each_block_indices][each_dim] = numpy.array( [estimated_lower_bound, estimated_upper_bound]) original_images_pared_slices = numpy.zeros( estimated_bounds.shape[:-2], dtype=[("actual", int, estimated_bounds.shape[-2:]), ("windowed", int, estimated_bounds.shape[-2:]), ("windowed_stack_selection", int, estimated_bounds.shape[-2:]), ("windowed_block_selection", int, estimated_bounds.shape[-2:])]) # Get the slice that is within bounds original_images_pared_slices["actual"] = estimated_bounds original_images_pared_slices["actual"][..., 0] = numpy.where( 0 < original_images_pared_slices["actual"][..., 0], original_images_pared_slices["actual"][..., 0], 0) original_images_pared_slices["actual"][..., 1] = numpy.where( original_images_pared_slices["actual"][..., 1] < original_images_pared_shape_array, original_images_pared_slices["actual"][..., 1], original_images_pared_shape_array) # Gets the defined half_window_size. window_addition = numpy.zeros(estimated_bounds.shape, dtype=int) window_addition[..., 0] = -half_window_shape_array window_addition[..., 1] = half_window_shape_array # Get the slice with a window added. original_images_pared_slices[ "windowed"] = estimated_bounds + window_addition original_images_pared_slices["windowed"][..., 0] = numpy.where( 0 < original_images_pared_slices["windowed"][..., 0], original_images_pared_slices["windowed"][..., 0], 0) original_images_pared_slices["windowed"][..., 1] = numpy.where( original_images_pared_slices["windowed"][..., 1] < original_images_pared_shape_array, original_images_pared_slices["windowed"][..., 1], original_images_pared_shape_array) # Get the slice information to get the windowed block from the original # image stack. original_images_pared_slices[ "windowed_stack_selection"] = original_images_pared_slices["windowed"] original_images_pared_slices[ "windowed_stack_selection"] += xnumpy.expand_view( half_border_shape_array, reps_after=2) # Get slice information for the portion within # `original_images_pared_slices["windowed"]`, which corresponds to # `original_images_pared_slices["actual"]`. #original_images_pared_slices["windowed_block_selection"][..., 0] = 0 original_images_pared_slices["windowed_block_selection"][..., 1] = ( original_images_pared_slices["actual"][..., 1] - original_images_pared_slices["actual"][..., 0]) original_images_pared_slices[ "windowed_block_selection"][:] += xnumpy.expand_view( original_images_pared_slices["actual"][..., 0] - original_images_pared_slices["windowed"][..., 0], reps_after=2) # Get a directory for intermediate results. try: os.mkdir(intermediate_output_dir) except OSError: # If it already exists, that is fine. pass intermediate_config = intermediate_output_dir + "/" + "config.json" # Overwrite the config file always with open(intermediate_config, "w") as fid: json.dump( dict(list(parameters.items()) + list({"debug": debug}.items())), fid, indent=4, separators=(",", " : ")) fid.write("\n") # Construct an HDF5 file for each block input_filename_block = [] output_filename_block = [] stdout_filename_block = [] stderr_filename_block = [] with h5py.File(output_filename_ext, "a") as output_file_handle: # Create a new output directory if doesn't exists. output_file_handle.require_group(output_group_name) output_group = output_file_handle[output_group_name] if "original_images" not in output_group: if input_filename_ext == output_filename_ext: output_group["original_images"] = h5py.SoftLink( input_dataset_name) else: output_group["original_images"] = h5py.ExternalLink( input_filename_ext, "/" + input_dataset_name) output_group.require_group("blocks") output_group_blocks = output_group["blocks"] input_file_handle = None try: # Skipping using region refs. input_file_handle = h5py.File(input_filename_ext, "r") except IOError: # File is already open input_file_handle = output_file_handle for i, i_str, sequential_block_i in iters.filled_stringify_enumerate( original_images_pared_slices.flat): intermediate_basename_i = intermediate_output_dir + "/" + i_str # Hold redirected stdout and stderr for each subprocess. stdout_filename_block.append(intermediate_basename_i + os.extsep + "out") stderr_filename_block.append(intermediate_basename_i + os.extsep + "err") # Ensure that the blocks are corrected to deal with trimming of the image stack # Must be done after the calculation of # original_images_pared_slices["windowed_block_selection"]. sequential_block_i_windowed = sequential_block_i[ "windowed_stack_selection"] slice_i = tuple( slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed) if i_str not in output_group_blocks: output_group_blocks[i_str] = [] output_group_blocks[i_str].attrs[ "filename"] = input_file_handle.filename output_group_blocks[i_str].attrs[ "dataset"] = input_dataset_name output_group_blocks[i_str].attrs["slice"] = str(slice_i) block_i = output_group_blocks[i_str] with h5py.File(intermediate_basename_i + os.extsep + "h5", "a") as each_block_file_handle: # Create a soft link to the original images. But use the # appropriate type of soft link depending on whether # the input and output file are the same. if "original_images" not in each_block_file_handle: each_block_file_handle[ "original_images"] = h5py.ExternalLink( os.path.relpath(block_i.file.filename, intermediate_output_dir), block_i.name) input_filename_block.append(each_block_file_handle.filename + "/" + "original_images") output_filename_block.append(each_block_file_handle.filename + "/") if input_file_handle != output_file_handle: input_file_handle.close() cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__)) cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0] cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath) cur_module_name = cur_module_name.replace(os.path.sep, ".") cur_module_filepath += os.extsep + "py" import sys python = sys.executable executable_run = "" executable_run += "from sys import argv, path, exit; " executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \ (cur_module_dirpath, cur_module_dirpath,) executable_run += "from %s import main; exit(main(*argv))" % \ (cur_module_name,) block_process_args_gen = iters.izip(itertools.repeat(python), itertools.repeat("-c"), itertools.repeat(executable_run), itertools.repeat(intermediate_config), input_filename_block, output_filename_block, stdout_filename_block, stderr_filename_block) if use_drmaa: # Attempt to import drmaa. # If it fails to import, either the user has no intent in using it or # forgot to install it. If it imports, but fails to find symbols, # then the user has not set DRMAA_LIBRARY_PATH or # does not have libdrmaa.so. try: import drmaa except ImportError: # python-drmaa is not installed. logger.error( "Was not able to import drmaa. " + "If this is meant to be run using the OpenGrid submission " + "system, then drmaa needs to be installed via pip or " + "easy_install.") raise except RuntimeError: # The drmaa library was not specified, but python-drmaa is # installed. logger.error( "Was able to import drmaa. " + "However, the drmaa library could not be found. Please " + "either specify the location of libdrmaa.so using the " + "DRMAA_LIBRARY_PATH environment variable or disable/remove " + "use_drmaa from the config file.") raise s = drmaa.Session() s.initialize() ready_processes = [] for each_arg_pack in block_process_args_gen: ready_processes.append((each_arg_pack, s.createJobTemplate())) ready_processes[-1][1].jobName = os.path.basename( os.path.splitext(cur_module_filepath) [0]) + "-" + os.path.basename( os.path.dirname(each_arg_pack[3].split(".h5")[0]) ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0]) ready_processes[-1][1].remoteCommand = each_arg_pack[0] ready_processes[-1][1].args = each_arg_pack[1:-2] ready_processes[-1][1].jobEnvironment = os.environ ready_processes[-1][1].inputPath = "localhost:" + os.devnull ready_processes[-1][ 1].outputPath = "localhost:" + each_arg_pack[-2] ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1] ready_processes[-1][1].workingDirectory = os.getcwd() ready_processes[-1][1].nativeSpecification = "-pe batch " + str( num_drmaa_cores) running_processes = [] for each_arg_pack, each_process_template in ready_processes: each_process_id = s.runJob(each_process_template) running_processes.append( (each_arg_pack, each_process_id, each_process_template)) logger.info("Started new process ( \"" + " ".join(each_arg_pack) + "\" ).") start_queue_time = time.time() logger.info("Waiting for queued jobs to complete.") #finished_processes = [] for each_arg_pack, each_process_id, each_process_template in running_processes: each_process_status = s.wait(each_process_id) if not each_process_status.hasExited: raise RuntimeError("The process (\"" + " ".join(each_arg_pack) + "\") has exited prematurely.") logger.info("Finished process ( \"" + " ".join(each_arg_pack) + "\" ).") s.deleteJobTemplate(each_process_template) #finished_processes.append((each_arg_pack, each_process_id)) s.exit() end_queue_time = time.time() diff_queue_time = end_queue_time - start_queue_time logger.info("Run time for queued jobs to complete is \"" + str(diff_queue_time) + " s\".") else: # TODO: Refactor into a separate class (have it return futures somehow) #finished_processes = [] running_processes = [] pool_tasks_empty = False while (not pool_tasks_empty) or len(running_processes): while (not pool_tasks_empty) and (len(running_processes) < num_processes): try: each_arg_pack = next(block_process_args_gen) each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[ -2], each_arg_pack[-1] each_process = subprocess.Popen( each_arg_pack, stdout=open(each_stdout_filename, "w"), stderr=open(each_stderr_filename, "w")) running_processes.append(( each_arg_pack, each_process, )) logger.info("Started new process ( \"" + " ".join(each_arg_pack) + "\" ).") except StopIteration: pool_tasks_empty = True while ((not pool_tasks_empty) and (len(running_processes) >= num_processes)) or \ (pool_tasks_empty and len(running_processes)): time.sleep(1) i = 0 while i < len(running_processes): if running_processes[i][1].poll() is not None: logger.info("Finished process ( \"" + " ".join(running_processes[i][0]) + "\" ).") #finished_processes.append(running_processes[i]) del running_processes[i] else: time.sleep(1) i += 1 # finished_processes = None start_time = time.time() logger.info("Starting merge over all blocks.") with h5py.File(output_filename_ext, "a") as output_file_handle: output_group = output_file_handle[output_group_name] new_neurons_set = segment.get_empty_neuron(shape=tuple( original_images_shape_array[1:]), dtype=float) for i, i_str, (output_filename_block_i, sequential_block_i) in iters.filled_stringify_enumerate( iters.izip(output_filename_block, original_images_pared_slices.flat)): windowed_slice_i = tuple( slice(_1, _2, 1) for _1, _2 in [(None, None)] + sequential_block_i["windowed_stack_selection"].tolist()[1:]) window_trimmed_i = tuple( slice(_1, _2, 1) for _1, _2 in sequential_block_i["windowed_block_selection"].tolist()) output_filename_block_i = output_filename_block_i.rstrip("/") with h5py.File(output_filename_block_i, "r") as each_block_file_handle: if "neurons" in each_block_file_handle: neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5( each_block_file_handle, "/neurons") neurons_block_i_windowed_count = numpy.squeeze( numpy.apply_over_axes( numpy.sum, neurons_block_i_smaller["mask"].astype(float), tuple( iters.irange( 1, neurons_block_i_smaller["mask"].ndim)))) if neurons_block_i_windowed_count.shape == tuple(): neurons_block_i_windowed_count = numpy.array( [neurons_block_i_windowed_count]) neurons_block_i_non_windowed_count = numpy.squeeze( numpy.apply_over_axes( numpy.sum, neurons_block_i_smaller["mask"] [window_trimmed_i].astype(float), tuple( iters.irange( 1, neurons_block_i_smaller["mask"].ndim)))) if neurons_block_i_non_windowed_count.shape == tuple(): neurons_block_i_non_windowed_count = numpy.array( [neurons_block_i_non_windowed_count]) if len(neurons_block_i_non_windowed_count): # Find ones that are inside the margins by more than # half neurons_block_i_acceptance = ( (neurons_block_i_non_windowed_count / neurons_block_i_windowed_count) > 0.5) logger.info( "Accepted the following neurons %s from block %s." % (str(neurons_block_i_acceptance.nonzero() [0].tolist()), i_str)) # Take a subset of our previous neurons that are within # the margins by half neurons_block_i_accepted = neurons_block_i_smaller[ neurons_block_i_acceptance] neurons_block_i = numpy.zeros( neurons_block_i_accepted.shape, dtype=new_neurons_set.dtype) neurons_block_i["mask"][ windowed_slice_i] = neurons_block_i_accepted[ "mask"] neurons_block_i["contour"][ windowed_slice_i] = neurons_block_i_accepted[ "contour"] neurons_block_i["image"][ windowed_slice_i] = neurons_block_i_accepted[ "image"] # Copy other properties neurons_block_i["area"] = neurons_block_i_accepted[ "area"] neurons_block_i["max_F"] = neurons_block_i_accepted[ "max_F"] neurons_block_i[ "gaussian_mean"] = neurons_block_i_accepted[ "gaussian_mean"] neurons_block_i[ "gaussian_cov"] = neurons_block_i_accepted[ "gaussian_cov"] # TODO: Correct centroid to larger block position. neurons_block_i["centroid"] = neurons_block_i_accepted[ "centroid"] neurons_block_i["centroid"] += sequential_block_i[ "windowed_stack_selection"][1:, 0] array_debug_recorder = hdf5.record.generate_HDF5_array_recorder( output_group, group_name="debug", enable=debug, overwrite_group=False, recorder_constructor=hdf5.record. HDF5EnumeratedArrayRecorder) segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder new_neurons_set = segment.merge_neuron_sets( new_neurons_set, neurons_block_i, **parameters["generate_neurons"] ["postprocess_data"]["merge_neuron_sets"]) else: logger.info( "Accepted the following neurons %s from block %s." % (str([]), i_str)) else: logger.info( "No neurons accepted as none were found for block" " %s." % i_str) hdf5.serializers.create_numpy_structured_array_in_HDF5(output_group, "neurons", new_neurons_set, overwrite=True) if "parameters" not in output_group["neurons"].attrs: output_group["neurons"].attrs["parameters"] = repr( dict( list(parameters.items()) + [("block_shape", block_shape), ("num_blocks", num_blocks), ("half_window_shape", half_window_shape), ("half_border_shape", half_border_shape), ("use_drmaa", use_drmaa), ("num_drmaa_cores", num_drmaa_cores), ("debug", debug)])) logger.info("Finished merge over all blocks.") end_time = time.time() diff_time = end_time - start_time logger.info("Run time for merge over all blocks is \"" + str(diff_time) + " s\".")
def main(*argv): """ Simple main function (like in C). Takes all arguments (as from sys.argv) and returns an exit status. Args: argv(list): arguments (includes command line call). Returns: int: exit code (0 if success) """ # Only necessary if running main (normally if calling command line). No # point in importing otherwise. import argparse argv = list(argv) # Creates command line parser parser = argparse.ArgumentParser( description="Parses input from the command line " + "for a registration job." ) parser.add_argument("config_filename", metavar="CONFIG_FILE", type=str, help="JSON file that provides configuration options " + "for how to import TIFF(s)." ) parser.add_argument("input_filenames", metavar="INPUT_FILE", type=str, nargs=1, help="HDF5 file to import (this should include a " + "path to where the internal dataset should be " + "stored)." ) parser.add_argument("output_filenames", metavar="OUTPUT_FILE", type=str, nargs=1, help="HDF5 file to export (this should include a " + "path to where the internal dataset should be " + "stored)." ) # Results of parsing arguments # (ignore the first one as it is the command line call). parsed_args = parser.parse_args(argv[1:]) # Go ahead and stuff in parameters with the other parsed_args parsed_args.parameters = xjson.read_parameters(parsed_args.config_filename) parsed_args.input_file_components = [] for each_input_filename in parsed_args.input_filenames: parsed_args.input_file_components.append( hdf5.serializers.split_hdf5_path(each_input_filename) ) parsed_args.output_file_components = [] for each_output_filename in parsed_args.output_filenames: parsed_args.output_file_components.append( hdf5.serializers.split_hdf5_path(each_output_filename) ) for each_input_filename_components, each_output_filename_components in iters.izip( parsed_args.input_file_components, parsed_args.output_file_components): with h5py.File(each_input_filename_components[0], "r") as input_file: with h5py.File(each_output_filename_components[0], "a") as output_file: data = input_file[each_input_filename_components[1]] result_filename = registration.register_mean_offsets( data, to_truncate=True, **parsed_args.parameters ) with h5py.File(result_filename, "r") as result_file: result_file.copy( "reg_frames", output_file[os.path.dirname(each_output_filename_components[1])], name=each_output_filename_components[1] ) if parsed_args.parameters.get("include_shift", False): result_file.copy( "space_shift", output_file[os.path.dirname(each_output_filename_components[1])], name=each_output_filename_components[1] + "_shift" ) # Copy all attributes from raw data to the final result. output = output_file[ each_output_filename_components[1] ] for each_attr_name in data.attrs: output.attrs[each_attr_name] = data.attrs[each_attr_name] # Only remove the directory if our input or output files are # not stored there. os.remove(result_filename) in_out_dirnames = set( os.path.dirname(os.path.abspath(_.filename)) for _ in [ input_file, output_file ] ) result_dirname = os.path.dirname(result_filename) if result_dirname not in in_out_dirnames: os.rmdir(result_dirname) return(0)
def main(*argv): """ Simple main function (like in C). Takes all arguments (as from sys.argv) and returns an exit status. Args: argv(list): arguments (includes command line call). Returns: int: exit code (0 if success) """ # Only necessary if running main (normally if calling command line). No # point in importing otherwise. import argparse argv = list(argv) # Creates command line parser parser = argparse.ArgumentParser( description="Parses input from the command line " + "for a registration job.") parser.add_argument("config_filename", metavar="CONFIG_FILE", type=str, help="JSON file that provides configuration options " + "for how to import TIFF(s).") parser.add_argument("input_filenames", metavar="INPUT_FILE", type=str, nargs=1, help="HDF5 file to import (this should include a " + "path to where the internal dataset should be " + "stored).") parser.add_argument("output_filenames", metavar="OUTPUT_FILE", type=str, nargs=1, help="HDF5 file to export (this should include a " + "path to where the internal dataset should be " + "stored).") # Results of parsing arguments # (ignore the first one as it is the command line call). parsed_args = parser.parse_args(argv[1:]) # Go ahead and stuff in parameters with the other parsed_args parsed_args.parameters = xjson.read_parameters(parsed_args.config_filename) parsed_args.input_file_components = [] for each_input_filename in parsed_args.input_filenames: parsed_args.input_file_components.append( hdf5.serializers.split_hdf5_path(each_input_filename)) parsed_args.output_file_components = [] for each_output_filename in parsed_args.output_filenames: parsed_args.output_file_components.append( hdf5.serializers.split_hdf5_path(each_output_filename)) for each_input_filename_components, each_output_filename_components in iters.izip( parsed_args.input_file_components, parsed_args.output_file_components): with h5py.File(each_input_filename_components[0], "r") as input_file: with h5py.File(each_output_filename_components[0], "a") as output_file: data = input_file[each_input_filename_components[1]] result_filename = registration.register_mean_offsets( data, to_truncate=True, **parsed_args.parameters) with h5py.File(result_filename, "r") as result_file: result_file.copy("reg_frames", output_file[os.path.dirname( each_output_filename_components[1])], name=each_output_filename_components[1]) if parsed_args.parameters.get("include_shift", False): result_file.copy( "space_shift", output_file[os.path.dirname( each_output_filename_components[1])], name=each_output_filename_components[1] + "_shift") # Copy all attributes from raw data to the final result. output = output_file[each_output_filename_components[1]] for each_attr_name in data.attrs: output.attrs[each_attr_name] = data.attrs[each_attr_name] # Only remove the directory if our input or output files are # not stored there. os.remove(result_filename) in_out_dirnames = set( os.path.dirname(os.path.abspath(_.filename)) for _ in [input_file, output_file]) result_dirname = os.path.dirname(result_filename) if result_dirname not in in_out_dirnames: os.rmdir(result_dirname) return (0)
def register_mean_offsets(frames2reg, max_iters=-1, block_frame_length=-1, include_shift=False, to_truncate=False, float_type=numpy.dtype(float).type): """ This algorithm registers the given image stack against its mean projection. This is done by computing translations needed to put each frame in alignment. Then the translation is performed and new translations are computed. This is repeated until no further improvement can be made. The code for translations can be found in find_mean_offsets. Notes: Adapted from code provided by Wenzhi Sun with speed improvements provided by Uri Dubin. Args: frames2reg(numpy.ndarray): Image stack to register (time is the first dimension uses C-order tyx or tzyx). max_iters(int): Number of iterations to allow before forcing termination if stable point is not found yet. Set to -1 if no limit. (Default -1) block_frame_length(int): Number of frames to work with at a time. By default all. (Default -1) include_shift(bool): Whether to return the shifts used, as well. (Default False) to_truncate(bool): Whether to truncate the frames to remove all masked portions. (Default False) float_type(type): Type of float to use for calculation. (Default numpy.float64). Returns: (numpy.ndarray): an array containing the translations to apply to each frame. Examples: >>> a = numpy.zeros((5, 3, 4)); a[:,0] = 1; a[2,0] = 0; a[2,2] = 1 >>> a array([[[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 0., 0., 0., 0.], [ 0., 0., 0., 0.], [ 1., 1., 1., 1.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]]]) >>> register_mean_offsets(a, include_shift=True) (masked_array(data = [[[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[-- -- -- --] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]]], mask = [[[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[ True True True True] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]]], fill_value = 0.0) , array([[0, 0], [0, 0], [1, 0], [0, 0], [0, 0]])) """ float_type = numpy.dtype(float_type).type # Must be of type float and must be at least 32-bit (smallest complex type # uses two 32-bit floats). assert issubclass(float_type, numpy.floating) assert numpy.dtype(float_type).itemsize >= 4 # Sadly, there is no easier way to map the two types; so, this is it. float_complex_mapping = { numpy.float32 : numpy.complex64, numpy.float64 : numpy.complex128, numpy.float128 : numpy.complex256 } complex_type = float_complex_mapping[float_type] if block_frame_length == -1: block_frame_length = len(frames2reg) tempdir_name = "" temporaries_filename = "" if isinstance(frames2reg, h5py.Dataset): tempdir_name, temporaries_filename = os.path.split( os.path.abspath(frames2reg.file.filename) ) temporaries_filename = os.path.splitext(temporaries_filename)[0] temporaries_filename += "_".join( [ frames2reg.name.replace("/", "_"), "temporaries.h5" ] ) temporaries_filename = os.path.join( tempdir_name, temporaries_filename ) elif (block_frame_length != len(frames2reg)): tempdir_name = tempfile.mkdtemp() temporaries_filename = os.path.join(tempdir_name, "temporaries.h5") frames2reg_fft = None space_shift = None this_space_shift = None if tempdir_name: temporaries_file = h5py.File(temporaries_filename, "w") frames2reg_fft = temporaries_file.create_dataset( "frames2reg_fft", shape=frames2reg.shape, dtype=complex_type ) space_shift = temporaries_file.create_dataset( "space_shift", shape=(len(frames2reg), len(frames2reg.shape)-1), dtype=int ) this_space_shift = temporaries_file.create_dataset( "this_space_shift", shape=space_shift.shape, dtype=space_shift.dtype ) else: frames2reg_fft = numpy.empty(frames2reg.shape, dtype=complex_type) space_shift = numpy.zeros( (len(frames2reg), len(frames2reg.shape)-1), dtype=int ) this_space_shift = numpy.empty_like(space_shift) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): frames2reg_fft[range_ij] = fft.fftn( frames2reg[range_ij], axes=range(1, len(frames2reg.shape)) ) template_fft = numpy.empty(frames2reg.shape[1:], dtype=complex_type) this_space_shift_mean = numpy.empty( this_space_shift.shape[1:], dtype=this_space_shift.dtype ) # Repeat shift calculation until there is no further adjustment. num_iters = 0 squared_magnitude_delta_space_shift = 1.0 while (squared_magnitude_delta_space_shift != 0.0): squared_magnitude_delta_space_shift = 0.0 template_fft[:] = 0 for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): template_fft += translate_fourier( frames2reg_fft[range_ij] / len(frames2reg), space_shift[range_ij] ).sum(axis=0) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = find_offsets( frames2reg_fft[range_ij], template_fft ) # Remove global shifts. this_space_shift_mean[...] = 0 for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift_mean += this_space_shift[range_ij].sum(axis=0) this_space_shift_mean[...] = numpy.round( this_space_shift_mean.astype(float_type) / len(this_space_shift) ).astype(this_space_shift_mean.dtype) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = xnumpy.find_relative_offsets( this_space_shift[range_ij], this_space_shift_mean ) # Find the shortest roll possible (i.e. if it is going over halfway # switch direction so it will go less than half). # Note all indices by definition were positive semi-definite and upper # bounded by the shape. This change will make them bound by # the half shape, but with either sign. for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = xnumpy.find_shortest_wraparound( this_space_shift[range_ij], frames2reg_fft.shape[1:] ) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): delta_space_shift_ij = this_space_shift[range_ij] - \ space_shift[range_ij] squared_magnitude_delta_space_shift += numpy.dot( delta_space_shift_ij, delta_space_shift_ij.T ).sum() for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): space_shift[range_ij] = this_space_shift[range_ij] num_iters += 1 logger.info( "Completed iteration, %i, " % num_iters + "where the L_2 norm squared of the relative shift was, %f." % squared_magnitude_delta_space_shift ) if (max_iters != -1) and (num_iters >= max_iters): logger.info("Hit maximum number of iterations.") break reg_frames_shape = frames2reg.shape if to_truncate: space_shift_max = numpy.zeros( space_shift.shape[1:], dtype=space_shift.dtype ) space_shift_min = numpy.zeros( space_shift.shape[1:], dtype=space_shift.dtype ) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): numpy.maximum( space_shift_max, space_shift[range_ij].max(axis=0), out=space_shift_max ) numpy.minimum( space_shift_min, space_shift[range_ij].min(axis=0), out=space_shift_min ) reg_frames_shape = numpy.asarray(reg_frames_shape) reg_frames_shape[1:] -= space_shift_max reg_frames_shape[1:] += space_shift_min reg_frames_shape = tuple(reg_frames_shape) space_shift_max = tuple(space_shift_max) space_shift_min = space_shift_min.astype(object) space_shift_min[space_shift_min == 0] = None space_shift_min = tuple(space_shift_min) reg_frames_slice = tuple( slice(_1, _2) for _1, _2 in iters.izip( space_shift_max, space_shift_min ) ) # Adjust the registered frames using the translations found. # Mask rolled values. reg_frames = None if tempdir_name: if to_truncate: reg_frames = temporaries_file.create_dataset( "reg_frames", shape=reg_frames_shape, dtype=frames2reg.dtype, chunks=True ) else: reg_frames = temporaries_file.create_group("reg_frames") reg_frames = hdf5.serializers.HDF5MaskedDataset( reg_frames, shape=frames2reg.shape, dtype=frames2reg.dtype ) else: if to_truncate: reg_frames = numpy.empty(reg_frames_shape, dtype=frames2reg.dtype) else: reg_frames = numpy.ma.empty_like(frames2reg) reg_frames.mask = numpy.ma.getmaskarray(reg_frames) reg_frames.set_fill_value(reg_frames.dtype.type(0)) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): for k in range_ij: if to_truncate: reg_frames[k] = xnumpy.roll( frames2reg[k], space_shift[k] )[reg_frames_slice] else: reg_frames[k] = xnumpy.roll( frames2reg[k], space_shift[k], to_mask=True ) result = None results_filename = "" if tempdir_name: result = results_filename results_filename = os.path.join(tempdir_name, "results.h5") results_file = h5py.File(results_filename, "w") if to_truncate: temporaries_file.copy(reg_frames.name, results_file) else: temporaries_file.copy(reg_frames.group, results_file) if include_shift: temporaries_file.copy(space_shift.name, results_file) frames2reg_fft = None reg_frames = None space_shift = None this_space_shift = None temporaries_file.close() os.remove(temporaries_filename) temporaries_filename = "" result = results_filename else: result = reg_frames if include_shift: result = (reg_frames, space_shift) if tempdir_name: results_file.close() results_file = None return(result)
def register_mean_offsets(frames2reg, max_iters=-1, block_frame_length=-1, include_shift=False, to_truncate=False, float_type=numpy.dtype(float).type): """ This algorithm registers the given image stack against its mean projection. This is done by computing translations needed to put each frame in alignment. Then the translation is performed and new translations are computed. This is repeated until no further improvement can be made. The code for translations can be found in find_mean_offsets. Notes: Adapted from code provided by Wenzhi Sun with speed improvements provided by Uri Dubin. Args: frames2reg(numpy.ndarray): Image stack to register (time is the first dimension uses C-order tyx or tzyx). max_iters(int): Number of iterations to allow before forcing termination if stable point is not found yet. Set to -1 if no limit. (Default -1) block_frame_length(int): Number of frames to work with at a time. By default all. (Default -1) include_shift(bool): Whether to return the shifts used, as well. (Default False) to_truncate(bool): Whether to truncate the frames to remove all masked portions. (Default False) float_type(type): Type of float to use for calculation. (Default numpy.float64). Returns: (numpy.ndarray): an array containing the translations to apply to each frame. Examples: >>> a = numpy.zeros((5, 3, 4)); a[:,0] = 1; a[2,0] = 0; a[2,2] = 1 >>> a array([[[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 0., 0., 0., 0.], [ 0., 0., 0., 0.], [ 1., 1., 1., 1.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]], <BLANKLINE> [[ 1., 1., 1., 1.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]]]) >>> register_mean_offsets(a, include_shift=True) (masked_array(data = [[[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[-- -- -- --] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]] <BLANKLINE> [[1.0 1.0 1.0 1.0] [0.0 0.0 0.0 0.0] [0.0 0.0 0.0 0.0]]], mask = [[[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[ True True True True] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]] <BLANKLINE> [[False False False False] [False False False False] [False False False False]]], fill_value = 0.0) , array([[0, 0], [0, 0], [1, 0], [0, 0], [0, 0]])) """ float_type = numpy.dtype(float_type).type # Must be of type float and must be at least 32-bit (smallest complex type # uses two 32-bit floats). assert issubclass(float_type, numpy.floating) assert numpy.dtype(float_type).itemsize >= 4 # Sadly, there is no easier way to map the two types; so, this is it. float_complex_mapping = { numpy.float32: numpy.complex64, numpy.float64: numpy.complex128, numpy.float128: numpy.complex256 } complex_type = float_complex_mapping[float_type] if block_frame_length == -1: block_frame_length = len(frames2reg) tempdir_name = "" temporaries_filename = "" if isinstance(frames2reg, h5py.Dataset): tempdir_name, temporaries_filename = os.path.split( os.path.abspath(frames2reg.file.filename)) temporaries_filename = os.path.splitext(temporaries_filename)[0] temporaries_filename += "_".join( [frames2reg.name.replace("/", "_"), "temporaries.h5"]) temporaries_filename = os.path.join(tempdir_name, temporaries_filename) elif (block_frame_length != len(frames2reg)): tempdir_name = tempfile.mkdtemp() temporaries_filename = os.path.join(tempdir_name, "temporaries.h5") frames2reg_fft = None space_shift = None this_space_shift = None if tempdir_name: temporaries_file = h5py.File(temporaries_filename, "w") frames2reg_fft = temporaries_file.create_dataset( "frames2reg_fft", shape=frames2reg.shape, dtype=complex_type) space_shift = temporaries_file.create_dataset( "space_shift", shape=(len(frames2reg), len(frames2reg.shape) - 1), dtype=int) this_space_shift = temporaries_file.create_dataset( "this_space_shift", shape=space_shift.shape, dtype=space_shift.dtype) else: frames2reg_fft = numpy.empty(frames2reg.shape, dtype=complex_type) space_shift = numpy.zeros((len(frames2reg), len(frames2reg.shape) - 1), dtype=int) this_space_shift = numpy.empty_like(space_shift) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): frames2reg_fft[range_ij] = fft.fftn(frames2reg[range_ij], axes=range(1, len(frames2reg.shape))) template_fft = numpy.empty(frames2reg.shape[1:], dtype=complex_type) this_space_shift_mean = numpy.empty(this_space_shift.shape[1:], dtype=this_space_shift.dtype) # Repeat shift calculation until there is no further adjustment. num_iters = 0 squared_magnitude_delta_space_shift = 1.0 while (squared_magnitude_delta_space_shift != 0.0): squared_magnitude_delta_space_shift = 0.0 template_fft[:] = 0 for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): template_fft += translate_fourier( frames2reg_fft[range_ij] / len(frames2reg), space_shift[range_ij]).sum(axis=0) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = find_offsets(frames2reg_fft[range_ij], template_fft) # Remove global shifts. this_space_shift_mean[...] = 0 for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift_mean += this_space_shift[range_ij].sum(axis=0) this_space_shift_mean[...] = numpy.round( this_space_shift_mean.astype(float_type) / len(this_space_shift)).astype(this_space_shift_mean.dtype) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = xnumpy.find_relative_offsets( this_space_shift[range_ij], this_space_shift_mean) # Find the shortest roll possible (i.e. if it is going over halfway # switch direction so it will go less than half). # Note all indices by definition were positive semi-definite and upper # bounded by the shape. This change will make them bound by # the half shape, but with either sign. for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): this_space_shift[range_ij] = xnumpy.find_shortest_wraparound( this_space_shift[range_ij], frames2reg_fft.shape[1:]) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): delta_space_shift_ij = this_space_shift[range_ij] - \ space_shift[range_ij] squared_magnitude_delta_space_shift += numpy.dot( delta_space_shift_ij, delta_space_shift_ij.T).sum() for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): space_shift[range_ij] = this_space_shift[range_ij] num_iters += 1 logger.info( "Completed iteration, %i, " % num_iters + "where the L_2 norm squared of the relative shift was, %f." % squared_magnitude_delta_space_shift) if (max_iters != -1) and (num_iters >= max_iters): logger.info("Hit maximum number of iterations.") break reg_frames_shape = frames2reg.shape if to_truncate: space_shift_max = numpy.zeros(space_shift.shape[1:], dtype=space_shift.dtype) space_shift_min = numpy.zeros(space_shift.shape[1:], dtype=space_shift.dtype) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): numpy.maximum(space_shift_max, space_shift[range_ij].max(axis=0), out=space_shift_max) numpy.minimum(space_shift_min, space_shift[range_ij].min(axis=0), out=space_shift_min) reg_frames_shape = numpy.asarray(reg_frames_shape) reg_frames_shape[1:] -= space_shift_max reg_frames_shape[1:] += space_shift_min reg_frames_shape = tuple(reg_frames_shape) space_shift_max = tuple(space_shift_max) space_shift_min = space_shift_min.astype(object) space_shift_min[space_shift_min == 0] = None space_shift_min = tuple(space_shift_min) reg_frames_slice = tuple( slice(_1, _2) for _1, _2 in iters.izip(space_shift_max, space_shift_min)) # Adjust the registered frames using the translations found. # Mask rolled values. reg_frames = None if tempdir_name: if to_truncate: reg_frames = temporaries_file.create_dataset( "reg_frames", shape=reg_frames_shape, dtype=frames2reg.dtype, chunks=True) else: reg_frames = temporaries_file.create_group("reg_frames") reg_frames = hdf5.serializers.HDF5MaskedDataset( reg_frames, shape=frames2reg.shape, dtype=frames2reg.dtype) else: if to_truncate: reg_frames = numpy.empty(reg_frames_shape, dtype=frames2reg.dtype) else: reg_frames = numpy.ma.empty_like(frames2reg) reg_frames.mask = numpy.ma.getmaskarray(reg_frames) reg_frames.set_fill_value(reg_frames.dtype.type(0)) for range_ij in iters.subrange(0, len(frames2reg), block_frame_length): for k in range_ij: if to_truncate: reg_frames[k] = xnumpy.roll(frames2reg[k], space_shift[k])[reg_frames_slice] else: reg_frames[k] = xnumpy.roll(frames2reg[k], space_shift[k], to_mask=True) result = None results_filename = "" if tempdir_name: result = results_filename results_filename = os.path.join(tempdir_name, "results.h5") results_file = h5py.File(results_filename, "w") if to_truncate: temporaries_file.copy(reg_frames.name, results_file) else: temporaries_file.copy(reg_frames.group, results_file) if include_shift: temporaries_file.copy(space_shift.name, results_file) frames2reg_fft = None reg_frames = None space_shift = None this_space_shift = None temporaries_file.close() os.remove(temporaries_filename) temporaries_filename = "" result = results_filename else: result = reg_frames if include_shift: result = (reg_frames, space_shift) if tempdir_name: results_file.close() results_file = None return (result)
def generate_neurons_blocks(input_filename, output_filename, num_processes=multiprocessing.cpu_count(), block_shape=None, num_blocks=None, half_window_shape=None, half_border_shape=None, use_drmaa=False, num_drmaa_cores=16, debug=False, **parameters): # TODO: Move function into new module with its own command line interface. # TODO: Heavy refactoring required on this function. # Extract and validate file extensions. # Parse input filename and validate that the name is acceptable input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path(input_filename) # Parse output filename and validate that the name is acceptable output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path(output_filename) # Directory where individual block runs will be stored. intermediate_output_dir = output_filename_ext.rsplit( os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks" # Read the input data. original_images_shape_array = None with h5py.File(input_filename_ext, "r") as input_file_handle: original_images_shape_array = numpy.array( input_file_handle[input_dataset_name].shape ) # Get the amount of the border to slice half_border_shape_array = None if half_border_shape is None: half_border_shape_array = numpy.zeros( len(original_images_shape_array), dtype=int ) else: assert (len(half_window_shape) == len(original_images_shape_array)) half_border_shape_array = numpy.array(half_border_shape) # Should be of type integer assert (issubclass(half_border_shape_array.dtype.type, numpy.integer)) # Should not cut along temporal portion. # Maybe replace with a warning. assert (half_border_shape[0] == 0) # TODO: Refactor to expanded_numpy. # Cuts boundaries from original_images_shape original_images_pared_shape_array = original_images_shape_array - \ 2*half_border_shape_array # At least one of them must be specified. If not some mixture of both. assert ((block_shape is not None) or (num_blocks is not None)) # Size of the block to use by pixels block_shape_array = None block_shape_array_undefined = None if block_shape is None: block_shape_array = -numpy.ones( original_images_pared_shape_array.shape, dtype=int ) block_shape_array_undefined = numpy.ones( original_images_pared_shape_array.shape, dtype=bool ) else: # Should have the same number of values in each assert (len(original_images_pared_shape_array) == len(block_shape)) block_shape_array = numpy.array(block_shape, dtype=int) # Should be of type integer assert issubclass(block_shape_array.dtype.type, numpy.integer) block_shape_array_undefined = (block_shape_array == -1) # Number of num_blocks_array = None num_blocks_array_undefined = None if num_blocks is None: num_blocks_array = - \ numpy.ones(original_images_pared_shape_array.shape, dtype=int) num_blocks_array_undefined = numpy.ones( original_images_pared_shape_array.shape, dtype=bool) else: # Should have the same number of values in each assert (len(original_images_pared_shape_array) == len(num_blocks)) num_blocks_array = numpy.array(num_blocks, dtype=int) # Should be of type integer assert issubclass(num_blocks_array.dtype.type, numpy.integer) num_blocks_array_undefined = (num_blocks_array == -1) # Want to ensure that both aren't defined. assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all() # If both are undefined, then the block should span that dimension missing_both = (block_shape_array_undefined & num_blocks_array_undefined) block_shape_array[ missing_both] = original_images_pared_shape_array[missing_both] num_blocks_array[missing_both] = 1 # Thus, we have resolved these values and can continue. block_shape_array_undefined[missing_both] = False num_blocks_array_undefined[missing_both] = False # Replace undefined values in block_shape_array missing_block_shape_array, block_shape_array_remainder = divmod( original_images_pared_shape_array[block_shape_array_undefined], num_blocks_array[block_shape_array_undefined] ) # Block shape must be well defined. assert (block_shape_array_remainder == 0).all() missing_block_shape_array = missing_block_shape_array.astype(int) block_shape_array[block_shape_array_undefined] = missing_block_shape_array # Replace undefined values in num_blocks_array missing_num_blocks_array, num_blocks_array_remainder = divmod( original_images_pared_shape_array[num_blocks_array_undefined], block_shape_array[num_blocks_array_undefined] ) # Allow some blocks to be smaller missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int) num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array # Get the overlap window half_window_shape_array = None if half_window_shape is None: half_window_shape_array = block_shape_array / 2.0 else: assert (len(half_window_shape) == len( original_images_pared_shape_array)) half_window_shape_array = numpy.array(half_window_shape) assert issubclass(half_window_shape_array.dtype.type, numpy.integer) # Want to make our window size is at least as large as the one used for # the f0 calculation. if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]: #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0]) assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] <= half_window_shape_array[0]) # Estimate bounds for each slice. Uses typical python [begin, end) for the # indices. estimated_bounds = numpy.zeros( tuple(num_blocks_array), dtype=(int, original_images_pared_shape_array.shape + (2,)) ) for each_block_indices in iters.index_generator(*num_blocks_array): for each_dim, each_block_dim_index in enumerate(each_block_indices): estimated_lower_bound = each_block_dim_index * block_shape_array[each_dim] estimated_upper_bound = (each_block_dim_index + 1) * block_shape_array[each_dim] estimated_bounds[each_block_indices][each_dim] = numpy.array([ estimated_lower_bound, estimated_upper_bound ]) original_images_pared_slices = numpy.zeros( estimated_bounds.shape[:-2], dtype=[("actual", int, estimated_bounds.shape[-2:]), ("windowed", int, estimated_bounds.shape[-2:]), ("windowed_stack_selection", int, estimated_bounds.shape[-2:]), ("windowed_block_selection", int, estimated_bounds.shape[-2:])]) # Get the slice that is within bounds original_images_pared_slices["actual"] = estimated_bounds original_images_pared_slices["actual"][..., 0] = numpy.where( 0 < original_images_pared_slices["actual"][..., 0], original_images_pared_slices["actual"][..., 0], 0 ) original_images_pared_slices["actual"][..., 1] = numpy.where( original_images_pared_slices["actual"][..., 1] < original_images_pared_shape_array, original_images_pared_slices["actual"][..., 1], original_images_pared_shape_array ) # Gets the defined half_window_size. window_addition = numpy.zeros(estimated_bounds.shape, dtype=int) window_addition[..., 0] = -half_window_shape_array window_addition[..., 1] = half_window_shape_array # Get the slice with a window added. original_images_pared_slices[ "windowed"] = estimated_bounds + window_addition original_images_pared_slices["windowed"][..., 0] = numpy.where( 0 < original_images_pared_slices["windowed"][..., 0], original_images_pared_slices["windowed"][..., 0], 0 ) original_images_pared_slices["windowed"][..., 1] = numpy.where( original_images_pared_slices["windowed"][..., 1] < original_images_pared_shape_array, original_images_pared_slices["windowed"][..., 1], original_images_pared_shape_array ) # Get the slice information to get the windowed block from the original # image stack. original_images_pared_slices["windowed_stack_selection"] = original_images_pared_slices["windowed"] original_images_pared_slices["windowed_stack_selection"] += xnumpy.expand_view( half_border_shape_array, reps_after=2 ) # Get slice information for the portion within # `original_images_pared_slices["windowed"]`, which corresponds to # `original_images_pared_slices["actual"]`. #original_images_pared_slices["windowed_block_selection"][..., 0] = 0 original_images_pared_slices["windowed_block_selection"][..., 1] = ( original_images_pared_slices["actual"][..., 1] - original_images_pared_slices["actual"][..., 0] ) original_images_pared_slices["windowed_block_selection"][:] += xnumpy.expand_view( original_images_pared_slices["actual"][..., 0] - original_images_pared_slices["windowed"][..., 0], reps_after=2 ) # Get a directory for intermediate results. try: os.mkdir(intermediate_output_dir) except OSError: # If it already exists, that is fine. pass intermediate_config = intermediate_output_dir + "/" + "config.json" # Overwrite the config file always with open(intermediate_config, "w") as fid: json.dump( dict(list(parameters.items()) + list({"debug" : debug}.items())), fid, indent=4, separators=(",", " : ") ) fid.write("\n") # Construct an HDF5 file for each block input_filename_block = [] output_filename_block = [] stdout_filename_block = [] stderr_filename_block = [] with h5py.File(output_filename_ext, "a") as output_file_handle: # Create a new output directory if doesn't exists. output_file_handle.require_group(output_group_name) output_group = output_file_handle[output_group_name] if "original_images" not in output_group: if input_filename_ext == output_filename_ext: output_group["original_images"] = h5py.SoftLink( input_dataset_name ) else: output_group["original_images"] = h5py.ExternalLink( input_filename_ext, "/" + input_dataset_name ) output_group.require_group("blocks") output_group_blocks = output_group["blocks"] input_file_handle = None try: # Skipping using region refs. input_file_handle = h5py.File( input_filename_ext, "r" ) except IOError: # File is already open input_file_handle = output_file_handle for i, i_str, sequential_block_i in iters.filled_stringify_enumerate( original_images_pared_slices.flat ): intermediate_basename_i = intermediate_output_dir + "/" + i_str # Hold redirected stdout and stderr for each subprocess. stdout_filename_block.append( intermediate_basename_i + os.extsep + "out") stderr_filename_block.append( intermediate_basename_i + os.extsep + "err") # Ensure that the blocks are corrected to deal with trimming of the image stack # Must be done after the calculation of # original_images_pared_slices["windowed_block_selection"]. sequential_block_i_windowed = sequential_block_i["windowed_stack_selection"] slice_i = tuple( slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed ) if i_str not in output_group_blocks: output_group_blocks[i_str] = [] output_group_blocks[i_str].attrs["filename"] = input_file_handle.filename output_group_blocks[i_str].attrs["dataset"] = input_dataset_name output_group_blocks[i_str].attrs["slice"] = str(slice_i) block_i = output_group_blocks[i_str] with h5py.File(intermediate_basename_i + os.extsep + "h5", "a") as each_block_file_handle: # Create a soft link to the original images. But use the # appropriate type of soft link depending on whether # the input and output file are the same. if "original_images" not in each_block_file_handle: each_block_file_handle["original_images"] = h5py.ExternalLink( os.path.relpath( block_i.file.filename, intermediate_output_dir ), block_i.name ) input_filename_block.append( each_block_file_handle.filename + "/" + "original_images" ) output_filename_block.append( each_block_file_handle.filename + "/" ) if input_file_handle != output_file_handle: input_file_handle.close() cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__)) cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0] cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath) cur_module_name = cur_module_name.replace(os.path.sep, ".") cur_module_filepath += os.extsep + "py" import sys python = sys.executable executable_run = "" executable_run += "from sys import argv, path, exit; " executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \ (cur_module_dirpath, cur_module_dirpath,) executable_run += "from %s import main; exit(main(*argv))" % \ (cur_module_name,) block_process_args_gen = iters.izip( itertools.repeat(python), itertools.repeat("-c"), itertools.repeat(executable_run), itertools.repeat(intermediate_config), input_filename_block, output_filename_block, stdout_filename_block, stderr_filename_block ) if use_drmaa: # Attempt to import drmaa. # If it fails to import, either the user has no intent in using it or # forgot to install it. If it imports, but fails to find symbols, # then the user has not set DRMAA_LIBRARY_PATH or # does not have libdrmaa.so. try: import drmaa except ImportError: # python-drmaa is not installed. logger.error( "Was not able to import drmaa. " + "If this is meant to be run using the OpenGrid submission " + "system, then drmaa needs to be installed via pip or " + "easy_install." ) raise except RuntimeError: # The drmaa library was not specified, but python-drmaa is # installed. logger.error( "Was able to import drmaa. " + "However, the drmaa library could not be found. Please " + "either specify the location of libdrmaa.so using the " + "DRMAA_LIBRARY_PATH environment variable or disable/remove " + "use_drmaa from the config file." ) raise s=drmaa.Session() s.initialize() ready_processes = [] for each_arg_pack in block_process_args_gen: ready_processes.append((each_arg_pack, s.createJobTemplate())) ready_processes[-1][1].jobName = os.path.basename( os.path.splitext(cur_module_filepath)[0] ) + "-" + os.path.basename( os.path.dirname(each_arg_pack[3].split(".h5")[0]) ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0]) ready_processes[-1][1].remoteCommand = each_arg_pack[0] ready_processes[-1][1].args = each_arg_pack[1:-2] ready_processes[-1][1].jobEnvironment = os.environ ready_processes[-1][1].inputPath = "localhost:" + os.devnull ready_processes[-1][1].outputPath = "localhost:" + each_arg_pack[-2] ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1] ready_processes[-1][1].workingDirectory = os.getcwd() ready_processes[-1][1].nativeSpecification = "-pe batch " + str(num_drmaa_cores) running_processes = [] for each_arg_pack, each_process_template in ready_processes: each_process_id = s.runJob(each_process_template) running_processes.append( (each_arg_pack, each_process_id, each_process_template) ) logger.info( "Started new process ( \"" + " ".join(each_arg_pack) + "\" )." ) start_queue_time = time.time() logger.info("Waiting for queued jobs to complete.") #finished_processes = [] for each_arg_pack, each_process_id, each_process_template in running_processes: each_process_status = s.wait(each_process_id) if not each_process_status.hasExited: raise RuntimeError( "The process (\"" + " ".join(each_arg_pack) + "\") has exited prematurely." ) logger.info( "Finished process ( \"" + " ".join(each_arg_pack) + "\" )." ) s.deleteJobTemplate(each_process_template) #finished_processes.append((each_arg_pack, each_process_id)) s.exit() end_queue_time = time.time() diff_queue_time = end_queue_time - start_queue_time logger.info( "Run time for queued jobs to complete is \"" + str(diff_queue_time) + " s\"." ) else: # TODO: Refactor into a separate class (have it return futures somehow) #finished_processes = [] running_processes = [] pool_tasks_empty = False while (not pool_tasks_empty) or len(running_processes): while (not pool_tasks_empty) and (len(running_processes) < num_processes): try: each_arg_pack = next(block_process_args_gen) each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[-2], each_arg_pack[-1] each_process = subprocess.Popen( each_arg_pack, stdout=open(each_stdout_filename, "w"), stderr=open(each_stderr_filename, "w") ) running_processes.append((each_arg_pack, each_process,)) logger.info( "Started new process ( \"" + " ".join(each_arg_pack) + "\" )." ) except StopIteration: pool_tasks_empty = True while ((not pool_tasks_empty) and (len(running_processes) >= num_processes)) or \ (pool_tasks_empty and len(running_processes)): time.sleep(1) i = 0 while i < len(running_processes): if running_processes[i][1].poll() is not None: logger.info( "Finished process ( \"" + " ".join(running_processes[i][0]) + "\" )." ) #finished_processes.append(running_processes[i]) del running_processes[i] else: time.sleep(1) i += 1 # finished_processes = None start_time = time.time() logger.info("Starting merge over all blocks.") with h5py.File(output_filename_ext, "a") as output_file_handle: output_group = output_file_handle[output_group_name] new_neurons_set = segment.get_empty_neuron( shape=tuple(original_images_shape_array[1:]), dtype=float ) for i, i_str, (output_filename_block_i, sequential_block_i) in iters.filled_stringify_enumerate( iters.izip(output_filename_block, original_images_pared_slices.flat)): windowed_slice_i = tuple( slice(_1, _2, 1) for _1, _2 in [(None, None)] + sequential_block_i["windowed_stack_selection"].tolist()[1:] ) window_trimmed_i = tuple( slice(_1, _2, 1) for _1, _2 in sequential_block_i["windowed_block_selection"].tolist() ) output_filename_block_i = output_filename_block_i.rstrip("/") with h5py.File(output_filename_block_i, "r") as each_block_file_handle: if "neurons" in each_block_file_handle: neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5( each_block_file_handle, "/neurons" ) neurons_block_i_windowed_count = numpy.squeeze( numpy.apply_over_axes( numpy.sum, neurons_block_i_smaller["mask"].astype(float), tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim)) ) ) if neurons_block_i_windowed_count.shape == tuple(): neurons_block_i_windowed_count = numpy.array( [neurons_block_i_windowed_count]) neurons_block_i_non_windowed_count = numpy.squeeze( numpy.apply_over_axes( numpy.sum, neurons_block_i_smaller["mask"][window_trimmed_i].astype(float), tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim)) ) ) if neurons_block_i_non_windowed_count.shape == tuple(): neurons_block_i_non_windowed_count = numpy.array( [neurons_block_i_non_windowed_count] ) if len(neurons_block_i_non_windowed_count): # Find ones that are inside the margins by more than # half neurons_block_i_acceptance = ( (neurons_block_i_non_windowed_count / neurons_block_i_windowed_count) > 0.5 ) logger.info( "Accepted the following neurons %s from block %s." % ( str(neurons_block_i_acceptance.nonzero()[0].tolist()), i_str ) ) # Take a subset of our previous neurons that are within # the margins by half neurons_block_i_accepted = neurons_block_i_smaller[neurons_block_i_acceptance] neurons_block_i = numpy.zeros( neurons_block_i_accepted.shape, dtype=new_neurons_set.dtype ) neurons_block_i["mask"][windowed_slice_i] = neurons_block_i_accepted["mask"] neurons_block_i["contour"][windowed_slice_i] = neurons_block_i_accepted["contour"] neurons_block_i["image"][windowed_slice_i] = neurons_block_i_accepted["image"] # Copy other properties neurons_block_i["area"] = neurons_block_i_accepted["area"] neurons_block_i["max_F"] = neurons_block_i_accepted["max_F"] neurons_block_i["gaussian_mean"] = neurons_block_i_accepted["gaussian_mean"] neurons_block_i["gaussian_cov"] = neurons_block_i_accepted["gaussian_cov"] # TODO: Correct centroid to larger block position. neurons_block_i["centroid"] = neurons_block_i_accepted["centroid"] neurons_block_i["centroid"] += sequential_block_i["windowed_stack_selection"][1:, 0] array_debug_recorder = hdf5.record.generate_HDF5_array_recorder( output_group, group_name="debug", enable=debug, overwrite_group=False, recorder_constructor=hdf5.record.HDF5EnumeratedArrayRecorder ) segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder new_neurons_set = segment.merge_neuron_sets( new_neurons_set, neurons_block_i, **parameters["generate_neurons"]["postprocess_data"]["merge_neuron_sets"] ) else: logger.info( "Accepted the following neurons %s from block %s." % ( str([]), i_str ) ) else: logger.info( "No neurons accepted as none were found for block" " %s." % i_str ) hdf5.serializers.create_numpy_structured_array_in_HDF5( output_group, "neurons", new_neurons_set, overwrite=True) if "parameters" not in output_group["neurons"].attrs: output_group["neurons"].attrs["parameters"] = repr(dict( list(parameters.items()) + [("block_shape", block_shape), ("num_blocks", num_blocks), ("half_window_shape", half_window_shape), ("half_border_shape", half_border_shape), ("use_drmaa", use_drmaa), ("num_drmaa_cores", num_drmaa_cores), ("debug", debug)] )) logger.info("Finished merge over all blocks.") end_time = time.time() diff_time = end_time - start_time logger.info( "Run time for merge over all blocks is \"" + str(diff_time) + " s\"." )