Exemple #1
0
def generate_neurons_blocks(input_filename,
                            output_filename,
                            num_processes=multiprocessing.cpu_count(),
                            block_shape=None,
                            num_blocks=None,
                            half_window_shape=None,
                            half_border_shape=None,
                            use_drmaa=False,
                            num_drmaa_cores=16,
                            debug=False,
                            **parameters):
    # TODO: Move function into new module with its own command line interface.
    # TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path(
        input_filename)

    # Parse output filename and validate that the name is acceptable
    output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path(
        output_filename)

    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_ext.rsplit(
        os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks"

    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_ext, "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape)

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(len(original_images_shape_array),
                                              dtype=int)
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - \
                                        2*half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int)
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = - \
            numpy.ones(original_images_pared_shape_array.shape, dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[missing_both] = original_images_pared_shape_array[
        missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined])
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined])
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (
            len(half_window_shape) == len(original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for
    # the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]
                ["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the
    # indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2, )))

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[
                each_dim]
            estimated_upper_bound = (each_block_dim_index +
                                     1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array(
                [estimated_lower_bound, estimated_upper_bound])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0], 0)
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array)

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0], 0)
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array)

    # Get the slice information to get the windowed block from the original
    # image stack.
    original_images_pared_slices[
        "windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices[
        "windowed_stack_selection"] += xnumpy.expand_view(
            half_border_shape_array, reps_after=2)

    # Get slice information for the portion within
    # `original_images_pared_slices["windowed"]`, which corresponds to
    # `original_images_pared_slices["actual"]`.
    #original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] -
        original_images_pared_slices["actual"][..., 0])
    original_images_pared_slices[
        "windowed_block_selection"][:] += xnumpy.expand_view(
            original_images_pared_slices["actual"][..., 0] -
            original_images_pared_slices["windowed"][..., 0],
            reps_after=2)

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(
            dict(list(parameters.items()) + list({"debug": debug}.items())),
            fid,
            indent=4,
            separators=(",", " : "))
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_ext, "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        output_file_handle.require_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_ext == output_filename_ext:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name)
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_ext, "/" + input_dataset_name)

        output_group.require_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(input_filename_ext, "r")
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(intermediate_basename_i + os.extsep +
                                         "out")
            stderr_filename_block.append(intermediate_basename_i + os.extsep +
                                         "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of
            # original_images_pared_slices["windowed_block_selection"].
            sequential_block_i_windowed = sequential_block_i[
                "windowed_stack_selection"]
            slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed)

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs[
                    "filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs[
                    "dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5",
                           "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the
                # appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle[
                        "original_images"] = h5py.ExternalLink(
                            os.path.relpath(block_i.file.filename,
                                            intermediate_output_dir),
                            block_i.name)

                input_filename_block.append(each_block_file_handle.filename +
                                            "/" + "original_images")
                output_filename_block.append(each_block_file_handle.filename +
                                             "/")

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = iters.izip(itertools.repeat(python),
                                        itertools.repeat("-c"),
                                        itertools.repeat(executable_run),
                                        itertools.repeat(intermediate_config),
                                        input_filename_block,
                                        output_filename_block,
                                        stdout_filename_block,
                                        stderr_filename_block)

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or
        # forgot to install it. If it imports, but fails to find symbols,
        # then the user has not set DRMAA_LIBRARY_PATH or
        # does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. " +
                "If this is meant to be run using the OpenGrid submission " +
                "system, then drmaa needs to be installed via pip or " +
                "easy_install.")
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is
            # installed.
            logger.error(
                "Was able to import drmaa. " +
                "However, the drmaa library could not be found. Please " +
                "either specify the location of libdrmaa.so using the " +
                "DRMAA_LIBRARY_PATH environment variable or disable/remove " +
                "use_drmaa from the config file.")
            raise

        s = drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(
                os.path.splitext(cur_module_filepath)
                [0]) + "-" + os.path.basename(
                    os.path.dirname(each_arg_pack[3].split(".h5")[0])
                ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][
                1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(
                num_drmaa_cores)

        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template))
            logger.info("Started new process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        #finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError("The process (\"" +
                                   " ".join(each_arg_pack) +
                                   "\") has exited prematurely.")

            logger.info("Finished process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")
            s.deleteJobTemplate(each_process_template)
            #finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info("Run time for queued jobs to complete is \"" +
                    str(diff_queue_time) + " s\".")
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        #finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) <
                                              num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[
                        -2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w"))

                    running_processes.append((
                        each_arg_pack,
                        each_process,
                    ))

                    logger.info("Started new process ( \"" +
                                " ".join(each_arg_pack) + "\" ).")
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                       (len(running_processes) >= num_processes)) or \
                    (pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info("Finished process ( \"" +
                                    " ".join(running_processes[i][0]) +
                                    "\" ).")

                        #finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_ext, "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(shape=tuple(
            original_images_shape_array[1:]),
                                                   dtype=float)

        for i, i_str, (output_filename_block_i,
                       sequential_block_i) in iters.filled_stringify_enumerate(
                           iters.izip(output_filename_block,
                                      original_images_pared_slices.flat)):
            windowed_slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in [(None, None)] +
                sequential_block_i["windowed_stack_selection"].tolist()[1:])
            window_trimmed_i = tuple(
                slice(_1, _2, 1) for _1, _2 in
                sequential_block_i["windowed_block_selection"].tolist())
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i,
                           "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons")

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(
                                iters.irange(
                                    1, neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum, neurons_block_i_smaller["mask"]
                            [window_trimmed_i].astype(float),
                            tuple(
                                iters.irange(
                                    1, neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count])

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than
                        # half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count /
                             neurons_block_i_windowed_count) > 0.5)

                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (str(neurons_block_i_acceptance.nonzero()
                                   [0].tolist()), i_str))

                        # Take a subset of our previous neurons that are within
                        # the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[
                            neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape,
                            dtype=new_neurons_set.dtype)
                        neurons_block_i["mask"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "mask"]
                        neurons_block_i["contour"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "contour"]
                        neurons_block_i["image"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted[
                            "area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted[
                            "max_F"]
                        neurons_block_i[
                            "gaussian_mean"] = neurons_block_i_accepted[
                                "gaussian_mean"]
                        neurons_block_i[
                            "gaussian_cov"] = neurons_block_i_accepted[
                                "gaussian_cov"]
                        # TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted[
                            "centroid"]
                        neurons_block_i["centroid"] += sequential_block_i[
                            "windowed_stack_selection"][1:, 0]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.
                            HDF5EnumeratedArrayRecorder)

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set, neurons_block_i,
                            **parameters["generate_neurons"]
                            ["postprocess_data"]["merge_neuron_sets"])
                    else:
                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (str([]), i_str))
                else:
                    logger.info(
                        "No neurons accepted as none were found for block"
                        " %s." % i_str)

        hdf5.serializers.create_numpy_structured_array_in_HDF5(output_group,
                                                               "neurons",
                                                               new_neurons_set,
                                                               overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(
                dict(
                    list(parameters.items()) +
                    [("block_shape", block_shape), ("num_blocks", num_blocks),
                     ("half_window_shape", half_window_shape),
                     ("half_border_shape",
                      half_border_shape), ("use_drmaa", use_drmaa),
                     ("num_drmaa_cores", num_drmaa_cores), ("debug", debug)]))

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info("Run time for merge over all blocks is \"" + str(diff_time) +
                " s\".")
Exemple #2
0
def transform(im0,
              scale=5,
              include_intermediates=False,
              include_lower_scales=False,
              out=None):
    """
        Performs integral steps of the wavelet transform on im0 up to the given
        scale. If scale is an iterable, then

        Args:
            im0(numpy.ndarray):                  the original image.
            scale(int or tuple of ints):         the scale of wavelet transform
                                                 to apply.

            include_intermediates(bool):         whether to return
                                                 intermediates or not
                                                 (default False).

            include_lower_scales(bool):          whether to include lower
                                                 scales or not (default False)
                                                 (ignored if
                                                 include_intermediates is True)

            out(numpy.ndarray):                  holds final result (cannot use
                                                 unless include_intermediates
                                                 is False or an AssertionError
                                                 will be raised.)

        Returns:
            W, out(tuple of numpy.ndarrays):     returns the final result of
                                                 the wavelet transform and
                                                 possibly other scales. Also,
                                                 may return the intermediates.


        Examples:
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = True,
            ...     include_lower_scales = True) # doctest: +NORMALIZE_WHITESPACE
            (array([[[ 0.59375, -0.375  , -0.34375],
                     [-0.375  ,  0.625  , -0.375  ],
                     [-0.34375, -0.375  ,  0.59375]]], dtype=float32),
             array([[[ 1.     ,  0.     ,  0.     ],
                     [ 0.     ,  1.     ,  0.     ],
                     [ 0.     ,  0.     ,  1.     ]],
                    [[ 0.40625,  0.375  ,  0.34375],
                     [ 0.375  ,  0.375  ,  0.375  ],
                     [ 0.34375,  0.375  ,  0.40625]]], dtype=float32))

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True)
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = (0, 1),
            ...     include_intermediates = False,
            ...     include_lower_scales = False)
            array([[ 0.625, -0.25 , -0.125],
                   [-0.5  ,  0.5  , -0.5  ],
                   [-0.125, -0.25 ,  0.625]], dtype=float32)

            >>> out = numpy.zeros((3, 3), dtype = numpy.float32)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> out = numpy.eye(3, dtype = numpy.float32)
            >>> transform(out,
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> out = numpy.empty((1, 3, 3), dtype = numpy.float32)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True,
            ...     out = out) # doctest: +NORMALIZE_WHITESPACE
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)
            >>> out
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)

            >>> out = numpy.empty((1, 3, 3), dtype = numpy.float64)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True,
            ...     out = out) # doctest: +NORMALIZE_WHITESPACE
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]])
            >>> out
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]])

            >>> out = numpy.eye(3, dtype = numpy.uint8)
            >>> transform(out,
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[1, 0, 0],
                   [0, 1, 0],
                   [0, 0, 1]], dtype=uint8)
    """

    if not issubclass(im0.dtype.type, numpy.float32):
        warnings.warn(
            "Provided im0 with type \"" + repr(im0.dtype.type) + "\". " +
            "Will be cast to type \"" + repr(numpy.float32) + "\"",
            RuntimeWarning)

        im0 = im0.astype(numpy.float32)

    # Make sure that we have scale as a list.
    # If it is not a list, then make a singleton list.
    try:
        scale = numpy.array(list(scale))

        assert (scale.ndim == 1), \
            "Scale should only have 1 dimension. " + \
            "Instead, got scale.ndim = \"" + str(scale.ndim) + "\"."

        assert (len(scale) == im0.ndim), \
            "Scale should have a value of each dimension of im0. " + \
            "Instead, got len(scale) = \"" + str(len(scale)) + "\" and " + \
            "im0.ndim = \"" + str(im0.ndim) + "\"."

    except TypeError:
        scale = numpy.repeat([scale], im0.ndim)

    imPrev = None
    imCur = None
    if include_intermediates:
        assert (out is None)

        W = numpy.zeros((scale.max(), ) + im0.shape, dtype=numpy.float32)
        imOut = numpy.zeros((scale.max() + 1, ) + im0.shape,
                            dtype=numpy.float32)
        imOut[0] = im0

        imCur = imOut[0]
        imPrev = imCur
    else:
        if include_lower_scales:
            if out is None:
                W = numpy.zeros((scale.max(), ) + im0.shape,
                                dtype=numpy.float32)
                out = W
            else:
                assert (out.shape == ((scale.max(), ) + im0.shape))

                if not issubclass(out.dtype.type, numpy.float32):
                    warnings.warn(
                        "Provided out with type \"" + repr(out.dtype.type) +
                        "\". " + "Will be cast to type \"" +
                        repr(numpy.float32) + "\"", RuntimeWarning)

                W = out

            imPrev = numpy.empty_like(im0)
        else:
            if out is not None:
                assert (out.shape == im0.shape)

                if not issubclass(out.dtype.type, numpy.float32):
                    warnings.warn(
                        "Provided out with type \"" + repr(out.dtype.type) +
                        "\". " + "Will be cast to type \"" +
                        repr(numpy.float32) + "\"", RuntimeWarning)

                    out = im0.astype(numpy.float32)

                imPrev = out
            else:
                imPrev = numpy.empty_like(im0)
                out = imPrev

        imCur = im0.astype(numpy.float32)

    for i in irange(1, scale.max() + 1):
        if include_intermediates:
            imPrev = imCur
            imOut[i] = imOut[i - 1]
            imCur = imOut[i]
        else:
            imPrev[:] = imCur

        h_ker = binomial_1D_vigra_kernel(i)

        for d in irange(len(scale)):
            if i <= scale[d]:
                vigra.filters.convolveOneDimension(imCur, d, h_ker, out=imCur)

        if include_intermediates or include_lower_scales:
            W[i - 1] = imPrev - imCur

    if include_intermediates:
        return ((W, imOut))
    elif include_lower_scales:
        return (W)
    else:
        # Same as returning imPrev - imCur.
        # Except, it avoids generating another array to hold the result.
        out -= imCur
        return (out)
Exemple #3
0
def get_matching_paths_groups(a_filehandle, a_path_pattern):
    """
        Looks for parts of the path pattern and tries to match them in order.
        Returns a list of matches that can be combined to yield acceptable
        matches for the given file handle.

        Note:
            This works best when a tree structure is created systematically in
            HDF5. Then, this will recreate what the tree structure could and
            may contain.

        Args:
            a_filehandle(h5py.File):        an HDF5 file.
            a_path_pattern(str):            an internal path (with patterns for
                                            each group) for the HDF5 file.

        Returns:
            (list):                         a list of matching paths.
    """

    def get_matching_paths_groups_recursive(a_filehandle, a_path_pattern):
        current_pattern_group_matches = []

        if (isinstance(a_filehandle, h5py.Group) and a_path_pattern):
            current_pattern_group_matches.append(collections.OrderedDict())

            current_group = a_filehandle

            a_path_pattern = a_path_pattern.strip("\b").strip("/")

            to_split = a_path_pattern.find("/")
            if to_split != -1:
                current_path = a_path_pattern[:to_split]
                next_path = a_path_pattern[1 + to_split:]
            else:
                current_path, next_path = a_path_pattern, ""

            current_pattern_group_regex = re.compile("/" + current_path + "/")

            for each_group in current_group:
                if current_pattern_group_regex.match("/" + each_group + "/") is not None:
                    next_group = current_group[each_group]

                    next_pattern_group_matches = get_matching_paths_groups_recursive(
                        next_group, next_path
                    )

                    current_pattern_group_matches[0][each_group] = None

                    while (len(current_pattern_group_matches) - 1) < len(next_pattern_group_matches):
                        current_pattern_group_matches.append(
                            collections.OrderedDict()
                        )

                    for i, each_next_pattern_group_matches in enumerate(
                            next_pattern_group_matches, start=1
                    ):
                        for each_next_pattern_group_match in each_next_pattern_group_matches:
                            current_pattern_group_matches[i][each_next_pattern_group_match] = None
        else:
            current_pattern_group_matches = []

        return(current_pattern_group_matches)

    groups = get_matching_paths_groups_recursive(a_filehandle, a_path_pattern)

    new_groups = []
    for i in iters.irange(len(groups)):
        new_groups.append(list(groups[i]))

    groups = new_groups

    return(groups)
Exemple #4
0
def get_standard_tiff_data(new_tiff_filename,
                           axis_order="tzyxc",
                           pages_to_channel=1,
                           memmap=False):
    """
        Reads a tiff file and returns a standard 5D array and the metadata.

        Args:
            new_tiff_filename(str):             the TIFF file to read in

            axis_order(int):                    how to order the axes (by
                                                default returns "tzyxc").

            pages_to_channel(int):              if channels are not normally
                                                stored in the channel variable,
                                                but are stored as pages (or as
                                                a mixture), then this will
                                                split neighboring pages into
                                                separate channels. (by default
                                                is 1 so changes nothing)

            memmap(bool):                       allows one to load the array
                                                using a memory mapped file as
                                                opposed to reading it directly.
                                                (by default is False)

        Returns:
            (ndarray/memmap, ndarray):          an array with the axis order
                                                specified and description
                                                metadata.
    """

    assert (pages_to_channel > 0)

    new_tiff_description = []
    with tifffile.TiffFile(new_tiff_filename) as new_tiff_file:
        if memmap:
            try:
                # tifffile >= 0.13.0
                new_tiff_array = new_tiff_file.asarray(out="memmap")
            except TypeError:
                # tifffile < 0.13.0
                new_tiff_array = new_tiff_file.asarray(memmap=True)
        else:
            new_tiff_array = new_tiff_file.asarray()

        for i in iters.irange(0, len(new_tiff_file.pages), pages_to_channel):
            new_tiff_description.append([])
            for j in iters.irange(pages_to_channel):
                each_page = new_tiff_file.pages[i + j]
                each_metadata = each_page.tags
                each_desc = u""

                try:
                    each_desc = unicode(
                        each_metadata["image_description"].value)
                except KeyError:
                    pass

                new_tiff_description[-1].append(each_desc)

    new_tiff_description = numpy.array(new_tiff_description)

    # Add a singleton channel if none is present.
    if new_tiff_array.ndim == 3:
        new_tiff_array = new_tiff_array[None]

    # Fit the old VIGRA style array. (may try to remove in the future)
    new_tiff_array = new_tiff_array.transpose(
        tuple(iters.irange(new_tiff_array.ndim - 1, 1, -1)) + (1, 0))

    # Check to make sure the dimensions are ok
    if (new_tiff_array.ndim == 5):
        pass
    elif (new_tiff_array.ndim == 4):
        # Has no z. So, add this.
        new_tiff_array = xnumpy.add_singleton_axis_beginning(new_tiff_array)
    else:
        raise Exception(
            "Invalid dimensionality for TIFF. Found shape to be \"" +
            repr(new_tiff_array.shape) + "\".")

    # Some people use pages to hold time and channel data. So, we need to
    # restructure it. However, if they are properly structuring their TIFF
    # file, then they shouldn't incur a penalty.
    if pages_to_channel > 1:
        new_tiff_array = new_tiff_array.reshape(new_tiff_array.shape[:-2] + (
            new_tiff_array.shape[-2] // pages_to_channel,
            pages_to_channel * new_tiff_array.shape[-1],
        ))

    new_tiff_array = xnumpy.tagging_reorder_array(new_tiff_array,
                                                  from_axis_order="zyxtc",
                                                  to_axis_order=axis_order,
                                                  to_copy=True)

    # Currently is `tc` order so convert it to the expected order.
    if axis_order.index("t") > axis_order.index("c"):
        new_tiff_description = new_tiff_description.T.copy()

    return (new_tiff_array, new_tiff_description)
Exemple #5
0
def zeroed_mean_images(input_array, output_array=None):
    """
        Takes and finds the mean for each image. Where each image is
        new_numpy_array[i] with some index i.

        Args:
            new_numpy_array(numpy.ndarray):     array images with time as the
                                                first index

            output_array(numpy.ndarray):        provides a location to store
                                                the result (optional)

        Returns:
            result(numpy.ndarray):              The same array with each images
                                                mean removed. Where
                                                means[i] = mean(new_numpy_array[i])


        Examples:
            >>> zeroed_mean_images(numpy.array([[0.,0.],[0.,0.]]))
            array([[ 0.,  0.],
                   [ 0.,  0.]])

            >>> zeroed_mean_images(numpy.array([[6.,0.],[0.,0.]]))
            array([[ 3., -3.],
                   [ 0.,  0.]])

            >>> zeroed_mean_images(numpy.array([[0.,0.],[0.,4.]]))
            array([[ 0.,  0.],
                   [-2.,  2.]])

            >>> zeroed_mean_images(numpy.array([[6.,0],[0.,4.]]))
            array([[ 3., -3.],
                   [-2.,  2.]])

            >>> zeroed_mean_images(numpy.array([[1.,2.],[3.,4.]]))
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> zeroed_mean_images(
            ...     numpy.array([[1,2],[3,4]])
            ... ) #doctest: +ELLIPSIS
            Traceback (most recent call last):
                ...
            AssertionError

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> zeroed_mean_images(a, output_array=a)
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])
            >>> a
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> a = numpy.array([[1.,2.],[3.,4.]]); b = numpy.zeros_like(a)
            >>> zeroed_mean_images(a, output_array=b)
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])
            >>> b
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> zeroed_mean_images(
            ...     numpy.array([[1,2],[3,4]]).astype(numpy.float32)
            ... )
            array([[-0.5,  0.5],
                   [-0.5,  0.5]], dtype=float32)

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a != zeroed_mean_images(a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a == zeroed_mean_images(a, output_array=a))
            True
    """

    assert issubclass(input_array.dtype.type, numpy.floating)

    if output_array is None:
        output_array = input_array.copy()
    elif id(input_array) != id(output_array):
        assert issubclass(output_array.dtype.type, numpy.floating)

        assert (input_array.shape == output_array.shape)

        output_array[:] = input_array

    # find the mean for each frame.
    means = output_array.mean(axis=tuple(iters.irange(1, output_array.ndim)))

    # reshape means until it has the right number of dimensions to broadcast.
    means = means.reshape(means.shape + (output_array.ndim - means.ndim) *
                          (1, ))

    # broadcast and subtract the means so that the mean of all values is zero
    output_array[:] -= means

    return (output_array)
Exemple #6
0
def get_matching_paths_groups(a_filehandle, a_path_pattern):
    """
        Looks for parts of the path pattern and tries to match them in order.
        Returns a list of matches that can be combined to yield acceptable
        matches for the given file handle.

        Note:
            This works best when a tree structure is created systematically in
            HDF5. Then, this will recreate what the tree structure could and
            may contain.

        Args:
            a_filehandle(h5py.File):        an HDF5 file.
            a_path_pattern(str):            an internal path (with patterns for
                                            each group) for the HDF5 file.

        Returns:
            (list):                         a list of matching paths.
    """
    def get_matching_paths_groups_recursive(a_filehandle, a_path_pattern):
        current_pattern_group_matches = []

        if (isinstance(a_filehandle, h5py.Group) and a_path_pattern):
            current_pattern_group_matches.append(collections.OrderedDict())

            current_group = a_filehandle

            a_path_pattern = a_path_pattern.strip("\b").strip("/")

            to_split = a_path_pattern.find("/")
            if to_split != -1:
                current_path = a_path_pattern[:to_split]
                next_path = a_path_pattern[1 + to_split:]
            else:
                current_path, next_path = a_path_pattern, ""

            current_pattern_group_regex = re.compile("/" + current_path + "/")

            for each_group in current_group:
                if current_pattern_group_regex.match("/" + each_group +
                                                     "/") is not None:
                    next_group = current_group[each_group]

                    next_pattern_group_matches = get_matching_paths_groups_recursive(
                        next_group, next_path)

                    current_pattern_group_matches[0][each_group] = None

                    while (len(current_pattern_group_matches) -
                           1) < len(next_pattern_group_matches):
                        current_pattern_group_matches.append(
                            collections.OrderedDict())

                    for i, each_next_pattern_group_matches in enumerate(
                            next_pattern_group_matches, start=1):
                        for each_next_pattern_group_match in each_next_pattern_group_matches:
                            current_pattern_group_matches[i][
                                each_next_pattern_group_match] = None
        else:
            current_pattern_group_matches = []

        return (current_pattern_group_matches)

    groups = get_matching_paths_groups_recursive(a_filehandle, a_path_pattern)

    new_groups = []
    for i in iters.irange(len(groups)):
        new_groups.append(list(groups[i]))

    groups = new_groups

    return (groups)
Exemple #7
0
def get_standard_tiff_data(new_tiff_filename,
                           axis_order="tzyxc",
                           pages_to_channel=1,
                           memmap=False):
    """
        Reads a tiff file and returns a standard 5D array and the metadata.

        Args:
            new_tiff_filename(str):             the TIFF file to read in

            axis_order(int):                    how to order the axes (by
                                                default returns "tzyxc").

            pages_to_channel(int):              if channels are not normally
                                                stored in the channel variable,
                                                but are stored as pages (or as
                                                a mixture), then this will
                                                split neighboring pages into
                                                separate channels. (by default
                                                is 1 so changes nothing)

            memmap(bool):                       allows one to load the array
                                                using a memory mapped file as
                                                opposed to reading it directly.
                                                (by default is False)

        Returns:
            (ndarray/memmap, ndarray):          an array with the axis order
                                                specified and description
                                                metadata.
    """

    assert (pages_to_channel > 0)

    new_tiff_description = []
    with tifffile.TiffFile(new_tiff_filename) as new_tiff_file:
        new_tiff_array = new_tiff_file.asarray(memmap=memmap)

        for i in iters.irange(
                0,
                len(new_tiff_file),
                pages_to_channel
        ):
            new_tiff_description.append([])
            for j in iters.irange(pages_to_channel):
                each_page = new_tiff_file[i + j]
                each_metadata = each_page.tags
                each_desc = u""

                try:
                    each_desc = unicode(
                        each_metadata["image_description"].value
                    )
                except KeyError:
                    pass

                new_tiff_description[-1].append(
                    each_desc
                )

    new_tiff_description = numpy.array(new_tiff_description)


    # Add a singleton channel if none is present.
    if new_tiff_array.ndim == 3:
        new_tiff_array = new_tiff_array[None]

    # Fit the old VIGRA style array. (may try to remove in the future)
    new_tiff_array = new_tiff_array.transpose(
        tuple(iters.irange(new_tiff_array.ndim - 1, 1, -1)) + (1, 0)
    )

    # Check to make sure the dimensions are ok
    if (new_tiff_array.ndim == 5):
        pass
    elif (new_tiff_array.ndim == 4):
        # Has no z. So, add this.
        new_tiff_array = xnumpy.add_singleton_axis_beginning(new_tiff_array)
    else:
        raise Exception(
            "Invalid dimensionality for TIFF. Found shape to be \"" +
            repr(new_tiff_array.shape) + "\"."
        )

    # Some people use pages to hold time and channel data. So, we need to
    # restructure it. However, if they are properly structuring their TIFF
    # file, then they shouldn't incur a penalty.
    if pages_to_channel > 1:
        new_tiff_array = new_tiff_array.reshape(
            new_tiff_array.shape[:-2] +
            (new_tiff_array.shape[-2] / pages_to_channel,
             pages_to_channel * new_tiff_array.shape[-1],)
        )

    new_tiff_array = xnumpy.tagging_reorder_array(
        new_tiff_array,
        from_axis_order="zyxtc",
        to_axis_order=axis_order,
        to_copy=True
    )

    # Currently is `tc` order so convert it to the expected order.
    if axis_order.index("t") > axis_order.index("c"):
        new_tiff_description = new_tiff_description.T.copy()

    return(new_tiff_array, new_tiff_description)
Exemple #8
0
def renormalized_images(input_array, ord=2, output_array=None):
    """
        Takes and divide each image by its norm. Where each image is
        new_numpy_array[i] with some index i.

        Args:
            new_numpy_array(numpy.ndarray):     array images with time as the
                                                first index

            ord(int):                           Which norm to use. (L_2 or
                                                Euclidean is default)

            output_array(numpy.ndarray):        provides a location to store
                                                the result (optional)

        Returns:
            result(numpy.ndarray):              The same array with each images
                                                normalized.


        Examples:
            >>> renormalized_images(numpy.array([[0.,1.],[1.,0.]]))
            array([[ 0.,  1.],
                   [ 1.,  0.]])

            >>> renormalized_images(
            ...     numpy.array([[0,1],[1,0]])
            ... ) #doctest: +ELLIPSIS
            Traceback (most recent call last):
                ...
            AssertionError

            >>> renormalized_images(
            ...     numpy.array([[0,1],[1,0]], dtype=numpy.float32)
            ... )
            array([[ 0.,  1.],
                   [ 1.,  0.]], dtype=float32)

            >>> renormalized_images(numpy.array([[0.,2.],[1.,0.]]))
            array([[ 0.,  1.],
                   [ 1.,  0.]])

            >>> renormalized_images(numpy.array([[2.,2.],[1.,0.]]))
            array([[ 0.70710678,  0.70710678],
                   [ 1.        ,  0.        ]])

            >>> renormalized_images(numpy.array([[1.,2.],[3.,4.]]))
            array([[ 0.4472136 ,  0.89442719],
                   [ 0.6       ,  0.8       ]])

            >>> renormalized_images(numpy.array([[1.,2.],[3.,4.]]), ord=1)
            array([[ 0.33333333,  0.66666667],
                   [ 0.42857143,  0.57142857]])

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a != renormalized_images(a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a == renormalized_images(a, output_array=a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]]); b = numpy.zeros_like(a)
            >>> numpy.all(b == renormalized_images(a, output_array=b))
            True

            >>> renormalized_images(numpy.zeros((2,3,)))
            array([[ 0.,  0.,  0.],
                   [ 0.,  0.,  0.]])
    """

    assert issubclass(input_array.dtype.type, numpy.floating)

    if output_array is None:
        output_array = input_array.copy()
    elif id(input_array) != id(output_array):
        assert issubclass(output_array.dtype.type, numpy.floating)

        assert (input_array.shape == output_array.shape)

        output_array[:] = input_array

    # Unfortunately, our version of numpy's function numpy.linalg.norm
    # does not support the axis keyword. So, we must use a for loop.
    # Take each image at each time and turn the image into a vector.
    # Then, find the norm and divide each image by this norm.
    for i in iters.irange(output_array.shape[0]):
        output_array_i = output_array[i]
        output_array_i_norm = numpy.linalg.norm(output_array_i.ravel(),
                                                ord=ord)

        if output_array_i_norm != 0:
            output_array_i /= output_array_i_norm

    return (output_array)
Exemple #9
0
def get_standard_tiff_array(new_tiff_filename,
                            axis_order="tzyxc",
                            pages_to_channel=1,
                            memmap=False):
    """
        Reads a tiff file and returns a standard 5D array.

        Args:
            new_tiff_filename(str):             the TIFF file to read in

            axis_order(int):                    how to order the axes (by
                                                default returns "tzyxc").

            pages_to_channel(int):              if channels are not normally
                                                stored in the channel variable,
                                                but are stored as pages (or as
                                                a mixture), then this will
                                                split neighboring pages into
                                                separate channels. (by default
                                                is 1 so changes nothing)

            memmap(bool):                       allows one to load the array
                                                using a memory mapped file as
                                                opposed to reading it directly.
                                                (by default is False)

        Returns:
            (numpy.ndarray or numpy.memmap):    an array with the axis order
                                                specified.
    """

    assert (pages_to_channel > 0)

    with tifffile.TiffFile(new_tiff_filename) as new_tiff_file:
        new_tiff_array = new_tiff_file.asarray(memmap=memmap)

    # Add a singleton channel if none is present.
    if new_tiff_array.ndim == 3:
        new_tiff_array = new_tiff_array[None]

    # Fit the old VIGRA style array. (may try to remove in the future)
    new_tiff_array = new_tiff_array.transpose(
        tuple(iters.irange(new_tiff_array.ndim - 1, 1, -1)) + (1, 0)
    )

    # Check to make sure the dimensions are ok
    if (new_tiff_array.ndim == 5):
        pass
    elif (new_tiff_array.ndim == 4):
        # Has no z. So, add this.
        new_tiff_array = xnumpy.add_singleton_axis_beginning(new_tiff_array)
    else:
        raise Exception(
            "Invalid dimensionality for TIFF. Found shape to be \"" +
            repr(new_tiff_array.shape) + "\"."
        )

    # Some people use pages to hold time and channel data. So, we need to
    # restructure it. However, if they are properly structuring their TIFF
    # file, then they shouldn't incur a penalty.
    if pages_to_channel > 1:
        new_tiff_array = new_tiff_array.reshape(
            new_tiff_array.shape[:-2] +
            (new_tiff_array.shape[-2] / pages_to_channel,
             pages_to_channel * new_tiff_array.shape[-1],)
        )

    new_tiff_array = xnumpy.tagging_reorder_array(
        new_tiff_array,
        from_axis_order="zyxtc",
        to_axis_order=axis_order,
        to_copy=True
    )

    return(new_tiff_array)
Exemple #10
0
def renormalized_images(input_array, ord=2, output_array=None):
    """
        Takes and divide each image by its norm. Where each image is
        new_numpy_array[i] with some index i.

        Args:
            new_numpy_array(numpy.ndarray):     array images with time as the
                                                first index

            ord(int):                           Which norm to use. (L_2 or
                                                Euclidean is default)

            output_array(numpy.ndarray):        provides a location to store
                                                the result (optional)

        Returns:
            result(numpy.ndarray):              The same array with each images
                                                normalized.


        Examples:
            >>> renormalized_images(numpy.array([[0.,1.],[1.,0.]]))
            array([[ 0.,  1.],
                   [ 1.,  0.]])

            >>> renormalized_images(
            ...     numpy.array([[0,1],[1,0]])
            ... ) #doctest: +ELLIPSIS
            Traceback (most recent call last):
                ...
            AssertionError

            >>> renormalized_images(
            ...     numpy.array([[0,1],[1,0]], dtype=numpy.float32)
            ... )
            array([[ 0.,  1.],
                   [ 1.,  0.]], dtype=float32)

            >>> renormalized_images(numpy.array([[0.,2.],[1.,0.]]))
            array([[ 0.,  1.],
                   [ 1.,  0.]])

            >>> renormalized_images(numpy.array([[2.,2.],[1.,0.]]))
            array([[ 0.70710678,  0.70710678],
                   [ 1.        ,  0.        ]])

            >>> renormalized_images(numpy.array([[1.,2.],[3.,4.]]))
            array([[ 0.4472136 ,  0.89442719],
                   [ 0.6       ,  0.8       ]])

            >>> renormalized_images(numpy.array([[1.,2.],[3.,4.]]), ord=1)
            array([[ 0.33333333,  0.66666667],
                   [ 0.42857143,  0.57142857]])

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a != renormalized_images(a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a == renormalized_images(a, output_array=a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]]); b = numpy.zeros_like(a)
            >>> numpy.all(b == renormalized_images(a, output_array=b))
            True

            >>> renormalized_images(numpy.zeros((2,3,)))
            array([[ 0.,  0.,  0.],
                   [ 0.,  0.,  0.]])
    """

    assert issubclass(input_array.dtype.type, numpy.floating)

    if output_array is None:
        output_array = input_array.copy()
    elif id(input_array) != id(output_array):
        assert issubclass(output_array.dtype.type, numpy.floating)

        assert (input_array.shape == output_array.shape)

        output_array[:] = input_array

    # Unfortunately, our version of numpy's function numpy.linalg.norm
    # does not support the axis keyword. So, we must use a for loop.
    # Take each image at each time and turn the image into a vector.
    # Then, find the norm and divide each image by this norm.
    for i in iters.irange(output_array.shape[0]):
        output_array_i = output_array[i]
        output_array_i_norm = numpy.linalg.norm(
            output_array_i.ravel(), ord=ord
        )

        if output_array_i_norm != 0:
            output_array_i /= output_array_i_norm

    return(output_array)
Exemple #11
0
def zeroed_mean_images(input_array, output_array=None):
    """
        Takes and finds the mean for each image. Where each image is
        new_numpy_array[i] with some index i.

        Args:
            new_numpy_array(numpy.ndarray):     array images with time as the
                                                first index

            output_array(numpy.ndarray):        provides a location to store
                                                the result (optional)

        Returns:
            result(numpy.ndarray):              The same array with each images
                                                mean removed. Where
                                                means[i] = mean(new_numpy_array[i])


        Examples:
            >>> zeroed_mean_images(numpy.array([[0.,0.],[0.,0.]]))
            array([[ 0.,  0.],
                   [ 0.,  0.]])

            >>> zeroed_mean_images(numpy.array([[6.,0.],[0.,0.]]))
            array([[ 3., -3.],
                   [ 0.,  0.]])

            >>> zeroed_mean_images(numpy.array([[0.,0.],[0.,4.]]))
            array([[ 0.,  0.],
                   [-2.,  2.]])

            >>> zeroed_mean_images(numpy.array([[6.,0],[0.,4.]]))
            array([[ 3., -3.],
                   [-2.,  2.]])

            >>> zeroed_mean_images(numpy.array([[1.,2.],[3.,4.]]))
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> zeroed_mean_images(
            ...     numpy.array([[1,2],[3,4]])
            ... ) #doctest: +ELLIPSIS
            Traceback (most recent call last):
                ...
            AssertionError

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> zeroed_mean_images(a, output_array=a)
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])
            >>> a
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> a = numpy.array([[1.,2.],[3.,4.]]); b = numpy.zeros_like(a)
            >>> zeroed_mean_images(a, output_array=b)
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])
            >>> b
            array([[-0.5,  0.5],
                   [-0.5,  0.5]])

            >>> zeroed_mean_images(
            ...     numpy.array([[1,2],[3,4]]).astype(numpy.float32)
            ... )
            array([[-0.5,  0.5],
                   [-0.5,  0.5]], dtype=float32)

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a != zeroed_mean_images(a))
            True

            >>> a = numpy.array([[1.,2.],[3.,4.]])
            >>> numpy.all(a == zeroed_mean_images(a, output_array=a))
            True
    """

    assert issubclass(input_array.dtype.type, numpy.floating)

    if output_array is None:
        output_array = input_array.copy()
    elif id(input_array) != id(output_array):
        assert issubclass(output_array.dtype.type, numpy.floating)

        assert (input_array.shape == output_array.shape)

        output_array[:] = input_array

    # find the mean for each frame.
    means = output_array.mean(axis=tuple(iters.irange(1, output_array.ndim)))

    # reshape means until it has the right number of dimensions to broadcast.
    means = means.reshape(means.shape + (output_array.ndim - means.ndim)*(1,))

    # broadcast and subtract the means so that the mean of all values is zero
    output_array[:] -= means

    return(output_array)
Exemple #12
0
def transform(im0,
              scale=5,
              include_intermediates=False,
              include_lower_scales=False,
              out=None):
    """
        Performs integral steps of the wavelet transform on im0 up to the given
        scale. If scale is an iterable, then

        Args:
            im0(numpy.ndarray):                  the original image.
            scale(int or tuple of ints):         the scale of wavelet transform
                                                 to apply.

            include_intermediates(bool):         whether to return
                                                 intermediates or not
                                                 (default False).

            include_lower_scales(bool):          whether to include lower
                                                 scales or not (default False)
                                                 (ignored if
                                                 include_intermediates is True)

            out(numpy.ndarray):                  holds final result (cannot use
                                                 unless include_intermediates
                                                 is False or an AssertionError
                                                 will be raised.)

        Returns:
            W, out(tuple of numpy.ndarrays):     returns the final result of
                                                 the wavelet transform and
                                                 possibly other scales. Also,
                                                 may return the intermediates.


        Examples:
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = True,
            ...     include_lower_scales = True) # doctest: +NORMALIZE_WHITESPACE
            (array([[[ 0.59375, -0.375  , -0.34375],
                     [-0.375  ,  0.625  , -0.375  ],
                     [-0.34375, -0.375  ,  0.59375]]], dtype=float32),
             array([[[ 1.     ,  0.     ,  0.     ],
                     [ 0.     ,  1.     ,  0.     ],
                     [ 0.     ,  0.     ,  1.     ]],
                    [[ 0.40625,  0.375  ,  0.34375],
                     [ 0.375  ,  0.375  ,  0.375  ],
                     [ 0.34375,  0.375  ,  0.40625]]], dtype=float32))

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True)
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = (0, 1),
            ...     include_intermediates = False,
            ...     include_lower_scales = False)
            array([[ 0.625, -0.25 , -0.125],
                   [-0.5  ,  0.5  , -0.5  ],
                   [-0.125, -0.25 ,  0.625]], dtype=float32)

            >>> out = numpy.zeros((3, 3), dtype = numpy.float32)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> out = numpy.eye(3, dtype = numpy.float32)
            >>> transform(out,
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)

            >>> out = numpy.empty((1, 3, 3), dtype = numpy.float32)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True,
            ...     out = out) # doctest: +NORMALIZE_WHITESPACE
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)
            >>> out
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]], dtype=float32)

            >>> out = numpy.empty((1, 3, 3), dtype = numpy.float64)
            >>> transform(numpy.eye(3, dtype = numpy.float32),
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = True,
            ...     out = out) # doctest: +NORMALIZE_WHITESPACE
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]])
            >>> out
            array([[[ 0.59375, -0.375  , -0.34375],
                    [-0.375  ,  0.625  , -0.375  ],
                    [-0.34375, -0.375  ,  0.59375]]])

            >>> out = numpy.eye(3, dtype = numpy.uint8)
            >>> transform(out,
            ...     scale = 1,
            ...     include_intermediates = False,
            ...     include_lower_scales = False,
            ...     out = out)
            array([[ 0.59375, -0.375  , -0.34375],
                   [-0.375  ,  0.625  , -0.375  ],
                   [-0.34375, -0.375  ,  0.59375]], dtype=float32)
            >>> out
            array([[1, 0, 0],
                   [0, 1, 0],
                   [0, 0, 1]], dtype=uint8)
    """

    if not issubclass(im0.dtype.type, numpy.float32):
        warnings.warn(
            "Provided im0 with type \"" + repr(im0.dtype.type) + "\". " +
            "Will be cast to type \"" + repr(numpy.float32) + "\"",
            RuntimeWarning
        )

        im0 = im0.astype(numpy.float32)

    # Make sure that we have scale as a list.
    # If it is not a list, then make a singleton list.
    try:
        scale = numpy.array(list(scale))

        assert (scale.ndim == 1), \
            "Scale should only have 1 dimension. " + \
            "Instead, got scale.ndim = \"" + str(scale.ndim) + "\"."

        assert (len(scale) == im0.ndim), \
            "Scale should have a value of each dimension of im0. " + \
            "Instead, got len(scale) = \"" + str(len(scale)) + "\" and " + \
            "im0.ndim = \"" + str(im0.ndim) + "\"."

    except TypeError:
        scale = numpy.repeat([scale], im0.ndim)


    imPrev = None
    imCur = None
    if include_intermediates:
        assert (out is None)

        W = numpy.zeros((scale.max(),) + im0.shape, dtype=numpy.float32)
        imOut = numpy.zeros(
            (scale.max() + 1,) + im0.shape, dtype=numpy.float32
        )
        imOut[0] = im0

        imCur = imOut[0]
        imPrev = imCur
    else:
        if include_lower_scales:
            if out is None:
                W = numpy.zeros(
                    (scale.max(),) + im0.shape, dtype=numpy.float32
                )
                out = W
            else:
                assert (out.shape == ((scale.max(),) + im0.shape))

                if not issubclass(out.dtype.type, numpy.float32):
                    warnings.warn(
                        "Provided out with type \"" + repr(out.dtype.type) +
                        "\". " +
                        "Will be cast to type \"" + repr(numpy.float32) + "\"",
                        RuntimeWarning
                    )

                W = out

            imPrev = numpy.empty_like(im0)
        else:
            if out is not None:
                assert (out.shape == im0.shape)

                if not issubclass(out.dtype.type, numpy.float32):
                    warnings.warn(
                        "Provided out with type \"" + repr(out.dtype.type) +
                        "\". " +
                        "Will be cast to type \"" + repr(numpy.float32) + "\"",
                        RuntimeWarning
                    )

                    out = im0.astype(numpy.float32)

                imPrev = out
            else:
                imPrev = numpy.empty_like(im0)
                out = imPrev

        imCur = im0.astype(numpy.float32)


    for i in irange(1, scale.max() + 1):
        if include_intermediates:
            imPrev = imCur
            imOut[i] = imOut[i - 1]
            imCur = imOut[i]
        else:
            imPrev[:] = imCur

        h_ker = binomial_1D_vigra_kernel(i)

        for d in irange(len(scale)):
            if i <= scale[d]:
                vigra.filters.convolveOneDimension(imCur, d, h_ker, out=imCur)

        if include_intermediates or include_lower_scales:
            W[i - 1] = imPrev - imCur

    if include_intermediates:
        return((W, imOut))
    elif include_lower_scales:
        return(W)
    else:
        # Same as returning imPrev - imCur.
        # Except, it avoids generating another array to hold the result.
        out -= imCur
        return(out)
Exemple #13
0
def generate_neurons_blocks(input_filename,
                            output_filename,
                            num_processes=multiprocessing.cpu_count(),
                            block_shape=None,
                            num_blocks=None,
                            half_window_shape=None,
                            half_border_shape=None,
                            use_drmaa=False,
                            num_drmaa_cores=16,
                            debug=False,
                            **parameters):
    # TODO: Move function into new module with its own command line interface.
    # TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path(input_filename)

    # Parse output filename and validate that the name is acceptable
    output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path(output_filename)


    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_ext.rsplit(
        os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks"


    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_ext, "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape
        )

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(
            len(original_images_shape_array), dtype=int
        )
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - \
                                        2*half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int
        )
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool
        )
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = - \
            numpy.ones(original_images_pared_shape_array.shape, dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[
        missing_both] = original_images_pared_shape_array[missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined]
    )
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined]
    )
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (len(half_window_shape) == len(
            original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for
    # the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the
    # indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2,))
    )

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[each_dim]
            estimated_upper_bound = (each_block_dim_index + 1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array([
                estimated_lower_bound, estimated_upper_bound
            ])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0],
        0
    )
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array
    )

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0],
        0
    )
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array
    )

    # Get the slice information to get the windowed block from the original
    # image stack.
    original_images_pared_slices["windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices["windowed_stack_selection"] += xnumpy.expand_view(
        half_border_shape_array, reps_after=2
    )

    # Get slice information for the portion within
    # `original_images_pared_slices["windowed"]`, which corresponds to
    # `original_images_pared_slices["actual"]`.
    #original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] - original_images_pared_slices["actual"][..., 0]
    )
    original_images_pared_slices["windowed_block_selection"][:] += xnumpy.expand_view(
        original_images_pared_slices["actual"][..., 0] - original_images_pared_slices["windowed"][..., 0],
        reps_after=2
    )

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(
            dict(list(parameters.items()) + list({"debug" : debug}.items())),
            fid,
            indent=4,
            separators=(",", " : ")
        )
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_ext, "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        output_file_handle.require_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_ext == output_filename_ext:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name
                )
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_ext,
                    "/" + input_dataset_name
                )

        output_group.require_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(
                input_filename_ext, "r"
            )
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat
        ):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(
                intermediate_basename_i + os.extsep + "out")
            stderr_filename_block.append(
                intermediate_basename_i + os.extsep + "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of
            # original_images_pared_slices["windowed_block_selection"].
            sequential_block_i_windowed = sequential_block_i["windowed_stack_selection"]
            slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed
            )

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs["filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs["dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5", "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the
                # appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle["original_images"] = h5py.ExternalLink(
                        os.path.relpath(
                            block_i.file.filename, intermediate_output_dir
                        ),
                        block_i.name
                    )

                input_filename_block.append(
                    each_block_file_handle.filename + "/" + "original_images"
                )
                output_filename_block.append(
                    each_block_file_handle.filename + "/"
                )

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = iters.izip(
        itertools.repeat(python),
        itertools.repeat("-c"),
        itertools.repeat(executable_run),
        itertools.repeat(intermediate_config),
        input_filename_block,
        output_filename_block,
        stdout_filename_block,
        stderr_filename_block
    )

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or
        # forgot to install it. If it imports, but fails to find symbols,
        # then the user has not set DRMAA_LIBRARY_PATH or
        # does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. " +
                "If this is meant to be run using the OpenGrid submission " +
                "system, then drmaa needs to be installed via pip or " +
                "easy_install."
            )
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is
            # installed.
            logger.error(
                "Was able to import drmaa. " +
                "However, the drmaa library could not be found. Please " +
                "either specify the location of libdrmaa.so using the " +
                "DRMAA_LIBRARY_PATH environment variable or disable/remove " +
                "use_drmaa from the config file."
            )
            raise

        s=drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(
                os.path.splitext(cur_module_filepath)[0]
            ) + "-" + os.path.basename(
                os.path.dirname(each_arg_pack[3].split(".h5")[0])
            ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(num_drmaa_cores)


        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template)
            )
            logger.info(
                "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
            )

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        #finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError(
                    "The process (\"" + " ".join(each_arg_pack) +
                    "\") has exited prematurely."
                )

            logger.info(
                "Finished process ( \"" + " ".join(each_arg_pack) + "\" )."
            )
            s.deleteJobTemplate(each_process_template)
            #finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info(
            "Run time for queued jobs to complete is \""
            + str(diff_queue_time) + " s\"."
        )
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        #finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) < num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[-2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w")
                    )

                    running_processes.append((each_arg_pack, each_process,))

                    logger.info(
                        "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
                    )
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                       (len(running_processes) >= num_processes)) or \
                    (pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info(
                            "Finished process ( \"" +
                            " ".join(running_processes[i][0]) + "\" )."
                        )

                        #finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_ext, "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(
            shape=tuple(original_images_shape_array[1:]), dtype=float
        )

        for i, i_str, (output_filename_block_i, sequential_block_i) in iters.filled_stringify_enumerate(
                iters.izip(output_filename_block, original_images_pared_slices.flat)):
            windowed_slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in [(None, None)] + sequential_block_i["windowed_stack_selection"].tolist()[1:]
            )
            window_trimmed_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i["windowed_block_selection"].tolist()
            )
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i, "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons"
                    )

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"][window_trimmed_i].astype(float),
                            tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count]
                        )

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than
                        # half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count / neurons_block_i_windowed_count) > 0.5
                        )

                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (
                                str(neurons_block_i_acceptance.nonzero()[0].tolist()),
                                i_str
                            )
                        )

                        # Take a subset of our previous neurons that are within
                        # the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape, dtype=new_neurons_set.dtype
                        )
                        neurons_block_i["mask"][windowed_slice_i] = neurons_block_i_accepted["mask"]
                        neurons_block_i["contour"][windowed_slice_i] = neurons_block_i_accepted["contour"]
                        neurons_block_i["image"][windowed_slice_i] = neurons_block_i_accepted["image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted["area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted["max_F"]
                        neurons_block_i["gaussian_mean"] = neurons_block_i_accepted["gaussian_mean"]
                        neurons_block_i["gaussian_cov"] = neurons_block_i_accepted["gaussian_cov"]
                        # TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted["centroid"]
                        neurons_block_i["centroid"] += sequential_block_i["windowed_stack_selection"][1:, 0]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.HDF5EnumeratedArrayRecorder
                        )

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set,
                            neurons_block_i,
                            **parameters["generate_neurons"]["postprocess_data"]["merge_neuron_sets"]
                        )
                    else:
                        logger.info(
                            "Accepted the following neurons %s from block %s." %
                            (
                                str([]),
                                i_str
                            )
                        )
                else:
                    logger.info(
                        "No neurons accepted as none were found for block"
                        " %s." %
                        i_str
                    )

        hdf5.serializers.create_numpy_structured_array_in_HDF5(
            output_group, "neurons", new_neurons_set, overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(dict(
                list(parameters.items()) +
                [("block_shape", block_shape),
                 ("num_blocks", num_blocks),
                 ("half_window_shape", half_window_shape),
                 ("half_border_shape", half_border_shape),
                 ("use_drmaa", use_drmaa),
                 ("num_drmaa_cores", num_drmaa_cores),
                 ("debug", debug)]
            ))

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info(
        "Run time for merge over all blocks is \"" + str(diff_time) + " s\"."
    )