Python expand_view Exemples, nanshe.util.xnumpy.expand_view Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : testOpColorizeLabelImage.py Projet : stuarteberg/ilastik

    def testBasic2(self):
        a = numpy.arange(256)
        a = expand_view(a, 256)
        a = a[..., None]
        a = vigra.taggedView(a, "xyc")

        graph = Graph()
        op = OpColorizeLabelImageCached(graph=graph)

        opPrep = OpArrayPiper(graph=graph)
        opPrep.Input.setValue(a)

        op.Input.connect(opPrep.Output)

        b = op.Output[...].wait()

        b_colors = set([tuple(_) for _ in list(array_to_matrix(b.T).T)])
        assert (len(b_colors) == 256)

Exemple #2

0

Afficher le fichier

Fichier : testOpColorizeLabelImage.py Projet : JaimeIvanCervantes/ilastik

    def testBasic2(self):
        a = numpy.arange(256)
        a = expand_view(a, 256)
        a = a[..., None]
        a = vigra.taggedView(a, "xyc")


        graph = Graph()
        op = OpColorizeLabelImageCached(graph=graph)

        opPrep = OpArrayPiper(graph=graph)
        opPrep.Input.setValue(a)

        op.Input.connect(opPrep.Output)

        b = op.Output[...].wait()

        b_colors = set([tuple(_) for _ in list(array_to_matrix(b.T).T)])
        assert(len(b_colors) == 256)

Exemple #3

0

Afficher le fichier

def generate_neurons_blocks(input_filename,
                            output_filename,
                            num_processes=multiprocessing.cpu_count(),
                            block_shape=None,
                            num_blocks=None,
                            half_window_shape=None,
                            half_border_shape=None,
                            use_drmaa=False,
                            num_drmaa_cores=16,
                            debug=False,
                            **parameters):
    #TODO: Move this function into a new module with its own command line interface.
    #TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_details = pathHelpers.PathComponents(input_filename)
    # Clean up the extension so it fits the standard.
    if (input_filename_details.extension.lower().lstrip(os.extsep)
            not in ["h5", "hdf5", "he5"]):
        raise Exception(
            "Input file with filename: \"" + input_filename + "\"" +
            " provided with an unknown file extension: \"" +
            input_filename_details.extension +
            "\". If it is a supported format, please run the given file through nanshe_converter first before proceeding."
        )

    # Parse output filename and validate that the name is acceptable
    output_filename_details = pathHelpers.PathComponents(output_filename)
    # Clean up the extension so it fits the standard.
    if (output_filename_details.extension.lower().lstrip(os.extsep)
            not in ["h5", "hdf5", "he5"]):
        raise Exception(
            "Output file with filename: \"" + output_filename + "\"" +
            " provided with an unknown file extension: \"" +
            output_filename_details.extension +
            "\". If it is a supported format, please run the given file through nanshe_converter first before proceeding."
        )

    # Where the original images are.
    input_dataset_name = input_filename_details.internalPath

    # Name of the group where all data will be stored.
    output_group_name = output_filename_details.internalPath

    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_details.externalPath.rsplit(
        output_filename_details.extension, 1)[0] + "_blocks"

    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_details.externalPath,
                   "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape)

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(len(original_images_shape_array),
                                              dtype=int)
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - 2 * half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int)
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = -numpy.ones(original_images_pared_shape_array.shape,
                                       dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[missing_both] = original_images_pared_shape_array[
        missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined])
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined])
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (
            len(half_window_shape) == len(original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        # assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]
                ["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2, )))

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[
                each_dim]
            estimated_upper_bound = (each_block_dim_index +
                                     1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array(
                [estimated_lower_bound, estimated_upper_bound])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0], 0)
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array)

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0], 0)
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array)

    # Get the slice information to get the windowed block from the original image stack.
    original_images_pared_slices[
        "windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices[
        "windowed_stack_selection"] += xnumpy.expand_view(
            half_border_shape_array, reps_after=2)

    # Get slice information for the portion within original_images_pared_slices["windowed"],
    # which corresponds to original_images_pared_slices["actual"]
    # original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] -
        original_images_pared_slices["actual"][..., 0])
    original_images_pared_slices[
        "windowed_block_selection"][:] += xnumpy.expand_view(
            (original_images_pared_slices["actual"][..., 0] -
             original_images_pared_slices["windowed"][..., 0]),
            reps_after=2)

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(dict(parameters.items() + {"debug": debug}.items()),
                  fid,
                  indent=4,
                  separators=(",", " : "))
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_details.externalPath,
                   "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        if output_group_name not in output_file_handle:
            output_file_handle.create_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_details.externalPath == output_filename_details.externalPath:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name)
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_details.externalPath,
                    "/" + input_dataset_name)

        if "blocks" not in output_group:
            output_group.create_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(input_filename_details.externalPath,
                                          "r")
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(intermediate_basename_i + os.extsep +
                                         "out")
            stderr_filename_block.append(intermediate_basename_i + os.extsep +
                                         "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of original_images_pared_slices["windowed_block_selection"]
            sequential_block_i_windowed = sequential_block_i[
                "windowed_stack_selection"]
            slice_i = tuple(
                [slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed])

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs[
                    "filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs[
                    "dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5",
                           "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle[
                        "original_images"] = h5py.ExternalLink(
                            os.path.relpath(block_i.file.filename,
                                            intermediate_output_dir),
                            block_i.name)

                input_filename_block.append(each_block_file_handle.filename +
                                            "/" + "original_images")
                output_filename_block.append(each_block_file_handle.filename +
                                             "/")

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = itertools.izip(
        itertools.repeat(python), itertools.repeat("-c"),
        itertools.repeat(executable_run),
        itertools.repeat(intermediate_config), input_filename_block,
        output_filename_block, stdout_filename_block, stderr_filename_block)

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or forgot to install it.
        # If it imports, but fails to find symbols, then the user has not set DRMAA_LIBRARY_PATH or does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. If this is meant to be run using the OpenGrid submission system, then drmaa needs to be installed via pip or easy_install."
            )
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is installed.
            logger.error(
                "Was able to import drmaa. However, the drmaa library could not be found. Please either specify the location of libdrmaa.so using the DRMAA_LIBRARY_PATH environment variable or disable/remove use_drmaa from the config file."
            )
            raise

        s = drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(os.path.splitext(cur_module_filepath)[0]) + "-" + \
                                             os.path.basename(os.path.dirname(each_arg_pack[3].split(".h5")[0])) + "-" + \
                                             os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][
                1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(
                num_drmaa_cores)

        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template))
            logger.info("Started new process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        # finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError("The process (\"" +
                                   " ".join(each_arg_pack) +
                                   "\") has exited prematurely.")

            logger.info("Finished process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")
            s.deleteJobTemplate(each_process_template)
            # finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info("Run time for queued jobs to complete is \"" +
                    str(diff_queue_time) + " s\".")
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        # finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) <
                                              num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[
                        -2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w"))

                    running_processes.append((
                        each_arg_pack,
                        each_process,
                    ))

                    logger.info("Started new process ( \"" +
                                " ".join(each_arg_pack) + "\" ).")
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                   (len(running_processes) >= num_processes)) or (
                       pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info("Finished process ( \"" +
                                    " ".join(running_processes[i][0]) +
                                    "\" ).")

                        # finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_details.externalPath,
                   "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(shape=tuple(
            original_images_shape_array[1:]),
                                                   dtype=float)

        for i, i_str, (output_filename_block_i,
                       sequential_block_i) in iters.filled_stringify_enumerate(
                           itertools.izip(output_filename_block,
                                          original_images_pared_slices.flat)):
            windowed_slice_i = tuple([
                slice(_1, _2, 1) for _1, _2 in [(None, None)] +
                sequential_block_i["windowed_stack_selection"].tolist()[1:]
            ])
            window_trimmed_i = tuple([
                slice(_1, _2, 1) for _1, _2 in
                sequential_block_i["windowed_block_selection"].tolist()
            ])
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i,
                           "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons")

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(
                                xrange(1,
                                       neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum, neurons_block_i_smaller["mask"]
                            [window_trimmed_i].astype(float),
                            tuple(
                                xrange(1,
                                       neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count])

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count /
                             neurons_block_i_windowed_count) > 0.5)

                        # Take a subset of our previous neurons that are within the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[
                            neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape,
                            dtype=new_neurons_set.dtype)
                        neurons_block_i["mask"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "mask"]
                        neurons_block_i["contour"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "contour"]
                        neurons_block_i["image"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted[
                            "area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted[
                            "max_F"]
                        neurons_block_i[
                            "gaussian_mean"] = neurons_block_i_accepted[
                                "gaussian_mean"]
                        neurons_block_i[
                            "gaussian_cov"] = neurons_block_i_accepted[
                                "gaussian_cov"]
                        #TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted[
                            "centroid"]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.
                            HDF5EnumeratedArrayRecorder)

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set, neurons_block_i,
                            **parameters["generate_neurons"]
                            ["postprocess_data"]["merge_neuron_sets"])

        hdf5.serializers.create_numpy_structured_array_in_HDF5(output_group,
                                                               "neurons",
                                                               new_neurons_set,
                                                               overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(dict(list(parameters.items()) + \
                                                                    [ ("block_shape", block_shape),
                                                                      ("num_blocks", num_blocks),
                                                                      ("half_window_shape", half_window_shape),
                                                                      ("half_border_shape", half_border_shape),
                                                                      ("use_drmaa", use_drmaa),
                                                                      ("num_drmaa_cores", num_drmaa_cores),
                                                                      ("debug", debug)
                                                                    ]
                                                              )
            )

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info("Run time for merge over all blocks is \"" + str(diff_time) +
                " s\".")

Exemple #4

0

Afficher le fichier

Fichier : learner.py Projet : fdiego/nanshe

def generate_neurons_blocks(input_filename, output_filename, num_processes=multiprocessing.cpu_count(), block_shape=None, num_blocks=None, half_window_shape=None, half_border_shape=None, use_drmaa=False, num_drmaa_cores=16, debug=False, **parameters):
    # TODO: Move this function into a new module with its own command line interface.
    # TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_details = pathHelpers.PathComponents(input_filename)
    # Clean up the extension so it fits the standard.
    if (input_filename_details.extension.lower().lstrip(os.extsep) not in ["h5", "hdf5", "he5"]):
        raise Exception(
            "Input file with filename: \"" + input_filename + "\"" +
            " provided with an unknown file extension: \"" +
            input_filename_details.extension + "\". If it is a supported " +
            "format, please run the given file through nanshe_converter " +
            "first before proceeding."
        )

    # Parse output filename and validate that the name is acceptable
    output_filename_details = pathHelpers.PathComponents(output_filename)
    # Clean up the extension so it fits the standard.
    if (output_filename_details.extension.lower().lstrip(os.extsep) not in ["h5", "hdf5", "he5"]):
        raise Exception(
            "Output file with filename: \"" + output_filename + "\"" +
            " provided with an unknown file extension: \"" +
            output_filename_details.extension + "\". If it is a supported " +
            "format, please run the given file through nanshe_converter " +
            "first before proceeding."
        )

    # Where the original images are.
    input_dataset_name = input_filename_details.internalPath

    # Name of the group where all data will be stored.
    output_group_name = output_filename_details.internalPath

    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_details.externalPath.rsplit(
        output_filename_details.extension, 1)[0] + "_blocks"


    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_details.externalPath, "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape
        )

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(
            len(original_images_shape_array), dtype=int
        )
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - 2*half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int
        )
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool
        )
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = - \
            numpy.ones(original_images_pared_shape_array.shape, dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[
        missing_both] = original_images_pared_shape_array[missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined]
    )
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined]
    )
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (len(half_window_shape) == len(
            original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for
    # the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"][
                "extract_f0"]["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the
    # indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2,))
    )

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[each_dim]
            estimated_upper_bound = (each_block_dim_index + 1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array([
                estimated_lower_bound, estimated_upper_bound
            ])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0],
        0
    )
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array
    )

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0],
        0
    )
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array
    )

    # Get the slice information to get the windowed block from the original
    # image stack.
    original_images_pared_slices["windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices["windowed_stack_selection"] += xnumpy.expand_view(
        half_border_shape_array, reps_after=2
    )

    # Get slice information for the portion within original_images_pared_slices["windowed"],
    # which corresponds to original_images_pared_slices["actual"]
    #original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] - original_images_pared_slices["actual"][..., 0]
    )
    original_images_pared_slices["windowed_block_selection"][:] += xnumpy.expand_view(
        original_images_pared_slices["actual"][..., 0] - original_images_pared_slices["windowed"][..., 0],
        reps_after=2
    )

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(
            dict(parameters.items() + {"debug" : debug}.items()),
            fid,
            indent=4,
            separators=(",", " : ")
        )
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_details.externalPath, "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        if output_group_name not in output_file_handle:
            output_file_handle.create_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_details.externalPath == output_filename_details.externalPath:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name
                )
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_details.externalPath,
                    "/" + input_dataset_name
                )

        if "blocks" not in output_group:
            output_group.create_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(
                input_filename_details.externalPath, "r"
            )
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat
        ):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(
                intermediate_basename_i + os.extsep + "out")
            stderr_filename_block.append(
                intermediate_basename_i + os.extsep + "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of
            #original_images_pared_slices["windowed_block_selection"]
            sequential_block_i_windowed = sequential_block_i["windowed_stack_selection"]
            slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed
            )

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs["filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs["dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5", "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle["original_images"] = h5py.ExternalLink(
                        os.path.relpath(
                            block_i.file.filename, intermediate_output_dir
                        ),
                        block_i.name
                    )

                input_filename_block.append(
                    each_block_file_handle.filename + "/" + "original_images"
                )
                output_filename_block.append(
                    each_block_file_handle.filename + "/"
                )

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = itertools.izip(
        itertools.repeat(python),
        itertools.repeat("-c"),
        itertools.repeat(executable_run),
        itertools.repeat(intermediate_config),
        input_filename_block,
        output_filename_block,
        stdout_filename_block,
        stderr_filename_block
    )

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or
        # forgot to install it. If it imports, but fails to find symbols,
        # then the user has not set DRMAA_LIBRARY_PATH or
        # does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. " +
                "If this is meant to be run using the OpenGrid submission " +
                "system, then drmaa needs to be installed via pip or " +
                "easy_install."
            )
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is
            # installed.
            logger.error(
                "Was able to import drmaa. " +
                "However, the drmaa library could not be found. Please " +
                "either specify the location of libdrmaa.so using the " +
                "DRMAA_LIBRARY_PATH environment variable or disable/remove " +
                "use_drmaa from the config file."
            )
            raise

        s=drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(
                os.path.splitext(cur_module_filepath)[0]
            ) + "-" + os.path.basename(
                os.path.dirname(each_arg_pack[3].split(".h5")[0])
            ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(num_drmaa_cores)


        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template)
            )
            logger.info(
                "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
            )

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        #finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError(
                    "The process (\"" + " ".join(each_arg_pack) +
                    "\") has exited prematurely."
                )

            logger.info(
                "Finished process ( \"" + " ".join(each_arg_pack) + "\" )."
            )
            s.deleteJobTemplate(each_process_template)
            #finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info(
            "Run time for queued jobs to complete is \""
            + str(diff_queue_time) + " s\"."
        )
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        #finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) < num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[-2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w")
                    )

                    running_processes.append((each_arg_pack, each_process,))

                    logger.info(
                        "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
                    )
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                       (len(running_processes) >= num_processes)) or \
                    (pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info(
                            "Finished process ( \"" +
                            " ".join(running_processes[i][0]) + "\" )."
                        )

                        #finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_details.externalPath, "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(
            shape=tuple(original_images_shape_array[1:]), dtype=float
        )

        for i, i_str, (output_filename_block_i, sequential_block_i) in iters.filled_stringify_enumerate(
                itertools.izip(output_filename_block, original_images_pared_slices.flat)):
            windowed_slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in [(None, None)] + sequential_block_i["windowed_stack_selection"].tolist()[1:]
            )
            window_trimmed_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i["windowed_block_selection"].tolist()
            )
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i, "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons"
                    )

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(xrange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"][window_trimmed_i].astype(float),
                            tuple(xrange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count]
                        )

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than
                        # half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count / neurons_block_i_windowed_count) > 0.5
                        )

                        # Take a subset of our previous neurons that are within
                        # the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape, dtype=new_neurons_set.dtype
                        )
                        neurons_block_i["mask"][windowed_slice_i] = neurons_block_i_accepted["mask"]
                        neurons_block_i["contour"][windowed_slice_i] = neurons_block_i_accepted["contour"]
                        neurons_block_i["image"][windowed_slice_i] = neurons_block_i_accepted["image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted["area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted["max_F"]
                        neurons_block_i["gaussian_mean"] = neurons_block_i_accepted["gaussian_mean"]
                        neurons_block_i["gaussian_cov"] = neurons_block_i_accepted["gaussian_cov"]
                        # TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted["centroid"]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.HDF5EnumeratedArrayRecorder
                        )

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set,
                            neurons_block_i,
                            **parameters["generate_neurons"]["postprocess_data"]["merge_neuron_sets"]
                        )

        hdf5.serializers.create_numpy_structured_array_in_HDF5(
            output_group, "neurons", new_neurons_set, overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(dict(
                list(parameters.items()) +
                [("block_shape", block_shape),
                 ("num_blocks", num_blocks),
                 ("half_window_shape", half_window_shape),
                 ("half_border_shape", half_border_shape),
                 ("use_drmaa", use_drmaa),
                 ("num_drmaa_cores", num_drmaa_cores),
                 ("debug", debug)]
            ))

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info(
        "Run time for merge over all blocks is \"" + str(diff_time) + " s\"."
    )