Exemple #1
0
def segmentation_for_img(raw_xy, predictions_xyc, multicut_workflow):
    """

    Parameters
    ----------
    raw_xy : vigra.VigraArray
    predictions_xyc : vigra.VigraArray
    multicut_workflow

    Returns
    -------

    """
    assert are_same_xy(raw_xy, predictions_xyc)

    # move these into setup_multicut?
    #####
    opEdgeTrainingWithMulticut = multicut_workflow.edgeTrainingWithMulticutApplet.topLevelOperator
    assert isinstance(opEdgeTrainingWithMulticut, OpEdgeTrainingWithMulticut)

    opDataExport = multicut_workflow.dataExportApplet.topLevelOperator
    opDataExport.OutputAxisOrder.setValue('xy')
    #####

    role_data_dict = OrderedDict([
        ("Raw Data", [DatasetInfo(preloaded_array=raw_xy)]),
        ("Probabilities", [DatasetInfo(preloaded_array=predictions_xyc)])
    ])
    batch_results = multicut_workflow.batchProcessingApplet.run_export(
        role_data_dict, export_to_array=True)

    assert len(batch_results) == 1
    segmentation_xy = vigra.taggedView(batch_results[0], axistags='xy')
    assert are_same_xy(segmentation_xy, raw_xy, predictions_xyc)
    return segmentation_xy
Exemple #2
0
def test_h5_stack_via_star_file_glob_and_defined_inner_path(h5_stack_dir, empty_project_file: h5py.File):
    h5_external_star_glob = os.path.join(h5_stack_dir, "*.h5")
    internal_path = DatasetInfo.globInternalPaths(h5_external_star_glob, "*")[0]
    total_path = os.path.join(h5_stack_dir, "*.h5", internal_path)
    info = FilesystemDatasetInfo(filePath=total_path, sequence_axis="z", project_file=empty_project_file)
    assert info.nickname == "2d_apoptotic_binary_-volume-data"
    assert info.is_under_project_file()
Exemple #3
0
def test_expand_path(h5_stack_dir):
    expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "*"))]
    expected_file_paths = [
        Path(h5_stack_dir) / "2d_apoptotic_binary_0.h5",
        Path(h5_stack_dir) / "2d_apoptotic_binary_1.h5",
        Path(h5_stack_dir) / "2d_apoptotic_binary_2.h5",
    ]
    assert expansions == expected_file_paths

    expected_dataset_paths = [Path(fp) / "volume/data" for fp in expected_file_paths]
    expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "*.h5", "vol*"))]
    assert expansions == expected_dataset_paths

    expansions = [Path(p) for p in DatasetInfo.expand_path(os.path.join(h5_stack_dir, "2d_apoptotic_binary_1.h5"))]
    assert expansions == expected_file_paths[1:2]

    relative_paths = ["2d_apoptotic_binary_0.h5", "2d_apoptotic_binary_1.h5", "2d_apoptotic_binary_2.h5"]
    relative_paths_with_colon = os.path.pathsep.join(relative_paths)
    expansions = [Path(p) for p in DatasetInfo.expand_path(relative_paths_with_colon, cwd=h5_stack_dir)]
    assert expansions == expected_file_paths
Exemple #4
0
        def handleImportLabelsAction():
            fileNames = ImageFileDialog(
                self,
                preferences_group="DataSelection",
                preferences_setting="recent image").getSelectedPaths()
            fileNames = list(map(str, fileNames))

            # For now, we require a single hdf5 file
            if len(fileNames) > 1:
                QMessageBox.critical(
                    self, "Too many files",
                    "Labels must be contained in a single hdf5 volume.")
                return
            if len(fileNames) == 0:
                # user cancelled
                return

            file_path = fileNames[0]
            internal_paths = DatasetInfo.getPossibleInternalPathsFor(file_path)
            if len(internal_paths) == 0:
                QMessageBox.critical(
                    self, "No volumes in file",
                    "Couldn't find a suitable dataset in your hdf5 file.")
                return
            if len(internal_paths) == 1:
                internal_path = internal_paths[0]
            else:
                dlg = SubvolumeSelectionDlg(internal_paths, self)
                if dlg.exec_() == QDialog.Rejected:
                    return
                selected_index = dlg.combo.currentIndex()
                internal_path = str(internal_paths[selected_index])

            path_components = PathComponents(file_path)
            path_components.internalPath = str(internal_path)

            try:
                top_op = self.topLevelOperatorView
                opReader = OpInputDataReader(parent=top_op.parent)
                opReader.FilePath.setValue(path_components.totalPath())

                # Reorder the axes
                op5 = OpReorderAxes(parent=top_op.parent)
                op5.AxisOrder.setValue(top_op.LabelInputs.meta.getAxisKeys())
                op5.Input.connect(opReader.Output)

                # Finally, import the labels
                top_op.importLabels(top_op.current_view_index(), op5.Output)

            finally:
                op5.cleanUp()
                opReader.cleanUp()
def _append_lane(workflow, input_filepath, axisorder=None):
    """
    Add a lane to the project file for the given input file.

    If axisorder is given, override the default axisorder for
    the file and force the project to use the given one.

    Globstrings are supported, in which case the files are converted to HDF5 first.
    """
    # If the filepath is a globstring, convert the stack to h5  # todo: skip this?
    tmp_dir = tempfile.mkdtemp()
    input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath],
                                                           tmp_dir)[0]

    try:
        os.rmdir(tmp_dir)
    except OSError as e:
        if e.errno == 39:
            logger.warning(
                'Temporary directory {} was populated: should be deleted')
        else:
            raise

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute
    # (otherwise they are relative to the project file, which probably isn't what the user meant)
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug("adding lane: {}".format(info))

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len(opDataSelection.DatasetGroup) + 1
    logger.debug("num_lanes: {}".format(num_lanes))
    opDataSelection.DatasetGroup.resize(num_lanes)

    # Configure it.
    role_index = 0  # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue(info)

    workflow.handleNewLanesAdded()
def ilastik_multicut(grayscale,
                     bounary_volume,
                     supervoxels,
                     ilp_path,
                     LAZYFLOW_THREADS=1,
                     LAZYFLOW_TOTAL_RAM_MB=None,
                     logfile="/dev/null",
                     extra_cmdline_args=[]):
    print 'status=multicut'
    print "Starting ilastik_multicut() ..."
    print "grayscale volume: dtype={}, shape={}".format(
        str(grayscale.dtype), grayscale.shape)
    print "boundary volume: dtype={}, shape={}".format(
        str(bounary_volume.dtype), bounary_volume.shape)
    print "supervoxels volume: dtype={}, shape={}".format(
        str(supervoxels.dtype), supervoxels.shape)

    import os
    from collections import OrderedDict

    import uuid
    import multiprocessing
    import platform
    import psutil
    import vigra

    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo

    print "ilastik_multicut(): Done with imports"

    if LAZYFLOW_TOTAL_RAM_MB is None:
        # By default, assume our alotted RAM is proportional
        # to the CPUs we've been told to use
        machine_ram = psutil.virtual_memory().total
        machine_ram -= 1024**3  # Leave 1 GB RAM for the OS.

        LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count(
        )

    # Before we start ilastik, prepare the environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB)
    os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10"

    extra_cmdline_args += ['--output_axis_order=zyx']
    # Prepare ilastik's "command-line" arguments, as if they were already parsed.
    args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args(
        extra_cmdline_args)
    args.headless = True
    args.debug = True  # ilastik's 'debug' flag enables special power features, including experimental workflows.
    args.project = str(ilp_path)
    args.readonly = True

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    args.process_name = platform.node() + "-" + str(uuid.uuid1())

    # To avoid conflicts between processes, give each process it's own logfile to write to.
    if logfile != "/dev/null":
        base, ext = os.path.splitext(logfile)
        logfile = base + '.' + args.process_name + ext

    # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt
    # Obviously, having all spark nodes write to a common file is a bad idea.
    # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file"
    args.logfile = logfile

    print "ilastik_multicut(): Creating shell..."

    # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args, extra_workflow_cmdline_args)

    ## Need to find a better way to verify the workflow type
    #from ilastik.workflows.multicutWorkflow import MulticutWorkflow
    #assert isinstance(shell.workflow, MulticutWorkflow)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See MulticutWorkflow.ROLE_NAMES)
    raw_data_array = vigra.taggedView(grayscale, 'zyx')
    probabilities_array = vigra.taggedView(bounary_volume, 'zyxc')
    superpixels_array = vigra.taggedView(supervoxels, 'zyx')

    role_data_dict = OrderedDict([
        ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)]),
        ("Probabilities", [DatasetInfo(preloaded_array=probabilities_array)]),
        ("Superpixels", [DatasetInfo(preloaded_array=superpixels_array)])
    ])

    print "ilastik_multicut(): Starting export..."

    # Run the export via the BatchProcessingApplet
    segmentation_list = shell.workflow.batchProcessingApplet.run_export(
        role_data_dict, export_to_array=True)
    assert len(segmentation_list) == 1
    segmentation = segmentation_list[0]

    assert segmentation.ndim == 3
    print 'status=multicut finished'
    return segmentation
Exemple #7
0
# Tagging the data this way ensures that ilastik interprets the axes correctly.
input_data1 = vigra.taggedView(input_data1, 'yxc')
input_data2 = vigra.taggedView(input_data2, 'yxc')

# In case you're curious about which label class is which,
# let's read the label names from the project file.
label_names = opPixelClassification.LabelNames.value
label_colors = opPixelClassification.LabelColors.value
probability_colors = opPixelClassification.PmapColors.value

print label_names, label_colors, probability_colors

# Construct an OrderedDict of role-names -> DatasetInfos
# (See PixelClassificationWorkflow.ROLE_NAMES)
role_data_dict = OrderedDict([("Raw Data", [
    DatasetInfo(preloaded_array=input_data1),
    DatasetInfo(preloaded_array=input_data2)
])])

## Note: If you want to pull your data from disk instead of in-memory, just provide filepaths like so:
# role_data_dict = OrderedDict([ ("Raw Data", [ '/path/to/input-file-1.png',
#                                               '/path/to/input-file-2.h5/mydata' ]) ])

# Run the export via the BatchProcessingApplet
# Note: If you don't provide export_to_array, then the results will
#       be exported to disk accordering to your project's DataExport settings.
#       In that case, run_export() returns None.
predictions = shell.workflow.batchProcessingApplet.run_export(
    role_data_dict, export_to_array=True)

print "Computed {} result arrays:".format(len(predictions))
Exemple #8
0
def classify_pixel(input_data, classifier, threads=8, ram=4000):
    """
    Runs a pre-trained ilastik classifier on a volume of data
    Adapted from Stuart Berg's example here:
    https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py
    Arguments:
        input_data: data to be classified - 3D numpy array
        classifier: ilastik trained/classified file
        threads: number of thread to use for classifying input data
        ram: RAM to use in MB
    Returns:
        pixel_out: The raw trained classifier
    """

    import numpy as np
    import six
    import pdb
    from collections import OrderedDict
    import vigra
    import os
    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo
    from ilastik.workflows.pixelClassification import PixelClassificationWorkflow

    # Before we start ilastik, prepare these environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(threads)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram)

    # Set the command-line arguments directly into argparse.Namespace object
    # Provide your project file, and don't forget to specify headless.
    args = ilastik_main.parser.parse_args([])
    args.headless = True
    args.project = classifier

    # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args)
    assert isinstance(shell.workflow, PixelClassificationWorkflow)

    # Obtain the training operator
    opPixelClassification = shell.workflow.pcApplet.topLevelOperator

    # Sanity checks
    assert len(opPixelClassification.InputImages) > 0
    assert opPixelClassification.Classifier.ready()

    # For this example, we'll use random input data to "batch process"
    print("input_data.shape", input_data.shape)

    # In this example, we're using 2D data (extra dimension for channel).
    # Tagging the data ensures that ilastik interprets the axes correctly.
    input_data = vigra.taggedView(input_data, 'xyz')

    # In case you're curious about which label class is which,
    # let's read the label names from the project file.
    label_names = opPixelClassification.LabelNames.value
    label_colors = opPixelClassification.LabelColors.value
    probability_colors = opPixelClassification.PmapColors.value

    print("label_names, label_colors, probability_colors", label_names,
          label_colors, probability_colors)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    role_data_dict = OrderedDict([("Raw Data",
                                   [DatasetInfo(preloaded_array=input_data)])])

    # Run the export via the BatchProcessingApplet
    # Note: If you don't provide export_to_array, then the results will
    #       be exported to disk according to project's DataExport settings.
    #       In that case, run_export() returns None.

    predictions = shell.workflow.batchProcessingApplet.\
        run_export(role_data_dict, export_to_array=True)
    predictions = np.squeeze(predictions)
    print("predictions.dtype, predictions.shape", predictions.dtype,
          predictions.shape)

    print("DONE.")

    return predictions
def run_ilastik_stage(stage_num,
                      ilp_path,
                      input_vol,
                      mask,
                      output_path,
                      LAZYFLOW_THREADS=1,
                      LAZYFLOW_TOTAL_RAM_MB=None,
                      logfile="/dev/null",
                      extra_cmdline_args=[]):
    import os
    from collections import OrderedDict

    import uuid
    import multiprocessing
    import platform
    import psutil
    import vigra

    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo

    if LAZYFLOW_TOTAL_RAM_MB is None:
        # By default, assume our alotted RAM is proportional
        # to the CPUs we've been told to use
        machine_ram = psutil.virtual_memory().total
        machine_ram -= 1024**3  # Leave 1 GB RAM for the OS.

        LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count(
        )

    # Before we start ilastik, prepare the environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB)
    os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10"

    # Prepare ilastik's "command-line" arguments, as if they were already parsed.
    args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args(
        extra_cmdline_args)
    args.headless = True
    args.debug = True  # ilastik's 'debug' flag enables special power features, including experimental workflows.
    args.project = ilp_path
    args.readonly = True

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    args.process_name = platform.node() + "-" + str(
        uuid.uuid1()) + "-" + str(stage_num)

    # To avoid conflicts between processes, give each process it's own logfile to write to.
    if logfile != "/dev/null":
        base, ext = os.path.splitext(logfile)
        logfile = base + '.' + args.process_name + ext

    # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt
    # Obviously, having all spark nodes write to a common file is a bad idea.
    # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file"
    args.logfile = logfile

    # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args, extra_workflow_cmdline_args)

    ## Need to find a better way to verify the workflow type
    #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow
    #assert isinstance(shell.workflow, PixelClassificationWorkflow)

    opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane(
        0)
    opInteractiveExport.OutputFilenameFormat.setValue(output_path)
    opInteractiveExport.OutputInternalPath.setValue('predictions')
    opInteractiveExport.OutputFormat.setValue('hdf5')

    selected_result = opInteractiveExport.InputSelection.value
    num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1]

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    if isinstance(input_vol, str):
        role_data_dict = OrderedDict([("Raw Data",
                                       [DatasetInfo(filepath=input_vol)])])
    else:
        # If given raw data, we assume it's grayscale, zyx order (stage 1)
        raw_data_array = vigra.taggedView(input_vol, 'zyx')
        role_data_dict = OrderedDict([
            ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)])
        ])

    if mask is not None:
        # If there's a mask, we might be able to save some computation time.
        mask = vigra.taggedView(mask, 'zyx')
        role_data_dict["Prediction Mask"] = [DatasetInfo(preloaded_array=mask)]

    # Run the export via the BatchProcessingApplet
    export_paths = shell.workflow.batchProcessingApplet.run_export(
        role_data_dict, export_to_array=False)
    assert len(export_paths) == 1
    assert export_paths[
        0] == output_path + '/predictions', "Output path was {}".format(
            export_paths[0])
def classify_pixel_hdf(hdf_data_set_name, classifier, threads, ram):
    
    """
    Interface function to Ilastik object classifier functions.function
    
    Runs a pre-trained ilastik classifier on a volume of data given in an hdf5 file
    Adapted from Stuart Berg's example here:
    https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py
    
    Parameters:
        hdf_data_set_name: dataset to be classified - 3D numpy array
        classifier: ilastik trained/classified file
        threads: number of thread to use for classifying input data
        ram: RAM to use in MB
    
    Returns:
        pixel_out: The probability maps for the classified pixels
    """
    
    # Before we start ilastik, prepare these environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(threads)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram)
    
    # Set the command-line arguments directly into argparse.Namespace object
    # Provide your project file, and don't forget to specify headless.
    args = ilastik_main.parser.parse_args([])
    args.headless = True
    args.project = classifier
    
    # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args)
    assert isinstance(shell.workflow, PixelClassificationWorkflow)
    
    # Obtain the training operator
    opPixelClassification = shell.workflow.pcApplet.topLevelOperator
    
    # Sanity checks
    assert len(opPixelClassification.InputImages) > 0
    assert opPixelClassification.Classifier.ready()
    
    # In case you're curious about which label class is which,
    # let's read the label names from the project file.
    label_names = opPixelClassification.LabelNames.value
    label_colors = opPixelClassification.LabelColors.value
    probability_colors = opPixelClassification.PmapColors.value
    
    print("label_names, label_colors, probability_colors", label_names, label_colors, probability_colors)
    
    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    data_info = DatasetInfo(hdf_data_set_name)
    # Classifying a volume specified of dimensions of: slices, column and rows
    data_info.axistags = vigra.defaultAxistags('zyx'.encode('ascii')) 
    role_data_dict = OrderedDict([("Raw Data", [data_info])])
    # Run the export via the BatchProcessingApplet
    # Note: If you don't provide export_to_array, then the results will
    #       be exported to disk according to project's DataExport settings.
    #       In that case, run_export() returns None.
    
    hdf_dataset_path = shell.workflow.batchProcessingApplet.\
        run_export(role_data_dict, export_to_array=False)
    
    print("DONE WITH CLASSIFICATION.")
    
    return hdf_dataset_path
Exemple #11
0
def test_create_nickname_for_single_file_does_not_contain_extension(
        h5_colon_path_stack):
    expanded_paths = DatasetInfo.expand_path(h5_colon_path_stack)
    nickname = DatasetInfo.create_nickname(expanded_paths[0:1])
    assert nickname == "2d_apoptotic_binary_0"
Exemple #12
0
def test_create_nickname(h5_colon_path_stack):
    expanded_paths = DatasetInfo.expand_path(h5_colon_path_stack)
    nickname = DatasetInfo.create_nickname(expanded_paths)
    assert nickname == "2d_apoptotic_binary_"
Exemple #13
0
def test_create_nickname_with_internal_paths(
        h5_colon_path_stack_with_inner_paths):
    expanded_paths = DatasetInfo.expand_path(
        h5_colon_path_stack_with_inner_paths)
    nickname = DatasetInfo.create_nickname(expanded_paths)
    assert nickname == "2d_apoptotic_binary_-volume-data"
Exemple #14
0
def classify_pixel_hdf(hdf_data_set_name, classifier, threads, ram):
    """
    Runs a pre-trained ilastik classifier on a volume of data given in an hdf5 file
    Adapted from Stuart Berg's example here:
    https://github.com/ilastik/ilastik/blob/master/examples/example_python_client.py
    
    Parameters:
        hdf_data_set_name: data to be classified - 3D numpy array
        classifier: ilastik trained/classified file
        threads: number of thread to use for classifying input data
        ram: RAM to use in MB
    
    Returns:
        pixel_out: The raw trained classifier
    """

    # Before we start ilastik, prepare these environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(threads)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(ram)

    # Set the command-line arguments directly into argparse.Namespace object
    # Provide your project file, and don't forget to specify headless.
    args = ilastik_main.parser.parse_args([])
    args.headless = True
    args.project = classifier

    # Instantiate the 'shell', (an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args)
    assert isinstance(shell.workflow, PixelClassificationWorkflow)

    # Obtain the training operator
    opPixelClassification = shell.workflow.pcApplet.topLevelOperator

    # Sanity checks
    assert len(opPixelClassification.InputImages) > 0
    assert opPixelClassification.Classifier.ready()

    # In case you're curious about which label class is which,
    # let's read the label names from the project file.
    label_names = opPixelClassification.LabelNames.value
    label_colors = opPixelClassification.LabelColors.value
    probability_colors = opPixelClassification.PmapColors.value

    print("label_names, label_colors, probability_colors", label_names,
          label_colors, probability_colors)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    data_info = DatasetInfo(hdf_data_set_name)
    data_info.axistags = vigra.defaultAxistags('tyx'.encode('ascii'))
    role_data_dict = OrderedDict([("Raw Data", [data_info])])
    # Run the export via the BatchProcessingApplet
    # Note: If you don't provide export_to_array, then the results will
    #       be exported to disk according to project's DataExport settings.
    #       In that case, run_export() returns None.

    hdf_dataset_path = shell.workflow.batchProcessingApplet.\
        run_export(role_data_dict, export_to_array=False)

    print("DONE WITH CLASSIFICATION.")

    return hdf_dataset_path
def ilastik_predict_with_array(gray_vol,
                               mask,
                               ilp_path,
                               selected_channels=None,
                               normalize=True,
                               LAZYFLOW_THREADS=1,
                               LAZYFLOW_TOTAL_RAM_MB=None,
                               logfile="/dev/null",
                               extra_cmdline_args=[]):
    """
    Using ilastik's python API, open the given project 
    file and run a prediction on the given raw data array.
    
    Other than the project file, nothing is read or written 
    using the hard disk.
    
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".

    ilp_path: Path to the project file.  ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp
    
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables.
    """
    print "ilastik_predict_with_array(): Starting with raw data: dtype={}, shape={}".format(
        str(gray_vol.dtype), gray_vol.shape)

    import os
    from collections import OrderedDict

    import uuid
    import multiprocessing
    import platform
    import psutil
    import vigra

    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo
    from lazyflow.operators.cacheMemoryManager import CacheMemoryManager

    import logging
    logging.getLogger(__name__).info('status=ilastik prediction')
    print "ilastik_predict_with_array(): Done with imports"

    if LAZYFLOW_TOTAL_RAM_MB is None:
        # By default, assume our alotted RAM is proportional
        # to the CPUs we've been told to use
        machine_ram = psutil.virtual_memory().total
        machine_ram -= 1024**3  # Leave 1 GB RAM for the OS.

        LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count(
        )

    # Before we start ilastik, prepare the environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB)
    os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10"

    # Prepare ilastik's "command-line" arguments, as if they were already parsed.
    args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args(
        extra_cmdline_args)
    args.headless = True
    args.debug = True  # ilastik's 'debug' flag enables special power features, including experimental workflows.
    args.project = str(ilp_path)
    args.readonly = True

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    args.process_name = platform.node() + "-" + str(uuid.uuid1())

    # To avoid conflicts between processes, give each process it's own logfile to write to.
    if logfile != "/dev/null":
        base, ext = os.path.splitext(logfile)
        logfile = base + '.' + args.process_name + ext

    # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt
    # Obviously, having all spark nodes write to a common file is a bad idea.
    # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file"
    args.logfile = logfile

    print "ilastik_predict_with_array(): Creating shell..."

    # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args, extra_workflow_cmdline_args)

    ## Need to find a better way to verify the workflow type
    #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow
    #assert isinstance(shell.workflow, PixelClassificationWorkflow)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    raw_data_array = vigra.taggedView(gray_vol, 'zyx')
    role_data_dict = OrderedDict([
        ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)])
    ])

    if mask is not None:
        # If there's a mask, we might be able to save some computation time.
        mask = vigra.taggedView(mask, 'zyx')
        role_data_dict["Prediction Mask"] = [DatasetInfo(preloaded_array=mask)]

    print "ilastik_predict_with_array(): Starting export..."

    # Sanity checks
    opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane(
        0)
    selected_result = opInteractiveExport.InputSelection.value
    num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1]

    # For convenience, verify the selected channels before we run the export.
    if selected_channels:
        assert isinstance(selected_channels, list)
        for selection in selected_channels:
            if isinstance(selection, list):
                assert all(c < num_channels for c in selection), \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )
            else:
                assert selection < num_channels, \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )

    # Run the export via the BatchProcessingApplet
    prediction_list = shell.workflow.batchProcessingApplet.run_export(
        role_data_dict, export_to_array=True)
    assert len(prediction_list) == 1
    predictions = prediction_list[0]

    assert predictions.shape[-1] == num_channels
    selected_predictions = select_channels(predictions, selected_channels)

    if normalize:
        normalize_channels_in_place(selected_predictions)

    # Cleanup: kill cache monitor thread
    CacheMemoryManager().stop()
    CacheMemoryManager.instance = None

    # Cleanup environment
    del os.environ["LAZYFLOW_THREADS"]
    del os.environ["LAZYFLOW_TOTAL_RAM_MB"]
    del os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"]

    logging.getLogger(__name__).info('status=ilastik prediction finished')
    return selected_predictions