예제 #1
0
def test_select_channels():
    a = np.zeros((100,200,10), dtype=np.float32)
    a[:] = np.arange(10)[None, None, :]
    
    assert select_channels(a, None) is a
    assert (select_channels(a, [2,3,5]) == np.array([2,3,5])[None, None, :]).all()
    
    combined = select_channels(a, [1,2,3,[4,5]])
    assert combined.shape == (100,200,4)
    assert (combined[..., 0] == 1).all()
    assert (combined[..., 1] == 2).all()
    assert (combined[..., 2] == 3).all()
    assert (combined[..., 3] == (4+5)).all()
예제 #2
0
def test_select_channels():
    a = np.zeros((100, 200, 10), dtype=np.float32)
    a[:] = np.arange(10)[None, None, :]

    assert select_channels(a, None) is a
    assert (select_channels(a, [2, 3, 5]) == np.array([2, 3,
                                                       5])[None,
                                                           None, :]).all()

    combined = select_channels(a, [1, 2, 3, [4, 5]])
    assert combined.shape == (100, 200, 4)
    assert (combined[..., 0] == 1).all()
    assert (combined[..., 1] == 2).all()
    assert (combined[..., 2] == 3).all()
    assert (combined[..., 3] == (4 + 5)).all()
def ilastik_simple_predict(gray_vol, mask, classifier_path, filter_specs_path, selected_channels=None, normalize=True, 
                           LAZYFLOW_THREADS=0, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null"):
    """
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".

    classifier_path: Path to a vigra RandomForest classifier, in HDF5.
                     Example: /path/to/myclassifier.h5/classifiers/my_rf

    filter_specs_path: Path to "filter specs" json file.  The json structure is like this:
                       [ ['GaussianSmoothing', 0.3],
                         ['GaussianSmoothing', 0.7],
                         ['LaplacianOfGaussian', 1.6] ]
                       (See ilastik's simple_predict.py for valid filter names.)
     
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Same meanings as in ilastik_predict_with_array().
                      (although we have to configure them in a different way)
    """
    print("ilastik_simple_predict(): Starting with raw data: dtype={}, shape={}".format(str(gray_vol.dtype), gray_vol.shape))

    import os
    from collections import OrderedDict

    import uuid
    import platform
    import vigra

    from ilastik.utility.simple_predict import load_and_predict
    from lazyflow.request import Request
    
    print("ilastik_simple_predict(): Done with imports")

    _prepare_lazyflow_config(LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB, 10)

    Request.reset_thread_pool(LAZYFLOW_THREADS)

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    process_name = platform.node() + "-" + str(uuid.uuid1())

    # To avoid conflicts between processes, give each process it's own logfile to write to.
    if logfile != "/dev/null":
        base, ext = os.path.splitext(logfile)
        logfile = base + '.' + process_name + ext

    _init_logging(logfile, process_name)
    
    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    raw_data_array = vigra.taggedView(gray_vol, 'zyx')
    print("ilastik_simple_predict(): Starting export...")

    predictions = load_and_predict( raw_data_array, classifier_path, filter_specs_path, compute_blockwise=True ) 
    selected_predictions = select_channels(predictions, selected_channels)

    if normalize:
        normalize_channels_in_place(selected_predictions)
    
    return selected_predictions
def two_stage_voxel_predictions(gray_vol,
                                mask,
                                stage_1_ilp_path,
                                stage_2_ilp_path,
                                selected_channels=None,
                                normalize=True,
                                LAZYFLOW_THREADS=1,
                                LAZYFLOW_TOTAL_RAM_MB=None,
                                logfile="/dev/null",
                                extra_cmdline_args=[]):
    """
    Using ilastik's python API, run a two-stage voxel prediction using the two given project files.
    The output of the first stage will be saved to a temporary location on disk and used as input to the second stage.
    
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".
         (It will only be used during the second stage.)

    ilp_stage_1_path: Path to the project file for the first stage.  Should accept graystale uint8 data as the input.
                      ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp

    ilp_stage_1_path: Path to the project file for the second stage.  Should take N input channels (uint8) as input, 
                      where N is the number of channels produced in stage 1.
    
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables.
    """

    print("two_stage_voxel_predictions(): Starting with raw data: dtype={}, shape={}"\
          .format(str(gray_vol.dtype), gray_vol.shape))

    import tempfile
    import shutil
    import numpy as np
    import h5py

    scratch_dir = tempfile.mkdtemp(prefix='voxel_predictions_')
    logger.info("Writing intermediate results to scratch directory: " +
                scratch_dir)

    #logger.info( "FIXME: Writing grayscale for debug purposes" )
    #with h5py.File(scratch_dir + '/grayscale.h5', 'w') as grayscale_file:
    #    grayscale_file.create_dataset('grayscale', data=gray_vol)

    try:
        # Run predictions on the in-memory data.
        stage_1_output_path = scratch_dir + '/stage_1_predictions.h5'
        run_ilastik_stage(1, stage_1_ilp_path, gray_vol, None,
                          stage_1_output_path, LAZYFLOW_THREADS,
                          LAZYFLOW_TOTAL_RAM_MB, logfile, extra_cmdline_args)
        stage_2_output_path = scratch_dir + '/stage_2_predictions.h5'
        run_ilastik_stage(2, stage_2_ilp_path, stage_1_output_path, mask,
                          stage_2_output_path, LAZYFLOW_THREADS,
                          LAZYFLOW_TOTAL_RAM_MB, logfile, extra_cmdline_args)

        combined_predictions_path = scratch_dir + '/combined_predictions.h5'

        # Sadly, we must rewrite the predictions into a single file, because they might be combined together.
        # Technically, we could avoid this with some fancy logic, but that would be really annoying.
        with h5py.File(combined_predictions_path,
                       'w') as combined_predictions_file:
            with h5py.File(stage_1_output_path, 'r') as stage_1_prediction_file, \
                 h5py.File(stage_2_output_path, 'r') as stage_2_prediction_file:
                stage_1_predictions = stage_1_prediction_file['predictions']
                stage_2_predictions = stage_2_prediction_file['predictions']

                assert stage_1_predictions.dtype == stage_2_predictions.dtype, \
                    "Mismatched dtypes: {} vs {}".format( stage_1_predictions.dtype, stage_2_predictions.dtype )

                stage_1_channels = stage_1_predictions.shape[-1]
                stage_2_channels = stage_2_predictions.shape[-1]

                assert stage_1_predictions.shape[:-1] == stage_2_predictions.shape[:-1], \
                    "Non-channel dimensions must match.  shapes were: {} and {}"\
                    .format(stage_1_predictions.shape, stage_2_predictions.shape)

                combined_shape = stage_1_predictions.shape[:-1] + (
                    (stage_1_channels + stage_2_channels), )
                combined_predictions = combined_predictions_file.create_dataset(
                    'predictions',
                    dtype=stage_1_predictions.dtype,
                    shape=combined_shape,
                    chunks=(64, 64, 64, 1))

                # Do this one channel at a time to save RAM
                for c in range(stage_1_channels):
                    combined_predictions[..., c] = stage_1_predictions[..., c]
                for c in range(stage_2_channels):
                    combined_predictions[..., stage_1_channels +
                                         c] = stage_2_predictions[..., c]

            num_channels = combined_predictions.shape[-1]

            if selected_channels:
                assert isinstance(selected_channels, list)
                for selection in selected_channels:
                    if isinstance(selection, list):
                        assert all(c < num_channels for c in selection), \
                            "Selected channels ({}) exceed number of prediction classes ({})"\
                            .format( selected_channels, num_channels )
                    else:
                        assert selection < num_channels, \
                            "Selected channels ({}) exceed number of prediction classes ({})"\
                            .format( selected_channels, num_channels )

            # This will extract the channels we want, converting from hdf5 to numpy along the way.
            selected_predictions = select_channels(combined_predictions,
                                                   selected_channels)

        if normalize:
            normalize_channels_in_place(selected_predictions)

        assert selected_predictions.dtype == np.float32
        return selected_predictions
    finally:
        shutil.rmtree(scratch_dir)
def two_stage_voxel_predictions(gray_vol, mask, stage_1_ilp_path, stage_2_ilp_path, selected_channels=None, normalize=True, 
                                LAZYFLOW_THREADS=1, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null", extra_cmdline_args=[]):
    """
    Using ilastik's python API, run a two-stage voxel prediction using the two given project files.
    The output of the first stage will be saved to a temporary location on disk and used as input to the second stage.
    
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".
         (It will only be used during the second stage.)

    ilp_stage_1_path: Path to the project file for the first stage.  Should accept graystale uint8 data as the input.
                      ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp

    ilp_stage_1_path: Path to the project file for the second stage.  Should take N input channels (uint8) as input, 
                      where N is the number of channels produced in stage 1.
    
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables.
    """
    
    print("two_stage_voxel_predictions(): Starting with raw data: dtype={}, shape={}"\
          .format(str(gray_vol.dtype), gray_vol.shape))

    import tempfile
    import shutil
    import numpy as np
    import h5py

    scratch_dir = tempfile.mkdtemp(prefix='voxel_predictions_')
    logger.info( "Writing intermediate results to scratch directory: " + scratch_dir )

    #logger.info( "FIXME: Writing grayscale for debug purposes" )    
    #with h5py.File(scratch_dir + '/grayscale.h5', 'w') as grayscale_file:
    #    grayscale_file.create_dataset('grayscale', data=gray_vol)

    try:
        # Run predictions on the in-memory data.
        stage_1_output_path = scratch_dir + '/stage_1_predictions.h5'
        run_ilastik_stage(1, stage_1_ilp_path, gray_vol, None, stage_1_output_path,
                          LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB, logfile, extra_cmdline_args)
        stage_2_output_path = scratch_dir + '/stage_2_predictions.h5'
        run_ilastik_stage(2, stage_2_ilp_path, stage_1_output_path, mask, stage_2_output_path,
                          LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB, logfile, extra_cmdline_args)
    
        combined_predictions_path = scratch_dir + '/combined_predictions.h5'
    
        # Sadly, we must rewrite the predictions into a single file, because they might be combined together.
        # Technically, we could avoid this with some fancy logic, but that would be really annoying.
        with h5py.File(combined_predictions_path, 'w') as combined_predictions_file:
            with h5py.File(stage_1_output_path, 'r') as stage_1_prediction_file, \
                 h5py.File(stage_2_output_path, 'r') as stage_2_prediction_file:
                stage_1_predictions = stage_1_prediction_file['predictions']
                stage_2_predictions = stage_2_prediction_file['predictions']
    
                assert stage_1_predictions.dtype == stage_2_predictions.dtype, \
                    "Mismatched dtypes: {} vs {}".format( stage_1_predictions.dtype, stage_2_predictions.dtype )
        
                stage_1_channels = stage_1_predictions.shape[-1]
                stage_2_channels = stage_2_predictions.shape[-1]
                
                assert stage_1_predictions.shape[:-1] == stage_2_predictions.shape[:-1], \
                    "Non-channel dimensions must match.  shapes were: {} and {}"\
                    .format(stage_1_predictions.shape, stage_2_predictions.shape)
                
                combined_shape = stage_1_predictions.shape[:-1] + ((stage_1_channels + stage_2_channels),)
                combined_predictions = combined_predictions_file.create_dataset('predictions',
                                                                                dtype=stage_1_predictions.dtype,
                                                                                shape=combined_shape,
                                                                                chunks=(64,64,64,1) )

                # Do this one channel at a time to save RAM
                for c in range(stage_1_channels):
                    combined_predictions[..., c] = stage_1_predictions[..., c]
                for c in range(stage_2_channels):
                    combined_predictions[..., stage_1_channels+c] = stage_2_predictions[..., c]
    
            num_channels = combined_predictions.shape[-1]
        
            if selected_channels:
                assert isinstance(selected_channels, list)
                for selection in selected_channels:
                    if isinstance(selection, list):
                        assert all(c < num_channels for c in selection), \
                            "Selected channels ({}) exceed number of prediction classes ({})"\
                            .format( selected_channels, num_channels )
                    else:
                        assert selection < num_channels, \
                            "Selected channels ({}) exceed number of prediction classes ({})"\
                            .format( selected_channels, num_channels )
    
            # This will extract the channels we want, converting from hdf5 to numpy along the way.    
            selected_predictions = select_channels(combined_predictions, selected_channels)
        
        if normalize:
            normalize_channels_in_place(selected_predictions)
        
        assert selected_predictions.dtype == np.float32
        return selected_predictions
    finally:
        shutil.rmtree(scratch_dir)
def ilastik_predict_with_array(gray_vol, mask, ilp_path, selected_channels=None, normalize=True, 
                               LAZYFLOW_THREADS=1, LAZYFLOW_TOTAL_RAM_MB=None, logfile="/dev/null", extra_cmdline_args=[]):
    """
    Using ilastik's python API, open the given project 
    file and run a prediction on the given raw data array.
    
    Other than the project file, nothing is read or written 
    using the hard disk.
    
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".

    ilp_path: Path to the project file.  ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp
    
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables.
    """
    print "ilastik_predict_with_array(): Starting with raw data: dtype={}, shape={}".format(str(gray_vol.dtype), gray_vol.shape)

    import os
    from collections import OrderedDict

    import uuid
    import multiprocessing
    import platform
    import psutil
    import vigra

    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo

    print "ilastik_predict_with_array(): Done with imports"

    if LAZYFLOW_TOTAL_RAM_MB is None:
        # By default, assume our alotted RAM is proportional 
        # to the CPUs we've been told to use
        machine_ram = psutil.virtual_memory().total
        machine_ram -= 1024**3 # Leave 1 GB RAM for the OS.

        LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count()

    # Before we start ilastik, prepare the environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB)
    os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10"

    # Prepare ilastik's "command-line" arguments, as if they were already parsed.
    args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args(extra_cmdline_args)
    args.headless = True
    args.debug = True # ilastik's 'debug' flag enables special power features, including experimental workflows.
    args.project = ilp_path
    args.readonly = True

    # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt
    # Obviously, having all spark nodes write to a common file is a bad idea.
    # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file"
    args.logfile = logfile

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    args.process_name = platform.node() + "-" + str(uuid.uuid1())

    print "ilastik_predict_with_array(): Creating shell..."

    # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main( args, extra_workflow_cmdline_args )

    ## Need to find a better way to verify the workflow type
    #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow
    #assert isinstance(shell.workflow, PixelClassificationWorkflow)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    raw_data_array = vigra.taggedView(gray_vol, 'zyx')
    role_data_dict = OrderedDict([ ("Raw Data", [ DatasetInfo(preloaded_array=raw_data_array) ]) ])
    
    if mask is not None:
        # If there's a mask, we might be able to save some computation time.
        mask = vigra.taggedView(mask, 'zyx')
        role_data_dict["Prediction Mask"] = [ DatasetInfo(preloaded_array=mask) ]

    print "ilastik_predict_with_array(): Starting export..."

    # Sanity checks
    opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane(0)
    selected_result = opInteractiveExport.InputSelection.value
    num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1]
    
    # For convenience, verify the selected channels before we run the export.
    if selected_channels:
        assert isinstance(selected_channels, list)
        for selection in selected_channels:
            if isinstance(selection, list):
                assert all(c < num_channels for c in selection), \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )
            else:
                assert selection < num_channels, \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )
                

    # Run the export via the BatchProcessingApplet
    prediction_list = shell.workflow.batchProcessingApplet.run_export(role_data_dict, export_to_array=True)
    assert len(prediction_list) == 1
    predictions = prediction_list[0]

    assert predictions.shape[-1] == num_channels
    selected_predictions = select_channels(predictions, selected_channels)

    if normalize:
        normalize_channels_in_place(selected_predictions)
    
    return selected_predictions
def ilastik_predict_with_array(gray_vol,
                               mask,
                               ilp_path,
                               selected_channels=None,
                               normalize=True,
                               LAZYFLOW_THREADS=1,
                               LAZYFLOW_TOTAL_RAM_MB=None,
                               logfile="/dev/null",
                               extra_cmdline_args=[]):
    """
    Using ilastik's python API, open the given project 
    file and run a prediction on the given raw data array.
    
    Other than the project file, nothing is read or written 
    using the hard disk.
    
    gray_vol: A 3D numpy array with axes zyx

    mask: A binary image where 0 means "no prediction necessary".
         'None' can be given, which means "predict everything".

    ilp_path: Path to the project file.  ilastik also accepts a url to a DVID key-value, which will be downloaded and opened as an ilp
    
    selected_channels: A list of channel indexes to select and return from the prediction results.
                       'None' can also be given, which means "return all prediction channels".
                       You may also return a *nested* list, in which case groups of channels can be
                       combined (summed) into their respective output channels.
                       For example: selected_channels=[0,3,[2,4],7] means the output will have 4 channels:
                                    0,3,2+4,7 (channels 5 and 6 are simply dropped).
    
    normalize: Renormalize all outputs so the channels sum to 1 everywhere.
               That is, (predictions.sum(axis=-1) == 1.0).all()
               Note: Pixels with 0.0 in all channels will be simply given a value of 1/N in all channels.
    
    LAZYFLOW_THREADS, LAZYFLOW_TOTAL_RAM_MB: Passed to ilastik via environment variables.
    """
    print "ilastik_predict_with_array(): Starting with raw data: dtype={}, shape={}".format(
        str(gray_vol.dtype), gray_vol.shape)

    import os
    from collections import OrderedDict

    import uuid
    import multiprocessing
    import platform
    import psutil
    import vigra

    import ilastik_main
    from ilastik.applets.dataSelection import DatasetInfo
    from lazyflow.operators.cacheMemoryManager import CacheMemoryManager

    import logging
    logging.getLogger(__name__).info('status=ilastik prediction')
    print "ilastik_predict_with_array(): Done with imports"

    if LAZYFLOW_TOTAL_RAM_MB is None:
        # By default, assume our alotted RAM is proportional
        # to the CPUs we've been told to use
        machine_ram = psutil.virtual_memory().total
        machine_ram -= 1024**3  # Leave 1 GB RAM for the OS.

        LAZYFLOW_TOTAL_RAM_MB = LAZYFLOW_THREADS * machine_ram / multiprocessing.cpu_count(
        )

    # Before we start ilastik, prepare the environment variable settings.
    os.environ["LAZYFLOW_THREADS"] = str(LAZYFLOW_THREADS)
    os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(LAZYFLOW_TOTAL_RAM_MB)
    os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"] = "10"

    # Prepare ilastik's "command-line" arguments, as if they were already parsed.
    args, extra_workflow_cmdline_args = ilastik_main.parser.parse_known_args(
        extra_cmdline_args)
    args.headless = True
    args.debug = True  # ilastik's 'debug' flag enables special power features, including experimental workflows.
    args.project = str(ilp_path)
    args.readonly = True

    # The process_name argument is prefixed to all log messages.
    # For now, just use the machine name and a uuid
    # FIXME: It would be nice to provide something more descriptive, like the ROI of the current spark job...
    args.process_name = platform.node() + "-" + str(uuid.uuid1())

    # To avoid conflicts between processes, give each process it's own logfile to write to.
    if logfile != "/dev/null":
        base, ext = os.path.splitext(logfile)
        logfile = base + '.' + args.process_name + ext

    # By default, all ilastik processes duplicate their console output to ~/.ilastik_log.txt
    # Obviously, having all spark nodes write to a common file is a bad idea.
    # The "/dev/null" setting here is recognized by ilastik and means "Don't write a log file"
    args.logfile = logfile

    print "ilastik_predict_with_array(): Creating shell..."

    # Instantiate the 'shell', (in this case, an instance of ilastik.shell.HeadlessShell)
    # This also loads the project file into shell.projectManager
    shell = ilastik_main.main(args, extra_workflow_cmdline_args)

    ## Need to find a better way to verify the workflow type
    #from ilastik.workflows.pixelClassification import PixelClassificationWorkflow
    #assert isinstance(shell.workflow, PixelClassificationWorkflow)

    # Construct an OrderedDict of role-names -> DatasetInfos
    # (See PixelClassificationWorkflow.ROLE_NAMES)
    raw_data_array = vigra.taggedView(gray_vol, 'zyx')
    role_data_dict = OrderedDict([
        ("Raw Data", [DatasetInfo(preloaded_array=raw_data_array)])
    ])

    if mask is not None:
        # If there's a mask, we might be able to save some computation time.
        mask = vigra.taggedView(mask, 'zyx')
        role_data_dict["Prediction Mask"] = [DatasetInfo(preloaded_array=mask)]

    print "ilastik_predict_with_array(): Starting export..."

    # Sanity checks
    opInteractiveExport = shell.workflow.batchProcessingApplet.dataExportApplet.topLevelOperator.getLane(
        0)
    selected_result = opInteractiveExport.InputSelection.value
    num_channels = opInteractiveExport.Inputs[selected_result].meta.shape[-1]

    # For convenience, verify the selected channels before we run the export.
    if selected_channels:
        assert isinstance(selected_channels, list)
        for selection in selected_channels:
            if isinstance(selection, list):
                assert all(c < num_channels for c in selection), \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )
            else:
                assert selection < num_channels, \
                    "Selected channels ({}) exceed number of prediction classes ({})"\
                    .format( selected_channels, num_channels )

    # Run the export via the BatchProcessingApplet
    prediction_list = shell.workflow.batchProcessingApplet.run_export(
        role_data_dict, export_to_array=True)
    assert len(prediction_list) == 1
    predictions = prediction_list[0]

    assert predictions.shape[-1] == num_channels
    selected_predictions = select_channels(predictions, selected_channels)

    if normalize:
        normalize_channels_in_place(selected_predictions)

    # Cleanup: kill cache monitor thread
    CacheMemoryManager().stop()
    CacheMemoryManager.instance = None

    # Cleanup environment
    del os.environ["LAZYFLOW_THREADS"]
    del os.environ["LAZYFLOW_TOTAL_RAM_MB"]
    del os.environ["LAZYFLOW_STATUS_MONITOR_SECONDS"]

    logging.getLogger(__name__).info('status=ilastik prediction finished')
    return selected_predictions