Ejemplo n.º 1
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        input_paths = parsed_args.input_files

        # If the user doesn't want image stacks to be copied inte the project file,
        #  we generate hdf5 volumes in a temporary directory and use those files instead.        
        if parsed_args.preconvert_stacks:
            import tempfile
            input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() )
        
        input_infos = []
        for p in input_paths:
            info = DatasetInfo()
            info.location = DatasetInfo.Location.FileSystem
    
            # Convert all paths to absolute 
            # (otherwise they are relative to the project file, which probably isn't what the user meant)        
            comp = PathComponents(p)
            comp.externalPath = os.path.abspath(comp.externalPath)
            
            info.filePath = comp.totalPath()
            info.nickname = comp.filenameBase
            input_infos.append(info)

        opDataSelection = self.topLevelOperator
        opDataSelection.DatasetGroup.resize( len(input_infos) )
        for lane_index, info in enumerate(input_infos):
            # Only one dataset role in pixel classification
            opDataSelection.DatasetGroup[lane_index][0].setValue( info )
Ejemplo n.º 2
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        # TODO: Support image stack inputs by checking for globstrings and converting to hdf5.
        input_paths = parsed_args.input_files
        input_infos = []
        for p in input_paths:
            info = DatasetInfo()
            info.location = DatasetInfo.Location.FileSystem
    
            # Convert all paths to absolute 
            # (otherwise they are relative to the project file, which probably isn't what the user meant)        
            comp = PathComponents(p)
            comp.externalPath = os.path.abspath(comp.externalPath)
            
            info.filePath = comp.totalPath()
            info.nickname = comp.filenameBase
            input_infos.append(info)

        opDataSelection = self.topLevelOperator
        opDataSelection.DatasetGroup.resize( len(input_infos) )
        for lane_index, info in enumerate(input_infos):
            # Only one dataset role in pixel classification
            opDataSelection.DatasetGroup[lane_index][0].setValue( info )
Ejemplo n.º 3
0
def _append_lane(workflow, input_filepath, axisorder=None):
    """
    Add a lane to the project file for the given input file.

    If axisorder is given, override the default axisorder for
    the file and force the project to use the given one.

    Globstrings are supported, in which case the files are converted to HDF5 first.
    """
    # If the filepath is a globstring, convert the stack to h5  # todo: skip this?
    tmp_dir = tempfile.mkdtemp()
    input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath],
                                                           tmp_dir)[0]

    try:
        os.rmdir(tmp_dir)
    except OSError as e:
        if e.errno == 39:
            logger.warning(
                'Temporary directory {} was populated: should be deleted')
        else:
            raise

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute
    # (otherwise they are relative to the project file, which probably isn't what the user meant)
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug("adding lane: {}".format(info))

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len(opDataSelection.DatasetGroup) + 1
    logger.debug("num_lanes: {}".format(num_lanes))
    opDataSelection.DatasetGroup.resize(num_lanes)

    # Configure it.
    role_index = 0  # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue(info)

    workflow.handleNewLanesAdded()
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    batchInputPaths = convertStacksToH5(batchInputPaths)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)        
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)
        
        info.filePath = comp.totalPath()        
        batchInputInfos.append(info)

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.Dataset.setValues( batchInputInfos )
    
    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)
    opBatchResults.SelectedSlices.setValue([30])
    
    logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value )

    # Set up progress display handling (just logging for now)        
    currentProgress = [None]
    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))
        
    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe( handleProgress )

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result
def append_lane(workflow, input_filepath, axisorder=None):
    # Sanity checks
    assert isinstance(workflow, PixelClassificationWorkflow)
    opPixelClassification = workflow.pcApplet.topLevelOperator
    assert opPixelClassification.Classifier.ready()

    # If the filepath is a globstring, convert the stack to h5
    input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], TMP_DIR )[0]

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute 
    # (otherwise they are relative to the project file, which probably isn't what the user meant)        
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug( "adding lane: {}".format( info ) )

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len( opDataSelection.DatasetGroup )+1
    logger.debug( "num_lanes: {}".format( num_lanes ) )
    opDataSelection.DatasetGroup.resize( num_lanes )
    
    # Configure it.
    role_index = 0 # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue( info )

    # Sanity check
    assert len(opPixelClassification.InputImages) == num_lanes
    
    return opPixelClassification
def append_lane(workflow, input_filepath, axisorder=None):
    """
    Add a lane to the project file for the given input file.

    If axisorder is given, override the default axisorder for
    the file and force the project to use the given one.
    
    Globstrings are supported, in which case the files are converted to HDF5 first.
    """
    # If the filepath is a globstring, convert the stack to h5
    input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], tempfile.mkdtemp() )[0]

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute 
    # (otherwise they are relative to the project file, which probably isn't what the user meant)        
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug( "adding lane: {}".format( info ) )

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len( opDataSelection.DatasetGroup )+1
    logger.debug( "num_lanes: {}".format( num_lanes ) )
    opDataSelection.DatasetGroup.resize( num_lanes )
    
    # Configure it.
    role_index = 0 # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue( info )
Ejemplo n.º 7
0
    def create_default_headless_dataset_info(cls, filepath):
        """
        filepath may be a globstring or a full hdf5 path+dataset 
        """
        comp = PathComponents(filepath)
        nickname = comp.filenameBase
        
        # Remove globstring syntax.
        if '*' in nickname:
            nickname = nickname.replace('*', '')
        if os.path.pathsep in nickname:
            nickname = PathComponents(nickname.split(os.path.pathsep)[0]).fileNameBase

        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem
        info.nickname = nickname
        info.filePath = filepath
        # Convert all (non-url) paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)
        if not isUrl(filepath):
            comp.externalPath = os.path.abspath(comp.externalPath)
            info.filePath = comp.totalPath()
        return info
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName, stackVolumeCacheDir):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    originalBatchInputPaths = list(batchInputPaths)
    batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)        
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)
        
        info.filePath = comp.totalPath()        
        batchInputInfos.append(info)

    # Also convert the export dir to absolute (for the same reason)
    if batchExportDir != '':
        batchExportDir = os.path.abspath( batchExportDir )

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.DatasetGroup.resize( len(batchInputInfos) )
    for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup):
        # FIXME: This assumes that the workflow has exactly one dataset role.
        multislot[0].setValue( info )
    
    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator

    # By default, the output files from the batch export operator
    #  are named using the input file name.
    # If we converted any stacks to hdf5, then the user won't recognize the input file name.
    # Let's override the output file name using the *original* input file names.
    outputFileNameBases = []
    for origPath in originalBatchInputPaths:
        outputFileNameBases.append( origPath.replace('*', 'STACKED') )

    opBatchResults.OutputFileNameBase.setValues( outputFileNameBases )    
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)
    
    logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value )

    # Set up progress display handling (just logging for now)        
    currentProgress = [None]
    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))
        
    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe( handleProgress )

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result
Ejemplo n.º 9
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        role_names = self.topLevelOperator.DatasetRoles.value
        role_paths = collections.OrderedDict()
        if role_names:
            for role_index, role_name in enumerate(role_names):
                arg_name = self._role_name_to_arg_name(role_name)
                input_paths = getattr(parsed_args, arg_name)
                role_paths[role_index] = input_paths

        if parsed_args.input_files:
            # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured.
            for role_index, input_paths in role_paths.items():
                if input_paths:
                    # FIXME: This error message could be more helpful.
                    role_args = map(self._role_name_to_arg_name, role_names)
                    role_args = map(lambda s: '--' + s, role_args)
                    role_args_str = ", ".join(role_args)
                    raise Exception(
                        "Invalid command line arguments: All roles must be configured explicitly.\n"
                        "Use the following flags to specify which files are matched with which inputs:\n"
                        + role_args_str)
            role_paths = {0: parsed_args.input_files}

        for role_index, input_paths in role_paths.items():
            # If the user doesn't want image stacks to be copied into the project file,
            #  we generate hdf5 volumes in a temporary directory and use those files instead.
            if parsed_args.preconvert_stacks:
                import tempfile
                input_paths = self.convertStacksToH5(input_paths,
                                                     tempfile.gettempdir())

            input_infos = []
            for p in input_paths:
                info = DatasetInfo()
                info.location = DatasetInfo.Location.FileSystem
                info.filePath = p

                comp = PathComponents(p)

                # Convert all (non-url) paths to absolute
                # (otherwise they are relative to the project file, which probably isn't what the user meant)
                if not isUrl(p):
                    comp.externalPath = os.path.abspath(comp.externalPath)
                    info.filePath = comp.totalPath()
                info.nickname = comp.filenameBase

                # Remove globstring syntax.
                if '*' in info.nickname:
                    info.nickname = info.nickname.replace('*', '')
                if os.path.pathsep in info.nickname:
                    info.nickname = PathComponents(
                        info.nickname.split(os.path.pathsep)[0]).fileNameBase
                input_infos.append(info)

            opDataSelection = self.topLevelOperator
            existing_lanes = len(opDataSelection.DatasetGroup)
            opDataSelection.DatasetGroup.resize(
                max(len(input_infos), existing_lanes))
            for lane_index, info in enumerate(input_infos):
                opDataSelection.DatasetGroup[lane_index][role_index].setValue(
                    info)

            need_warning = False
            for lane_index in range(len(input_infos)):
                output_slot = opDataSelection.ImageGroup[lane_index][
                    role_index]
                if output_slot.meta.prefer_2d:
                    need_warning = True
                    break

            if need_warning:
                logger.warn(
                    "*******************************************************************************************"
                )
                logger.warn(
                    "Some of your input data is stored in a format that is not efficient for 3D access patterns."
                )
                logger.warn(
                    "Performance may suffer as a result.  For best performance, use a chunked HDF5 volume."
                )
                logger.warn(
                    "*******************************************************************************************"
                )
Ejemplo n.º 10
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        role_names = self.topLevelOperator.DatasetRoles.value
        role_paths = collections.OrderedDict()
        if role_names:
            for role_index, role_name in enumerate(role_names):
                arg_name = self._role_name_to_arg_name(role_name)
                input_paths = getattr(parsed_args, arg_name)
                role_paths[role_index] = input_paths

        if parsed_args.input_files:
            # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured.
            for role_index, input_paths in role_paths.items():
                if input_paths:
                    # FIXME: This error message could be more helpful.
                    role_args = map( self._role_name_to_arg_name, role_names )
                    role_args = map( lambda s: '--' + s, role_args )
                    role_args_str = ", ".join( role_args )
                    raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n"
                                    "Use the following flags to specify which files are matched with which inputs:\n"
                                    + role_args_str )
            role_paths = { 0 : parsed_args.input_files }

        for role_index, input_paths in role_paths.items():
            # If the user doesn't want image stacks to be copied into the project file,
            #  we generate hdf5 volumes in a temporary directory and use those files instead.        
            if parsed_args.preconvert_stacks:
                import tempfile
                input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() )
            
            input_infos = []
            for p in input_paths:
                info = DatasetInfo()
                info.location = DatasetInfo.Location.FileSystem
                info.filePath = p
    
                comp = PathComponents(p)
    
                # Convert all (non-url) paths to absolute 
                # (otherwise they are relative to the project file, which probably isn't what the user meant)        
                if not isUrl(p):
                    comp.externalPath = os.path.abspath(comp.externalPath)
                    info.filePath = comp.totalPath()
                info.nickname = comp.filenameBase
                
                # Remove globstring syntax.
                if '*' in info.nickname:
                    info.nickname = info.nickname.replace('*', '')
                if os.path.pathsep in info.nickname:
                    info.nickname = PathComponents(info.nickname.split(os.path.pathsep)[0]).fileNameBase
                input_infos.append(info)
    
            opDataSelection = self.topLevelOperator
            existing_lanes = len(opDataSelection.DatasetGroup)
            opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes) )
            for lane_index, info in enumerate(input_infos):
                opDataSelection.DatasetGroup[lane_index][role_index].setValue( info )
            
            need_warning = False
            for lane_index in range(len(input_infos)):
                output_slot = opDataSelection.ImageGroup[lane_index][role_index]
                if output_slot.meta.prefer_2d:
                    need_warning = True
                    break

            if need_warning:
                logger.warn("*******************************************************************************************")
                logger.warn("Some of your input data is stored in a format that is not efficient for 3D access patterns.")
                logger.warn("Performance may suffer as a result.  For best performance, use a chunked HDF5 volume.")                
                logger.warn("*******************************************************************************************")
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir,
                             batchOutputSuffix, exportedDatasetName,
                             stackVolumeCacheDir):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    originalBatchInputPaths = list(batchInputPaths)
    batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute
        # (otherwise they are relative to the project file, which probably isn't what the user meant)
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)

        info.filePath = comp.totalPath()
        batchInputInfos.append(info)

    # Also convert the export dir to absolute (for the same reason)
    if batchExportDir != '':
        batchExportDir = os.path.abspath(batchExportDir)

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.DatasetGroup.resize(len(batchInputInfos))
    for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup):
        # FIXME: This assumes that the workflow has exactly one dataset role.
        multislot[0].setValue(info)

    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator

    # By default, the output files from the batch export operator
    #  are named using the input file name.
    # If we converted any stacks to hdf5, then the user won't recognize the input file name.
    # Let's override the output file name using the *original* input file names.
    outputFileNameBases = []
    for origPath in originalBatchInputPaths:
        outputFileNameBases.append(origPath.replace('*', 'STACKED'))

    opBatchResults.OutputFileNameBase.setValues(outputFileNameBases)
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)

    logger.info("Exporting data to " + opBatchResults.OutputDataPath[0].value)

    # Set up progress display handling (just logging for now)
    currentProgress = [None]

    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))

    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe(handleProgress)

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result