Example #1
0
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset
        jsonNamespace: If provided, overrides default settings after filepath is applied
        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.
        """
        cwd = cwd or os.getcwd()
        Location = DatasetInfo.Location
        self.location = Location.FileSystem # Whether the data will be found/stored on the filesystem or in the project file
        self._filePath = ""                 # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        self.allowLabels = True             # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.drange = None
        self.normalizeDisplay = True
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.subvolume_roi = None

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None
            if '*' in filepath:
                file_list = glob.glob(filepath)
            if not isUrl(filepath) and os.path.pathsep in filepath:
                file_list = filepath.split(os.path.pathsep)
            
            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
    
                # Convert all paths to absolute 
                file_list = map(lambda f: make_absolute(f, cwd), file_list)
                filepath = os.path.pathsep.join( file_list )
    
                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
                nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute 
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack

        if jsonNamespace is not None:
            self.updateFromJson( jsonNamespace )
Example #2
0
    def parse_known_cmdline_args(self, cmdline_args):
        """
        Helper function for headless workflows.
        Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator 
        and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()``
        
        Relative paths are converted to absolute paths **according to ``os.getcwd()``**, 
        not according to the project file location, since this more likely to be what headless users expect.

        .. note: If the top-level operator was configured with multiple 'roles', then the input files for 
                 each role can be configured separately:
                 $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png
                 If the workflow has only one role (or only one required role), then the role-name flag can be omitted:
                 # python ilastik.py [other workflow options] input1.png input2.png

        See also: :py:meth:`configure_operator_with_parsed_args()`.
        """
        role_names = self.topLevelOperator.DatasetRoles.value
        arg_parser = argparse.ArgumentParser()
        if role_names:
            for role_name in role_names:
                arg_name = self._role_name_to_arg_name(role_name)
                arg_parser.add_argument('--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name ))
        
        # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names
        arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.')
            
        arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False)
        parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args)

        for i, path in enumerate( parsed_args.input_files ):
            # Replace '~' with home dir
            parsed_args.input_files[i] = os.path.expanduser( path )
        
        # Check for errors: Do all input files exist?
        all_input_paths = list(parsed_args.input_files)
        for role_name in role_names:
            arg_name = self._role_name_to_arg_name(role_name)
            role_paths = getattr(parsed_args, arg_name)
            if role_paths:
                all_input_paths += role_paths
        error = False
        for p in all_input_paths:
            if isUrl(p):
                # Don't error-check urls in advance.
                continue
            p = PathComponents(p).externalPath
            if '*' in p:
                if len(glob.glob(p)) == 0:
                    logger.error("Could not find any files for globstring: {}".format(p))
                    logger.error("Check your quotes!")
                    error = True
            elif not os.path.exists(p):
                logger.error("Input file does not exist: " + p)
                error = True
        if error:
            raise RuntimeError("Could not find one or more input files.  See logged errors.")

        return parsed_args, unused_args
Example #3
0
    def _applyStorageComboToTempOps(self, index):
        if index == -1:
            return

        newStorageLocation, goodcast = self.storageComboBox.itemData(
            index).toInt()
        assert goodcast

        # Save a copy of our settings
        oldInfos = {}
        for laneIndex, op in self.tempOps.items():
            oldInfos[laneIndex] = copy.copy(op.Dataset.value)

        # Attempt to apply to all temp operators
        try:
            for laneIndex, op in self.tempOps.items():
                info = copy.copy(op.Dataset.value)

                if info.location == DatasetInfo.Location.ProjectInternal:
                    thisLaneStorage = StorageLocation.ProjectFile
                elif info.location == DatasetInfo.Location.FileSystem:
                    # Determine if the path is relative or absolute
                    if isUrl(info.filePath) or os.path.isabs(info.filePath):
                        thisLaneStorage = StorageLocation.AbsoluteLink
                    else:
                        thisLaneStorage = StorageLocation.RelativeLink

                if thisLaneStorage != newStorageLocation:
                    if newStorageLocation == StorageLocation.ProjectFile:
                        info.location = DatasetInfo.Location.ProjectInternal
                    else:
                        info.location = DatasetInfo.Location.FileSystem
                        cwd = op.WorkingDirectory.value
                        absPath, relPath = getPathVariants(info.filePath, cwd)
                        if relPath is not None and newStorageLocation == StorageLocation.RelativeLink:
                            info.filePath = relPath
                        elif newStorageLocation == StorageLocation.AbsoluteLink:
                            info.filePath = absPath
                        else:
                            assert False, "Unknown storage location setting."
                    op.Dataset.setValue(info)
            self._error_fields.discard('Storage Location')
            return True

        except Exception as e:
            # Revert everything back to the previous state
            for laneIndex, op in self.tempOps.items():
                op.Dataset.setValue(oldInfos[laneIndex])

            msg = "Could not set new storage location settings due to an exception:\n"
            msg += "{}".format(e)
            log_exception(logger, msg)
            QMessageBox.warning(self, "Error", msg)
            self._error_fields.add('Storage Location')
            return False

        finally:
            self._updateStorageCombo()
    def _applyStorageComboToTempOps(self, index):
        if index == -1:
            return
        
        newStorageLocation, goodcast = self.storageComboBox.itemData( index ).toInt()
        assert goodcast
        
        # Save a copy of our settings
        oldInfos = {}
        for laneIndex, op in self.tempOps.items():
            oldInfos[laneIndex] = copy.copy( op.Dataset.value )
        
        # Attempt to apply to all temp operators
        try:
            for laneIndex, op in self.tempOps.items():
                info = copy.copy( op.Dataset.value )
                
                if info.location == DatasetInfo.Location.ProjectInternal:
                    thisLaneStorage = StorageLocation.ProjectFile
                elif info.location == DatasetInfo.Location.FileSystem:
                    # Determine if the path is relative or absolute
                    if isUrl(info.filePath) or os.path.isabs(info.filePath):
                        thisLaneStorage = StorageLocation.AbsoluteLink
                    else:
                        thisLaneStorage = StorageLocation.RelativeLink

                if thisLaneStorage != newStorageLocation:
                    if newStorageLocation == StorageLocation.ProjectFile:
                        info.location = DatasetInfo.Location.ProjectInternal
                    else:
                        info.location = DatasetInfo.Location.FileSystem 
                        cwd = op.WorkingDirectory.value
                        absPath, relPath = getPathVariants( info.filePath, cwd )
                        if relPath is not None and newStorageLocation == StorageLocation.RelativeLink:
                            info.filePath = relPath
                        elif newStorageLocation == StorageLocation.AbsoluteLink:
                            info.filePath = absPath
                        else:
                            assert False, "Unknown storage location setting."
                    op.Dataset.setValue( info )
            self._error_fields.discard('Storage Location')
            return True
        
        except Exception as e:
            # Revert everything back to the previous state
            for laneIndex, op in self.tempOps.items():
                op.Dataset.setValue( oldInfos[laneIndex] )
            
            msg = "Could not set new storage location settings due to an exception:\n"
            msg += "{}".format( e )
            log_exception( logger, msg )
            QMessageBox.warning(self, "Error", msg)
            self._error_fields.add('Storage Location')
            return False
        
        finally:
            self._updateStorageCombo()
Example #5
0
 def create_dataset_info(self,
                         url: Union[Path, str],
                         axistags: Optional[vigra.AxisTags] = None,
                         sequence_axis: str = "z") -> DatasetInfo:
     url = str(url)
     if isUrl(url):
         return UrlDatasetInfo(url=url, axistags=axistags)
     else:
         return RelativeFilesystemDatasetInfo.create_or_fallback_to_absolute(
             filePath=url, axistags=axistags, sequence_axis=sequence_axis)
Example #6
0
def _append_lane(workflow, input_filepath, axisorder=None):
    """
    Add a lane to the project file for the given input file.

    If axisorder is given, override the default axisorder for
    the file and force the project to use the given one.

    Globstrings are supported, in which case the files are converted to HDF5 first.
    """
    # If the filepath is a globstring, convert the stack to h5  # todo: skip this?
    tmp_dir = tempfile.mkdtemp()
    input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath],
                                                           tmp_dir)[0]

    try:
        os.rmdir(tmp_dir)
    except OSError as e:
        if e.errno == 39:
            logger.warning(
                'Temporary directory {} was populated: should be deleted')
        else:
            raise

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute
    # (otherwise they are relative to the project file, which probably isn't what the user meant)
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug("adding lane: {}".format(info))

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len(opDataSelection.DatasetGroup) + 1
    logger.debug("num_lanes: {}".format(num_lanes))
    opDataSelection.DatasetGroup.resize(num_lanes)

    # Configure it.
    role_index = 0  # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue(info)

    workflow.handleNewLanesAdded()
    def updateFromSlot(self):
        if self._urlSlot.ready():
            # FIXME: Choose a default dvid url...            
            file_path = self._urlSlot.value
            if not isUrl( file_path ):
                file_path = ""

            # Remove extension
            file_path = os.path.splitext(file_path)[0]
            self.urlLabel.setText( decode_to_qstring(file_path) )
            
            # Re-configure the slot in case we removed the extension
            self._urlSlot.setValue( file_path )
Example #8
0
    def updateFromSlot(self):
        if self._urlSlot.ready():
            # FIXME: Choose a default dvid url...
            file_path = self._urlSlot.value
            if not isUrl(file_path):
                file_path = ""

            # Remove extension
            file_path = os.path.splitext(file_path)[0]
            self.urlLabel.setText(decode_to_qstring(file_path))

            # Re-configure the slot in case we removed the extension
            self._urlSlot.setValue(file_path)
Example #9
0
    def _updateStorageCombo(self):
        sharedStorageSetting = None
        for laneIndex in self._laneIndexes:
            op = self.tempOps[laneIndex]
            info = op.Dataset.value

            # Determine the current setting
            location = info.location

            if location == DatasetInfo.Location.ProjectInternal:
                storageSetting = StorageLocation.ProjectFile
            elif location == DatasetInfo.Location.FileSystem:
                # Determine if the path is relative or absolute
                if isUrl(info.filePath) or os.path.isabs(info.filePath):
                    storageSetting = StorageLocation.AbsoluteLink
                else:
                    storageSetting = StorageLocation.RelativeLink

            if sharedStorageSetting is None:
                sharedStorageSetting = storageSetting
            elif sharedStorageSetting != storageSetting:
                # Not all lanes have the same setting
                sharedStorageSetting = -1
                break

        if sharedStorageSetting == -1:
            self.storageComboBox.setCurrentIndex(-1)
        else:
            comboIndex = self.storageComboBox.findData(
                QVariant(sharedStorageSetting))
            self.storageComboBox.setCurrentIndex(comboIndex)

        disableLinks = False
        for laneIndex in self._laneIndexes:
            op = self.tempOps[laneIndex]
            info = op.Dataset.value

            disableLinks |= info.fromstack

        if disableLinks:
            # If any of the files were loaded from a stack, then you can't refer to them via a link.
            absIndex = self.storageComboBox.findData(
                QVariant(StorageLocation.AbsoluteLink))
            relIndex = self.storageComboBox.findData(
                QVariant(StorageLocation.RelativeLink))

            # http://theworldwideinternet.blogspot.com/2011/01/disabling-qcombobox-items.html
            model = self.storageComboBox.model()
            model.setData(model.index(absIndex, 0), 0, Qt.UserRole - 1)
            model.setData(model.index(relIndex, 0), 0, Qt.UserRole - 1)
    def _updateStorageCombo(self):
        sharedStorageSetting = None
        for laneIndex in self._laneIndexes:
            op = self.tempOps[laneIndex]
            info = op.Dataset.value

            # Determine the current setting
            location = info.location
    
            if location == DatasetInfo.Location.ProjectInternal:
                storageSetting = StorageLocation.ProjectFile
            elif location == DatasetInfo.Location.FileSystem:
                # Determine if the path is relative or absolute
                if isUrl(info.filePath) or os.path.isabs(info.filePath):
                    storageSetting = StorageLocation.AbsoluteLink
                else:
                    storageSetting = StorageLocation.RelativeLink
        
            if sharedStorageSetting is None:
                sharedStorageSetting = storageSetting
            elif sharedStorageSetting != storageSetting:
                # Not all lanes have the same setting
                sharedStorageSetting = -1
                break

        if sharedStorageSetting == -1:
            self.storageComboBox.setCurrentIndex(-1)
        else:
            comboIndex = self.storageComboBox.findData( QVariant(sharedStorageSetting) )
            self.storageComboBox.setCurrentIndex( comboIndex )

        disableLinks = False
        for laneIndex in self._laneIndexes:
            op = self.tempOps[laneIndex]
            info = op.Dataset.value
            
            disableLinks |= info.fromstack
        
        if disableLinks:
            # If any of the files were loaded from a stack, then you can't refer to them via a link.
            absIndex = self.storageComboBox.findData( QVariant(StorageLocation.AbsoluteLink) )
            relIndex = self.storageComboBox.findData( QVariant(StorageLocation.RelativeLink) )

            # http://theworldwideinternet.blogspot.com/2011/01/disabling-qcombobox-items.html
            model = self.storageComboBox.model()
            model.setData( model.index( absIndex, 0 ), 0, Qt.UserRole-1 )
            model.setData( model.index( relIndex, 0 ), 0, Qt.UserRole-1 )
def append_lane(workflow, input_filepath, axisorder=None):
    # Sanity checks
    assert isinstance(workflow, PixelClassificationWorkflow)
    opPixelClassification = workflow.pcApplet.topLevelOperator
    assert opPixelClassification.Classifier.ready()

    # If the filepath is a globstring, convert the stack to h5
    input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], TMP_DIR )[0]

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute 
    # (otherwise they are relative to the project file, which probably isn't what the user meant)        
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug( "adding lane: {}".format( info ) )

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len( opDataSelection.DatasetGroup )+1
    logger.debug( "num_lanes: {}".format( num_lanes ) )
    opDataSelection.DatasetGroup.resize( num_lanes )
    
    # Configure it.
    role_index = 0 # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue( info )

    # Sanity check
    assert len(opPixelClassification.InputImages) == num_lanes
    
    return opPixelClassification
Example #12
0
    def parse_known_cmdline_args(self, cmdline_args):
        """
        Helper function for headless workflows.
        Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator 
        and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()``
        
        Relative paths are converted to absolute paths **according to ``os.getcwd()``**, 
        not according to the project file location, since this more likely to be what headless users expect.
        
        .. note: Currently, this command-line interface only supports workflows with a SINGLE dataset role.
                 Workflows that take multiple files per lane will need to configure the data selection applet 
                 by some other means.  :py:meth:`DatasetInfo.updateFromJson()` might be useful in that case.
        
        See also: :py:meth:`configure_operator_with_parsed_args()`.
        """
        # Currently, we don't support any special options -- just a list of files        
        arg_parser = argparse.ArgumentParser()
        arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.')
        arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False)
        parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args)
        
        # Check for errors: Do all input files exist?
        input_paths = parsed_args.input_files
        error = False
        for p in input_paths:
            if isUrl(p):
                # Don't error-check urls in advance.
                continue
            p = PathComponents(p).externalPath
            if '*' in p:
                if len(glob.glob(p)) == 0:
                    logger.error("Could not find any files for globstring: {}".format(p))
                    logger.error("Check your quotes!")
                    error = True
            elif not os.path.exists(p):
                logger.error("Input file does not exist: " + p)
                error = True
        if error:
            raise RuntimeError("Could not find one or more input files.  See logged errors.")

        return parsed_args, unused_args
def append_lane(workflow, input_filepath, axisorder=None):
    """
    Add a lane to the project file for the given input file.

    If axisorder is given, override the default axisorder for
    the file and force the project to use the given one.
    
    Globstrings are supported, in which case the files are converted to HDF5 first.
    """
    # If the filepath is a globstring, convert the stack to h5
    input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], tempfile.mkdtemp() )[0]

    info = DatasetInfo()
    info.location = DatasetInfo.Location.FileSystem
    info.filePath = input_filepath

    comp = PathComponents(input_filepath)

    # Convert all (non-url) paths to absolute 
    # (otherwise they are relative to the project file, which probably isn't what the user meant)        
    if not isUrl(input_filepath):
        comp.externalPath = os.path.abspath(comp.externalPath)
        info.filePath = comp.totalPath()
    info.nickname = comp.filenameBase
    if axisorder:
        info.axistags = vigra.defaultAxistags(axisorder)

    logger.debug( "adding lane: {}".format( info ) )

    opDataSelection = workflow.dataSelectionApplet.topLevelOperator

    # Add a lane
    num_lanes = len( opDataSelection.DatasetGroup )+1
    logger.debug( "num_lanes: {}".format( num_lanes ) )
    opDataSelection.DatasetGroup.resize( num_lanes )
    
    # Configure it.
    role_index = 0 # raw data
    opDataSelection.DatasetGroup[-1][role_index].setValue( info )
Example #14
0
    def create_default_headless_dataset_info(cls, filepath):
        """
        filepath may be a globstring or a full hdf5 path+dataset 
        """
        comp = PathComponents(filepath)
        nickname = comp.filenameBase
        
        # Remove globstring syntax.
        if '*' in nickname:
            nickname = nickname.replace('*', '')
        if os.path.pathsep in nickname:
            nickname = PathComponents(nickname.split(os.path.pathsep)[0]).fileNameBase

        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem
        info.nickname = nickname
        info.filePath = filepath
        # Convert all (non-url) paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)
        if not isUrl(filepath):
            comp.externalPath = os.path.abspath(comp.externalPath)
            info.filePath = comp.totalPath()
        return info
Example #15
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        input_paths = parsed_args.input_files

        # If the user doesn't want image stacks to be copied inte the project file,
        #  we generate hdf5 volumes in a temporary directory and use those files instead.        
        if parsed_args.preconvert_stacks:
            import tempfile
            input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() )
        
        input_infos = []
        for p in input_paths:
            info = DatasetInfo()
            info.location = DatasetInfo.Location.FileSystem
            info.filePath = p

            comp = PathComponents(p)

            # Convert all (non-url) paths to absolute 
            # (otherwise they are relative to the project file, which probably isn't what the user meant)        
            if not isUrl(p):
                comp.externalPath = os.path.abspath(comp.externalPath)
                info.filePath = comp.totalPath()
            info.nickname = comp.filenameBase
            input_infos.append(info)

        opDataSelection = self.topLevelOperator
        opDataSelection.DatasetGroup.resize( len(input_infos) )
        for lane_index, info in enumerate(input_infos):
            # Only one dataset role in pixel classification
            opDataSelection.DatasetGroup[lane_index][0].setValue( info )
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset
        
        jsonNamespace: If provided, overrides default settings after filepath is applied
        
        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.
        
        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray.
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array # See description above.
        Location = DatasetInfo.Location
        self._filePath = ""                 # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        self.allowLabels = True             # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.drange = None
        self.normalizeDisplay = True
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = "" # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.fromstack = False
            self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name )
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None
            if '*' in filepath:
                file_list = glob.glob(filepath)
                file_list = sorted(file_list)
            if not isUrl(filepath) and os.path.pathsep in filepath:
                file_list = filepath.split(os.path.pathsep)
            
            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
    
                # Convert all paths to absolute 
                file_list = map(lambda f: make_absolute(f, cwd), file_list)
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join( file_list )
    
                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
                nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute 
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack

        if jsonNamespace is not None:
            self.updateFromJson( jsonNamespace )
Example #17
0
    def parse_known_cmdline_args(cls, cmdline_args, role_names):
        """
        Helper function for headless workflows.
        Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator 
        and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()``
        
        Relative paths are converted to absolute paths **according to ``os.getcwd()``**, 
        not according to the project file location, since this more likely to be what headless users expect.

        .. note: If the top-level operator was configured with multiple 'roles', then the input files for 
                 each role can be configured separately:
                 $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png
                 If the workflow has only one role (or only one required role), then the role-name flag can be omitted:
                 # python ilastik.py [other workflow options] input1.png input2.png

        See also: :py:meth:`configure_operator_with_parsed_args()`.
        """
        arg_parser = argparse.ArgumentParser()
        if role_names:
            for role_name in role_names:
                arg_name = cls._role_name_to_arg_name(role_name)
                arg_parser.add_argument(
                    '--' + arg_name,
                    nargs='+',
                    help='List of input files for the {} role'.format(
                        role_name))

        # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names
        arg_parser.add_argument('input_files',
                                nargs='*',
                                help='List of input files to process.')

        arg_parser.add_argument(
            '--preconvert_stacks',
            help=
            "Convert image stacks to temporary hdf5 files before loading them.",
            action='store_true',
            default=False)
        parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args)

        for i, path in enumerate(parsed_args.input_files):
            # Replace '~' with home dir
            parsed_args.input_files[i] = os.path.expanduser(path)

        # Check for errors: Do all input files exist?
        all_input_paths = list(parsed_args.input_files)
        for role_name in role_names:
            arg_name = cls._role_name_to_arg_name(role_name)
            role_paths = getattr(parsed_args, arg_name)
            if role_paths:
                all_input_paths += role_paths
        error = False
        for p in all_input_paths:
            if isUrl(p):
                # Don't error-check urls in advance.
                continue
            p = PathComponents(p).externalPath
            if '*' in p:
                if len(glob.glob(p)) == 0:
                    logger.error(
                        "Could not find any files for globstring: {}".format(
                            p))
                    logger.error("Check your quotes!")
                    error = True
            elif not os.path.exists(p):
                logger.error("Input file does not exist: " + p)
                error = True
        if error:
            raise RuntimeError(
                "Could not find one or more input files.  See logged errors.")

        return parsed_args, unused_args
Example #18
0
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None,
                 preloaded_array=None, sequence_axis=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset

        jsonNamespace: If provided, overrides default settings after filepath is applied

        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.

        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged
                         vigra.VigraArray.

        sequence_axis: Axis along which to stack (only applicable for stacks).
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array  # See description above.
        Location = DatasetInfo.Location
        # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._filePath = ""
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.allowLabels = True
        self.drange = None
        self.normalizeDisplay = True
        self.sequenceAxis = None
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.original_axistags = None
        # Necessary in headless mode in order to recover the shape of the raw data
        self.laneShape = None
        self.laneDtype = None
        # A flag indicating whether the dataset is backed by a real source (e.g. file)
        # or by the fake provided (e.g. in headless mode when raw data are not necessary)
        self.realDataSource = True
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default'  # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = ""  # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.nickname = "preloaded-{}-array".format(self.preloaded_array.dtype.name)
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None

            # To support h5 sequences, filepath may contain external and
            # internal path components
            if not isUrl(filepath):
                file_list = filepath.split(os.path.pathsep)

                pathComponents = [PathComponents(x) for x in file_list]
                externalPaths = [pc.externalPath for pc in pathComponents]
                internalPaths = [pc.internalPath for pc in pathComponents]

                if len(file_list) > 0:
                    if len(externalPaths) == 1:
                        if '*' in externalPaths[0]:
                            if internalPaths[0] is not None:
                                assert ('*' not in internalPaths[0]), (
                                    "Only internal OR external glob placeholder supported"
                                )
                            file_list = sorted(glob.glob(filepath))
                        else:
                            file_list = [externalPaths[0]]
                            if internalPaths[0] is not None:
                                if '*' in internalPaths[0]:
                                    # overwrite internalPaths, will be assembled further down
                                    glob_string = "{}{}".format(externalPaths[0], internalPaths[0])
                                    internalPaths = \
                                        OpStreamingH5N5SequenceReaderS.expandGlobStrings(
                                            externalPaths[0], glob_string)
                                    if internalPaths:
                                        file_list = [externalPaths[0]] * len(internalPaths)
                                    else:
                                        file_list = None

                    else:
                        assert (not any('*' in ep for ep in externalPaths)), (
                            "Multiple glob paths shouldn't be happening"
                        )
                        file_list = [ex for ex in externalPaths]

                    assert all(pc.extension == pathComponents[0].extension
                               for pc in pathComponents[1::]), (
                        "Supplied multiple files with multiple extensions"
                    )
                    # The following is necessary for h5 as well as npz-files
                    internalPathExts = (
                        OpInputDataReader.h5_n5_Exts +
                        OpInputDataReader.npzExts
                    )
                    internalPathExts = [".{}".format(ipx) for ipx in internalPathExts]

                    if pathComponents[0].extension in internalPathExts and internalPaths:
                        if len(file_list) == len(internalPaths):
                            # assuming a matching internal paths to external paths
                            file_list_with_internal = []
                            for external, internal in zip(file_list, internalPaths):
                                if internal:
                                    file_list_with_internal.append('{}/{}'.format(external, internal))
                                else:
                                    file_list_with_internal.append(external)
                            file_list = file_list_with_internal
                        else:
                            # sort of fallback, in case of a mismatch in lengths
                            for i in range(len(file_list)):
                                file_list[i] += '/' + internalPaths[0]

            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
                # Convert all paths to absolute
                file_list = [make_absolute(f, cwd) for f in file_list]
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join(file_list)

                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len(os.path.splitext(file_list[-1])[1])
                nickname = PathComponents(prefix).filenameBase + ("_" * num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack
            self.sequenceAxis = sequence_axis

        if jsonNamespace is not None:
            self.updateFromJson(jsonNamespace)
    def parse_known_cmdline_args(cls, cmdline_args, role_names):
        """
        Helper function for headless workflows.
        Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator 
        and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()``
        
        Relative paths are converted to absolute paths **according to ``os.getcwd()``**, 
        not according to the project file location, since this more likely to be what headless users expect.

        .. note: If the top-level operator was configured with multiple 'roles', then the input files for 
                 each role can be configured separately:
                 $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png
                 If the workflow has only one role (or only one required role), then the role-name flag can be omitted:
                 # python ilastik.py [other workflow options] input1.png input2.png

        See also: :py:meth:`configure_operator_with_parsed_args()`.
        """
        arg_parser = argparse.ArgumentParser()
        if role_names:
            for role_name in role_names:
                arg_name = cls._role_name_to_arg_name(role_name)
                arg_parser.add_argument('--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name ))
        
        # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names
        arg_parser.add_argument('unspecified_input_files', nargs='*', help='List of input files to process.')
            
        arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False)
        arg_parser.add_argument('--input_axes', help="Explicitly specify the axes of your dataset.", required=False)
        parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args)

        if parsed_args.unspecified_input_files:
            # We allow the file list to go to the 'default' role, 
            # but only if no other roles were explicitly configured.
            arg_names = list(map(cls._role_name_to_arg_name, role_names))
            for arg_name in arg_names:
                if getattr(parsed_args, arg_name):
                    # FIXME: This error message could be more helpful.
                    role_args = list(map( cls._role_name_to_arg_name, role_names ))
                    role_args = ['--' + s for s in role_args]
                    role_args_str = ", ".join( role_args )
                    raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n"
                                    "Use the following flags to specify which files are matched with which inputs:\n"
                                    + role_args_str )
            
            # Relocate to the 'default' role
            arg_name = cls._role_name_to_arg_name(role_names[0])
            setattr(parsed_args, arg_name, parsed_args.unspecified_input_files)
            parsed_args.unspecified_input_files = None

        # Replace '~' with home dir
        for role_name in role_names:
            arg_name = cls._role_name_to_arg_name(role_name)
            paths_for_role = getattr(parsed_args, arg_name)
            if paths_for_role:
                for i, path in enumerate( paths_for_role ):
                    paths_for_role[i] = os.path.expanduser( path )            
        
        # Check for errors: Do all input files exist?
        all_input_paths = []
        for role_name in role_names:
            arg_name = cls._role_name_to_arg_name(role_name)
            role_paths = getattr(parsed_args, arg_name)
            if role_paths:
                all_input_paths += role_paths
        error = False
        for p in all_input_paths:
            if isUrl(p):
                # Don't error-check urls in advance.
                continue
            p = PathComponents(p).externalPath
            if '*' in p:
                if len(glob.glob(p)) == 0:
                    logger.error("Could not find any files for globstring: {}".format(p))
                    logger.error("Check your quotes!")
                    error = True
            elif not os.path.exists(p):
                logger.error("Input file does not exist: " + p)
                error = True
        if error:
            raise RuntimeError("Could not find one or more input files.  See logged errors.")

        return parsed_args, unused_args
Example #20
0
    def openProjectFile(self, projectFilePath, force_readonly=False):
        # If the user gave a URL to a DVID key, then download the project file from dvid first.
        # (So far, DVID is the only type of URL access we support for project files.)
        if isUrl(projectFilePath):
            projectFilePath = HeadlessShell.downloadProjectFromDvid(projectFilePath)

        # Make sure all workflow sub-classes have been loaded,
        #  so we can detect the workflow type in the project.
        import ilastik.workflows

        try:
            # Open the project file
            hdf5File, workflow_class, readOnly = ProjectManager.openProjectFile(projectFilePath, force_readonly)

            # If there are any "creation-time" command-line args saved to the project file,
            #  load them so that the workflow can be instantiated with the same settings
            #  that were used when the project was first created.
            project_creation_args = []
            if "workflow_cmdline_args" in list(hdf5File.keys()):
                if len(hdf5File["workflow_cmdline_args"]) > 0:
                    project_creation_args = list(map(str, hdf5File["workflow_cmdline_args"][...]))

            if workflow_class is None:
                # If the project file has no known workflow, we assume pixel classification
                import ilastik.workflows

                workflow_class = ilastik.workflows.pixelClassification.PixelClassificationWorkflow
                import warnings

                warnings.warn(
                    "Your project file ({}) does not specify a workflow type.  "
                    "Assuming Pixel Classification".format(projectFilePath)
                )

            # Create our project manager
            # This instantiates the workflow and applies all settings from the project.
            self.projectManager = ProjectManager(
                self,
                workflow_class,
                headless=True,
                workflow_cmdline_args=self._workflow_cmdline_args,
                project_creation_args=project_creation_args,
            )
            self.projectManager._loadProject(hdf5File, projectFilePath, readOnly)

        except ProjectManager.FileMissingError:
            logger.error("Couldn't find project file: {}".format(projectFilePath))
            raise
        except ProjectManager.ProjectVersionError:
            # Couldn't open project.  Try importing it.
            oldProjectFilePath = projectFilePath
            name, ext = os.path.splitext(oldProjectFilePath)

            # Create a brand new project file.
            projectFilePath = name + "_imported" + ext
            logger.info("Importing project as '" + projectFilePath + "'")
            hdf5File = ProjectManager.createBlankProjectFile(projectFilePath)

            # For now, we assume that any imported projects are pixel classification workflow projects.
            import ilastik.workflows

            default_workflow = ilastik.workflows.pixelClassification.PixelClassificationWorkflow

            # Create the project manager.
            self.projectManager = ProjectManager(
                self,
                default_workflow,
                headless=True,
                workflow_cmdline_args=self._workflow_cmdline_args,
                project_creation_args=self._workflow_cmdline_args,
            )

            self.projectManager._importProject(oldProjectFilePath, hdf5File, projectFilePath)
Example #21
0
    def parse_known_cmdline_args(cls, cmdline_args, role_names):
        """
        Helper function for headless workflows.
        Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator
        and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()``

        Relative paths are converted to absolute paths **according to ``os.getcwd()``**,
        not according to the project file location, since this more likely to be what headless users expect.

        .. note: If the top-level operator was configured with multiple 'roles', then the input files for
                 each role can be configured separately:
                 $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B
                    inputB1.png, inputB2.png
                 If the workflow has only one role (or only one required role), then the role-name flag can be omitted:
                 # python ilastik.py [other workflow options] input1.png input2.png

        See also: :py:meth:`configure_operator_with_parsed_args()`.
        """
        arg_parser = argparse.ArgumentParser()
        if role_names:
            for role_name in role_names:
                arg_name = cls._role_name_to_arg_name(role_name)
                arg_parser.add_argument(
                    '--' + arg_name,
                    nargs='+',
                    help='List of input files for the {} role'.format(
                        role_name))

        # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names
        arg_parser.add_argument('unspecified_input_files',
                                nargs='*',
                                help='List of input files to process.')

        arg_parser.add_argument(
            '--preconvert_stacks',
            help=
            "Convert image stacks to temporary hdf5 files before loading them.",
            action='store_true',
            default=False)
        arg_parser.add_argument(
            '--input_axes',
            help="Explicitly specify the axes of your dataset.",
            required=False)
        arg_parser.add_argument('--stack_along',
                                help="Sequence axis along which to stack",
                                type=str,
                                default='z')

        parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args)

        if parsed_args.unspecified_input_files:
            # We allow the file list to go to the 'default' role,
            # but only if no other roles were explicitly configured.
            arg_names = list(map(cls._role_name_to_arg_name, role_names))
            for arg_name in arg_names:
                if getattr(parsed_args, arg_name):
                    # FIXME: This error message could be more helpful.
                    role_args = list(
                        map(cls._role_name_to_arg_name, role_names))
                    role_args = ['--' + s for s in role_args]
                    role_args_str = ", ".join(role_args)
                    raise Exception(
                        "Invalid command line arguments: All roles must be configured explicitly.\n"
                        "Use the following flags to specify which files are matched with which inputs:\n"
                        "" + role_args_str)

            # Relocate to the 'default' role
            arg_name = cls._role_name_to_arg_name(role_names[0])
            setattr(parsed_args, arg_name, parsed_args.unspecified_input_files)
            parsed_args.unspecified_input_files = None

        # Replace '~' with home dir
        for role_name in role_names:
            arg_name = cls._role_name_to_arg_name(role_name)
            paths_for_role = getattr(parsed_args, arg_name)
            if paths_for_role:
                for i, path in enumerate(paths_for_role):
                    paths_for_role[i] = os.path.expanduser(path)

        # Check for errors: Do all input files exist?
        all_input_paths = []
        for role_name in role_names:
            arg_name = cls._role_name_to_arg_name(role_name)
            role_paths = getattr(parsed_args, arg_name)
            if role_paths:
                all_input_paths += role_paths
        error = False
        for p in all_input_paths:
            if isUrl(p):
                # Don't error-check urls in advance.
                continue
            p = PathComponents(p).externalPath
            if '*' in p:
                if len(glob.glob(p)) == 0:
                    logger.error(
                        "Could not find any files for globstring: {}".format(
                            p))
                    logger.error("Check your quotes!")
                    error = True
            elif not os.path.exists(p):
                logger.error("Input file does not exist: " + p)
                error = True
        if error:
            raise RuntimeError(
                "Could not find one or more input files.  See logged errors.")

        return parsed_args, unused_args
Example #22
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        role_names = self.topLevelOperator.DatasetRoles.value
        role_paths = collections.OrderedDict()
        if role_names:
            for role_index, role_name in enumerate(role_names):
                arg_name = self._role_name_to_arg_name(role_name)
                input_paths = getattr(parsed_args, arg_name)
                role_paths[role_index] = input_paths

        if parsed_args.input_files:
            # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured.
            for role_index, input_paths in role_paths.items():
                if input_paths:
                    # FIXME: This error message could be more helpful.
                    role_args = map(self._role_name_to_arg_name, role_names)
                    role_args = map(lambda s: '--' + s, role_args)
                    role_args_str = ", ".join(role_args)
                    raise Exception(
                        "Invalid command line arguments: All roles must be configured explicitly.\n"
                        "Use the following flags to specify which files are matched with which inputs:\n"
                        + role_args_str)
            role_paths = {0: parsed_args.input_files}

        for role_index, input_paths in role_paths.items():
            # If the user doesn't want image stacks to be copied into the project file,
            #  we generate hdf5 volumes in a temporary directory and use those files instead.
            if parsed_args.preconvert_stacks:
                import tempfile
                input_paths = self.convertStacksToH5(input_paths,
                                                     tempfile.gettempdir())

            input_infos = []
            for p in input_paths:
                info = DatasetInfo()
                info.location = DatasetInfo.Location.FileSystem
                info.filePath = p

                comp = PathComponents(p)

                # Convert all (non-url) paths to absolute
                # (otherwise they are relative to the project file, which probably isn't what the user meant)
                if not isUrl(p):
                    comp.externalPath = os.path.abspath(comp.externalPath)
                    info.filePath = comp.totalPath()
                info.nickname = comp.filenameBase

                # Remove globstring syntax.
                if '*' in info.nickname:
                    info.nickname = info.nickname.replace('*', '')
                if os.path.pathsep in info.nickname:
                    info.nickname = PathComponents(
                        info.nickname.split(os.path.pathsep)[0]).fileNameBase
                input_infos.append(info)

            opDataSelection = self.topLevelOperator
            existing_lanes = len(opDataSelection.DatasetGroup)
            opDataSelection.DatasetGroup.resize(
                max(len(input_infos), existing_lanes))
            for lane_index, info in enumerate(input_infos):
                opDataSelection.DatasetGroup[lane_index][role_index].setValue(
                    info)

            need_warning = False
            for lane_index in range(len(input_infos)):
                output_slot = opDataSelection.ImageGroup[lane_index][
                    role_index]
                if output_slot.meta.prefer_2d:
                    need_warning = True
                    break

            if need_warning:
                logger.warn(
                    "*******************************************************************************************"
                )
                logger.warn(
                    "Some of your input data is stored in a format that is not efficient for 3D access patterns."
                )
                logger.warn(
                    "Performance may suffer as a result.  For best performance, use a chunked HDF5 volume."
                )
                logger.warn(
                    "*******************************************************************************************"
                )
Example #23
0
    def configure_operator_with_parsed_args(self, parsed_args):
        """
        Helper function for headless workflows.
        Configures this applet's top-level operator according to the settings provided in ``parsed_args``.
        
        :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`.
        """
        role_names = self.topLevelOperator.DatasetRoles.value
        role_paths = collections.OrderedDict()
        if role_names:
            for role_index, role_name in enumerate(role_names):
                arg_name = self._role_name_to_arg_name(role_name)
                input_paths = getattr(parsed_args, arg_name)
                role_paths[role_index] = input_paths

        if parsed_args.input_files:
            # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured.
            for role_index, input_paths in role_paths.items():
                if input_paths:
                    # FIXME: This error message could be more helpful.
                    role_args = map( self._role_name_to_arg_name, role_names )
                    role_args = map( lambda s: '--' + s, role_args )
                    role_args_str = ", ".join( role_args )
                    raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n"
                                    "Use the following flags to specify which files are matched with which inputs:\n"
                                    + role_args_str )
            role_paths = { 0 : parsed_args.input_files }

        for role_index, input_paths in role_paths.items():
            # If the user doesn't want image stacks to be copied into the project file,
            #  we generate hdf5 volumes in a temporary directory and use those files instead.        
            if parsed_args.preconvert_stacks:
                import tempfile
                input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() )
            
            input_infos = []
            for p in input_paths:
                info = DatasetInfo()
                info.location = DatasetInfo.Location.FileSystem
                info.filePath = p
    
                comp = PathComponents(p)
    
                # Convert all (non-url) paths to absolute 
                # (otherwise they are relative to the project file, which probably isn't what the user meant)        
                if not isUrl(p):
                    comp.externalPath = os.path.abspath(comp.externalPath)
                    info.filePath = comp.totalPath()
                info.nickname = comp.filenameBase
                
                # Remove globstring syntax.
                if '*' in info.nickname:
                    info.nickname = info.nickname.replace('*', '')
                if os.path.pathsep in info.nickname:
                    info.nickname = PathComponents(info.nickname.split(os.path.pathsep)[0]).fileNameBase
                input_infos.append(info)
    
            opDataSelection = self.topLevelOperator
            existing_lanes = len(opDataSelection.DatasetGroup)
            opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes) )
            for lane_index, info in enumerate(input_infos):
                opDataSelection.DatasetGroup[lane_index][role_index].setValue( info )
            
            need_warning = False
            for lane_index in range(len(input_infos)):
                output_slot = opDataSelection.ImageGroup[lane_index][role_index]
                if output_slot.meta.prefer_2d:
                    need_warning = True
                    break

            if need_warning:
                logger.warn("*******************************************************************************************")
                logger.warn("Some of your input data is stored in a format that is not efficient for 3D access patterns.")
                logger.warn("Performance may suffer as a result.  For best performance, use a chunked HDF5 volume.")                
                logger.warn("*******************************************************************************************")
Example #24
0
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset
        
        jsonNamespace: If provided, overrides default settings after filepath is applied
        
        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.
        
        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray.
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array # See description above.
        Location = DatasetInfo.Location
        self._filePath = ""                 # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        self.allowLabels = True             # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.drange = None
        self.normalizeDisplay = True
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = "" # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.fromstack = False
            self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name )
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None
            if '*' in filepath:
                file_list = glob.glob(filepath)
                file_list = sorted(file_list)
            if not isUrl(filepath) and os.path.pathsep in filepath:
                file_list = filepath.split(os.path.pathsep)
            
            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
    
                # Convert all paths to absolute 
                file_list = map(lambda f: make_absolute(f, cwd), file_list)
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join( file_list )
    
                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
                nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute 
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack

        if jsonNamespace is not None:
            self.updateFromJson( jsonNamespace )
    def openProjectFile(self, projectFilePath, force_readonly=False):
        # If the user gave a URL to a DVID key, then download the project file from dvid first.
        # (So far, DVID is the only type of URL access we support for project files.)
        if isUrl(projectFilePath):
            projectFilePath = HeadlessShell.downloadProjectFromDvid(projectFilePath)

        # Make sure all workflow sub-classes have been loaded,
        #  so we can detect the workflow type in the project.
        import ilastik.workflows
        try:
            # Open the project file
            hdf5File, workflow_class, readOnly = ProjectManager.openProjectFile(projectFilePath, force_readonly)

            # If there are any "creation-time" command-line args saved to the project file,
            #  load them so that the workflow can be instantiated with the same settings 
            #  that were used when the project was first created. 
            project_creation_args = []
            if "workflow_cmdline_args" in hdf5File.keys():
                if len(hdf5File["workflow_cmdline_args"]) > 0:
                    project_creation_args = map(str, hdf5File["workflow_cmdline_args"][...])

            if workflow_class is None:
                # If the project file has no known workflow, we assume pixel classification
                import ilastik.workflows
                workflow_class = ilastik.workflows.pixelClassification.PixelClassificationWorkflow
                import warnings
                warnings.warn( "Your project file ({}) does not specify a workflow type.  "
                               "Assuming Pixel Classification".format( projectFilePath ) )            
            
            # Create our project manager
            # This instantiates the workflow and applies all settings from the project.
            self.projectManager = ProjectManager( self,
                                                  workflow_class,
                                                  headless=True,
                                                  workflow_cmdline_args=self._workflow_cmdline_args,
                                                  project_creation_args=project_creation_args )
            self.projectManager._loadProject(hdf5File, projectFilePath, readOnly)

        except ProjectManager.FileMissingError:
            logger.error("Couldn't find project file: {}".format( projectFilePath ))
            raise            
        except ProjectManager.ProjectVersionError:
            # Couldn't open project.  Try importing it.
            oldProjectFilePath = projectFilePath
            name, ext = os.path.splitext(oldProjectFilePath)
    
            # Create a brand new project file.
            projectFilePath = name + "_imported" + ext
            logger.info("Importing project as '" + projectFilePath + "'")
            hdf5File = ProjectManager.createBlankProjectFile(projectFilePath)

            # For now, we assume that any imported projects are pixel classification workflow projects.
            import ilastik.workflows
            default_workflow = ilastik.workflows.pixelClassification.PixelClassificationWorkflow

            # Create the project manager.
            self.projectManager = ProjectManager( self,
                                                  default_workflow,
                                                  headless=True,
                                                  workflow_cmdline_args=self._workflow_cmdline_args,
                                                  project_creation_args=self._workflow_cmdline_args )

            self.projectManager._importProject(importFromPath, hdf5File, projectFilePath)
Example #26
0
    def __init__(self,
                 filepath=None,
                 jsonNamespace=None,
                 cwd=None,
                 preloaded_array=None,
                 sequence_axis=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset

        jsonNamespace: If provided, overrides default settings after filepath is applied

        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.

        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged
                         vigra.VigraArray.

        sequence_axis: Axis along which to stack (only applicable for stacks).
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array  # See description above.
        Location = DatasetInfo.Location
        # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._filePath = ""
        self._datasetId = ""  # The name of the data within the project file (if it is stored locally)
        # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.allowLabels = True
        self.drange = None
        self.normalizeDisplay = True
        self.sequenceAxis = None
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.original_axistags = None
        # Necessary in headless mode in order to recover the shape of the raw data
        self.laneShape = None
        self.laneDtype = None
        # A flag indicating whether the dataset is backed by a real source (e.g. file)
        # or by the fake provided (e.g. in headless mode when raw data are not necessary)
        self.realDataSource = True
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default'  # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = ""  # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.nickname = "preloaded-{}-array".format(
                self.preloaded_array.dtype.name)
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None

            # To support h5 sequences, filepath may contain external and
            # internal path components
            if not isUrl(filepath):
                file_list = filepath.split(os.path.pathsep)

                pathComponents = [PathComponents(x) for x in file_list]
                externalPaths = [pc.externalPath for pc in pathComponents]
                internalPaths = [pc.internalPath for pc in pathComponents]

                if len(file_list) > 0:
                    if len(externalPaths) == 1:
                        if '*' in externalPaths[0]:
                            if internalPaths[0] is not None:
                                assert ('*' not in internalPaths[0]), (
                                    "Only internal OR external glob placeholder supported"
                                )
                            file_list = sorted(glob.glob(filepath))
                        else:
                            file_list = [externalPaths[0]]
                            if internalPaths[0] is not None:
                                if '*' in internalPaths[0]:
                                    # overwrite internalPaths, will be assembled further down
                                    glob_string = "{}{}".format(
                                        externalPaths[0], internalPaths[0])
                                    internalPaths = \
                                        OpStreamingH5N5SequenceReaderS.expandGlobStrings(
                                            externalPaths[0], glob_string)
                                    if internalPaths:
                                        file_list = [externalPaths[0]
                                                     ] * len(internalPaths)
                                    else:
                                        file_list = None

                    else:
                        assert (not any('*' in ep for ep in externalPaths)), (
                            "Multiple glob paths shouldn't be happening")
                        file_list = [ex for ex in externalPaths]

                    assert all(
                        pc.extension == pathComponents[0].extension
                        for pc in pathComponents[1::]), (
                            "Supplied multiple files with multiple extensions")
                    # The following is necessary for h5 as well as npz-files
                    internalPathExts = (OpInputDataReader.h5_n5_Exts +
                                        OpInputDataReader.npzExts)
                    internalPathExts = [
                        ".{}".format(ipx) for ipx in internalPathExts
                    ]

                    if pathComponents[
                            0].extension in internalPathExts and internalPaths:
                        if len(file_list) == len(internalPaths):
                            # assuming a matching internal paths to external paths
                            file_list_with_internal = []
                            for external, internal in zip(
                                    file_list, internalPaths):
                                if internal:
                                    file_list_with_internal.append(
                                        '{}/{}'.format(external, internal))
                                else:
                                    file_list_with_internal.append(external)
                            file_list = file_list_with_internal
                        else:
                            # sort of fallback, in case of a mismatch in lengths
                            for i in range(len(file_list)):
                                file_list[i] += '/' + internalPaths[0]

            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
                # Convert all paths to absolute
                file_list = [make_absolute(f, cwd) for f in file_list]
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join(file_list)

                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len(
                    os.path.splitext(file_list[-1])[1])
                nickname = PathComponents(prefix).filenameBase + (
                    "_" * num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack
            self.sequenceAxis = sequence_axis

        if jsonNamespace is not None:
            self.updateFromJson(jsonNamespace)
Example #27
0
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset
        
        jsonNamespace: If provided, overrides default settings after filepath is applied
        
        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.
        
        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray.
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array # See description above.
        Location = DatasetInfo.Location
        self._filePath = ""                 # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        self.allowLabels = True             # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.drange = None
        self.normalizeDisplay = True
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = "" # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.fromstack = False
            self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name )
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None

            # To support h5 sequences, filepath may contain external and
            # internal path components
            if not isUrl(filepath):
                file_list = filepath.split(os.path.pathsep)

                pathComponents = [PathComponents(x) for x in file_list]
                externalPaths = [pc.externalPath for pc in pathComponents]
                internalPaths = [pc.internalPath for pc in pathComponents]

                if len(file_list) > 0:
                    if len(externalPaths) == 1:
                        if '*' in externalPaths[0]:
                            if internalPaths[0] is not None:
                                assert ('*' not in internalPaths[0]), (
                                    "Only internal OR external glob placeholder supported"
                                )
                            file_list = sorted(glob.glob(filepath))
                        else:
                            file_list = [externalPaths[0]]
                            if internalPaths[0] is not None:
                                if '*' in internalPaths[0]:
                                    # TODO single hdf5 file stacks
                                    raise NotImplementedError(
                                        'Single file h5Stack import is not implemented in the GUI yet.')
                    else:
                        assert (not any('*' in ep for ep in externalPaths)), (
                            "Multiple glob paths shouldn't be happening"
                        )
                        file_list = [ex for ex in externalPaths]

                    assert all(pc.extension == pathComponents[0].extension
                               for pc in pathComponents[1::]), (
                        "Supplied multiple files with multiple extensions"
                    )
                    # The following is necessary for h5 as well as npz-files
                    internalPathExts = (
                        OpInputDataReader.h5Exts +
                        OpInputDataReader.npzExts
                    )
                    internalPathExts = [".{}".format(ipx) for ipx in internalPathExts]
                    if pathComponents[0].extension in internalPathExts and internalPaths[0]:
                        for i in xrange(len(file_list)):
                            file_list[i] += '/' + internalPaths[0]

            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
                # Convert all paths to absolute 
                file_list = map(lambda f: make_absolute(f, cwd), file_list)
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join( file_list )
    
                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
                nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute 
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack

        if jsonNamespace is not None:
            self.updateFromJson( jsonNamespace )
Example #28
0
    def _getDisplayRoleData(self, index):
        laneIndex = index.row()

        UninitializedDisplayData = {
            DatasetDetailedInfoColumn.Nickname: "<empty>",
            DatasetDetailedInfoColumn.Location: "",
            DatasetDetailedInfoColumn.InternalID: "",
            DatasetDetailedInfoColumn.AxisOrder: "",
            DatasetDetailedInfoColumn.Shape: "",
            DatasetDetailedInfoColumn.Range: ""
        }

        if len( self._op.DatasetGroup ) <= laneIndex \
        or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex:
            return UninitializedDisplayData[index.column()]

        datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex]

        # Default
        if not datasetSlot.ready():
            return UninitializedDisplayData[index.column()]

        datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value
        filePathComponents = PathComponents(datasetInfo.filePath)

        ## Input meta-data fields

        # Name
        if index.column() == DatasetDetailedInfoColumn.Nickname:
            return datasetInfo.nickname

        # Location
        if index.column() == DatasetDetailedInfoColumn.Location:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                if isUrl(datasetInfo.filePath) or os.path.isabs(
                        datasetInfo.filePath):
                    text = "Absolute Link: {}".format(
                        filePathComponents.externalPath)
                    return text
                else:
                    text = "Relative Link: {}".format(
                        filePathComponents.externalPath)
                    return text
            else:
                return "Project File"

        # Internal ID
        if index.column() == DatasetDetailedInfoColumn.InternalID:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                return filePathComponents.internalPath
            return ""

        ## Output meta-data fields

        # Defaults
        imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex]
        if not imageSlot.ready():
            return UninitializedDisplayData[index.column()]

        # Axis order
        if index.column() == DatasetDetailedInfoColumn.AxisOrder:
            if imageSlot.meta.original_axistags is not None:
                return "".join(imageSlot.meta.getOriginalAxisKeys())
                assert imageSlot.meta.original_shape is not None, \
                    'if original axistags are available, original shape has to exist as well'

            if imageSlot.meta.axistags is not None:
                return "".join(imageSlot.meta.getAxisKeys())
            return ""

        # Shape
        if index.column() == DatasetDetailedInfoColumn.Shape:
            original_shape = imageSlot.meta.original_shape
            if original_shape is not None:
                assert imageSlot.meta.original_axistags is not None, \
                    'if original shape is available, original axistags have to exist as well'
                return str(original_shape)
            shape = imageSlot.meta.shape
            if shape is None:
                return ""
            return str(shape)

        # Range
        if index.column() == DatasetDetailedInfoColumn.Range:
            drange = imageSlot.meta.drange
            if drange is None:
                return ""
            return str(drange)

        assert False, "Unknown column: row={}, column={}".format(
            index.row(), index.column())
    def _getDisplayRoleData(self, index):
        laneIndex = index.row()

        UninitializedDisplayData = { DatasetDetailedInfoColumn.Nickname : "<empty>",
                                     DatasetDetailedInfoColumn.Location : "",
                                     DatasetDetailedInfoColumn.InternalID : "",
                                     DatasetDetailedInfoColumn.AxisOrder : "",
                                     DatasetDetailedInfoColumn.Shape : "",
                                     DatasetDetailedInfoColumn.Range : "" }

        if len( self._op.DatasetGroup ) <= laneIndex \
        or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex:
            return UninitializedDisplayData[ index.column() ]

        datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex]

        # Default
        if not datasetSlot.ready():
            return UninitializedDisplayData[ index.column() ]
        
        datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value
        filePathComponents = PathComponents( datasetInfo.filePath )

        ## Input meta-data fields

        # Name
        if index.column() == DatasetDetailedInfoColumn.Nickname:
            return decode_to_qstring( datasetInfo.nickname )

        # Location
        if index.column() == DatasetDetailedInfoColumn.Location:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                if isUrl(datasetInfo.filePath) or os.path.isabs(datasetInfo.filePath):
                    text = "Absolute Link: {}".format( filePathComponents.externalPath )
                    return decode_to_qstring(text)
                else:
                    text = "Relative Link: {}".format( filePathComponents.externalPath )
                    return decode_to_qstring(text)
            else:
                return "Project File"

        # Internal ID        
        if index.column() == DatasetDetailedInfoColumn.InternalID:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                return filePathComponents.internalPath
            return ""

        ## Output meta-data fields
        
        # Defaults        
        imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex]
        if not imageSlot.ready():
            return UninitializedDisplayData[index.column()]

        # Axis order            
        if index.column() == DatasetDetailedInfoColumn.AxisOrder:
            original_axistags = imageSlot.meta.original_axistags
            axistags = imageSlot.meta.axistags
            if original_axistags is not None:
                return "".join( tag.key for tag in original_axistags )            
            if axistags is not None:
                return "".join( imageSlot.meta.getAxisKeys() )
            return ""

        # Shape
        if index.column() == DatasetDetailedInfoColumn.Shape:
            original_shape = imageSlot.meta.original_shape
            shape = imageSlot.meta.shape
            if original_shape is not None:
                return str(original_shape)
            if shape is None:
                return ""
            return str(shape)

        # Range
        if index.column() == DatasetDetailedInfoColumn.Range:
            drange = imageSlot.meta.drange
            if drange is None:
                return ""
            return str(drange)

        assert False, "Unknown column: row={}, column={}".format( index.row(), index.column() )