def __init__(self, filepath=None, jsonNamespace=None, cwd=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. """ cwd = cwd or os.getcwd() Location = DatasetInfo.Location self.location = Location.FileSystem # Whether the data will be found/stored on the filesystem or in the project file self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None if '*' in filepath: file_list = glob.glob(filepath) if not isUrl(filepath) and os.path.pathsep in filepath: file_list = filepath.split(os.path.pathsep) # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = map(lambda f: make_absolute(f, cwd), file_list) filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )
def parse_known_cmdline_args(self, cmdline_args): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = self._role_name_to_arg_name(role_name) arg_parser.add_argument('--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name )) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) for i, path in enumerate( parsed_args.input_files ): # Replace '~' with home dir parsed_args.input_files[i] = os.path.expanduser( path ) # Check for errors: Do all input files exist? all_input_paths = list(parsed_args.input_files) for role_name in role_names: arg_name = self._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error("Could not find any files for globstring: {}".format(p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError("Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def _applyStorageComboToTempOps(self, index): if index == -1: return newStorageLocation, goodcast = self.storageComboBox.itemData( index).toInt() assert goodcast # Save a copy of our settings oldInfos = {} for laneIndex, op in self.tempOps.items(): oldInfos[laneIndex] = copy.copy(op.Dataset.value) # Attempt to apply to all temp operators try: for laneIndex, op in self.tempOps.items(): info = copy.copy(op.Dataset.value) if info.location == DatasetInfo.Location.ProjectInternal: thisLaneStorage = StorageLocation.ProjectFile elif info.location == DatasetInfo.Location.FileSystem: # Determine if the path is relative or absolute if isUrl(info.filePath) or os.path.isabs(info.filePath): thisLaneStorage = StorageLocation.AbsoluteLink else: thisLaneStorage = StorageLocation.RelativeLink if thisLaneStorage != newStorageLocation: if newStorageLocation == StorageLocation.ProjectFile: info.location = DatasetInfo.Location.ProjectInternal else: info.location = DatasetInfo.Location.FileSystem cwd = op.WorkingDirectory.value absPath, relPath = getPathVariants(info.filePath, cwd) if relPath is not None and newStorageLocation == StorageLocation.RelativeLink: info.filePath = relPath elif newStorageLocation == StorageLocation.AbsoluteLink: info.filePath = absPath else: assert False, "Unknown storage location setting." op.Dataset.setValue(info) self._error_fields.discard('Storage Location') return True except Exception as e: # Revert everything back to the previous state for laneIndex, op in self.tempOps.items(): op.Dataset.setValue(oldInfos[laneIndex]) msg = "Could not set new storage location settings due to an exception:\n" msg += "{}".format(e) log_exception(logger, msg) QMessageBox.warning(self, "Error", msg) self._error_fields.add('Storage Location') return False finally: self._updateStorageCombo()
def _applyStorageComboToTempOps(self, index): if index == -1: return newStorageLocation, goodcast = self.storageComboBox.itemData( index ).toInt() assert goodcast # Save a copy of our settings oldInfos = {} for laneIndex, op in self.tempOps.items(): oldInfos[laneIndex] = copy.copy( op.Dataset.value ) # Attempt to apply to all temp operators try: for laneIndex, op in self.tempOps.items(): info = copy.copy( op.Dataset.value ) if info.location == DatasetInfo.Location.ProjectInternal: thisLaneStorage = StorageLocation.ProjectFile elif info.location == DatasetInfo.Location.FileSystem: # Determine if the path is relative or absolute if isUrl(info.filePath) or os.path.isabs(info.filePath): thisLaneStorage = StorageLocation.AbsoluteLink else: thisLaneStorage = StorageLocation.RelativeLink if thisLaneStorage != newStorageLocation: if newStorageLocation == StorageLocation.ProjectFile: info.location = DatasetInfo.Location.ProjectInternal else: info.location = DatasetInfo.Location.FileSystem cwd = op.WorkingDirectory.value absPath, relPath = getPathVariants( info.filePath, cwd ) if relPath is not None and newStorageLocation == StorageLocation.RelativeLink: info.filePath = relPath elif newStorageLocation == StorageLocation.AbsoluteLink: info.filePath = absPath else: assert False, "Unknown storage location setting." op.Dataset.setValue( info ) self._error_fields.discard('Storage Location') return True except Exception as e: # Revert everything back to the previous state for laneIndex, op in self.tempOps.items(): op.Dataset.setValue( oldInfos[laneIndex] ) msg = "Could not set new storage location settings due to an exception:\n" msg += "{}".format( e ) log_exception( logger, msg ) QMessageBox.warning(self, "Error", msg) self._error_fields.add('Storage Location') return False finally: self._updateStorageCombo()
def create_dataset_info(self, url: Union[Path, str], axistags: Optional[vigra.AxisTags] = None, sequence_axis: str = "z") -> DatasetInfo: url = str(url) if isUrl(url): return UrlDatasetInfo(url=url, axistags=axistags) else: return RelativeFilesystemDatasetInfo.create_or_fallback_to_absolute( filePath=url, axistags=axistags, sequence_axis=sequence_axis)
def _append_lane(workflow, input_filepath, axisorder=None): """ Add a lane to the project file for the given input file. If axisorder is given, override the default axisorder for the file and force the project to use the given one. Globstrings are supported, in which case the files are converted to HDF5 first. """ # If the filepath is a globstring, convert the stack to h5 # todo: skip this? tmp_dir = tempfile.mkdtemp() input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath], tmp_dir)[0] try: os.rmdir(tmp_dir) except OSError as e: if e.errno == 39: logger.warning( 'Temporary directory {} was populated: should be deleted') else: raise info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug("adding lane: {}".format(info)) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len(opDataSelection.DatasetGroup) + 1 logger.debug("num_lanes: {}".format(num_lanes)) opDataSelection.DatasetGroup.resize(num_lanes) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue(info) workflow.handleNewLanesAdded()
def updateFromSlot(self): if self._urlSlot.ready(): # FIXME: Choose a default dvid url... file_path = self._urlSlot.value if not isUrl( file_path ): file_path = "" # Remove extension file_path = os.path.splitext(file_path)[0] self.urlLabel.setText( decode_to_qstring(file_path) ) # Re-configure the slot in case we removed the extension self._urlSlot.setValue( file_path )
def updateFromSlot(self): if self._urlSlot.ready(): # FIXME: Choose a default dvid url... file_path = self._urlSlot.value if not isUrl(file_path): file_path = "" # Remove extension file_path = os.path.splitext(file_path)[0] self.urlLabel.setText(decode_to_qstring(file_path)) # Re-configure the slot in case we removed the extension self._urlSlot.setValue(file_path)
def _updateStorageCombo(self): sharedStorageSetting = None for laneIndex in self._laneIndexes: op = self.tempOps[laneIndex] info = op.Dataset.value # Determine the current setting location = info.location if location == DatasetInfo.Location.ProjectInternal: storageSetting = StorageLocation.ProjectFile elif location == DatasetInfo.Location.FileSystem: # Determine if the path is relative or absolute if isUrl(info.filePath) or os.path.isabs(info.filePath): storageSetting = StorageLocation.AbsoluteLink else: storageSetting = StorageLocation.RelativeLink if sharedStorageSetting is None: sharedStorageSetting = storageSetting elif sharedStorageSetting != storageSetting: # Not all lanes have the same setting sharedStorageSetting = -1 break if sharedStorageSetting == -1: self.storageComboBox.setCurrentIndex(-1) else: comboIndex = self.storageComboBox.findData( QVariant(sharedStorageSetting)) self.storageComboBox.setCurrentIndex(comboIndex) disableLinks = False for laneIndex in self._laneIndexes: op = self.tempOps[laneIndex] info = op.Dataset.value disableLinks |= info.fromstack if disableLinks: # If any of the files were loaded from a stack, then you can't refer to them via a link. absIndex = self.storageComboBox.findData( QVariant(StorageLocation.AbsoluteLink)) relIndex = self.storageComboBox.findData( QVariant(StorageLocation.RelativeLink)) # http://theworldwideinternet.blogspot.com/2011/01/disabling-qcombobox-items.html model = self.storageComboBox.model() model.setData(model.index(absIndex, 0), 0, Qt.UserRole - 1) model.setData(model.index(relIndex, 0), 0, Qt.UserRole - 1)
def _updateStorageCombo(self): sharedStorageSetting = None for laneIndex in self._laneIndexes: op = self.tempOps[laneIndex] info = op.Dataset.value # Determine the current setting location = info.location if location == DatasetInfo.Location.ProjectInternal: storageSetting = StorageLocation.ProjectFile elif location == DatasetInfo.Location.FileSystem: # Determine if the path is relative or absolute if isUrl(info.filePath) or os.path.isabs(info.filePath): storageSetting = StorageLocation.AbsoluteLink else: storageSetting = StorageLocation.RelativeLink if sharedStorageSetting is None: sharedStorageSetting = storageSetting elif sharedStorageSetting != storageSetting: # Not all lanes have the same setting sharedStorageSetting = -1 break if sharedStorageSetting == -1: self.storageComboBox.setCurrentIndex(-1) else: comboIndex = self.storageComboBox.findData( QVariant(sharedStorageSetting) ) self.storageComboBox.setCurrentIndex( comboIndex ) disableLinks = False for laneIndex in self._laneIndexes: op = self.tempOps[laneIndex] info = op.Dataset.value disableLinks |= info.fromstack if disableLinks: # If any of the files were loaded from a stack, then you can't refer to them via a link. absIndex = self.storageComboBox.findData( QVariant(StorageLocation.AbsoluteLink) ) relIndex = self.storageComboBox.findData( QVariant(StorageLocation.RelativeLink) ) # http://theworldwideinternet.blogspot.com/2011/01/disabling-qcombobox-items.html model = self.storageComboBox.model() model.setData( model.index( absIndex, 0 ), 0, Qt.UserRole-1 ) model.setData( model.index( relIndex, 0 ), 0, Qt.UserRole-1 )
def append_lane(workflow, input_filepath, axisorder=None): # Sanity checks assert isinstance(workflow, PixelClassificationWorkflow) opPixelClassification = workflow.pcApplet.topLevelOperator assert opPixelClassification.Classifier.ready() # If the filepath is a globstring, convert the stack to h5 input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], TMP_DIR )[0] info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug( "adding lane: {}".format( info ) ) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len( opDataSelection.DatasetGroup )+1 logger.debug( "num_lanes: {}".format( num_lanes ) ) opDataSelection.DatasetGroup.resize( num_lanes ) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue( info ) # Sanity check assert len(opPixelClassification.InputImages) == num_lanes return opPixelClassification
def parse_known_cmdline_args(self, cmdline_args): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: Currently, this command-line interface only supports workflows with a SINGLE dataset role. Workflows that take multiple files per lane will need to configure the data selection applet by some other means. :py:meth:`DatasetInfo.updateFromJson()` might be useful in that case. See also: :py:meth:`configure_operator_with_parsed_args()`. """ # Currently, we don't support any special options -- just a list of files arg_parser = argparse.ArgumentParser() arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) # Check for errors: Do all input files exist? input_paths = parsed_args.input_files error = False for p in input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error("Could not find any files for globstring: {}".format(p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError("Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def append_lane(workflow, input_filepath, axisorder=None): """ Add a lane to the project file for the given input file. If axisorder is given, override the default axisorder for the file and force the project to use the given one. Globstrings are supported, in which case the files are converted to HDF5 first. """ # If the filepath is a globstring, convert the stack to h5 input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], tempfile.mkdtemp() )[0] info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug( "adding lane: {}".format( info ) ) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len( opDataSelection.DatasetGroup )+1 logger.debug( "num_lanes: {}".format( num_lanes ) ) opDataSelection.DatasetGroup.resize( num_lanes ) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue( info )
def create_default_headless_dataset_info(cls, filepath): """ filepath may be a globstring or a full hdf5 path+dataset """ comp = PathComponents(filepath) nickname = comp.filenameBase # Remove globstring syntax. if '*' in nickname: nickname = nickname.replace('*', '') if os.path.pathsep in nickname: nickname = PathComponents(nickname.split(os.path.pathsep)[0]).fileNameBase info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.nickname = nickname info.filePath = filepath # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() return info
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ input_paths = parsed_args.input_files # If the user doesn't want image stacks to be copied inte the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() ) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase input_infos.append(info) opDataSelection = self.topLevelOperator opDataSelection.DatasetGroup.resize( len(input_infos) ) for lane_index, info in enumerate(input_infos): # Only one dataset role in pixel classification opDataSelection.DatasetGroup[lane_index][0].setValue( info )
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.fromstack = False self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name ) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None if '*' in filepath: file_list = glob.glob(filepath) file_list = sorted(file_list) if not isUrl(filepath) and os.path.pathsep in filepath: file_list = filepath.split(os.path.pathsep) # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = map(lambda f: make_absolute(f, cwd), file_list) if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )
def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument( '--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name)) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument( '--preconvert_stacks', help= "Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) for i, path in enumerate(parsed_args.input_files): # Replace '~' with home dir parsed_args.input_files[i] = os.path.expanduser(path) # Check for errors: Do all input files exist? all_input_paths = list(parsed_args.input_files) for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error( "Could not find any files for globstring: {}".format( p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError( "Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None, sequence_axis=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. sequence_axis: Axis along which to stack (only applicable for stacks). """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location # The original path to the data (also used as a fallback if the data isn't in the project yet) self._filePath = "" self._datasetId = "" # The name of the data within the project file (if it is stored locally) # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.allowLabels = True self.drange = None self.normalizeDisplay = True self.sequenceAxis = None self.fromstack = False self.nickname = "" self.axistags = None self.original_axistags = None # Necessary in headless mode in order to recover the shape of the raw data self.laneShape = None self.laneDtype = None # A flag indicating whether the dataset is backed by a real source (e.g. file) # or by the fake provided (e.g. in headless mode when raw data are not necessary) self.realDataSource = True self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.nickname = "preloaded-{}-array".format(self.preloaded_array.dtype.name) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # overwrite internalPaths, will be assembled further down glob_string = "{}{}".format(externalPaths[0], internalPaths[0]) internalPaths = \ OpStreamingH5N5SequenceReaderS.expandGlobStrings( externalPaths[0], glob_string) if internalPaths: file_list = [externalPaths[0]] * len(internalPaths) else: file_list = None else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening" ) file_list = [ex for ex in externalPaths] assert all(pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions" ) # The following is necessary for h5 as well as npz-files internalPathExts = ( OpInputDataReader.h5_n5_Exts + OpInputDataReader.npzExts ) internalPathExts = [".{}".format(ipx) for ipx in internalPathExts] if pathComponents[0].extension in internalPathExts and internalPaths: if len(file_list) == len(internalPaths): # assuming a matching internal paths to external paths file_list_with_internal = [] for external, internal in zip(file_list, internalPaths): if internal: file_list_with_internal.append('{}/{}'.format(external, internal)) else: file_list_with_internal.append(external) file_list = file_list_with_internal else: # sort of fallback, in case of a mismatch in lengths for i in range(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = [make_absolute(f, cwd) for f in file_list] if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join(file_list) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len(os.path.splitext(file_list[-1])[1]) nickname = PathComponents(prefix).filenameBase + ("_" * num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack self.sequenceAxis = sequence_axis if jsonNamespace is not None: self.updateFromJson(jsonNamespace)
def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument('--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name )) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('unspecified_input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument('--preconvert_stacks', help="Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) arg_parser.add_argument('--input_axes', help="Explicitly specify the axes of your dataset.", required=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) if parsed_args.unspecified_input_files: # We allow the file list to go to the 'default' role, # but only if no other roles were explicitly configured. arg_names = list(map(cls._role_name_to_arg_name, role_names)) for arg_name in arg_names: if getattr(parsed_args, arg_name): # FIXME: This error message could be more helpful. role_args = list(map( cls._role_name_to_arg_name, role_names )) role_args = ['--' + s for s in role_args] role_args_str = ", ".join( role_args ) raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str ) # Relocate to the 'default' role arg_name = cls._role_name_to_arg_name(role_names[0]) setattr(parsed_args, arg_name, parsed_args.unspecified_input_files) parsed_args.unspecified_input_files = None # Replace '~' with home dir for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) paths_for_role = getattr(parsed_args, arg_name) if paths_for_role: for i, path in enumerate( paths_for_role ): paths_for_role[i] = os.path.expanduser( path ) # Check for errors: Do all input files exist? all_input_paths = [] for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error("Could not find any files for globstring: {}".format(p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError("Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def openProjectFile(self, projectFilePath, force_readonly=False): # If the user gave a URL to a DVID key, then download the project file from dvid first. # (So far, DVID is the only type of URL access we support for project files.) if isUrl(projectFilePath): projectFilePath = HeadlessShell.downloadProjectFromDvid(projectFilePath) # Make sure all workflow sub-classes have been loaded, # so we can detect the workflow type in the project. import ilastik.workflows try: # Open the project file hdf5File, workflow_class, readOnly = ProjectManager.openProjectFile(projectFilePath, force_readonly) # If there are any "creation-time" command-line args saved to the project file, # load them so that the workflow can be instantiated with the same settings # that were used when the project was first created. project_creation_args = [] if "workflow_cmdline_args" in list(hdf5File.keys()): if len(hdf5File["workflow_cmdline_args"]) > 0: project_creation_args = list(map(str, hdf5File["workflow_cmdline_args"][...])) if workflow_class is None: # If the project file has no known workflow, we assume pixel classification import ilastik.workflows workflow_class = ilastik.workflows.pixelClassification.PixelClassificationWorkflow import warnings warnings.warn( "Your project file ({}) does not specify a workflow type. " "Assuming Pixel Classification".format(projectFilePath) ) # Create our project manager # This instantiates the workflow and applies all settings from the project. self.projectManager = ProjectManager( self, workflow_class, headless=True, workflow_cmdline_args=self._workflow_cmdline_args, project_creation_args=project_creation_args, ) self.projectManager._loadProject(hdf5File, projectFilePath, readOnly) except ProjectManager.FileMissingError: logger.error("Couldn't find project file: {}".format(projectFilePath)) raise except ProjectManager.ProjectVersionError: # Couldn't open project. Try importing it. oldProjectFilePath = projectFilePath name, ext = os.path.splitext(oldProjectFilePath) # Create a brand new project file. projectFilePath = name + "_imported" + ext logger.info("Importing project as '" + projectFilePath + "'") hdf5File = ProjectManager.createBlankProjectFile(projectFilePath) # For now, we assume that any imported projects are pixel classification workflow projects. import ilastik.workflows default_workflow = ilastik.workflows.pixelClassification.PixelClassificationWorkflow # Create the project manager. self.projectManager = ProjectManager( self, default_workflow, headless=True, workflow_cmdline_args=self._workflow_cmdline_args, project_creation_args=self._workflow_cmdline_args, ) self.projectManager._importProject(oldProjectFilePath, hdf5File, projectFilePath)
def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument( '--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name)) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('unspecified_input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument( '--preconvert_stacks', help= "Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) arg_parser.add_argument( '--input_axes', help="Explicitly specify the axes of your dataset.", required=False) arg_parser.add_argument('--stack_along', help="Sequence axis along which to stack", type=str, default='z') parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) if parsed_args.unspecified_input_files: # We allow the file list to go to the 'default' role, # but only if no other roles were explicitly configured. arg_names = list(map(cls._role_name_to_arg_name, role_names)) for arg_name in arg_names: if getattr(parsed_args, arg_name): # FIXME: This error message could be more helpful. role_args = list( map(cls._role_name_to_arg_name, role_names)) role_args = ['--' + s for s in role_args] role_args_str = ", ".join(role_args) raise Exception( "Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" "" + role_args_str) # Relocate to the 'default' role arg_name = cls._role_name_to_arg_name(role_names[0]) setattr(parsed_args, arg_name, parsed_args.unspecified_input_files) parsed_args.unspecified_input_files = None # Replace '~' with home dir for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) paths_for_role = getattr(parsed_args, arg_name) if paths_for_role: for i, path in enumerate(paths_for_role): paths_for_role[i] = os.path.expanduser(path) # Check for errors: Do all input files exist? all_input_paths = [] for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error( "Could not find any files for globstring: {}".format( p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError( "Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = collections.OrderedDict() if role_names: for role_index, role_name in enumerate(role_names): arg_name = self._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths if parsed_args.input_files: # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured. for role_index, input_paths in role_paths.items(): if input_paths: # FIXME: This error message could be more helpful. role_args = map(self._role_name_to_arg_name, role_names) role_args = map(lambda s: '--' + s, role_args) role_args_str = ", ".join(role_args) raise Exception( "Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str) role_paths = {0: parsed_args.input_files} for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5(input_paths, tempfile.gettempdir()) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase # Remove globstring syntax. if '*' in info.nickname: info.nickname = info.nickname.replace('*', '') if os.path.pathsep in info.nickname: info.nickname = PathComponents( info.nickname.split(os.path.pathsep)[0]).fileNameBase input_infos.append(info) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes)) for lane_index, info in enumerate(input_infos): opDataSelection.DatasetGroup[lane_index][role_index].setValue( info) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][ role_index] if output_slot.meta.prefer_2d: need_warning = True break if need_warning: logger.warn( "*******************************************************************************************" ) logger.warn( "Some of your input data is stored in a format that is not efficient for 3D access patterns." ) logger.warn( "Performance may suffer as a result. For best performance, use a chunked HDF5 volume." ) logger.warn( "*******************************************************************************************" )
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = collections.OrderedDict() if role_names: for role_index, role_name in enumerate(role_names): arg_name = self._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths if parsed_args.input_files: # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured. for role_index, input_paths in role_paths.items(): if input_paths: # FIXME: This error message could be more helpful. role_args = map( self._role_name_to_arg_name, role_names ) role_args = map( lambda s: '--' + s, role_args ) role_args_str = ", ".join( role_args ) raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str ) role_paths = { 0 : parsed_args.input_files } for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() ) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase # Remove globstring syntax. if '*' in info.nickname: info.nickname = info.nickname.replace('*', '') if os.path.pathsep in info.nickname: info.nickname = PathComponents(info.nickname.split(os.path.pathsep)[0]).fileNameBase input_infos.append(info) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes) ) for lane_index, info in enumerate(input_infos): opDataSelection.DatasetGroup[lane_index][role_index].setValue( info ) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][role_index] if output_slot.meta.prefer_2d: need_warning = True break if need_warning: logger.warn("*******************************************************************************************") logger.warn("Some of your input data is stored in a format that is not efficient for 3D access patterns.") logger.warn("Performance may suffer as a result. For best performance, use a chunked HDF5 volume.") logger.warn("*******************************************************************************************")
def openProjectFile(self, projectFilePath, force_readonly=False): # If the user gave a URL to a DVID key, then download the project file from dvid first. # (So far, DVID is the only type of URL access we support for project files.) if isUrl(projectFilePath): projectFilePath = HeadlessShell.downloadProjectFromDvid(projectFilePath) # Make sure all workflow sub-classes have been loaded, # so we can detect the workflow type in the project. import ilastik.workflows try: # Open the project file hdf5File, workflow_class, readOnly = ProjectManager.openProjectFile(projectFilePath, force_readonly) # If there are any "creation-time" command-line args saved to the project file, # load them so that the workflow can be instantiated with the same settings # that were used when the project was first created. project_creation_args = [] if "workflow_cmdline_args" in hdf5File.keys(): if len(hdf5File["workflow_cmdline_args"]) > 0: project_creation_args = map(str, hdf5File["workflow_cmdline_args"][...]) if workflow_class is None: # If the project file has no known workflow, we assume pixel classification import ilastik.workflows workflow_class = ilastik.workflows.pixelClassification.PixelClassificationWorkflow import warnings warnings.warn( "Your project file ({}) does not specify a workflow type. " "Assuming Pixel Classification".format( projectFilePath ) ) # Create our project manager # This instantiates the workflow and applies all settings from the project. self.projectManager = ProjectManager( self, workflow_class, headless=True, workflow_cmdline_args=self._workflow_cmdline_args, project_creation_args=project_creation_args ) self.projectManager._loadProject(hdf5File, projectFilePath, readOnly) except ProjectManager.FileMissingError: logger.error("Couldn't find project file: {}".format( projectFilePath )) raise except ProjectManager.ProjectVersionError: # Couldn't open project. Try importing it. oldProjectFilePath = projectFilePath name, ext = os.path.splitext(oldProjectFilePath) # Create a brand new project file. projectFilePath = name + "_imported" + ext logger.info("Importing project as '" + projectFilePath + "'") hdf5File = ProjectManager.createBlankProjectFile(projectFilePath) # For now, we assume that any imported projects are pixel classification workflow projects. import ilastik.workflows default_workflow = ilastik.workflows.pixelClassification.PixelClassificationWorkflow # Create the project manager. self.projectManager = ProjectManager( self, default_workflow, headless=True, workflow_cmdline_args=self._workflow_cmdline_args, project_creation_args=self._workflow_cmdline_args ) self.projectManager._importProject(importFromPath, hdf5File, projectFilePath)
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None, sequence_axis=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. sequence_axis: Axis along which to stack (only applicable for stacks). """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location # The original path to the data (also used as a fallback if the data isn't in the project yet) self._filePath = "" self._datasetId = "" # The name of the data within the project file (if it is stored locally) # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.allowLabels = True self.drange = None self.normalizeDisplay = True self.sequenceAxis = None self.fromstack = False self.nickname = "" self.axistags = None self.original_axistags = None # Necessary in headless mode in order to recover the shape of the raw data self.laneShape = None self.laneDtype = None # A flag indicating whether the dataset is backed by a real source (e.g. file) # or by the fake provided (e.g. in headless mode when raw data are not necessary) self.realDataSource = True self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # overwrite internalPaths, will be assembled further down glob_string = "{}{}".format( externalPaths[0], internalPaths[0]) internalPaths = \ OpStreamingH5N5SequenceReaderS.expandGlobStrings( externalPaths[0], glob_string) if internalPaths: file_list = [externalPaths[0] ] * len(internalPaths) else: file_list = None else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening") file_list = [ex for ex in externalPaths] assert all( pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions") # The following is necessary for h5 as well as npz-files internalPathExts = (OpInputDataReader.h5_n5_Exts + OpInputDataReader.npzExts) internalPathExts = [ ".{}".format(ipx) for ipx in internalPathExts ] if pathComponents[ 0].extension in internalPathExts and internalPaths: if len(file_list) == len(internalPaths): # assuming a matching internal paths to external paths file_list_with_internal = [] for external, internal in zip( file_list, internalPaths): if internal: file_list_with_internal.append( '{}/{}'.format(external, internal)) else: file_list_with_internal.append(external) file_list = file_list_with_internal else: # sort of fallback, in case of a mismatch in lengths for i in range(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = [make_absolute(f, cwd) for f in file_list] if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join(file_list) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1]) nickname = PathComponents(prefix).filenameBase + ( "_" * num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack self.sequenceAxis = sequence_axis if jsonNamespace is not None: self.updateFromJson(jsonNamespace)
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.fromstack = False self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name ) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # TODO single hdf5 file stacks raise NotImplementedError( 'Single file h5Stack import is not implemented in the GUI yet.') else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening" ) file_list = [ex for ex in externalPaths] assert all(pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions" ) # The following is necessary for h5 as well as npz-files internalPathExts = ( OpInputDataReader.h5Exts + OpInputDataReader.npzExts ) internalPathExts = [".{}".format(ipx) for ipx in internalPathExts] if pathComponents[0].extension in internalPathExts and internalPaths[0]: for i in xrange(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = map(lambda f: make_absolute(f, cwd), file_list) if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )
def _getDisplayRoleData(self, index): laneIndex = index.row() UninitializedDisplayData = { DatasetDetailedInfoColumn.Nickname: "<empty>", DatasetDetailedInfoColumn.Location: "", DatasetDetailedInfoColumn.InternalID: "", DatasetDetailedInfoColumn.AxisOrder: "", DatasetDetailedInfoColumn.Shape: "", DatasetDetailedInfoColumn.Range: "" } if len( self._op.DatasetGroup ) <= laneIndex \ or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex: return UninitializedDisplayData[index.column()] datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex] # Default if not datasetSlot.ready(): return UninitializedDisplayData[index.column()] datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value filePathComponents = PathComponents(datasetInfo.filePath) ## Input meta-data fields # Name if index.column() == DatasetDetailedInfoColumn.Nickname: return datasetInfo.nickname # Location if index.column() == DatasetDetailedInfoColumn.Location: if datasetInfo.location == DatasetInfo.Location.FileSystem: if isUrl(datasetInfo.filePath) or os.path.isabs( datasetInfo.filePath): text = "Absolute Link: {}".format( filePathComponents.externalPath) return text else: text = "Relative Link: {}".format( filePathComponents.externalPath) return text else: return "Project File" # Internal ID if index.column() == DatasetDetailedInfoColumn.InternalID: if datasetInfo.location == DatasetInfo.Location.FileSystem: return filePathComponents.internalPath return "" ## Output meta-data fields # Defaults imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex] if not imageSlot.ready(): return UninitializedDisplayData[index.column()] # Axis order if index.column() == DatasetDetailedInfoColumn.AxisOrder: if imageSlot.meta.original_axistags is not None: return "".join(imageSlot.meta.getOriginalAxisKeys()) assert imageSlot.meta.original_shape is not None, \ 'if original axistags are available, original shape has to exist as well' if imageSlot.meta.axistags is not None: return "".join(imageSlot.meta.getAxisKeys()) return "" # Shape if index.column() == DatasetDetailedInfoColumn.Shape: original_shape = imageSlot.meta.original_shape if original_shape is not None: assert imageSlot.meta.original_axistags is not None, \ 'if original shape is available, original axistags have to exist as well' return str(original_shape) shape = imageSlot.meta.shape if shape is None: return "" return str(shape) # Range if index.column() == DatasetDetailedInfoColumn.Range: drange = imageSlot.meta.drange if drange is None: return "" return str(drange) assert False, "Unknown column: row={}, column={}".format( index.row(), index.column())
def _getDisplayRoleData(self, index): laneIndex = index.row() UninitializedDisplayData = { DatasetDetailedInfoColumn.Nickname : "<empty>", DatasetDetailedInfoColumn.Location : "", DatasetDetailedInfoColumn.InternalID : "", DatasetDetailedInfoColumn.AxisOrder : "", DatasetDetailedInfoColumn.Shape : "", DatasetDetailedInfoColumn.Range : "" } if len( self._op.DatasetGroup ) <= laneIndex \ or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex: return UninitializedDisplayData[ index.column() ] datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex] # Default if not datasetSlot.ready(): return UninitializedDisplayData[ index.column() ] datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value filePathComponents = PathComponents( datasetInfo.filePath ) ## Input meta-data fields # Name if index.column() == DatasetDetailedInfoColumn.Nickname: return decode_to_qstring( datasetInfo.nickname ) # Location if index.column() == DatasetDetailedInfoColumn.Location: if datasetInfo.location == DatasetInfo.Location.FileSystem: if isUrl(datasetInfo.filePath) or os.path.isabs(datasetInfo.filePath): text = "Absolute Link: {}".format( filePathComponents.externalPath ) return decode_to_qstring(text) else: text = "Relative Link: {}".format( filePathComponents.externalPath ) return decode_to_qstring(text) else: return "Project File" # Internal ID if index.column() == DatasetDetailedInfoColumn.InternalID: if datasetInfo.location == DatasetInfo.Location.FileSystem: return filePathComponents.internalPath return "" ## Output meta-data fields # Defaults imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex] if not imageSlot.ready(): return UninitializedDisplayData[index.column()] # Axis order if index.column() == DatasetDetailedInfoColumn.AxisOrder: original_axistags = imageSlot.meta.original_axistags axistags = imageSlot.meta.axistags if original_axistags is not None: return "".join( tag.key for tag in original_axistags ) if axistags is not None: return "".join( imageSlot.meta.getAxisKeys() ) return "" # Shape if index.column() == DatasetDetailedInfoColumn.Shape: original_shape = imageSlot.meta.original_shape shape = imageSlot.meta.shape if original_shape is not None: return str(original_shape) if shape is None: return "" return str(shape) # Range if index.column() == DatasetDetailedInfoColumn.Range: drange = imageSlot.meta.drange if drange is None: return "" return str(drange) assert False, "Unknown column: row={}, column={}".format( index.row(), index.column() )