def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ input_paths = parsed_args.input_files # If the user doesn't want image stacks to be copied inte the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() ) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase input_infos.append(info) opDataSelection = self.topLevelOperator opDataSelection.DatasetGroup.resize( len(input_infos) ) for lane_index, info in enumerate(input_infos): # Only one dataset role in pixel classification opDataSelection.DatasetGroup[lane_index][0].setValue( info )
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ # TODO: Support image stack inputs by checking for globstrings and converting to hdf5. input_paths = parsed_args.input_files input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase input_infos.append(info) opDataSelection = self.topLevelOperator opDataSelection.DatasetGroup.resize( len(input_infos) ) for lane_index, info in enumerate(input_infos): # Only one dataset role in pixel classification opDataSelection.DatasetGroup[lane_index][0].setValue( info )
def _append_lane(workflow, input_filepath, axisorder=None): """ Add a lane to the project file for the given input file. If axisorder is given, override the default axisorder for the file and force the project to use the given one. Globstrings are supported, in which case the files are converted to HDF5 first. """ # If the filepath is a globstring, convert the stack to h5 # todo: skip this? tmp_dir = tempfile.mkdtemp() input_filepath = DataSelectionApplet.convertStacksToH5([input_filepath], tmp_dir)[0] try: os.rmdir(tmp_dir) except OSError as e: if e.errno == 39: logger.warning( 'Temporary directory {} was populated: should be deleted') else: raise info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug("adding lane: {}".format(info)) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len(opDataSelection.DatasetGroup) + 1 logger.debug("num_lanes: {}".format(num_lanes)) opDataSelection.DatasetGroup.resize(num_lanes) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue(info) workflow.handleNewLanesAdded()
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName): """ Compute the predictions for each of the specified batch input files, and export them to corresponding h5 files. """ batchInputPaths = convertStacksToH5(batchInputPaths) batchInputInfos = [] for p in batchInputPaths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() batchInputInfos.append(info) # Configure batch input operator opBatchInputs = workflow.batchInputApplet.topLevelOperator opBatchInputs.Dataset.setValues( batchInputInfos ) # Configure batch export operator opBatchResults = workflow.batchResultsApplet.topLevelOperator opBatchResults.ExportDirectory.setValue(batchExportDir) opBatchResults.Format.setValue(ExportFormat.H5) opBatchResults.Suffix.setValue(batchOutputSuffix) opBatchResults.InternalPath.setValue(exportedDatasetName) opBatchResults.SelectedSlices.setValue([30]) logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value ) # Set up progress display handling (just logging for now) currentProgress = [None] def handleProgress(percentComplete): if currentProgress[0] != percentComplete: currentProgress[0] = percentComplete logger.info("Batch job: {}% complete.".format(percentComplete)) progressSignal = opBatchResults.ProgressSignal[0].value progressSignal.subscribe( handleProgress ) # Make it happen! result = opBatchResults.ExportResult[0].value return result
def append_lane(workflow, input_filepath, axisorder=None): # Sanity checks assert isinstance(workflow, PixelClassificationWorkflow) opPixelClassification = workflow.pcApplet.topLevelOperator assert opPixelClassification.Classifier.ready() # If the filepath is a globstring, convert the stack to h5 input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], TMP_DIR )[0] info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug( "adding lane: {}".format( info ) ) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len( opDataSelection.DatasetGroup )+1 logger.debug( "num_lanes: {}".format( num_lanes ) ) opDataSelection.DatasetGroup.resize( num_lanes ) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue( info ) # Sanity check assert len(opPixelClassification.InputImages) == num_lanes return opPixelClassification
def append_lane(workflow, input_filepath, axisorder=None): """ Add a lane to the project file for the given input file. If axisorder is given, override the default axisorder for the file and force the project to use the given one. Globstrings are supported, in which case the files are converted to HDF5 first. """ # If the filepath is a globstring, convert the stack to h5 input_filepath = DataSelectionApplet.convertStacksToH5( [input_filepath], tempfile.mkdtemp() )[0] info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = input_filepath comp = PathComponents(input_filepath) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(input_filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase if axisorder: info.axistags = vigra.defaultAxistags(axisorder) logger.debug( "adding lane: {}".format( info ) ) opDataSelection = workflow.dataSelectionApplet.topLevelOperator # Add a lane num_lanes = len( opDataSelection.DatasetGroup )+1 logger.debug( "num_lanes: {}".format( num_lanes ) ) opDataSelection.DatasetGroup.resize( num_lanes ) # Configure it. role_index = 0 # raw data opDataSelection.DatasetGroup[-1][role_index].setValue( info )
def create_default_headless_dataset_info(cls, filepath): """ filepath may be a globstring or a full hdf5 path+dataset """ comp = PathComponents(filepath) nickname = comp.filenameBase # Remove globstring syntax. if '*' in nickname: nickname = nickname.replace('*', '') if os.path.pathsep in nickname: nickname = PathComponents(nickname.split(os.path.pathsep)[0]).fileNameBase info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.nickname = nickname info.filePath = filepath # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(filepath): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() return info
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName, stackVolumeCacheDir): """ Compute the predictions for each of the specified batch input files, and export them to corresponding h5 files. """ originalBatchInputPaths = list(batchInputPaths) batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir) batchInputInfos = [] for p in batchInputPaths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() batchInputInfos.append(info) # Also convert the export dir to absolute (for the same reason) if batchExportDir != '': batchExportDir = os.path.abspath( batchExportDir ) # Configure batch input operator opBatchInputs = workflow.batchInputApplet.topLevelOperator opBatchInputs.DatasetGroup.resize( len(batchInputInfos) ) for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup): # FIXME: This assumes that the workflow has exactly one dataset role. multislot[0].setValue( info ) # Configure batch export operator opBatchResults = workflow.batchResultsApplet.topLevelOperator # By default, the output files from the batch export operator # are named using the input file name. # If we converted any stacks to hdf5, then the user won't recognize the input file name. # Let's override the output file name using the *original* input file names. outputFileNameBases = [] for origPath in originalBatchInputPaths: outputFileNameBases.append( origPath.replace('*', 'STACKED') ) opBatchResults.OutputFileNameBase.setValues( outputFileNameBases ) opBatchResults.ExportDirectory.setValue(batchExportDir) opBatchResults.Format.setValue(ExportFormat.H5) opBatchResults.Suffix.setValue(batchOutputSuffix) opBatchResults.InternalPath.setValue(exportedDatasetName) logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value ) # Set up progress display handling (just logging for now) currentProgress = [None] def handleProgress(percentComplete): if currentProgress[0] != percentComplete: currentProgress[0] = percentComplete logger.info("Batch job: {}% complete.".format(percentComplete)) progressSignal = opBatchResults.ProgressSignal[0].value progressSignal.subscribe( handleProgress ) # Make it happen! result = opBatchResults.ExportResult[0].value return result
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = collections.OrderedDict() if role_names: for role_index, role_name in enumerate(role_names): arg_name = self._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths if parsed_args.input_files: # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured. for role_index, input_paths in role_paths.items(): if input_paths: # FIXME: This error message could be more helpful. role_args = map(self._role_name_to_arg_name, role_names) role_args = map(lambda s: '--' + s, role_args) role_args_str = ", ".join(role_args) raise Exception( "Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str) role_paths = {0: parsed_args.input_files} for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5(input_paths, tempfile.gettempdir()) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase # Remove globstring syntax. if '*' in info.nickname: info.nickname = info.nickname.replace('*', '') if os.path.pathsep in info.nickname: info.nickname = PathComponents( info.nickname.split(os.path.pathsep)[0]).fileNameBase input_infos.append(info) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes)) for lane_index, info in enumerate(input_infos): opDataSelection.DatasetGroup[lane_index][role_index].setValue( info) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][ role_index] if output_slot.meta.prefer_2d: need_warning = True break if need_warning: logger.warn( "*******************************************************************************************" ) logger.warn( "Some of your input data is stored in a format that is not efficient for 3D access patterns." ) logger.warn( "Performance may suffer as a result. For best performance, use a chunked HDF5 volume." ) logger.warn( "*******************************************************************************************" )
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = collections.OrderedDict() if role_names: for role_index, role_name in enumerate(role_names): arg_name = self._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths if parsed_args.input_files: # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured. for role_index, input_paths in role_paths.items(): if input_paths: # FIXME: This error message could be more helpful. role_args = map( self._role_name_to_arg_name, role_names ) role_args = map( lambda s: '--' + s, role_args ) role_args_str = ", ".join( role_args ) raise Exception("Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str ) role_paths = { 0 : parsed_args.input_files } for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5( input_paths, tempfile.gettempdir() ) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase # Remove globstring syntax. if '*' in info.nickname: info.nickname = info.nickname.replace('*', '') if os.path.pathsep in info.nickname: info.nickname = PathComponents(info.nickname.split(os.path.pathsep)[0]).fileNameBase input_infos.append(info) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes) ) for lane_index, info in enumerate(input_infos): opDataSelection.DatasetGroup[lane_index][role_index].setValue( info ) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][role_index] if output_slot.meta.prefer_2d: need_warning = True break if need_warning: logger.warn("*******************************************************************************************") logger.warn("Some of your input data is stored in a format that is not efficient for 3D access patterns.") logger.warn("Performance may suffer as a result. For best performance, use a chunked HDF5 volume.") logger.warn("*******************************************************************************************")
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName, stackVolumeCacheDir): """ Compute the predictions for each of the specified batch input files, and export them to corresponding h5 files. """ originalBatchInputPaths = list(batchInputPaths) batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir) batchInputInfos = [] for p in batchInputPaths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() batchInputInfos.append(info) # Also convert the export dir to absolute (for the same reason) if batchExportDir != '': batchExportDir = os.path.abspath(batchExportDir) # Configure batch input operator opBatchInputs = workflow.batchInputApplet.topLevelOperator opBatchInputs.DatasetGroup.resize(len(batchInputInfos)) for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup): # FIXME: This assumes that the workflow has exactly one dataset role. multislot[0].setValue(info) # Configure batch export operator opBatchResults = workflow.batchResultsApplet.topLevelOperator # By default, the output files from the batch export operator # are named using the input file name. # If we converted any stacks to hdf5, then the user won't recognize the input file name. # Let's override the output file name using the *original* input file names. outputFileNameBases = [] for origPath in originalBatchInputPaths: outputFileNameBases.append(origPath.replace('*', 'STACKED')) opBatchResults.OutputFileNameBase.setValues(outputFileNameBases) opBatchResults.ExportDirectory.setValue(batchExportDir) opBatchResults.Format.setValue(ExportFormat.H5) opBatchResults.Suffix.setValue(batchOutputSuffix) opBatchResults.InternalPath.setValue(exportedDatasetName) logger.info("Exporting data to " + opBatchResults.OutputDataPath[0].value) # Set up progress display handling (just logging for now) currentProgress = [None] def handleProgress(percentComplete): if currentProgress[0] != percentComplete: currentProgress[0] = percentComplete logger.info("Batch job: {}% complete.".format(percentComplete)) progressSignal = opBatchResults.ProgressSignal[0].value progressSignal.subscribe(handleProgress) # Make it happen! result = opBatchResults.ExportResult[0].value return result