def testBasic(self): graph = Graph() op = OpTiledVolumeReader(graph=graph) op.DescriptionFilePath.setValue( self.data_setup.VOLUME_DESCRIPTION_FILE) roi = numpy.array([(10, 150, 100), (30, 550, 550)]) result_out = op.Output(*roi).wait() # We expect a channel dimension to be added automatically... assert (result_out.shape == roi[1] - roi[0]).all() ref_path_comp = PathComponents(self.data_setup.REFERENCE_VOL_PATH) with h5py.File(ref_path_comp.externalPath, "r") as f: ref_data = f[ref_path_comp.internalPath][:] expected = ref_data[roiToSlice(*roi)] # numpy.save('/tmp/expected.npy', expected) # numpy.save('/tmp/result_out.npy', result_out) # We can't expect the pixels to match exactly because compression was used to create the tiles... assert (expected == result_out).all()
def test_5_TestExportRoi(self): roi = ((0, 25, 25, 25, 0), (1, 75, 75, 75, 1)) exportDir = tempfile.mkdtemp() datasetPath = self.bfs.exportRoiToHdf5(roi, exportDir) path_parts = PathComponents(datasetPath) try: assert path_parts.externalDirectory == exportDir, \ "Dataset was not exported to the correct directory:\n"\ "Expected: {}\n"\ "Got: {}".format( exportDir, path_parts.externalDirectory ) expected_data = self.data[roiToSlice(*roi)] with h5py.File(path_parts.externalPath, 'r') as f: read_data = f[path_parts.internalPath][...] assert read_data.shape == expected_data.shape, "Exported data had wrong shape" assert read_data.dtype == expected_data.dtype, "Exported data had wrong dtype" assert (read_data == expected_data ).all(), "Exported data did not match expected data" finally: shutil.rmtree(exportDir)
def test_special_z_translation(self): """ This tests the special """ tiled_volume = TiledVolume( self.data_setup.SPECIAL_Z_VOLUME_DESCRIPTION_FILE ) tiled_volume.TEST_MODE = True reference_roi = numpy.array( [(20, 150, 100), (40, 550, 550)] ) result_out = numpy.zeros( reference_roi[1] - reference_roi[0], dtype=tiled_volume.description.dtype ) roi_translated = reference_roi - [11,0,0] tiled_volume.read( roi_translated, result_out ) ref_path_comp = PathComponents(self.data_setup.REFERENCE_VOL_PATH) with h5py.File(ref_path_comp.externalPath, 'r') as f: ref_data = f[ref_path_comp.internalPath][:] expected = ref_data[roiToSlice(*reference_roi)] #numpy.save('/tmp/expected.npy', expected) #numpy.save('/tmp/result_out.npy', result_out) assert (expected == result_out).all()
def testRemappedTiles(self): # The config above specifies that slices 45:47 get their data from slice 44, # and slice 41 is the same as 40 tiled_volume = TiledVolume( self.data_setup.VOLUME_DESCRIPTION_FILE ) tiled_volume.TEST_MODE = True roi = numpy.array( [(40, 150, 100), (50, 550, 550)] ) result_out = numpy.zeros( roi[1] - roi[0], dtype=tiled_volume.description.dtype ) tiled_volume.read( roi, result_out ) ref_path_comp = PathComponents(self.data_setup.REFERENCE_VOL_PATH) with h5py.File(ref_path_comp.externalPath, 'r') as f: ref_data = f[ref_path_comp.internalPath][:] # Slices 5,6,7 are missing from the server data, and 'filled in' with slice 4 # Similarly, slice 1 is missing and filled in with slice 0. expected = ref_data[roiToSlice(*roi)] expected[5:8] = expected[4] expected[1] = expected[0] #numpy.save('/tmp/expected.npy', expected) #numpy.save('/tmp/result_out.npy', result_out) assert (expected == result_out).all()
def testCustomAxes(self): tiled_volume = TiledVolume( self.data_setup.TRANSPOSED_VOLUME_DESCRIPTION_FILE) tiled_volume.TEST_MODE = True roi = numpy.array([(10, 150, 100), (30, 550, 550)]) result_out = numpy.zeros(roi[1] - roi[0], dtype=tiled_volume.description.dtype) roi_t = (tuple(reversed(roi[0])), tuple(reversed(roi[1]))) result_out_t = result_out.transpose() tiled_volume.read(roi_t, result_out_t) ref_path_comp = PathComponents(self.data_setup.REFERENCE_VOL_PATH) with h5py.File(ref_path_comp.externalPath, 'r') as f: ref_data = f[ref_path_comp.internalPath][:] expected = ref_data[roiToSlice(*roi)] #numpy.save('/tmp/expected.npy', expected) #numpy.save('/tmp/result_out.npy', result_out) assert (expected == result_out).all()
def exportRoiToHdf5(self, roi, exportDirectory, use_view_coordinates=True): """ Export an arbitrary roi to a single hdf5 file. The file will be placed in the given exportDirectory, and will be named according to the exported roi. :param roi: The roi to export :param exportDirectory: The directory in which the result should be placed. :param use_view_coordinates: If True, assume the roi was given relative to the view start. Otherwise, assume it was given relative to the on-disk coordinates. """ roi = list(map(TinyVector, roi)) if not use_view_coordinates: abs_roi = roi assert ( abs_roi[0] >= self.description.view_origin ), "Roi {} is out-of-bounds: must not span lower than the view origin: ".format( roi, self.description.origin ) view_roi = roi - self.description.view_origin else: view_roi = roi abs_roi = view_roi + self.description.view_origin # Always name the file according to the absolute roi roiString = "{}".format((list(abs_roi[0]), list(abs_roi[1]))) datasetPath = self._description.block_file_name_format.format(roiString=roiString) fullDatasetPath = os.path.join(exportDirectory, datasetPath) path_parts = PathComponents(fullDatasetPath) with h5py.File(path_parts.externalPath, "w") as f: self._createDatasetInFile(f, path_parts.internalPath, view_roi) dataset = f[path_parts.internalPath] self.readData(view_roi, dataset) return fullDatasetPath
def _selectFiles(self): # Find the directory of the most recently opened image file mostRecentStackDirectory = PreferencesManager().get( 'DataSelection', 'recent stack directory') if mostRecentStackDirectory is not None: defaultDirectory = os.path.split(mostRecentStackDirectory)[0] else: defaultDirectory = os.path.expanduser('~') options = QFileDialog.Options(QFileDialog.ShowDirsOnly) if ilastik.config.cfg.getboolean("ilastik", "debug"): options |= QFileDialog.DontUseNativeDialog h5exts = [x.lstrip('.') for x in OpStreamingHdf5SequenceReaderM.H5EXTS] # Launch the "Open File" dialog extensions = vigra.impex.listExtensions().split() extensions.extend(h5exts) filt = "Image files (" + ' '.join('*.' + x for x in extensions) + ')' options = QFileDialog.Options() if ilastik.config.cfg.getboolean("ilastik", "debug"): options |= QFileDialog.DontUseNativeDialog fileNames = QFileDialog.getOpenFileNames(self, "Select Images for Stack", defaultDirectory, filt, options=options) fileNames = map(encode_from_qstring, fileNames) msg = '' if len(fileNames) == 0: return if len(fileNames) == 1: msg += 'Cannot create stack: You only chose a single file. ' msg += 'If your stack is contained in a single file (e.g. a multi-page tiff or ' msg += 'hdf5 volume), please use the "Add File" button.' QMessageBox.warning(self, "Invalid selection", msg) return None pathComponents = PathComponents(fileNames[0]) directory = pathComponents.externalPath PreferencesManager().set('DataSelection', 'recent stack directory', directory) if pathComponents.extension in OpStreamingHdf5SequenceReaderM.H5EXTS: # check for internal paths! internal_paths = self._findCommonInternal(fileNames) if len(internal_paths) == 0: msg += 'Could not find a unique common internal path in' msg += directory + '\n' QMessageBox.warning(self, "Invalid selection", msg) return None elif len(internal_paths) == 1: fileNames = [ '{}/{}'.format(fn, internal_paths[0]) for fn in fileNames ] else: # Ask the user which dataset to choose dlg = H5VolumeSelectionDlg(internal_paths, self) if dlg.exec_() == QDialog.Accepted: selected_index = dlg.combo.currentIndex() selected_dataset = str(internal_paths[selected_index]) fileNames = [ '{}/{}'.format(fn, selected_dataset) for fn in fileNames ] else: msg = 'No valid internal path selected.' QMessageBox.warning(self, "Invalid selection", msg) return None self._updateFileList(fileNames)
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName, stackVolumeCacheDir): """ Compute the predictions for each of the specified batch input files, and export them to corresponding h5 files. """ originalBatchInputPaths = list(batchInputPaths) batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir) batchInputInfos = [] for p in batchInputPaths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem # Convert all paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) comp = PathComponents(p) comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() batchInputInfos.append(info) # Also convert the export dir to absolute (for the same reason) if batchExportDir != '': batchExportDir = os.path.abspath(batchExportDir) # Configure batch input operator opBatchInputs = workflow.batchInputApplet.topLevelOperator opBatchInputs.DatasetGroup.resize(len(batchInputInfos)) for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup): # FIXME: This assumes that the workflow has exactly one dataset role. multislot[0].setValue(info) # Configure batch export operator opBatchResults = workflow.batchResultsApplet.topLevelOperator # By default, the output files from the batch export operator # are named using the input file name. # If we converted any stacks to hdf5, then the user won't recognize the input file name. # Let's override the output file name using the *original* input file names. outputFileNameBases = [] for origPath in originalBatchInputPaths: outputFileNameBases.append(origPath.replace('*', 'STACKED')) opBatchResults.OutputFileNameBase.setValues(outputFileNameBases) opBatchResults.ExportDirectory.setValue(batchExportDir) opBatchResults.Format.setValue(ExportFormat.H5) opBatchResults.Suffix.setValue(batchOutputSuffix) opBatchResults.InternalPath.setValue(exportedDatasetName) logger.info("Exporting data to " + opBatchResults.OutputDataPath[0].value) # Set up progress display handling (just logging for now) currentProgress = [None] def handleProgress(percentComplete): if currentProgress[0] != percentComplete: currentProgress[0] = percentComplete logger.info("Batch job: {}% complete.".format(percentComplete)) progressSignal = opBatchResults.ProgressSignal[0].value progressSignal.subscribe(handleProgress) # Make it happen! result = opBatchResults.ExportResult[0].value return result
def setup(self): """ Generate a directory with all the files needed for this test. We use the same temporary directory every time, so we don't waste time regenerating the data if the test has already been run recently. The directory consists of the following files: - reference_volume.h5 - volume_description.json - transposed_volume_description.json - [lots of png tiles..] """ global volume_description_text global port try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: # allow the socket port to be reused if in TIME_WAIT state sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(('localhost', port)) # try default/previous port except Exception as e: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: # allow the socket port to be reused if in TIME_WAIT state sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(('localhost', 0)) # find free port port = sock.getsockname()[1] volume_description_text = volume_description_text.replace( '{port}', str(port)) tmp = tempfile.gettempdir() self.TILE_DIRECTORY = os.path.join(tmp, 'testTiledVolume_data') logger.debug("Using test directory: {}".format(self.TILE_DIRECTORY)) self.REFERENCE_VOL_PATH = os.path.join(self.TILE_DIRECTORY, 'reference_volume.h5/data') ref_vol_path_comp = PathComponents(self.REFERENCE_VOL_PATH) self.REFERENCE_VOL_FILE = ref_vol_path_comp.externalPath self.VOLUME_DESCRIPTION_FILE = os.path.join(self.TILE_DIRECTORY, 'volume_description.json') self.LOCAL_VOLUME_DESCRIPTION_FILE = os.path.join( self.TILE_DIRECTORY, 'local_volume_description.json') self.TRANSPOSED_VOLUME_DESCRIPTION_FILE = os.path.join( self.TILE_DIRECTORY, 'transposed_volume_description.json') self.TRANSLATED_VOLUME_DESCRIPTION_FILE = os.path.join( self.TILE_DIRECTORY, 'translated_volume_description.json') self.SPECIAL_Z_VOLUME_DESCRIPTION_FILE = os.path.join( self.TILE_DIRECTORY, 'special_z_volume_description.json') if not os.path.exists(self.TILE_DIRECTORY): print("Creating new tile directory: {}".format( self.TILE_DIRECTORY)) os.mkdir(self.TILE_DIRECTORY) if not os.path.exists(self.REFERENCE_VOL_FILE): ref_vol = numpy.random.randint(0, 255, (100, 600, 600)).astype(numpy.uint8) with h5py.File(self.REFERENCE_VOL_FILE, 'w') as ref_file: ref_file[ref_vol_path_comp.internalPath] = ref_vol else: with h5py.File(self.REFERENCE_VOL_FILE, 'r') as ref_file: ref_vol = ref_file[ref_vol_path_comp.internalPath][:] need_rewrite = False if not os.path.exists(self.VOLUME_DESCRIPTION_FILE): need_rewrite = True else: with open(self.VOLUME_DESCRIPTION_FILE, 'r') as f: if f.read() != volume_description_text: need_rewrite = True if need_rewrite: with open(self.VOLUME_DESCRIPTION_FILE, 'w') as f: f.write(volume_description_text) # Read the volume description as a JsonConfig Namespace volume_description = TiledVolume.readDescription( self.VOLUME_DESCRIPTION_FILE) # Write out a copy of the description, but with a local tile path instead of a URL config_helper = JsonConfigParser(TiledVolume.DescriptionFields) local_description = copy.copy(volume_description) local_description.tile_url_format = self.TILE_DIRECTORY + "/tile_z{z_start:05}_y{y_start:05}_x{x_start:05}.png" config_helper.writeConfigFile(self.LOCAL_VOLUME_DESCRIPTION_FILE, local_description) # Write out a copy of the description, but with custom output axes config_helper = JsonConfigParser(TiledVolume.DescriptionFields) transposed_description = copy.copy(volume_description) transposed_description.output_axes = "xyz" config_helper.writeConfigFile( self.TRANSPOSED_VOLUME_DESCRIPTION_FILE, transposed_description) # Write out another copy of the description, but with an origin translation config_helper = JsonConfigParser(TiledVolume.DescriptionFields) translated_description = copy.copy(volume_description) translated_description.view_origin_zyx = [10, 20, 30] translated_description.shape_zyx = None config_helper.writeConfigFile( self.TRANSLATED_VOLUME_DESCRIPTION_FILE, translated_description) # Write out another copy of the description, but with a special function for translating z-coordinates. config_helper = JsonConfigParser(TiledVolume.DescriptionFields) special_z_description = copy.copy(volume_description) special_z_description.z_translation_function = "lambda z: z+11" config_helper.writeConfigFile( self.SPECIAL_Z_VOLUME_DESCRIPTION_FILE, special_z_description) # Remove all old image tiles in the tile directory files = os.listdir(self.TILE_DIRECTORY) for name in files: if os.path.splitext( name)[1] == '.' + volume_description.format: os.remove(os.path.join(self.TILE_DIRECTORY, name)) # Write the new tiles export_to_tiles(ref_vol, volume_description.tile_shape_2d_yx[0], self.TILE_DIRECTORY, print_progress=False) # To support testMissingTiles (below), remove slice 2 files = os.listdir(self.TILE_DIRECTORY) for name in files: if name.startswith("tile_z00002"): p = os.path.join(self.TILE_DIRECTORY, name) print("removing:", p) os.remove(p) # lastly, start the server self._start_server()
import argparse # sys.argv += "/tmp/example_slice.h5/data /tmp/example_slice2.h5/data --export_drange=(0,255) --output_format=png --pipeline_result_drange=(1,2)".split() # Construct a parser with all the 'normal' export options, and add arg for prediction_image_paths. parser = DataExportApplet.make_cmdline_parser(argparse.ArgumentParser()) parser.add_argument("prediction_image_paths", nargs="+", help="Path(s) to your exported predictions.") parsed_args = parser.parse_args() parsed_args, unused_args = DataExportApplet.parse_known_cmdline_args( sys.argv[1:], parsed_args) # As a convenience, auto-determine the internal dataset path if possible. for index, input_path in enumerate(parsed_args.prediction_image_paths): path_comp = PathComponents(input_path, os.getcwd()) if not parsed_args.output_internal_path: parsed_args.output_internal_path = "segmentation" if path_comp.extension in PathComponents.HDF5_EXTS and path_comp.internalDatasetName == "": with h5py.File(path_comp.externalPath, "r") as f: all_internal_paths = all_dataset_internal_paths(f) if len(all_internal_paths) == 1: path_comp.internalPath = all_internal_paths[0] parsed_args.prediction_image_paths[ index] = path_comp.totalPath() elif len(all_internal_paths) == 0: sys.stderr.write( "Could not find any datasets in your input file:\n{}\n". format(input_path)) sys.exit(1)
parser = argparse.ArgumentParser() parser.add_argument( 'h5_volume_path', help= 'A path to the hdf5 volume, with internal dataset name, e.g. /tmp/myfile.h5/myvolume' ) parser.add_argument( 'object_label_1', help='The label value of the first object for comparison') parser.add_argument( 'object_label_2', help='The label value of the second object for comparison') parsed_args = parser.parse_args() h5_path_comp = PathComponents(parsed_args.h5_volume_path) object_label_1 = int(parsed_args.object_label_1) object_label_2 = int(parsed_args.object_label_2) with h5py.File(h5_path_comp.externalPath, 'r') as f: volume = f[h5_path_comp.internalPath][:] contact_area = measure_surface_contact_A(volume, object_label_1, object_label_2, contact_distance=1) # Alternative implementation: #contact_area = measure_surface_contact_B(volume, object_label_1, object_label_2, contact_distance=1) print contact_area
def setupOutputs(self): self.cleanupOnDiskView() # FIXME: If RawData becomes unready() at the same time as RawDatasetInfo(), then # we have no guarantees about which one will trigger setupOutputs() first. # It is therefore possible for 'RawDatasetInfo' to appear ready() to us, # even though it's upstream partner is UNready. We are about to get the # unready() notification, but it will come too late to prevent our # setupOutputs method from being called. # Without proper graph setup transaction semantics, we have to use this # hack as a workaround. try: rawInfo = self.RawDatasetInfo.value except: for oslot in list(self.outputs.values()): if oslot.partner is None: oslot.meta.NOTREADY = True return selection_index = self.InputSelection.value if not self.Inputs[selection_index].ready(): for oslot in list(self.outputs.values()): if oslot.partner is None: oslot.meta.NOTREADY = True return self._opFormattedExport.Input.connect( self.Inputs[selection_index] ) if os.path.pathsep in rawInfo.filePath: first_dataset = rawInfo.filePath.split(os.path.pathsep)[0] dataset_dir = PathComponents(first_dataset).externalDirectory else: dataset_dir = PathComponents(rawInfo.filePath).externalDirectory abs_dataset_dir, _ = getPathVariants(dataset_dir, self.WorkingDirectory.value) known_keys = {} known_keys['dataset_dir'] = abs_dataset_dir nickname = rawInfo.nickname.replace('*', '') if os.path.pathsep in nickname: nickname = PathComponents(nickname.split(os.path.pathsep)[0]).fileNameBase known_keys['nickname'] = nickname result_types = self.SelectionNames.value known_keys['result_type'] = result_types[selection_index] # Disconnect to open the 'transaction' if self._opImageOnDiskProvider is not None: self._opImageOnDiskProvider.TransactionSlot.disconnect() self._opFormattedExport.TransactionSlot.disconnect() # Blank the internal path while we manipulate the external path # to avoid invalid intermediate states of ExportPath self._opFormattedExport.OutputInternalPath.setValue( "" ) # use partial formatting to fill in non-coordinate name fields name_format = self.OutputFilenameFormat.value partially_formatted_name = format_known_keys( name_format, known_keys ) # Convert to absolute path before configuring the internal op abs_path, _ = getPathVariants( partially_formatted_name, self.WorkingDirectory.value ) self._opFormattedExport.OutputFilenameFormat.setValue( abs_path ) # use partial formatting on the internal dataset name, too internal_dataset_format = self.OutputInternalPath.value partially_formatted_dataset_name = format_known_keys( internal_dataset_format, known_keys ) self._opFormattedExport.OutputInternalPath.setValue( partially_formatted_dataset_name ) # Re-connect to finish the 'transaction' self._opFormattedExport.TransactionSlot.connect( self.TransactionSlot ) if self._opImageOnDiskProvider is not None: self._opImageOnDiskProvider.TransactionSlot.connect( self.TransactionSlot ) self.setupOnDiskView()
def importStackAsLocalDataset(self, info, sequence_axis='t'): """ Add the given stack data to the project file as a local dataset. Does not update the topLevelOperator. :param info: A DatasetInfo object. Note: info.filePath must be a str which lists the stack files, delimited with os.path.pathsep Note: info will be MODIFIED by this function. Use the modified info when assigning it to a dataset. """ self.progressSignal.emit(0) projectFileHdf5 = self.topLevelOperator.ProjectFile.value globstring = info.filePath info.location = DatasetInfo.Location.ProjectInternal firstPathParts = PathComponents(info.filePath.split(os.path.pathsep)[0]) info.filePath = firstPathParts.externalDirectory + '/??' + firstPathParts.extension info.fromstack = True # Use absolute path cwd = self.topLevelOperator.WorkingDirectory if os.path.pathsep not in globstring and not os.path.isabs(globstring): globstring = os.path.normpath( os.path.join(cwd, globstring) ) if firstPathParts.extension.lower() in OpTiffReader.TIFF_EXTS: # Special loader for TIFFs opLoader = OpTiffSequenceReader( parent=self.topLevelOperator.parent ) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.GlobString.setValue(globstring) data_slot = opLoader.Output elif firstPathParts.extension.lower() in OpStreamingHdf5SequenceReaderM.H5EXTS: # Now use the .checkGlobString method of the stack readers isSingleFile = True try: OpStreamingHdf5SequenceReaderS.checkGlobString(globstring) except (OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError, OpStreamingHdf5SequenceReaderS.NotTheSameFileError, OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError): isSingleFile = False isMultiFile = True try: OpStreamingHdf5SequenceReaderM.checkGlobString(globstring) except (OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError, OpStreamingHdf5SequenceReaderM.SameFileError, OpStreamingHdf5SequenceReaderM.InternalPlaceholderError): isMultiFile = False assert (not(isMultiFile and isSingleFile)), ( "Something is wrong, glob string shouldn't allow both") assert (isMultiFile or isSingleFile), ( "Glob string doesn't conform to h5 stack glob string rules") if isSingleFile: opLoader = OpStreamingHdf5SequenceReaderS(parent=self.topLevelOperator.parent) elif isMultiFile: opLoader = OpStreamingHdf5SequenceReaderM(parent=self.topLevelOperator.parent) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.GlobString.setValue(globstring) data_slot = opLoader.OutputImage else: # All other sequences (e.g. pngs, jpegs, etc.) opLoader = OpStackLoader( parent=self.topLevelOperator.parent ) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.globstring.setValue(globstring) data_slot = opLoader.stack try: opWriter = OpH5WriterBigDataset(parent=self.topLevelOperator.parent) opWriter.hdf5File.setValue(projectFileHdf5) opWriter.hdf5Path.setValue(self.topGroupName + '/local_data/' + info.datasetId) opWriter.CompressionEnabled.setValue(False) # We assume that the main bottleneck is the hard disk, # so adding lots of threads to access it at once seems like a bad idea. opWriter.BatchSize.setValue(1) opWriter.Image.connect( data_slot ) # Forward progress from the writer directly to our applet opWriter.progressSignal.subscribe( self.progressSignal.emit ) success = opWriter.WriteImage.value finally: opWriter.cleanUp() opLoader.cleanUp() self.progressSignal.emit(100) return success
def get_settings_and_export_layer(layer, parent_widget=None): """ Prompt the user for layer export settings, and perform the layer export. """ sourceTags = [True for l in layer.datasources] for i, source in enumerate(layer.datasources): if not hasattr(source, "dataSlot"): sourceTags[i] = False if not any(sourceTags): raise RuntimeError( "can not export from a non-lazyflow data source (layer=%r, datasource=%r)" % (type(layer), type(layer.datasources[0]))) if not _has_lazyflow: raise RuntimeError("lazyflow not installed") import lazyflow dataSlots = [ slot.dataSlot for (slot, isSlot) in zip(layer.datasources, sourceTags) if isSlot is True ] opStackChannels = lazyflow.operators.OpMultiArrayStacker( dataSlots[0].getRealOperator().parent) for slot in dataSlots: assert isinstance( slot, lazyflow.graph.Slot), "slot is of type %r" % (type(slot)) assert isinstance( slot.getRealOperator(), lazyflow.graph.Operator), "slot's operator is of type %r" % (type( slot.getRealOperator())) opStackChannels.AxisFlag.setValue("c") opStackChannels.Images.resize(len(dataSlots)) for i, islot in enumerate(opStackChannels.Images): islot.connect(dataSlots[i]) export_dir = PreferencesManager().get("layer", "export-dir", default=os.path.expanduser("~")) # Create an operator to do the work from lazyflow.operators.ioOperators import OpFormattedDataExport opExport = OpFormattedDataExport(parent=opStackChannels.parent) opExport.OutputFilenameFormat.setValue(os.path.join( export_dir, layer.name)) opExport.Input.connect(opStackChannels.Output) opExport.TransactionSlot.setValue(True) # Use this dialog to populate the operator's slot settings settingsDlg = DataExportOptionsDlg(parent_widget, opExport) # If user didn't cancel, run the export now. if (settingsDlg.exec_() == DataExportOptionsDlg.Accepted): export_dir = PathComponents( opExport.ExportPath.value).externalDirectory PreferencesManager().set("layer", "export-dir", export_dir) helper = ExportHelper(parent_widget) helper.run(opExport) # Clean up our temporary operators opExport.cleanUp() opStackChannels.cleanUp()
def setup(self): """ Generate a directory with all the files needed for this test. We use the same temporary directory every time, so we don't waste time regenerating the data if the test has already been run recently. The directory consists of the following files: - reference_volume.h5 - volume_description.json - transposed_volume_description.json - [lots of png tiles..] """ tmp = tempfile.gettempdir() self.TILE_DIRECTORY = os.path.join(tmp, 'testTiledVolume_data') logger.debug("Using test directory: {}".format(self.TILE_DIRECTORY)) self.REFERENCE_VOL_PATH = os.path.join(self.TILE_DIRECTORY, 'reference_volume.h5/data') ref_vol_path_comp = PathComponents(self.REFERENCE_VOL_PATH) self.REFERENCE_VOL_FILE = ref_vol_path_comp.externalPath self.VOLUME_DESCRIPTION_FILE = os.path.join(self.TILE_DIRECTORY, 'volume_description.json') self.TRANSPOSED_VOLUME_DESCRIPTION_FILE = os.path.join( self.TILE_DIRECTORY, 'transposed_volume_description.json') if not os.path.exists(self.TILE_DIRECTORY): print "Creating new tile directory: {}".format(self.TILE_DIRECTORY) os.mkdir(self.TILE_DIRECTORY) if not os.path.exists(self.REFERENCE_VOL_FILE): ref_vol = numpy.random.randint(0, 255, (100, 600, 600)).astype(numpy.uint8) with h5py.File(self.REFERENCE_VOL_FILE, 'w') as ref_file: ref_file[ref_vol_path_comp.internalPath] = ref_vol else: with h5py.File(self.REFERENCE_VOL_FILE, 'r') as ref_file: ref_vol = ref_file[ref_vol_path_comp.internalPath][:] need_rewrite = False if not os.path.exists(self.VOLUME_DESCRIPTION_FILE): need_rewrite = True else: with open(self.VOLUME_DESCRIPTION_FILE, 'r') as f: if f.read() != volume_description_text: need_rewrite = True if need_rewrite: with open(self.VOLUME_DESCRIPTION_FILE, 'w') as f: f.write(volume_description_text) # Read the volume description as a JsonConfig Namespace volume_description = TiledVolume.readDescription( self.VOLUME_DESCRIPTION_FILE) # Write out a copy of the description, but with custom output axes config_helper = JsonConfigParser(TiledVolume.DescriptionFields) transposed_description = copy.copy(volume_description) transposed_description.output_axes = "xyz" config_helper.writeConfigFile( self.TRANSPOSED_VOLUME_DESCRIPTION_FILE, transposed_description) # Remove all old image tiles in the tile directory files = os.listdir(self.TILE_DIRECTORY) for name in files: if os.path.splitext( name)[1] == '.' + volume_description.format: os.remove(os.path.join(self.TILE_DIRECTORY, name)) # Write the new tiles export_to_tiles(ref_vol, volume_description.tile_shape_2d_yx[0], self.TILE_DIRECTORY, print_progress=False) # To support testMissingTiles (below), remove slice 2 files = os.listdir(self.TILE_DIRECTORY) for name in files: if name.startswith("tile_z00002"): p = os.path.join(self.TILE_DIRECTORY, name) print "removing:", p os.remove(p) # lastly, start the server self._start_server()
(This script is not generally useful for most ilastik users or developers.) Input: hdf5 volume Output: directory of .png tiles representing the volume. """ if __name__ == "__main__": import sys import h5py import logging import argparse from lazyflow.utility import PathComponents, export_to_tiles logger = logging.getLogger() logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.INFO) # Usage: python make_tiles.py --tile_size=250 /path/to/my_vol.h5/some/dataset /path/to/output_dir parser = argparse.ArgumentParser() parser.add_argument("--tile_size", type=int) parser.add_argument("hdf5_dataset_path") parser.add_argument("output_dir") parsed_args = parser.parse_args(sys.argv[1:]) path_comp = PathComponents(parsed_args.hdf5_dataset_path) with h5py.File(path_comp.externalPath) as input_file: vol_dset = input_file[path_comp.internalPath] export_to_tiles(vol_dset, parsed_args.tile_size, parsed_args.output_dir)
def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument( '--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name)) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('unspecified_input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument( '--preconvert_stacks', help= "Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) arg_parser.add_argument( '--input_axes', help="Explicitly specify the axes of your dataset.", required=False) arg_parser.add_argument('--stack_along', help="Sequence axis along which to stack", type=str, default='z') parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) if parsed_args.unspecified_input_files: # We allow the file list to go to the 'default' role, # but only if no other roles were explicitly configured. arg_names = list(map(cls._role_name_to_arg_name, role_names)) for arg_name in arg_names: if getattr(parsed_args, arg_name): # FIXME: This error message could be more helpful. role_args = list( map(cls._role_name_to_arg_name, role_names)) role_args = ['--' + s for s in role_args] role_args_str = ", ".join(role_args) raise Exception( "Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" "" + role_args_str) # Relocate to the 'default' role arg_name = cls._role_name_to_arg_name(role_names[0]) setattr(parsed_args, arg_name, parsed_args.unspecified_input_files) parsed_args.unspecified_input_files = None # Replace '~' with home dir for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) paths_for_role = getattr(parsed_args, arg_name) if paths_for_role: for i, path in enumerate(paths_for_role): paths_for_role[i] = os.path.expanduser(path) # Check for errors: Do all input files exist? all_input_paths = [] for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error( "Could not find any files for globstring: {}".format( p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError( "Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.fromstack = False self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name ) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # TODO single hdf5 file stacks raise NotImplementedError( 'Single file h5Stack import is not implemented in the GUI yet.') else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening" ) file_list = [ex for ex in externalPaths] assert all(pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions" ) # The following is necessary for h5 as well as npz-files internalPathExts = ( OpInputDataReader.h5Exts + OpInputDataReader.npzExts ) internalPathExts = [".{}".format(ipx) for ipx in internalPathExts] if pathComponents[0].extension in internalPathExts and internalPaths[0]: for i in xrange(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = map(lambda f: make_absolute(f, cwd), file_list) if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )
def parse_known_cmdline_args(cls, cmdline_args, role_names): """ Helper function for headless workflows. Parses command-line args that can be used to configure the ``DataSelectionApplet`` top-level operator and returns ``(parsed_args, unused_args)``, similar to ``argparse.ArgumentParser.parse_known_args()`` Relative paths are converted to absolute paths **according to ``os.getcwd()``**, not according to the project file location, since this more likely to be what headless users expect. .. note: If the top-level operator was configured with multiple 'roles', then the input files for each role can be configured separately: $ python ilastik.py [other workflow options] --my-role-A inputA1.png inputA2.png --my-role-B inputB1.png, inputB2.png If the workflow has only one role (or only one required role), then the role-name flag can be omitted: # python ilastik.py [other workflow options] input1.png input2.png See also: :py:meth:`configure_operator_with_parsed_args()`. """ arg_parser = argparse.ArgumentParser() if role_names: for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) arg_parser.add_argument( '--' + arg_name, nargs='+', help='List of input files for the {} role'.format( role_name)) # Finally, a catch-all for role 0 (if the workflow only has one role, there's no need to provide role names arg_parser.add_argument('input_files', nargs='*', help='List of input files to process.') arg_parser.add_argument( '--preconvert_stacks', help= "Convert image stacks to temporary hdf5 files before loading them.", action='store_true', default=False) parsed_args, unused_args = arg_parser.parse_known_args(cmdline_args) for i, path in enumerate(parsed_args.input_files): # Replace '~' with home dir parsed_args.input_files[i] = os.path.expanduser(path) # Check for errors: Do all input files exist? all_input_paths = list(parsed_args.input_files) for role_name in role_names: arg_name = cls._role_name_to_arg_name(role_name) role_paths = getattr(parsed_args, arg_name) if role_paths: all_input_paths += role_paths error = False for p in all_input_paths: if isUrl(p): # Don't error-check urls in advance. continue p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error( "Could not find any files for globstring: {}".format( p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Input file does not exist: " + p) error = True if error: raise RuntimeError( "Could not find one or more input files. See logged errors.") return parsed_args, unused_args
def configure_operator_with_parsed_args(self, parsed_args): """ Helper function for headless workflows. Configures this applet's top-level operator according to the settings provided in ``parsed_args``. :param parsed_args: Must be an ``argparse.Namespace`` as returned by :py:meth:`parse_known_cmdline_args()`. """ role_names = self.topLevelOperator.DatasetRoles.value role_paths = collections.OrderedDict() if role_names: for role_index, role_name in enumerate(role_names): arg_name = self._role_name_to_arg_name(role_name) input_paths = getattr(parsed_args, arg_name) role_paths[role_index] = input_paths if parsed_args.input_files: # We allow the file list to go to the 'default' role, but only if no other roles were explicitly configured. for role_index, input_paths in role_paths.items(): if input_paths: # FIXME: This error message could be more helpful. role_args = map(self._role_name_to_arg_name, role_names) role_args = map(lambda s: '--' + s, role_args) role_args_str = ", ".join(role_args) raise Exception( "Invalid command line arguments: All roles must be configured explicitly.\n" "Use the following flags to specify which files are matched with which inputs:\n" + role_args_str) role_paths = {0: parsed_args.input_files} for role_index, input_paths in role_paths.items(): # If the user doesn't want image stacks to be copied into the project file, # we generate hdf5 volumes in a temporary directory and use those files instead. if parsed_args.preconvert_stacks: import tempfile input_paths = self.convertStacksToH5(input_paths, tempfile.gettempdir()) input_infos = [] for p in input_paths: info = DatasetInfo() info.location = DatasetInfo.Location.FileSystem info.filePath = p comp = PathComponents(p) # Convert all (non-url) paths to absolute # (otherwise they are relative to the project file, which probably isn't what the user meant) if not isUrl(p): comp.externalPath = os.path.abspath(comp.externalPath) info.filePath = comp.totalPath() info.nickname = comp.filenameBase # Remove globstring syntax. if '*' in info.nickname: info.nickname = info.nickname.replace('*', '') if os.path.pathsep in info.nickname: info.nickname = PathComponents( info.nickname.split(os.path.pathsep)[0]).fileNameBase input_infos.append(info) opDataSelection = self.topLevelOperator existing_lanes = len(opDataSelection.DatasetGroup) opDataSelection.DatasetGroup.resize( max(len(input_infos), existing_lanes)) for lane_index, info in enumerate(input_infos): opDataSelection.DatasetGroup[lane_index][role_index].setValue( info) need_warning = False for lane_index in range(len(input_infos)): output_slot = opDataSelection.ImageGroup[lane_index][ role_index] if output_slot.meta.prefer_2d: need_warning = True break if need_warning: logger.warn( "*******************************************************************************************" ) logger.warn( "Some of your input data is stored in a format that is not efficient for 3D access patterns." ) logger.warn( "Performance may suffer as a result. For best performance, use a chunked HDF5 volume." ) logger.warn( "*******************************************************************************************" )
def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless): # Unready datasets are represented with an empty group. if len( infoGroup ) == 0: return None, False datasetInfo = DatasetInfo() # Make a reverse-lookup of the location storage strings LocationLookup = { v:k for k,v in self.LocationStrings.items() } datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ] # Write to the 'private' members to avoid resetting the dataset id datasetInfo._filePath = infoGroup['filePath'].value datasetInfo._datasetId = infoGroup['datasetId'].value try: datasetInfo.allowLabels = infoGroup['allowLabels'].value except KeyError: pass try: datasetInfo.drange = tuple( infoGroup['drange'].value ) except KeyError: pass try: datasetInfo.nickname = infoGroup['nickname'].value except KeyError: datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase try: datasetInfo.fromstack = infoGroup['fromstack'].value except KeyError: # Guess based on the storage setting and original filepath datasetInfo.fromstack = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal and ( ('?' in datasetInfo._filePath) or (os.path.pathsep in datasetInfo._filePath) ) ) try: tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value ) datasetInfo.axistags = tags except KeyError: # Old projects just have an 'axisorder' field instead of full axistags try: axisorder = infoGroup['axisorder'].value datasetInfo.axistags = vigra.defaultAxistags(axisorder) except KeyError: pass try: start, stop = map( tuple, infoGroup['subvolume_roi'].value ) datasetInfo.subvolume_roi = (start, stop) except KeyError: pass # If the data is supposed to be in the project, # check for it now. if datasetInfo.location == DatasetInfo.Location.ProjectInternal: if not datasetInfo.datasetId in localDataGroup.keys(): raise RuntimeError("Corrupt project file. Could not find data for " + infoGroup.name) dirty = False # If the data is supposed to exist outside the project, make sure it really does. if datasetInfo.location == DatasetInfo.Location.FileSystem and not isUrl(datasetInfo.filePath): pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0]) filePath = pathData.externalPath if not os.path.exists(filePath): if headless: raise RuntimeError("Could not find data at " + filePath) filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')' newpath = self.repairFile(filePath, filt) if pathData.internalPath is not None: newpath += pathData.internalPath datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0] dirty = True return datasetInfo, dirty
def _selectFiles(self): # Find the directory of the most recently opened image file mostRecentStackDirectory = PreferencesManager().get( 'DataSelection', 'recent stack directory') if mostRecentStackDirectory is not None: defaultDirectory = os.path.split(mostRecentStackDirectory)[0] else: defaultDirectory = os.path.expanduser('~') options = QFileDialog.Options(QFileDialog.ShowDirsOnly) if ilastik.config.cfg.getboolean("ilastik", "debug"): options |= QFileDialog.DontUseNativeDialog h5exts = [x.lstrip('.') for x in OpStreamingH5N5SequenceReaderM.H5EXTS] # Launch the "Open File" dialog extensions = vigra.impex.listExtensions().split() extensions.extend(h5exts) extensions.extend(OpInputDataReader.n5Selection) filt = "Image files (" + ' '.join('*.' + x for x in extensions) + ')' options = QFileDialog.Options() if ilastik.config.cfg.getboolean("ilastik", "debug"): options |= QFileDialog.DontUseNativeDialog fileNames, _filter = QFileDialog.getOpenFileNames( self, "Select Images for Stack", defaultDirectory, filt, options=options) # For the n5 extension, the attributes.json file has to be selected in the file dialog. # However we need just the n5 directory-file. for i in range(len(fileNames)): if os.path.join("n5", "attributes.json") in fileNames[i]: fileNames[i] = fileNames[i].replace( os.path.sep + "attributes.json", "") msg = '' if len(fileNames) == 0: return pathComponents = PathComponents(fileNames[0]) if (len(fileNames) == 1) and pathComponents.extension not in OpStreamingH5N5SequenceReaderM.H5EXTS \ + OpStreamingH5N5SequenceReaderM.N5EXTS: msg += 'Cannot create stack: You only chose a single file. ' msg += 'If your stack is contained in a single file (e.g. a multi-page tiff) ' msg += 'please use the "Add File" button.' QMessageBox.warning(self, "Invalid selection", msg) return None directory = pathComponents.externalPath PreferencesManager().set('DataSelection', 'recent stack directory', directory) if pathComponents.extension in OpStreamingH5N5SequenceReaderM.H5EXTS or \ pathComponents.extension in OpStreamingH5N5SequenceReaderM.N5EXTS: if len(fileNames) == 1: # open the dialog for globbing: file_name = fileNames[0] dlg = H5N5StackingDlg( parent=self, list_of_paths=self._findInternalStacks(file_name)) if dlg.exec_() == QDialog.Accepted: globstring = '{}/{}'.format(file_name, dlg.get_globstring()) self.patternEdit.setText(globstring) self._applyPattern() return None else: return None else: # check for internal paths internal_paths = self._h5N5FindCommonInternal(fileNames) if len(internal_paths) == 0: msg += 'Could not find a unique common internal path in' msg += directory + '\n' QMessageBox.warning(self, "Invalid selection", msg) return None elif len(internal_paths) == 1: fileNames = [ '{}/{}'.format(fn, internal_paths[0]) for fn in fileNames ] else: # Ask the user which dataset to choose dlg = H5N5VolumeSelectionDlg(internal_paths, self) if dlg.exec_() == QDialog.Accepted: selected_index = dlg.combo.currentIndex() selected_dataset = str(internal_paths[selected_index]) fileNames = [ '{}/{}'.format(fn, selected_dataset) for fn in fileNames ] else: msg = 'No valid internal path selected.' QMessageBox.warning(self, "Invalid selection", msg) return None self._updateFileList(fileNames)
dataset_keys = [key for key in allkeys if isinstance(f[key], h5py.Dataset)] return dataset_keys if __name__ == "__main__": import sys import argparse # Construct a parser with all the 'normal' export options, and add arg for input_path. parser = DataExportApplet.make_cmdline_parser(argparse.ArgumentParser()) parser.add_argument("input_path", help="Path to your exported predictions.") parsed_args = parser.parse_args() # As a convenience, auto-determine the internal dataset path if possible. path_comp = PathComponents(parsed_args.input_path, os.getcwd()) if path_comp.extension in PathComponents.HDF5_EXTS and path_comp.internalDatasetName == "": with h5py.File(path_comp.externalPath, "r") as f: all_internal_paths = all_dataset_internal_paths(f) if len(all_internal_paths) == 1: path_comp.internalPath = all_internal_paths[0] parsed_args.input_path = path_comp.totalPath() elif len(all_internal_paths) == 0: sys.stderr.write("Could not find any datasets in your input file.") sys.exit(1) else: sys.stderr.write( "Found more than one dataset in your input file.\n" "Please specify the dataset name, e.g. /path/to/myfile.h5/internal/dataset_name"
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.fromstack = False self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name ) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None if '*' in filepath: file_list = glob.glob(filepath) file_list = sorted(file_list) if not isUrl(filepath) and os.path.pathsep in filepath: file_list = filepath.split(os.path.pathsep) # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = map(lambda f: make_absolute(f, cwd), file_list) if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None, sequence_axis=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. sequence_axis: Axis along which to stack (only applicable for stacks). """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location # The original path to the data (also used as a fallback if the data isn't in the project yet) self._filePath = "" self._datasetId = "" # The name of the data within the project file (if it is stored locally) # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.allowLabels = True self.drange = None self.normalizeDisplay = True self.sequenceAxis = None self.fromstack = False self.nickname = "" self.axistags = None self.original_axistags = None # Necessary in headless mode in order to recover the shape of the raw data self.laneShape = None self.laneDtype = None # A flag indicating whether the dataset is backed by a real source (e.g. file) # or by the fake provided (e.g. in headless mode when raw data are not necessary) self.realDataSource = True self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # overwrite internalPaths, will be assembled further down glob_string = "{}{}".format( externalPaths[0], internalPaths[0]) internalPaths = \ OpStreamingH5N5SequenceReaderS.expandGlobStrings( externalPaths[0], glob_string) if internalPaths: file_list = [externalPaths[0] ] * len(internalPaths) else: file_list = None else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening") file_list = [ex for ex in externalPaths] assert all( pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions") # The following is necessary for h5 as well as npz-files internalPathExts = (OpInputDataReader.h5_n5_Exts + OpInputDataReader.npzExts) internalPathExts = [ ".{}".format(ipx) for ipx in internalPathExts ] if pathComponents[ 0].extension in internalPathExts and internalPaths: if len(file_list) == len(internalPaths): # assuming a matching internal paths to external paths file_list_with_internal = [] for external, internal in zip( file_list, internalPaths): if internal: file_list_with_internal.append( '{}/{}'.format(external, internal)) else: file_list_with_internal.append(external) file_list = file_list_with_internal else: # sort of fallback, in case of a mismatch in lengths for i in range(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = [make_absolute(f, cwd) for f in file_list] if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join(file_list) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1]) nickname = PathComponents(prefix).filenameBase + ( "_" * num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack self.sequenceAxis = sequence_axis if jsonNamespace is not None: self.updateFromJson(jsonNamespace)
def post_process_lane_export(self, lane_index, checkOverwriteFiles=False): # FIXME: This probably only works for the non-blockwise export slot. # We should assert that the user isn't using the blockwise slot. # Plugin export if selected logger.info("Export source is: " + self.dataExportTrackingApplet. topLevelOperator.SelectedExportSource.value) print "in post_process_lane_export" if self.dataExportTrackingApplet.topLevelOperator.SelectedExportSource.value == OpTrackingBaseDataExport.PluginOnlyName: logger.info("Export source plugin selected!") selectedPlugin = self.dataExportTrackingApplet.topLevelOperator.SelectedPlugin.value exportPluginInfo = pluginManager.getPluginByName( selectedPlugin, category="TrackingExportFormats") if exportPluginInfo is None: logger.error("Could not find selected plugin %s" % exportPluginInfo) else: exportPlugin = exportPluginInfo.plugin_object logger.info("Exporting tracking result using %s" % selectedPlugin) name_format = self.dataExportTrackingApplet.topLevelOperator.getLane( lane_index).OutputFilenameFormat.value partially_formatted_name = self.getPartiallyFormattedName( lane_index, name_format) if exportPlugin.exportsToFile: filename = partially_formatted_name if os.path.basename(filename) == '': filename = os.path.join(filename, 'pluginExport.txt') else: filename = os.path.dirname(partially_formatted_name) if filename is None or len(str(filename)) == 0: logger.error( "Cannot export from plugin with empty output filename") return exportStatus = self.trackingApplet.topLevelOperator.getLane( lane_index).exportPlugin(filename, exportPlugin, checkOverwriteFiles) if not exportStatus: return False logger.info("Export done") return # CSV Table export (only if plugin was not selected) settings, selected_features = self.trackingApplet.topLevelOperator.getLane( lane_index).get_table_export_settings() from lazyflow.utility import PathComponents, make_absolute, format_known_keys if settings: self.dataExportTrackingApplet.progressSignal.emit(-1) raw_dataset_info = self.dataSelectionApplet.topLevelOperator.DatasetGroup[ lane_index][0].value project_path = self.shell.projectManager.currentProjectPath project_dir = os.path.dirname(project_path) dataset_dir = PathComponents( raw_dataset_info.filePath).externalDirectory abs_dataset_dir = make_absolute(dataset_dir, cwd=project_dir) known_keys = {} known_keys['dataset_dir'] = abs_dataset_dir nickname = raw_dataset_info.nickname.replace('*', '') if os.path.pathsep in nickname: nickname = PathComponents(nickname.split( os.path.pathsep)[0]).fileNameBase known_keys['nickname'] = nickname # use partial formatting to fill in non-coordinate name fields name_format = settings['file path'] partially_formatted_name = format_known_keys( name_format, known_keys) settings['file path'] = partially_formatted_name req = self.trackingApplet.topLevelOperator.getLane( lane_index ).export_object_data( lane_index, # FIXME: Even in non-headless mode, we can't show the gui because we're running in a non-main thread. # That's not a huge deal, because there's still a progress bar for the overall export. show_gui=False) req.wait() self.dataExportTrackingApplet.progressSignal.emit(100)
def _getDisplayRoleData(self, index): laneIndex = index.row() UninitializedDisplayData = { DatasetDetailedInfoColumn.Nickname: "<empty>", DatasetDetailedInfoColumn.Location: "", DatasetDetailedInfoColumn.InternalID: "", DatasetDetailedInfoColumn.AxisOrder: "", DatasetDetailedInfoColumn.Shape: "", DatasetDetailedInfoColumn.Range: "" } if len( self._op.DatasetGroup ) <= laneIndex \ or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex: return UninitializedDisplayData[index.column()] datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex] # Default if not datasetSlot.ready(): return UninitializedDisplayData[index.column()] datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value filePathComponents = PathComponents(datasetInfo.filePath) ## Input meta-data fields # Name if index.column() == DatasetDetailedInfoColumn.Nickname: return datasetInfo.nickname # Location if index.column() == DatasetDetailedInfoColumn.Location: if datasetInfo.location == DatasetInfo.Location.FileSystem: if isUrl(datasetInfo.filePath) or os.path.isabs( datasetInfo.filePath): text = "Absolute Link: {}".format( filePathComponents.externalPath) return text else: text = "Relative Link: {}".format( filePathComponents.externalPath) return text else: return "Project File" # Internal ID if index.column() == DatasetDetailedInfoColumn.InternalID: if datasetInfo.location == DatasetInfo.Location.FileSystem: return filePathComponents.internalPath return "" ## Output meta-data fields # Defaults imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex] if not imageSlot.ready(): return UninitializedDisplayData[index.column()] # Axis order if index.column() == DatasetDetailedInfoColumn.AxisOrder: if imageSlot.meta.original_axistags is not None: return "".join(imageSlot.meta.getOriginalAxisKeys()) assert imageSlot.meta.original_shape is not None, \ 'if original axistags are available, original shape has to exist as well' if imageSlot.meta.axistags is not None: return "".join(imageSlot.meta.getAxisKeys()) return "" # Shape if index.column() == DatasetDetailedInfoColumn.Shape: original_shape = imageSlot.meta.original_shape if original_shape is not None: assert imageSlot.meta.original_axistags is not None, \ 'if original shape is available, original axistags have to exist as well' return str(original_shape) shape = imageSlot.meta.shape if shape is None: return "" return str(shape) # Range if index.column() == DatasetDetailedInfoColumn.Range: drange = imageSlot.meta.drange if drange is None: return "" return str(drange) assert False, "Unknown column: row={}, column={}".format( index.row(), index.column())
def runWorkflow(parsed_args): args = parsed_args # Use a temporary cache dir if args.stack_volume_cache_dir is None: args.stack_volume_cache_dir = tempfile.gettempdir() # Make sure project file exists. if not os.path.exists(args.project): raise RuntimeError("Project file '" + args.project + "' does not exist.") # Make sure batch inputs exist. for p in args.batch_inputs: error = False p = PathComponents(p).externalPath if '*' in p: if len(glob.glob(p)) == 0: logger.error( "Could not find any files for globstring: {}".format(p)) logger.error("Check your quotes!") error = True elif not os.path.exists(p): logger.error("Batch input file does not exist: " + p) error = True if error: raise RuntimeError( "Could not find one or more batch inputs. See logged errors.") # Instantiate 'shell' shell = HeadlessShell( functools.partial(PixelClassificationWorkflow, appendBatchOperators=True)) if args.assume_old_ilp_axes: # Special hack for Janelia: # In some old versions of 0.5, the data was stored in tyxzc order. # We have no way of inspecting the data to determine this, so we allow # users to specify that their ilp is very old using the # assume_old_ilp_axes command-line flag ilastik.utility.globals.ImportOptions.default_axis_order = 'tyxzc' # Load project (auto-import it if necessary) logger.info("Opening project: '" + args.project + "'") shell.openProjectFile(args.project) try: if not args.generate_project_predictions and len( args.batch_inputs) == 0: logger.error( "Command-line arguments didn't specify any classification jobs." ) else: # Predictions for project input datasets if args.generate_project_predictions: generateProjectPredictions(shell) # Predictions for other datasets ('batch datasets') result = True if len(args.batch_inputs) > 0: result = generateBatchPredictions( shell.workflow, args.batch_inputs, args.batch_export_dir, args.batch_output_suffix, args.batch_output_dataset_name, args.stack_volume_cache_dir) assert result finally: logger.info("Closing project...") shell.closeCurrentProject() logger.info("FINISHED.")