def setUp(self): """ Create a blockwise fileset to test with. """ if platform.system() == 'Windows': # On windows, there are errors, and we make no attempt to solve them (at the moment). raise nose.SkipTest try: BlockwiseFileset._prepare_system() except ValueError: # If the system isn't configured to allow lots of open files, we can't run this test. raise nose.SkipTest testConfig = \ """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug("Loading config file...") with open(self.configpath, 'w') as f: f.write(testConfig) logger.debug("Creating random test data...") bfs = BlockwiseFileset(self.configpath, 'a') dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint(255, size=dataShape).astype(numpy.uint8) logger.debug("Writing test data...") datasetRoi = ([0, 0, 0, 0, 0], dataShape) bfs.writeData(datasetRoi, self.data) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
def setUp(self): """ Create a blockwise fileset to test with. """ if platform.system() == 'Windows': # On windows, there are errors, and we make no attempt to solve them (at the moment). raise nose.SkipTest try: BlockwiseFileset._prepare_system() except ValueError: # If the system isn't configured to allow lots of open files, we can't run this test. raise nose.SkipTest testConfig = \ """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug( "Loading config file..." ) with open(self.configpath, 'w') as f: f.write(testConfig) logger.debug( "Creating random test data..." ) bfs = BlockwiseFileset( self.configpath, 'a' ) dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint( 255, size=dataShape ).astype(numpy.uint8) logger.debug( "Writing test data..." ) datasetRoi = ([0,0,0,0,0], dataShape) bfs.writeData( datasetRoi, self.data ) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
def setUp(self): """ Create a blockwise fileset to test with. """ if "Darwin" in platform.platform(): # For unknown reasons, blockwise fileset tests fail due to strange "too many files" errors on mac raise nose.SkipTest testConfig = """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug("Loading config file...") with open(self.configpath, "w") as f: f.write(testConfig) logger.debug("Creating random test data...") bfs = BlockwiseFileset(self.configpath, "a") dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint(255, size=dataShape).astype(numpy.uint8) logger.debug("Writing test data...") datasetRoi = ([0, 0, 0, 0, 0], dataShape) bfs.writeData(datasetRoi, self.data) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
def _prepareDestination(self): """ - If the result file doesn't exist yet, create it (and the dataset) - If the result file already exists, return a list of the rois that are NOT needed (their data already exists in the final output) """ originalDescription = BlockwiseFileset.readDescription(self.OutputDatasetDescription.value) datasetDescription = copy.deepcopy(originalDescription) # Modify description fields as needed # -- axes datasetDescription.axes = "".join( self.Input.meta.getTaggedShape().keys() ) assert set(originalDescription.axes) == set( datasetDescription.axes ), \ "Can't prepare destination dataset: original dataset description listed " \ "axes as {}, but actual output axes are {}".format( originalDescription.axes, datasetDescription.axes ) # -- shape datasetDescription.view_shape = list(self.Input.meta.shape) # -- block_shape assert originalDescription.block_shape is not None originalBlockDims = collections.OrderedDict( zip( originalDescription.axes, originalDescription.block_shape ) ) datasetDescription.block_shape = map( lambda a: originalBlockDims[a], datasetDescription.axes ) datasetDescription.block_shape = map( min, zip( datasetDescription.block_shape, self.Input.meta.shape ) ) # -- chunks if originalDescription.chunks is not None: originalChunkDims = collections.OrderedDict( zip( originalDescription.axes, originalDescription.chunks ) ) datasetDescription.chunks = map( lambda a: originalChunkDims[a], datasetDescription.axes ) datasetDescription.chunks = map( min, zip( datasetDescription.chunks, self.Input.meta.shape ) ) # -- dtype if datasetDescription.dtype != self.Input.meta.dtype: dtype = self.Input.meta.dtype if type(dtype) is numpy.dtype: dtype = dtype.type datasetDescription.dtype = dtype().__class__.__name__ # Create a unique hash for this blocking scheme. # If it changes, we can't use any previous data. sha = hashlib.sha1() sha.update( str( tuple( datasetDescription.block_shape) ) ) sha.update( datasetDescription.axes ) sha.update( datasetDescription.block_file_name_format ) datasetDescription.hash_id = sha.hexdigest() if datasetDescription != originalDescription: descriptionFilePath = self.OutputDatasetDescription.value logger.info( "Overwriting dataset description: {}".format( descriptionFilePath ) ) BlockwiseFileset.writeDescription(descriptionFilePath, datasetDescription) with open( descriptionFilePath, 'r' ) as f: logger.info( f.read() ) # Now open the dataset blockwiseFileset = BlockwiseFileset( self.OutputDatasetDescription.value ) taskInfos = self._prepareTaskInfos( blockwiseFileset.getAllBlockRois() ) if blockwiseFileset.description.hash_id != originalDescription.hash_id: # Something about our blocking scheme changed. # Make sure all blocks are marked as NOT available. # (Just in case some were left over from a previous run.) for roi in taskInfos.keys(): blockwiseFileset.setBlockStatus( roi[0], BlockwiseFileset.BLOCK_NOT_AVAILABLE ) return blockwiseFileset, taskInfos
def _prepareDestination(self): """ - If the result file doesn't exist yet, create it (and the dataset) - If the result file already exists, return a list of the rois that are NOT needed (their data already exists in the final output) """ originalDescription = BlockwiseFileset.readDescription( self.OutputDatasetDescription.value) datasetDescription = copy.deepcopy(originalDescription) # Modify description fields as needed # -- axes datasetDescription.axes = "".join( self.Input.meta.getTaggedShape().keys()) assert set(originalDescription.axes) == set( datasetDescription.axes ), \ "Can't prepare destination dataset: original dataset description listed " \ "axes as {}, but actual output axes are {}".format( originalDescription.axes, datasetDescription.axes ) # -- shape datasetDescription.view_shape = list(self.Input.meta.shape) # -- block_shape assert originalDescription.block_shape is not None originalBlockDims = collections.OrderedDict( zip(originalDescription.axes, originalDescription.block_shape)) datasetDescription.block_shape = map(lambda a: originalBlockDims[a], datasetDescription.axes) datasetDescription.block_shape = map( min, zip(datasetDescription.block_shape, self.Input.meta.shape)) # -- chunks if originalDescription.chunks is not None: originalChunkDims = collections.OrderedDict( zip(originalDescription.axes, originalDescription.chunks)) datasetDescription.chunks = map(lambda a: originalChunkDims[a], datasetDescription.axes) datasetDescription.chunks = map( min, zip(datasetDescription.chunks, self.Input.meta.shape)) # -- dtype if datasetDescription.dtype != self.Input.meta.dtype: dtype = self.Input.meta.dtype if type(dtype) is numpy.dtype: dtype = dtype.type datasetDescription.dtype = dtype().__class__.__name__ # Create a unique hash for this blocking scheme. # If it changes, we can't use any previous data. sha = hashlib.sha1() sha.update(str(tuple(datasetDescription.block_shape))) sha.update(datasetDescription.axes) sha.update(datasetDescription.block_file_name_format) datasetDescription.hash_id = sha.hexdigest() if datasetDescription != originalDescription: descriptionFilePath = self.OutputDatasetDescription.value logger.info("Overwriting dataset description: {}".format( descriptionFilePath)) BlockwiseFileset.writeDescription(descriptionFilePath, datasetDescription) with open(descriptionFilePath, 'r') as f: logger.info(f.read()) # Now open the dataset blockwiseFileset = BlockwiseFileset( self.OutputDatasetDescription.value) taskInfos = self._prepareTaskInfos(blockwiseFileset.getAllBlockRois()) if blockwiseFileset.description.hash_id != originalDescription.hash_id: # Something about our blocking scheme changed. # Make sure all blocks are marked as NOT available. # (Just in case some were left over from a previous run.) for roi in taskInfos.keys(): blockwiseFileset.setBlockStatus( roi[0], BlockwiseFileset.BLOCK_NOT_AVAILABLE) return blockwiseFileset, taskInfos