def setUp(self): """ Create a blockwise fileset to test with. """ if platform.system() == 'Windows': # On windows, there are errors, and we make no attempt to solve them (at the moment). raise nose.SkipTest try: BlockwiseFileset._prepare_system() except ValueError: # If the system isn't configured to allow lots of open files, we can't run this test. raise nose.SkipTest testConfig = \ """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug("Loading config file...") with open(self.configpath, 'w') as f: f.write(testConfig) logger.debug("Creating random test data...") bfs = BlockwiseFileset(self.configpath, 'a') dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint(255, size=dataShape).astype(numpy.uint8) logger.debug("Writing test data...") datasetRoi = ([0, 0, 0, 0, 0], dataShape) bfs.writeData(datasetRoi, self.data) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
def setUp(self): """ Create a blockwise fileset to test with. """ if platform.system() == 'Windows': # On windows, there are errors, and we make no attempt to solve them (at the moment). raise nose.SkipTest try: BlockwiseFileset._prepare_system() except ValueError: # If the system isn't configured to allow lots of open files, we can't run this test. raise nose.SkipTest testConfig = \ """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug( "Loading config file..." ) with open(self.configpath, 'w') as f: f.write(testConfig) logger.debug( "Creating random test data..." ) bfs = BlockwiseFileset( self.configpath, 'a' ) dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint( 255, size=dataShape ).astype(numpy.uint8) logger.debug( "Writing test data..." ) datasetRoi = ([0,0,0,0,0], dataShape) bfs.writeData( datasetRoi, self.data ) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
def setUp(self): """ Create a blockwise fileset to test with. """ if "Darwin" in platform.platform(): # For unknown reasons, blockwise fileset tests fail due to strange "too many files" errors on mac raise nose.SkipTest testConfig = """ { "_schema_name" : "blockwise-fileset-description", "_schema_version" : 1.0, "name" : "synapse_small", "format" : "hdf5", "axes" : "txyzc", "shape" : [1,400,400,100,1], "dtype" : "numpy.uint8", "block_shape" : [1, 50, 50, 50, 100], "block_file_name_format" : "cube{roiString}.h5/volume/data" } """ self.tempDir = tempfile.mkdtemp() self.configpath = os.path.join(self.tempDir, "config.json") logger.debug("Loading config file...") with open(self.configpath, "w") as f: f.write(testConfig) logger.debug("Creating random test data...") bfs = BlockwiseFileset(self.configpath, "a") dataShape = tuple(bfs.description.shape) self.data = numpy.random.randint(255, size=dataShape).astype(numpy.uint8) logger.debug("Writing test data...") datasetRoi = ([0, 0, 0, 0, 0], dataShape) bfs.writeData(datasetRoi, self.data) block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi) for block_start in block_starts: bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE) bfs.close()
class OpTaskWorker(Operator): Input = InputSlot() RoiString = InputSlot(stype='string') TaskName = InputSlot(stype='string') ConfigFilePath = InputSlot(stype='filestring') OutputFilesetDescription = InputSlot(stype='filestring') SecondaryInputs = InputSlot(level=1, optional=True) SecondaryOutputDescriptions = InputSlot(level=1, optional=True) ReturnCode = OutputSlot() def __init__(self, *args, **kwargs): super( OpTaskWorker, self ).__init__( *args, **kwargs ) self.progressSignal = OrderedSignal() self._primaryBlockwiseFileset = None self._secondaryBlockwiseFilesets = [] def setupOutputs(self): self.ReturnCode.meta.dtype = bool self.ReturnCode.meta.shape = (1,) self._closeFiles() self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a' ) self._secondaryBlockwiseFilesets = [] for slot in self.SecondaryOutputDescriptions: descriptionPath = slot.value self._secondaryBlockwiseFilesets.append( BlockwiseFileset( descriptionPath, 'a' ) ) def cleanUp(self): self._closeFiles() super( OpTaskWorker, self ).cleanUp() def _closeFiles(self): if self._primaryBlockwiseFileset is not None: self._primaryBlockwiseFileset.close() for fileset in self._secondaryBlockwiseFilesets: fileset.close() self._primaryBlockwiseFileset = None self._secondaryBlockwiseFilesets = [] def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile( configFilePath ) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len( roi.start ) != len( self.Input.meta.shape ): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString ) logger.info( "Executing for roi: {}".format(roi) ) if config.use_node_local_scratch: assert False, "FIXME." assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop ) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags ) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests ) streamer.progressSignal.subscribe( self.progressSignal ) streamer.resultSignal.subscribe( self._handlePrimaryResultBlock ) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE ) logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) ) result[0] = True return result def propagateDirty(self, slot, subindex, roi): self.ReturnCode.setDirty( slice(None) ) def _handlePrimaryResultBlock(self, roi, result): # First write the primary self._primaryBlockwiseFileset.writeData(roi, result) # Get this block's index with respect to the primary dataset sub_block_index = roi[0] / self._primaryBlockwiseFileset.description.sub_block_shape # Now request the secondaries for slot, fileset in zip(self.SecondaryInputs, self._secondaryBlockwiseFilesets): # Compute the corresponding sub_block in this output dataset sub_block_shape = fileset.description.sub_block_shape sub_block_start = sub_block_index * sub_block_shape sub_block_stop = sub_block_start + sub_block_shape sub_block_stop = numpy.minimum( sub_block_stop, fileset.description.shape ) sub_block_roi = (sub_block_start, sub_block_stop) secondary_result = slot( *sub_block_roi ).wait() fileset.writeData( sub_block_roi, secondary_result )
class OpTaskWorker(Operator): Input = InputSlot() RoiString = InputSlot(stype='string') TaskName = InputSlot(stype='string') ConfigFilePath = InputSlot(stype='filestring') OutputFilesetDescription = InputSlot(stype='filestring') SecondaryInputs = InputSlot(level=1, optional=True) SecondaryOutputDescriptions = InputSlot(level=1, optional=True) ReturnCode = OutputSlot() def __init__(self, *args, **kwargs): super(OpTaskWorker, self).__init__(*args, **kwargs) self.progressSignal = OrderedSignal() self._primaryBlockwiseFileset = None self._secondaryBlockwiseFilesets = [] def setupOutputs(self): self.ReturnCode.meta.dtype = bool self.ReturnCode.meta.shape = (1, ) self._closeFiles() self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a') self._secondaryBlockwiseFilesets = [] for slot in self.SecondaryOutputDescriptions: descriptionPath = slot.value self._secondaryBlockwiseFilesets.append( BlockwiseFileset(descriptionPath, 'a')) def cleanUp(self): self._closeFiles() super(OpTaskWorker, self).cleanUp() def _closeFiles(self): if self._primaryBlockwiseFileset is not None: self._primaryBlockwiseFileset.close() for fileset in self._secondaryBlockwiseFilesets: fileset.close() self._primaryBlockwiseFileset = None self._secondaryBlockwiseFilesets = [] def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile(configFilePath) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len(roi.start) != len(self.Input.meta.shape): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString) logger.info("Executing for roi: {}".format(roi)) if config.use_node_local_scratch: assert False, "FIXME." assert ( blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop ).all( ), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests) streamer.progressSignal.subscribe(self.progressSignal) streamer.resultSignal.subscribe(self._handlePrimaryResultBlock) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus(roi.start, BlockwiseFileset.BLOCK_AVAILABLE) logger.info("Finished task in {} seconds".format( computeTimer.seconds())) result[0] = True return result def propagateDirty(self, slot, subindex, roi): self.ReturnCode.setDirty(slice(None)) def _handlePrimaryResultBlock(self, roi, result): # First write the primary self._primaryBlockwiseFileset.writeData(roi, result) # Get this block's index with respect to the primary dataset sub_block_index = roi[ 0] / self._primaryBlockwiseFileset.description.sub_block_shape # Now request the secondaries for slot, fileset in zip(self.SecondaryInputs, self._secondaryBlockwiseFilesets): # Compute the corresponding sub_block in this output dataset sub_block_shape = fileset.description.sub_block_shape sub_block_start = sub_block_index * sub_block_shape sub_block_stop = sub_block_start + sub_block_shape sub_block_stop = numpy.minimum(sub_block_stop, fileset.description.shape) sub_block_roi = (sub_block_start, sub_block_stop) secondary_result = slot(*sub_block_roi).wait() fileset.writeData(sub_block_roi, secondary_result)
class OpTaskWorker(Operator): Input = InputSlot() RoiString = InputSlot(stype='string') TaskName = InputSlot(stype='string') ConfigFilePath = InputSlot(stype='filestring') OutputFilesetDescription = InputSlot(stype='filestring') ReturnCode = OutputSlot() def __init__(self, *args, **kwargs): super( OpTaskWorker, self ).__init__( *args, **kwargs ) self.progressSignal = OrderedSignal() self._primaryBlockwiseFileset = None def setupOutputs(self): self.ReturnCode.meta.dtype = bool self.ReturnCode.meta.shape = (1,) self._closeFiles() self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a' ) def cleanUp(self): self._closeFiles() super( OpTaskWorker, self ).cleanUp() def _closeFiles(self): if self._primaryBlockwiseFileset is not None: self._primaryBlockwiseFileset.close() self._primaryBlockwiseFileset = None def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile( configFilePath ) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len( roi.start ) != len( self.Input.meta.shape ): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString ) logger.info( "Executing for roi: {}".format(roi) ) if config.use_node_local_scratch: assert False, "FIXME." assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop ) assert self.Input.ready() with Timer() as computeTimer: # Stream the data out to disk. request_blockshape = self._primaryBlockwiseFileset.description.sub_block_shape # Could be None. That's okay. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), request_blockshape ) streamer.progressSignal.subscribe( self.progressSignal ) streamer.resultSignal.subscribe( self._handlePrimaryResultBlock ) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE ) logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) ) result[0] = True return result def propagateDirty(self, slot, subindex, roi): self.ReturnCode.setDirty( slice(None) ) def _handlePrimaryResultBlock(self, roi, result): # First write the primary self._primaryBlockwiseFileset.writeData(roi, result) # Ask the workflow if there is any special post-processing to do... self.get_workflow().postprocessClusterSubResult(roi, result, self._primaryBlockwiseFileset) def get_workflow(self): op = self while not isinstance(op, Workflow): op = op.parent return op
class OpTaskWorker(Operator): Input = InputSlot() RoiString = InputSlot(stype='string') TaskName = InputSlot(stype='string') ConfigFilePath = InputSlot(stype='filestring') OutputFilesetDescription = InputSlot(stype='filestring') ReturnCode = OutputSlot() def __init__(self, *args, **kwargs): super(OpTaskWorker, self).__init__(*args, **kwargs) self.progressSignal = OrderedSignal() self._primaryBlockwiseFileset = None def setupOutputs(self): self.ReturnCode.meta.dtype = bool self.ReturnCode.meta.shape = (1, ) self._closeFiles() self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a') def cleanUp(self): self._closeFiles() super(OpTaskWorker, self).cleanUp() def _closeFiles(self): if self._primaryBlockwiseFileset is not None: self._primaryBlockwiseFileset.close() self._primaryBlockwiseFileset = None def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile(configFilePath) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len(roi.start) != len(self.Input.meta.shape): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString) logger.info("Executing for roi: {}".format(roi)) if config.use_node_local_scratch: assert False, "FIXME." assert ( blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop ).all( ), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop) assert self.Input.ready() with Timer() as computeTimer: # Stream the data out to disk. request_blockshape = self._primaryBlockwiseFileset.description.sub_block_shape # Could be None. That's okay. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), request_blockshape) streamer.progressSignal.subscribe(self.progressSignal) streamer.resultSignal.subscribe(self._handlePrimaryResultBlock) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus(roi.start, BlockwiseFileset.BLOCK_AVAILABLE) logger.info("Finished task in {} seconds".format( computeTimer.seconds())) result[0] = True return result def propagateDirty(self, slot, subindex, roi): self.ReturnCode.setDirty(slice(None)) def _handlePrimaryResultBlock(self, roi, result): # First write the primary self._primaryBlockwiseFileset.writeData(roi, result) # Ask the workflow if there is any special post-processing to do... self.get_workflow().postprocessClusterSubResult( roi, result, self._primaryBlockwiseFileset) def get_workflow(self): op = self while not isinstance(op, Workflow): op = op.parent return op