Ejemplo n.º 1
0
    def setUp(self):
        """
        Create a blockwise fileset to test with.
        """
        if platform.system() == 'Windows':
            # On windows, there are errors, and we make no attempt to solve them (at the moment).
            raise nose.SkipTest

        try:
            BlockwiseFileset._prepare_system()
        except ValueError:
            # If the system isn't configured to allow lots of open files, we can't run this test.
            raise nose.SkipTest

        testConfig = \
        """
        {
            "_schema_name" : "blockwise-fileset-description",
            "_schema_version" : 1.0,

            "name" : "synapse_small",
            "format" : "hdf5",
            "axes" : "txyzc",
            "shape" : [1,400,400,100,1],
            "dtype" : "numpy.uint8",
            "block_shape" : [1, 50, 50, 50, 100],
            "block_file_name_format" : "cube{roiString}.h5/volume/data"
        }
        """
        self.tempDir = tempfile.mkdtemp()
        self.configpath = os.path.join(self.tempDir, "config.json")

        logger.debug("Loading config file...")
        with open(self.configpath, 'w') as f:
            f.write(testConfig)

        logger.debug("Creating random test data...")
        bfs = BlockwiseFileset(self.configpath, 'a')
        dataShape = tuple(bfs.description.shape)
        self.data = numpy.random.randint(255,
                                         size=dataShape).astype(numpy.uint8)

        logger.debug("Writing test data...")
        datasetRoi = ([0, 0, 0, 0, 0], dataShape)
        bfs.writeData(datasetRoi, self.data)
        block_starts = getIntersectingBlocks(bfs.description.block_shape,
                                             datasetRoi)
        for block_start in block_starts:
            bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE)
        bfs.close()
Ejemplo n.º 2
0
    def setUp(self):
        """
        Create a blockwise fileset to test with.
        """
        if platform.system() == 'Windows':
            # On windows, there are errors, and we make no attempt to solve them (at the moment).
            raise nose.SkipTest
        
        try:
            BlockwiseFileset._prepare_system()
        except ValueError:
            # If the system isn't configured to allow lots of open files, we can't run this test.
            raise nose.SkipTest
        
        testConfig = \
        """
        {
            "_schema_name" : "blockwise-fileset-description",
            "_schema_version" : 1.0,

            "name" : "synapse_small",
            "format" : "hdf5",
            "axes" : "txyzc",
            "shape" : [1,400,400,100,1],
            "dtype" : "numpy.uint8",
            "block_shape" : [1, 50, 50, 50, 100],
            "block_file_name_format" : "cube{roiString}.h5/volume/data"
        }
        """
        self.tempDir = tempfile.mkdtemp()
        self.configpath = os.path.join(self.tempDir, "config.json")

        logger.debug( "Loading config file..." )
        with open(self.configpath, 'w') as f:
            f.write(testConfig)
        
        logger.debug( "Creating random test data..." )
        bfs = BlockwiseFileset( self.configpath, 'a' )
        dataShape = tuple(bfs.description.shape)
        self.data = numpy.random.randint( 255, size=dataShape ).astype(numpy.uint8)
        
        logger.debug( "Writing test data..." )
        datasetRoi = ([0,0,0,0,0], dataShape)
        bfs.writeData( datasetRoi, self.data )
        block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi)
        for block_start in block_starts:
            bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE)
        bfs.close()
    def setUp(self):
        """
        Create a blockwise fileset to test with.
        """
        if "Darwin" in platform.platform():
            # For unknown reasons, blockwise fileset tests fail due to strange "too many files" errors on mac
            raise nose.SkipTest
        testConfig = """
        {
            "_schema_name" : "blockwise-fileset-description",
            "_schema_version" : 1.0,

            "name" : "synapse_small",
            "format" : "hdf5",
            "axes" : "txyzc",
            "shape" : [1,400,400,100,1],
            "dtype" : "numpy.uint8",
            "block_shape" : [1, 50, 50, 50, 100],
            "block_file_name_format" : "cube{roiString}.h5/volume/data"
        }
        """
        self.tempDir = tempfile.mkdtemp()
        self.configpath = os.path.join(self.tempDir, "config.json")

        logger.debug("Loading config file...")
        with open(self.configpath, "w") as f:
            f.write(testConfig)

        logger.debug("Creating random test data...")
        bfs = BlockwiseFileset(self.configpath, "a")
        dataShape = tuple(bfs.description.shape)
        self.data = numpy.random.randint(255, size=dataShape).astype(numpy.uint8)

        logger.debug("Writing test data...")
        datasetRoi = ([0, 0, 0, 0, 0], dataShape)
        bfs.writeData(datasetRoi, self.data)
        block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi)
        for block_start in block_starts:
            bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE)
        bfs.close()
Ejemplo n.º 4
0
class OpTaskWorker(Operator):
    Input = InputSlot()
    RoiString = InputSlot(stype='string')
    TaskName = InputSlot(stype='string')
    ConfigFilePath = InputSlot(stype='filestring')
    OutputFilesetDescription = InputSlot(stype='filestring')

    SecondaryInputs = InputSlot(level=1, optional=True)
    SecondaryOutputDescriptions = InputSlot(level=1, optional=True)
    
    ReturnCode = OutputSlot()

    def __init__(self, *args, **kwargs):
        super( OpTaskWorker, self ).__init__( *args, **kwargs )
        self.progressSignal = OrderedSignal()
        self._primaryBlockwiseFileset = None
        self._secondaryBlockwiseFilesets = []

    def setupOutputs(self):
        self.ReturnCode.meta.dtype = bool
        self.ReturnCode.meta.shape = (1,)
        
        self._closeFiles()
        self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a' )        
        self._secondaryBlockwiseFilesets = []
        for slot in self.SecondaryOutputDescriptions:
            descriptionPath = slot.value
            self._secondaryBlockwiseFilesets.append( BlockwiseFileset( descriptionPath, 'a' ) )
    
    def cleanUp(self):
        self._closeFiles()
        super( OpTaskWorker, self ).cleanUp()

    def _closeFiles(self):
        if self._primaryBlockwiseFileset is not None:
            self._primaryBlockwiseFileset.close()
        for fileset in self._secondaryBlockwiseFilesets:
            fileset.close()
        self._primaryBlockwiseFileset = None
        self._secondaryBlockwiseFilesets = []

    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile( configFilePath )        
        
        blockwiseFileset = self._primaryBlockwiseFileset
        
        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )
        
        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len( roi.start ) != len( self.Input.meta.shape ):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format( roiString )

        logger.info( "Executing for roi: {}".format(roi) )

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format( roi.start, roi.stop )
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags )
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests )
            streamer.progressSignal.subscribe( self.progressSignal )
            streamer.resultSignal.subscribe( self._handlePrimaryResultBlock )
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE )

        logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) )
        result[0] = True
        return result

    def propagateDirty(self, slot, subindex, roi):
        self.ReturnCode.setDirty( slice(None) )
        
    def _handlePrimaryResultBlock(self, roi, result):
        # First write the primary
        self._primaryBlockwiseFileset.writeData(roi, result)

        # Get this block's index with respect to the primary dataset
        sub_block_index = roi[0] / self._primaryBlockwiseFileset.description.sub_block_shape
        
        # Now request the secondaries
        for slot, fileset in zip(self.SecondaryInputs, self._secondaryBlockwiseFilesets):
            # Compute the corresponding sub_block in this output dataset
            sub_block_shape = fileset.description.sub_block_shape
            sub_block_start = sub_block_index * sub_block_shape
            sub_block_stop = sub_block_start + sub_block_shape
            sub_block_stop = numpy.minimum( sub_block_stop, fileset.description.shape )
            sub_block_roi = (sub_block_start, sub_block_stop)
            
            secondary_result = slot( *sub_block_roi ).wait()
            fileset.writeData( sub_block_roi, secondary_result )
Ejemplo n.º 5
0
class OpTaskWorker(Operator):
    Input = InputSlot()
    RoiString = InputSlot(stype='string')
    TaskName = InputSlot(stype='string')
    ConfigFilePath = InputSlot(stype='filestring')
    OutputFilesetDescription = InputSlot(stype='filestring')

    SecondaryInputs = InputSlot(level=1, optional=True)
    SecondaryOutputDescriptions = InputSlot(level=1, optional=True)

    ReturnCode = OutputSlot()

    def __init__(self, *args, **kwargs):
        super(OpTaskWorker, self).__init__(*args, **kwargs)
        self.progressSignal = OrderedSignal()
        self._primaryBlockwiseFileset = None
        self._secondaryBlockwiseFilesets = []

    def setupOutputs(self):
        self.ReturnCode.meta.dtype = bool
        self.ReturnCode.meta.shape = (1, )

        self._closeFiles()
        self._primaryBlockwiseFileset = BlockwiseFileset(
            self.OutputFilesetDescription.value, 'a')
        self._secondaryBlockwiseFilesets = []
        for slot in self.SecondaryOutputDescriptions:
            descriptionPath = slot.value
            self._secondaryBlockwiseFilesets.append(
                BlockwiseFileset(descriptionPath, 'a'))

    def cleanUp(self):
        self._closeFiles()
        super(OpTaskWorker, self).cleanUp()

    def _closeFiles(self):
        if self._primaryBlockwiseFileset is not None:
            self._primaryBlockwiseFileset.close()
        for fileset in self._secondaryBlockwiseFilesets:
            fileset.close()
        self._primaryBlockwiseFileset = None
        self._secondaryBlockwiseFilesets = []

    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile(configFilePath)

        blockwiseFileset = self._primaryBlockwiseFileset

        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )

        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len(roi.start) != len(self.Input.meta.shape):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format(
                roiString)

        logger.info("Executing for roi: {}".format(roi))

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (
            blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop
        ).all(
        ), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format(
            roi.start, roi.stop)
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map(
            lambda tag: config.task_subrequest_shape[tag.key],
            self.Input.meta.axistags)
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop),
                                          subrequest_shape,
                                          config.task_parallel_subrequests)
            streamer.progressSignal.subscribe(self.progressSignal)
            streamer.resultSignal.subscribe(self._handlePrimaryResultBlock)
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus(roi.start,
                                            BlockwiseFileset.BLOCK_AVAILABLE)

        logger.info("Finished task in {} seconds".format(
            computeTimer.seconds()))
        result[0] = True
        return result

    def propagateDirty(self, slot, subindex, roi):
        self.ReturnCode.setDirty(slice(None))

    def _handlePrimaryResultBlock(self, roi, result):
        # First write the primary
        self._primaryBlockwiseFileset.writeData(roi, result)

        # Get this block's index with respect to the primary dataset
        sub_block_index = roi[
            0] / self._primaryBlockwiseFileset.description.sub_block_shape

        # Now request the secondaries
        for slot, fileset in zip(self.SecondaryInputs,
                                 self._secondaryBlockwiseFilesets):
            # Compute the corresponding sub_block in this output dataset
            sub_block_shape = fileset.description.sub_block_shape
            sub_block_start = sub_block_index * sub_block_shape
            sub_block_stop = sub_block_start + sub_block_shape
            sub_block_stop = numpy.minimum(sub_block_stop,
                                           fileset.description.shape)
            sub_block_roi = (sub_block_start, sub_block_stop)

            secondary_result = slot(*sub_block_roi).wait()
            fileset.writeData(sub_block_roi, secondary_result)
Ejemplo n.º 6
0
class OpTaskWorker(Operator):
    Input = InputSlot()
    RoiString = InputSlot(stype='string')
    TaskName = InputSlot(stype='string')
    ConfigFilePath = InputSlot(stype='filestring')
    OutputFilesetDescription = InputSlot(stype='filestring')

    ReturnCode = OutputSlot()

    def __init__(self, *args, **kwargs):
        super( OpTaskWorker, self ).__init__( *args, **kwargs )
        self.progressSignal = OrderedSignal()
        self._primaryBlockwiseFileset = None

    def setupOutputs(self):
        self.ReturnCode.meta.dtype = bool
        self.ReturnCode.meta.shape = (1,)
        
        self._closeFiles()
        self._primaryBlockwiseFileset = BlockwiseFileset( self.OutputFilesetDescription.value, 'a' )        
    
    def cleanUp(self):
        self._closeFiles()
        super( OpTaskWorker, self ).cleanUp()

    def _closeFiles(self):
        if self._primaryBlockwiseFileset is not None:
            self._primaryBlockwiseFileset.close()
        self._primaryBlockwiseFileset = None

    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile( configFilePath )        
        
        blockwiseFileset = self._primaryBlockwiseFileset
        
        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )
        
        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len( roi.start ) != len( self.Input.meta.shape ):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format( roiString )

        logger.info( "Executing for roi: {}".format(roi) )

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format( roi.start, roi.stop )
        assert self.Input.ready()

        with Timer() as computeTimer:
            # Stream the data out to disk.
            request_blockshape = self._primaryBlockwiseFileset.description.sub_block_shape # Could be None.  That's okay.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), request_blockshape )
            streamer.progressSignal.subscribe( self.progressSignal )
            streamer.resultSignal.subscribe( self._handlePrimaryResultBlock )
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE )

        logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) )
        result[0] = True
        return result

    def propagateDirty(self, slot, subindex, roi):
        self.ReturnCode.setDirty( slice(None) )
        
    def _handlePrimaryResultBlock(self, roi, result):
        # First write the primary
        self._primaryBlockwiseFileset.writeData(roi, result)

        # Ask the workflow if there is any special post-processing to do...
        self.get_workflow().postprocessClusterSubResult(roi, result, self._primaryBlockwiseFileset)

    def get_workflow(self):
        op = self
        while not isinstance(op, Workflow):
            op = op.parent
        return op
Ejemplo n.º 7
0
class OpTaskWorker(Operator):
    Input = InputSlot()
    RoiString = InputSlot(stype='string')
    TaskName = InputSlot(stype='string')
    ConfigFilePath = InputSlot(stype='filestring')
    OutputFilesetDescription = InputSlot(stype='filestring')

    ReturnCode = OutputSlot()

    def __init__(self, *args, **kwargs):
        super(OpTaskWorker, self).__init__(*args, **kwargs)
        self.progressSignal = OrderedSignal()
        self._primaryBlockwiseFileset = None

    def setupOutputs(self):
        self.ReturnCode.meta.dtype = bool
        self.ReturnCode.meta.shape = (1, )

        self._closeFiles()
        self._primaryBlockwiseFileset = BlockwiseFileset(
            self.OutputFilesetDescription.value, 'a')

    def cleanUp(self):
        self._closeFiles()
        super(OpTaskWorker, self).cleanUp()

    def _closeFiles(self):
        if self._primaryBlockwiseFileset is not None:
            self._primaryBlockwiseFileset.close()
        self._primaryBlockwiseFileset = None

    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile(configFilePath)

        blockwiseFileset = self._primaryBlockwiseFileset

        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )

        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len(roi.start) != len(self.Input.meta.shape):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format(
                roiString)

        logger.info("Executing for roi: {}".format(roi))

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (
            blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop
        ).all(
        ), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format(
            roi.start, roi.stop)
        assert self.Input.ready()

        with Timer() as computeTimer:
            # Stream the data out to disk.
            request_blockshape = self._primaryBlockwiseFileset.description.sub_block_shape  # Could be None.  That's okay.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop),
                                          request_blockshape)
            streamer.progressSignal.subscribe(self.progressSignal)
            streamer.resultSignal.subscribe(self._handlePrimaryResultBlock)
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus(roi.start,
                                            BlockwiseFileset.BLOCK_AVAILABLE)

        logger.info("Finished task in {} seconds".format(
            computeTimer.seconds()))
        result[0] = True
        return result

    def propagateDirty(self, slot, subindex, roi):
        self.ReturnCode.setDirty(slice(None))

    def _handlePrimaryResultBlock(self, roi, result):
        # First write the primary
        self._primaryBlockwiseFileset.writeData(roi, result)

        # Ask the workflow if there is any special post-processing to do...
        self.get_workflow().postprocessClusterSubResult(
            roi, result, self._primaryBlockwiseFileset)

    def get_workflow(self):
        op = self
        while not isinstance(op, Workflow):
            op = op.parent
        return op