Example #1
0
    def predict_probabilities(self, X):
        logger.debug( "Predicting with parallel vigra RF" )
        X = numpy.asarray(X, dtype=numpy.float32)
        assert X.ndim == 2

        if self._feature_names is not None:
            # For some reason, vigra doesn't seem to check this for us...
            assert X.shape[1] == len(self._feature_names), \
                "Feature count ({}) doesn't match the training feature count ({}).\n"\
                "Expected features: {}".format( X.shape[1], len(self._feature_names), self._feature_names )

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()
        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request( partial( forest.predictProbabilities, X ) )
            req.notify_finished( partial(update_predictions, forest) )
            pool.add( req )
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
    def create_and_train(self, X, y):
        logger.debug( "Training parallel vigra RF" )
        # Save for future reference
        known_labels = numpy.unique(y)

        X = numpy.asarray(X, numpy.float32)
        y = numpy.asarray(y, numpy.uint32)
        if y.ndim == 1:
            y = y[:, numpy.newaxis]

        assert X.ndim == 2
        assert len(X) == len(y)

        # Create N forests
        forests = []
        for _ in range(self._num_forests):
            forest = vigra.learning.RandomForest(self._trees_per_forest, **self._kwargs)
            forests.append( forest )

        # Train them all in parallel
        pool = RequestPool()
        for forest in forests:
            pool.add( Request( partial(forest.learnRF, X, y) ) )
        pool.wait()

        return ParallelVigraRfLazyflowClassifier( forests, known_labels )
Example #3
0
    def execute(self, slot, subindex, roi, result):
        featMatrix=[]
        labelsMatrix=[]
        for i,labels in enumerate(self.inputs["Labels"]):
            if labels.meta.shape is not None:
                labels=labels[:].wait()

                indexes=numpy.nonzero(labels[...,0].view(numpy.ndarray))
                #Maybe later request only part of the region?

                image=self.inputs["Images"][i][:].wait()

                features=image[indexes]
                labels=labels[indexes]

                featMatrix.append(features)
                labelsMatrix.append(labels)


        featMatrix=numpy.concatenate(featMatrix,axis=0)
        labelsMatrix=numpy.concatenate(labelsMatrix,axis=0)

        # train and store self._forest_count forests in parallel
        pool = RequestPool()
        for i in range(self._forest_count):
            def train_and_store(number):
                result[number] = vigra.learning.RandomForest(self._tree_count)
                result[number].learnRF(featMatrix.astype(numpy.float32),labelsMatrix.astype(numpy.uint32))
            req = pool.request(partial(train_and_store, i))

        pool.wait()

        return result
Example #4
0
    def predict_probabilities(self, X):
        logger.debug( "Predicting with parallel vigra RF" )
        X = numpy.asarray(X, dtype=numpy.float32)

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()
        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request( partial( forest.predictProbabilities, X ) )
            req.notify_finished( partial(update_predictions, forest) )
            pool.add( req )
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
    def execute(self, slot, subindex, roi, result):
        clipped_block_rois = getIntersectingRois(
            self.Input.meta.shape, self.BlockShape.value, (roi.start, roi.stop), True
        )
        if self._always_request_full_blocks:
            full_block_rois = getIntersectingRois(
                self.Input.meta.shape, self.BlockShape.value, (roi.start, roi.stop), False
            )
        else:
            full_block_rois = clipped_block_rois

        pool = RequestPool()
        for full_block_roi, clipped_block_roi in zip(full_block_rois, clipped_block_rois):
            full_block_roi = numpy.asarray(full_block_roi)
            clipped_block_roi = numpy.asarray(clipped_block_roi)

            req = self.Input(*full_block_roi)
            output_roi = numpy.asarray(clipped_block_roi) - roi.start
            if (full_block_roi == clipped_block_roi).all():
                req.writeInto(result[roiToSlice(*output_roi)])
            else:
                roi_within_block = clipped_block_roi - full_block_roi[0]

                def copy_request_result(output_roi, roi_within_block, request_result):
                    self.Output.stype.copy_data(
                        result[roiToSlice(*output_roi)], request_result[roiToSlice(*roi_within_block)]
                    )

                req.notify_finished(partial(copy_request_result, output_roi, roi_within_block))
            pool.add(req)
            del req
        pool.wait()
Example #6
0
    def _executeOutput(self, roi, destination):
        assert len(roi.stop) == len(self.Input.meta.shape), "roi: {} has the wrong number of dimensions for Input shape: {}".format( roi, self.Input.meta.shape )
        assert numpy.less_equal(roi.stop, self.Input.meta.shape).all(), "roi: {} is out-of-bounds for Input shape: {}".format( roi, self.Input.meta.shape )
        
        block_starts = getIntersectingBlocks( self._blockshape, (roi.start, roi.stop) )
        block_starts = map( tuple, block_starts )

        # Ensure all block cache files are up-to-date
        reqPool = RequestPool() # (Do the work in parallel.)
        for block_start in block_starts:
            entire_block_roi = getBlockBounds( self.Input.meta.shape, self._blockshape, block_start )
            f = partial( self._ensureCached, entire_block_roi)
            reqPool.add( Request(f) )
        logger.debug( "Waiting for {} blocks...".format( len(block_starts) ) )
        reqPool.wait()

        # Copy data from each block
        # (Parallelism not needed here: h5py will serialize these requests anyway)
        logger.debug( "Copying data from {} blocks...".format( len(block_starts) ) )
        for block_start in block_starts:
            entire_block_roi = getBlockBounds( self.Input.meta.shape, self._blockshape, block_start )

            # This block's portion of the roi
            intersecting_roi = getIntersection( (roi.start, roi.stop), entire_block_roi )
            
            # Compute slicing within destination array and slicing within this block
            destination_relative_intersection = numpy.subtract(intersecting_roi, roi.start)
            block_relative_intersection = numpy.subtract(intersecting_roi, block_start)
            
            # Copy from block to destination
            dataset = self._getBlockDataset( entire_block_roi )
            destination[ roiToSlice(*destination_relative_intersection) ] = dataset[ roiToSlice( *block_relative_intersection ) ]
        return destination
Example #7
0
def execute_tasks( tasks ):
    """
    Executes the given list of tasks (functions) in the lazyflow threadpool.
    """
    pool = RequestPool()
    for task in tasks:
        pool.add( Request(task) )
    pool.wait()
Example #8
0
    def read(self, view_roi, result_out):
        """
        roi: (start, stop) tuples, ordered according to description.output_axes
             roi should be relative to the view
        """
        output_axes = self.description.output_axes
        roi_transposed = zip(*view_roi)
        roi_dict = dict( zip(output_axes, roi_transposed) )
        view_roi = zip( *(roi_dict['z'], roi_dict['y'], roi_dict['x']) )

        # First, normalize roi and result to zyx order
        result_out = vigra.taggedView(result_out, output_axes)
        result_out = result_out.withAxes(*'zyx')
        
        assert numpy.array(view_roi).shape == (2,3), "Invalid roi for 3D volume: {}".format( view_roi )
        view_roi = numpy.array(view_roi)
        assert (result_out.shape == (view_roi[1] - view_roi[0])).all()
        
        # User gave roi according to the view output.
        # Now offset it find global roi.
        roi = view_roi + self.description.view_origin_zyx
        
        tile_blockshape = (1,) + tuple(self.description.tile_shape_2d_yx)
        tile_starts = getIntersectingBlocks( tile_blockshape, roi )

        pool = RequestPool()
        for tile_start in tile_starts:
            tile_roi_in = getBlockBounds( self.description.bounds_zyx, tile_blockshape, tile_start )
            tile_roi_in = numpy.array(tile_roi_in)

            # This tile's portion of the roi
            intersecting_roi = getIntersection( roi, tile_roi_in )
            intersecting_roi = numpy.array( intersecting_roi )

            # Compute slicing within destination array and slicing within this tile
            destination_relative_intersection = numpy.subtract(intersecting_roi, roi[0])
            tile_relative_intersection = intersecting_roi - tile_roi_in[0]
            
            # Get a view to the output slice
            result_region = result_out[roiToSlice(*destination_relative_intersection)]
            
            rest_args = self._get_rest_args(tile_blockshape, tile_roi_in)
            if self.description.tile_url_format.startswith('http'):
                retrieval_fn = partial( self._retrieve_remote_tile, rest_args, tile_relative_intersection, result_region )
            else:
                retrieval_fn = partial( self._retrieve_local_tile, rest_args, tile_relative_intersection, result_region )            

            PARALLEL_REQ = True
            if PARALLEL_REQ:
                pool.add( Request( retrieval_fn ) )
            else:
                # execute serially (leave the pool empty)
                retrieval_fn()

        if PARALLEL_REQ:
            with Timer() as timer:
                pool.wait()
            logger.info("Loading {} tiles took a total of {}".format( len(tile_starts), timer.seconds() ))
Example #9
0
    def _resolveMergers(self, hypothesesGraph, model):
        '''
        run merger resolution on the hypotheses graph which contains the current solution
        '''
        logger.info("Resolving mergers.")
                
        parameters = self.Parameters.value
        withTracklets = parameters['withTracklets']
        originalGraph = hypothesesGraph.referenceTraxelGraph if withTracklets else hypothesesGraph
        resolvedMergersDict = {}
        
        # Enable full graph computation for animal tracking workflow
        withFullGraph = False
        if 'withAnimalTracking' in parameters and parameters['withAnimalTracking']: # TODO: Setting this parameter outside of the track() function (on AnimalConservationTrackingWorkflow) is not desirable 
            withFullGraph = True
            logger.info("Computing full graph on merger resolver (Only enabled on animal tracking workflow)")
        
        mergerResolver = IlastikMergerResolver(originalGraph, pluginPaths=self.pluginPaths, withFullGraph=withFullGraph)
        
        # Check if graph contains mergers, otherwise skip merger resolving
        if not mergerResolver.mergerNum:
            logger.info("Graph contains no mergers. Skipping merger resolving.")
        else:        
            # Fit and refine merger nodes using a GMM 
            # It has to be done per time-step in order to aviod loading the whole video on RAM
            traxelIdPerTimestepToUniqueIdMap, uuidToTraxelMap = getMappingsBetweenUUIDsAndTraxels(model)
            timesteps = [int(t) for t in traxelIdPerTimestepToUniqueIdMap.keys()]
            timesteps.sort()
            
            timeIndex = self.LabelImage.meta.axistags.index('t')
            
            for timestep in timesteps:
                roi = [slice(None) for i in range(len(self.LabelImage.meta.shape))]
                roi[timeIndex] = slice(timestep, timestep+1)
                roi = tuple(roi)
                
                labelImage = self.LabelImage[roi].wait()
                
                # Get coordinates for object IDs in label image. Used by GMM merger fit.
                objectIds = vigra.analysis.unique(labelImage[0,...,0])
                maxObjectId = max(objectIds)
                
                coordinatesForIds = {}
                
                pool = RequestPool()
                for objectId in objectIds:
                    pool.add(Request(partial(mergerResolver.getCoordinatesForObjectId, coordinatesForIds, labelImage[0, ..., 0], timestep, objectId)))                 

                # Run requests to get object ID coordinates
                pool.wait()              
                
                # Fit mergers and store fit info in nodes  
                if coordinatesForIds:
                    mergerResolver.fitAndRefineNodesForTimestep(coordinatesForIds, maxObjectId, timestep)   
                
            # Compute object features, re-run flow solver, update model and result, and get merger dictionary
            resolvedMergersDict = mergerResolver.run()
        return resolvedMergersDict
    def testBasic(self):
        graph = Graph()
        opDataProvider = OpArrayPiperWithAccessCount(graph=graph)
        opCache = OpUnblockedArrayCache(graph=graph)

        data = np.random.random((100, 100, 100)).astype(np.float32)
        opDataProvider.Input.setValue(vigra.taggedView(data, "zyx"))
        opCache.Input.connect(opDataProvider.Output)

        assert opCache.CleanBlocks.value == []

        roi = ((30, 30, 30), (50, 50, 50))
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 1
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]

        # Request the same data a second time.
        # Access count should not change.
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 1
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]

        # Now invalidate a part of the data
        # The cache will discard it, so the access count should increase.
        opDataProvider.Input.setDirty((30, 30, 30), (31, 31, 31))
        assert opCache.CleanBlocks.value == []
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 2

        # Repeat this next part just for safety
        for _ in range(10):
            # Make sure the cache is empty
            opDataProvider.Input.setDirty((30, 30, 30), (31, 31, 31))
            opDataProvider.accessCount = 0

            # Create many requests for the same data.
            # Upstream data should only be accessed ONCE.
            pool = RequestPool()
            for _ in range(10):
                pool.add(opCache.Output(*roi))
            pool.wait()
            assert opDataProvider.accessCount == 1

        # Also, make sure requests for INNER rois of stored blocks are also serviced from memory
        opDataProvider.accessCount = 0
        inner_roi = ((35, 35, 35), (45, 45, 45))
        cache_data = opCache.Output(*inner_roi).wait()
        assert (cache_data == data[roiToSlice(*inner_roi)]).all()
        assert opDataProvider.accessCount == 0
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]
Example #11
0
 def _waitForBlocks(self, block_starts):
     """
     Make sure that all blocks in the given list of blocks are present in the cache before returning.
     (Blocks that are not yet present will be requested from our Input slot.)
     """
     reqPool = RequestPool() # (Do the work in parallel.)
     for block_start in block_starts:
         entire_block_roi = getBlockBounds( self.Output.meta.shape, self._blockshape, block_start )
         f = partial( self._ensureCached, entire_block_roi)
         reqPool.add( Request(f) )
     logger.debug( "Waiting for {} blocks...".format( len(block_starts) ) )
     reqPool.wait()
        def export(self, filename, hypothesesGraph, pluginExportContext):
            """Export the tracking solution stored in the hypotheses graph as a sequence of H5 files,
            one per frame, containing the label image of that frame and which objects were part
            of a move or a division.
    
            :param filename: string of the FOLDER where to save the result
            :param hypothesesGraph: hytra.core.hypothesesgraph.HypothesesGraph filled with a solution
            :param pluginExportContext: instance of ilastik.plugins.PluginExportContext containing:
                labelImageSlot (required here) as well as objectFeaturesSlot, rawImageSlot, additionalPluginArgumentsSlot

            :returns: True on success, False otherwise
            """
            labelImageSlot = pluginExportContext.labelImageSlot
            traxelIdPerTimestepToUniqueIdMap, uuidToTraxelMap = hypothesesGraph.getMappingsBetweenUUIDsAndTraxels()
            timesteps = [t for t in traxelIdPerTimestepToUniqueIdMap.keys()]
    
            result = hypothesesGraph.getSolutionDictionary()
            mergers, detections, links, divisions = getMergersDetectionsLinksDivisions(result, uuidToTraxelMap)
    
            # group by timestep for event creation
            mergersPerTimestep = getMergersPerTimestep(mergers, timesteps)
            linksPerTimestep = getLinksPerTimestep(links, timesteps)
            detectionsPerTimestep = getDetectionsPerTimestep(detections, timesteps)
            divisionsPerTimestep = getDivisionsPerTimestep(divisions, linksPerTimestep, timesteps)
    
            # save to disk in parallel
            pool = RequestPool()
    
            timeIndex = labelImageSlot.meta.axistags.index('t')

            if not os.path.exists(filename):
                os.makedirs(filename)
    
            for timestep in traxelIdPerTimestepToUniqueIdMap.keys():
                # extract current frame lable image
                roi = [slice(None) for i in range(len(labelImageSlot.meta.shape))]
                roi[timeIndex] = slice(int(timestep), int(timestep)+1)
                roi = tuple(roi)
                labelImage = labelImageSlot[roi].wait()
    
                fn = os.path.join(filename, "{0:05d}.h5".format(int(timestep)))
                pool.add(Request(partial(writeEvents,
                                            int(timestep),
                                             linksPerTimestep[timestep],
                                             divisionsPerTimestep[timestep],
                                             mergersPerTimestep[timestep],
                                             detectionsPerTimestep[timestep],
                                             fn,
                                             labelImage)))
            pool.wait()
    
            return True
    def _execute_Output(self, slot, subindex, roi, result):
        """
        Overridden from OpUnblockedArrayCache
        """

        def copy_block(full_block_roi, clipped_block_roi):
            full_block_roi = numpy.asarray(full_block_roi)
            clipped_block_roi = numpy.asarray(clipped_block_roi)
            output_roi = numpy.asarray(clipped_block_roi) - roi.start

            block_roi = self._get_containing_block_roi(clipped_block_roi)

            # Skip cache and copy full block directly
            if self.BypassModeEnabled.value:
                full_block_data = self.Output.stype.allocateDestination(SubRegion(self.Output, *full_block_roi))

                self.Input(*full_block_roi).writeInto(full_block_data).block()

                roi_within_block = clipped_block_roi - full_block_roi[0]
                self.Output.stype.copy_data(
                    result[roiToSlice(*output_roi)], full_block_data[roiToSlice(*roi_within_block)]
                )
            # If data data exists already or we can just fetch it without needing extra scratch space,
            # just call the base class
            elif block_roi is not None or (full_block_roi == clipped_block_roi).all():
                self._execute_Output_impl(clipped_block_roi, result[roiToSlice(*output_roi)])
            elif self.Input.meta.dontcache:
                # Data isn't in the cache, but we don't need it in the cache anyway.
                self.Input(*clipped_block_roi).writeInto(result[roiToSlice(*output_roi)]).block()
            else:
                # Data doesn't exist yet in the cache.
                # Request the full block, but then discard the parts we don't need.

                # (We use allocateDestination() here to support MaskedArray types.)
                # TODO: We should probably just get rid of MaskedArray support altogether...
                full_block_data = self.Output.stype.allocateDestination(SubRegion(self.Output, *full_block_roi))
                self._execute_Output_impl(full_block_roi, full_block_data)

                roi_within_block = clipped_block_roi - full_block_roi[0]
                self.Output.stype.copy_data(
                    result[roiToSlice(*output_roi)], full_block_data[roiToSlice(*roi_within_block)]
                )

        clipped_block_rois = getIntersectingRois(self.Input.meta.shape, self._blockshape, (roi.start, roi.stop), True)
        full_block_rois = getIntersectingRois(self.Input.meta.shape, self._blockshape, (roi.start, roi.stop), False)

        pool = RequestPool()
        for full_block_roi, clipped_block_roi in zip(full_block_rois, clipped_block_rois):
            req = Request(partial(copy_block, full_block_roi, clipped_block_roi))
            pool.add(req)
        pool.wait()
Example #14
0
    def execute(self, slot, subindex, roi, result):
        assert len(roi.start) == len(roi.stop) == len(self.Output.meta.shape)
        assert slot == self.Output

        t_ind = self.RawVolume.meta.axistags.index('t')
        assert t_ind < len(self.RawVolume.meta.shape)

        def compute_features_for_time_slice(res_t_ind, t):
            axes4d = [k for k in self.RawVolume.meta.getTaggedShape().keys() if k in 'xyzc']

            # Process entire spatial volume
            s = [slice(None)] * len(self.RawVolume.meta.shape)
            s[t_ind] = slice(t, t+1)
            s = tuple(s)

            # Request in parallel
            raw_req = self.RawVolume[s]
            raw_req.submit()

            label_req = self.LabelVolume[s]
            label_req.submit()

            if self.Atlas.ready():
                atlasVolume = self.Atlas[s].wait()
                atlasVolume = vigra.taggedView(atlasVolume, axistags=self.Atlas.meta.axistags)
                atlasVolume = atlasVolume.withAxes(*axes4d)
            else:
                atlasVolume = None

            # Get results
            rawVolume = raw_req.wait()
            labelVolume = label_req.wait()

            rawVolume = vigra.taggedView(rawVolume, axistags=self.RawVolume.meta.axistags)
            labelVolume = vigra.taggedView(labelVolume, axistags=self.LabelVolume.meta.axistags)

            # Convert to 4D (preserve axis order)
            rawVolume = rawVolume.withAxes(*axes4d)
            labelVolume = labelVolume.withAxes(*axes4d)
            acc = self._extract(rawVolume, labelVolume, atlasVolume)

            # Copy into the result
            result[res_t_ind] = acc

        # loop over requested time slices
        pool = RequestPool()
        for res_t_ind, t in enumerate(range(roi.start[t_ind], roi.stop[t_ind])):
            pool.add( Request( partial(compute_features_for_time_slice, res_t_ind, t) ) )
        
        pool.wait()
        return result
Example #15
0
    def _label(self, roi, result):
        result = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        # get the background values
        bg = self.Background[...].wait()
        bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags)
        bg = bg.withAxes(*"ct")
        assert np.all(
            self.Background.meta.shape[0] == self.Input.meta.shape[0]
        ), "Shape of background values incompatible to shape of Input"
        assert np.all(
            self.Background.meta.shape[4] == self.Input.meta.shape[4]
        ), "Shape of background values incompatible to shape of Input"

        # do labeling in parallel over channels and time slices
        pool = RequestPool()

        start = np.asarray(roi.start, dtype=np.int)
        stop = np.asarray(roi.stop, dtype=np.int)
        for ti, t in enumerate(range(roi.start[0], roi.stop[0])):
            start[0], stop[0] = t, t + 1
            for ci, c in enumerate(range(roi.start[4], roi.stop[4])):
                start[4], stop[4] = c, c + 1
                newRoi = SubRegion(self.Output, start=tuple(start), stop=tuple(stop))
                resView = result[ti, ..., ci].withAxes(*"xyz")
                req = Request(partial(self._label3d, newRoi, bg[c, t], resView))
                pool.add(req)

        logger.debug("{}: Computing connected components for ROI {} ...".format(self.name, roi))
        pool.wait()
        pool.clean()
        logger.debug("{}: Connected components computed.".format(self.name))
Example #16
0
    def execute(self, slot, subindex, rroi, result):
        key = roiToSlice(rroi.start,rroi.stop)

        cnt = 0
        written = 0
        start, stop = roi.sliceToRoi(key, self.outputs["Output"].meta.shape)
        assert (stop<=self.outputs["Output"].meta.shape).all()
        #axisindex = self.inputs["AxisIndex"].value
        flag = self.inputs["AxisFlag"].value
        axisindex = self.outputs["Output"].meta.axistags.index(flag)
        #ugly-ugly-ugly
        oldkey = list(key)
        oldkey.pop(axisindex)
        
        #print "STACKER: ", flag, axisindex
        #print "requesting an outslot from stacker:", key, result.shape
        #print "input slots total: ", len(self.inputs['Images'])
        requests = []
        
        pool = RequestPool()

        for i, inSlot in enumerate(self.inputs['Images']):
            req = None
            inTagKeys = [ax.key for ax in inSlot.meta.axistags]
            if flag in inTagKeys:
                slices = inSlot.meta.shape[axisindex]
                if cnt + slices >= start[axisindex] and start[axisindex]-cnt<slices and start[axisindex]+written<stop[axisindex]:
                    begin = 0
                    if cnt < start[axisindex]:
                        begin = start[axisindex] - cnt
                    end = slices
                    if cnt + end > stop[axisindex]:
                        end -= cnt + end - stop[axisindex]
                    key_ = copy.copy(oldkey)
                    key_.insert(axisindex, slice(begin, end, None))
                    reskey = [slice(None, None, None) for x in range(len(result.shape))]
                    reskey[axisindex] = slice(written, written+end-begin, None)

                    req = inSlot[tuple(key_)].writeInto(result[tuple(reskey)])
                    written += end - begin
                cnt += slices
            else:
                if cnt>=start[axisindex] and start[axisindex] + written < stop[axisindex]:
                    #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey
                    #print "result: ", result.shape, "inslot:", inSlot.meta.shape
                    reskey = [slice(None, None, None) for s in oldkey]
                    reskey.insert(axisindex, written)
                    destArea = result[tuple(reskey)]
                    req = inSlot[tuple(oldkey)].writeInto(destArea)
                    written += 1
                cnt += 1

            if req is not None:
                pool.add(req)

        pool.wait()
        pool.clean()
Example #17
0
    def execute(self, slot, subindex, roi, result):
        t1 = time.perf_counter()
        key = roi.toSlice()
        nlabels = self.inputs["LabelsCount"].value

        traceLogger.debug(
            "OpPredictRandomForest: Requesting classifier. roi={}".format(roi))
        forests = self.inputs["Classifier"][:].wait()

        if any(forest is None for forest in forests):
            # Training operator may return 'None' if there was no data to train with
            return np.zeros(np.subtract(roi.stop, roi.start),
                            dtype=np.float32)[...]

        traceLogger.debug("OpPredictRandomForest: Got classifier")
        #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels)

        newKey = key[:-1]
        newKey += (slice(0, self.inputs["Image"].meta.shape[-1], None), )

        res = self.inputs["Image"][newKey].wait()

        shape = res.shape
        prod = np.prod(shape[:-1])
        res.shape = (prod, shape[-1])
        features = res

        predictions = [0] * len(forests)

        t2 = time.perf_counter()

        pool = RequestPool()

        def predict_forest(i):
            predictions[i] = forests[i].predict(
                np.asarray(features, dtype=np.float32))
            predictions[i] = predictions[i].reshape(result.shape[:-1])

        for i, f in enumerate(forests):
            req = pool.request(partial(predict_forest, i))

        pool.wait()
        pool.clean()
        #predictions[0] = forests[0].predict(np.asarray(features, dtype = np.float32), normalize = False)
        #predictions[0] = predictions[0].reshape(result.shape)
        prediction = np.dstack(predictions)
        result[...] = prediction

        # If our LabelsCount is higher than the number of labels in the training set,
        # then our results aren't really valid.  FIXME !!!
        # Duplicate the last label's predictions
        #for c in range(result.shape[-1]):
        #    result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)]

        t3 = time.perf_counter()

        logger.debug(
            "Predict took %fseconds, actual RF time was %fs, feature time was %fs"
            % (t3 - t1, t3 - t2, t2 - t1))
        return result
Example #18
0
    def _label(self, roi, result):
        result = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        # get the background values
        bg = self.Background[...].wait()
        bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags)
        bg = bg.withAxes(*'ct')
        assert np.all(self.Background.meta.shape[3:] ==
                      self.Input.meta.shape[3:]),\
            "Shape of background values incompatible to shape of Input"

        # do labeling in parallel over channels and time slices
        pool = RequestPool()

        start = np.asarray(roi.start, dtype=np.int)
        stop = np.asarray(roi.stop, dtype=np.int)
        for ti, t in enumerate(range(roi.start[4], roi.stop[4])):
            start[4], stop[4] = t, t+1
            for ci, c in enumerate(range(roi.start[3], roi.stop[3])):
                start[3], stop[3] = c, c+1
                newRoi = SubRegion(self.Output,
                                   start=tuple(start), stop=tuple(stop))
                resView = result[..., ci, ti].withAxes(*'xyz')
                req = Request(partial(self._label3d, newRoi,
                                      bg[c, t], resView))
                pool.add(req)

        logger.debug(
            "{}: Computing connected components for ROI {} ...".format(
                self.name, roi))
        pool.wait()
        pool.clean()
        logger.debug("{}: Connected components computed.".format(
            self.name))
Example #19
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self._ReorderedOutput
        pool = RequestPool()

        t_ind = 0
        for t in range(roi.start[0], roi.stop[0]):
            c_ind = 0
            for c in range(roi.start[-1], roi.stop[-1]):
                newroi = roi.copy()
                newroi.start[0] = t
                newroi.stop[0] = t+1
                newroi.start[-1] = c
                newroi.stop[-1] = c+1

                req = self._op.Output.get(newroi)
                resView = result[t_ind:t_ind+1, ..., c_ind:c_ind+1]
                req.writeInto(resView)

                pool.add(req)

                c_ind += 1

            t_ind += 1

        pool.wait()
        pool.clean()
    def execute(self, slot, subindex, roi, result):
        assert slot == self.ConcatenatedOutput
        self.progressSignal(0.0)

        num_dirty_slots = len(self._dirty_slots)
        subtask_progress = {}
        progress_lock = RequestLock()

        def forward_progress_updates(feature_slot, progress):
            with progress_lock:
                subtask_progress[feature_slot] = progress
                total_progress = 0.95 * sum(subtask_progress.values()) / num_dirty_slots
            self.progressSignal(total_progress)

        logger.debug(
            "Updating features for {} dirty images out of {}"
            "".format(len(self._dirty_slots), len(self.FeatureMatrices))
        )

        pool = RequestPool()
        subresults = []
        for feature_slot, progress_slot in zip(self.FeatureMatrices, self.ProgressSignals):
            subresults.append([None])
            req = feature_slot[:]
            req.writeInto(subresults[-1])

            # Only use progress for slots that were dirty.
            # The others are going to be really fast.
            if feature_slot in self._dirty_slots:
                sub_progress_signal = progress_slot.value
                sub_progress_signal.subscribe(partial(forward_progress_updates, feature_slot))
            pool.add(req)
        pool.wait()

        # Reset dirty slots
        self._dirty_slots = set()

        # Since the subresults are returned in 'value' slots,
        #  we have to unpack them from their single-element lists.
        subresult_list = list(itertools.chain(*subresults))

        total_matrix = numpy.concatenate(subresult_list, axis=0)
        self.progressSignal(100.0)
        result[0] = total_matrix
Example #21
0
    def execute(self, slot, subindex, roi, result):
        def compute_for_channel(output_channel, input_channel):
            input_roi = numpy.array((roi.start, roi.stop))
            input_roi[:, -1] = (input_channel, input_channel + 1)
            input_req = self.Input(*input_roi)

            # If possible, use the result array itself as a scratch area
            if self.Input.meta.dtype == result.dtype:
                input_req.writeInto(result[..., output_channel : output_channel + 1])

            input_data = input_req.wait()
            input_data = input_data.astype(numpy.float32, order="C", copy=False)
            input_data = input_data[..., 0]  # drop channel axis
            result[..., output_channel] = computeIntegralImage(input_data)

        pool = RequestPool()
        for output_channel, input_channel in enumerate(range(roi.start[-1], roi.stop[-1])):
            pool.add(Request(partial(compute_for_channel, output_channel, input_channel)))
        pool.wait()
Example #22
0
    def execute(self, slot, subindex, rroi, result):
        key = roiToSlice(rroi.start,rroi.stop)

        cnt = 0
        written = 0
        start, stop = roi.sliceToRoi(key, self.outputs["Output"].meta.shape)
        assert (stop<=self.outputs["Output"].meta.shape).all()
        #axisindex = self.inputs["AxisIndex"].value
        flag = self.inputs["AxisFlag"].value
        axisindex = self.outputs["Output"].meta.axistags.index(flag)
        #ugly-ugly-ugly
        oldkey = list(key)
        oldkey.pop(axisindex)
        
        #print "STACKER: ", flag, axisindex
        #print "requesting an outslot from stacker:", key, result.shape
        #print "input slots total: ", len(self.inputs['Images'])
        requests = []
        
        pool = RequestPool()

        for i, inSlot in enumerate(self.inputs['Images']):
            req = None
            inTagKeys = [ax.key for ax in inSlot.meta.axistags]
            if flag in inTagKeys:
                slices = inSlot.meta.shape[axisindex]
                if cnt + slices >= start[axisindex] and start[axisindex]-cnt<slices and start[axisindex]+written<stop[axisindex]:
                    begin = 0
                    if cnt < start[axisindex]:
                        begin = start[axisindex] - cnt
                    end = slices
                    if cnt + end > stop[axisindex]:
                        end -= cnt + end - stop[axisindex]
                    key_ = copy.copy(oldkey)
                    key_.insert(axisindex, slice(begin, end, None))
                    reskey = [slice(None, None, None) for x in range(len(result.shape))]
                    reskey[axisindex] = slice(written, written+end-begin, None)

                    req = inSlot[tuple(key_)].writeInto(result[tuple(reskey)])
                    written += end - begin
                cnt += slices
            else:
                if cnt>=start[axisindex] and start[axisindex] + written < stop[axisindex]:
                    #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey
                    #print "result: ", result.shape, "inslot:", inSlot.meta.shape
                    reskey = [slice(None, None, None) for s in oldkey]
                    reskey.insert(axisindex, written)
                    destArea = result[tuple(reskey)]
                    req = inSlot[tuple(oldkey)].writeInto(destArea)
                    written += 1
                cnt += 1

            if req is not None:
                pool.add(req)

        pool.wait()
        pool.clean()
    def _train_forests_with_feature_importance(forests, X, y, feature_names, export_path=None):
        """
        Train all RFs (in parallel) and compute feature importances while doing so.
        The importances table will be logged as INFO, and also exported to a file if export_path is given.

        Returns: oobs and importances
        """
        oobs = [None] * len(forests)
        importances = [None] * len(forests)

        def store_training_results(i, training_results):
            oob, importance_results = training_results
            oobs[i] = oob
            importances[i] = importance_results

        with Timer() as train_timer:
            pool = RequestPool()
            for i, forest in enumerate(forests):
                req = Request(partial(forest.learnRFWithFeatureSelection, X, y))
                # save the training results
                req.notify_finished(partial(store_training_results, i))
                pool.add(req)
            pool.wait()

        logger.info("Training took, {} seconds".format(train_timer.seconds()))

        # Forests may have different numbers of trees,
        # so take a weighted average of their importances
        tree_counts = [f.treeCount() for f in forests]
        weights = numpy.array(tree_counts).astype(float)
        weights /= weights.sum()

        named_importances = collections.OrderedDict(
            list(zip(feature_names, numpy.average(importances, weights=weights, axis=0)))
        )

        importance_table = generate_importance_table(named_importances, sort="overall", export_path=export_path)

        logger.info("Feature importance measurements during training: \n{}".format(importance_table))

        return oobs, named_importances
    def run(self):
        
#         # Load caches beforehand (To remove overhead of reading frames)
#         with Timer() as timerCaches:    
#             rawVol = self.opCacheRaw.Output([]).wait()
#             binaryVol = self.opCacheBinary.Output([]).wait()
#              
#         print "Caches took {} secs".format(timerCaches.seconds())
#          
#         del rawVol
#         del binaryVol
    
        # Profile object extraction simplified
        print("\nStarting object extraction simplified (single-thread, without cache)")
             
        with Timer() as timerObjectFeaturesSimp:
            featsObjectFeaturesSimp = self.opObjectFeaturesSimp.Features([]).wait()
                 
        print("Simplified object extraction took: {} seconds".format(timerObjectFeaturesSimp.seconds()))     
        
        # Profile object extraction optimized
        print("\nStarting object extraction (multi-thread, without cache)")
          
        with Timer() as timerObjectExtraction:
            featsObjectExtraction = self.opObjectExtraction.RegionFeatures([]).wait()
              
        print("Object extraction took: {} seconds".format(timerObjectExtraction.seconds())) 
    
        # Profile for basic multi-threaded feature computation 
        # just a multi-threaded loop that labels volumes and extract object features directly (No operators, no plugin system, no overhead, just a loop)
        featsBasicFeatureComp = dict.fromkeys( list(range(self.op5Raw.Output.meta.shape[0])), None)
            
        print("\nStarting basic multi-threaded feature computation")
        pool = RequestPool()    
        for t in range(0, self.op5Raw.Output.meta.shape[0], 1):
            pool.add( Request( partial(self._computeObjectFeatures, t, featsBasicFeatureComp) ) )
                 
        with Timer() as timerBasicFeatureComp:
            pool.wait()
                 
        print("Basic multi-threaded feature extraction took: {} seconds".format( timerBasicFeatureComp.seconds() ))                 
    def create_and_train(self, X, y):
        logger.debug("Training parallel vigra RF")
        # Save for future reference
        known_labels = numpy.unique(y)

        X = numpy.asarray(X, numpy.float32)
        y = numpy.asarray(y, numpy.uint32)
        if y.ndim == 1:
            y = y[:, numpy.newaxis]

        assert X.ndim == 2
        assert len(X) == len(y)

        # Create N forests
        forests = []
        for _ in range(self._num_forests):
            forest = vigra.learning.RandomForest(self._trees_per_forest,
                                                 **self._kwargs)
            forests.append(forest)

        # Train them all in parallel
        pool = RequestPool()
        for forest in forests:
            pool.add(Request(partial(forest.learnRF, X, y)))
        pool.wait()

        return ParallelVigraRfLazyflowClassifier(forests, known_labels)
    def predict_probabilities(self, X):
        logger.debug("Predicting with parallel vigra RF")
        X = numpy.asarray(X, dtype=numpy.float32)
        assert X.ndim == 2

        if self._feature_names is not None:
            # For some reason, vigra doesn't seem to check this for us...
            assert X.shape[1] == len(self._feature_names), \
                "Feature count doesn't match the training data."

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()

        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request(partial(forest.predictProbabilities, X))
            req.notify_finished(partial(update_predictions, forest))
            pool.add(req)
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
Example #27
0
    def execute(self, slot, subindex, roi, result):
        def compute_for_channel(output_channel, input_channel):
            input_roi = numpy.array((roi.start, roi.stop))
            input_roi[:, -1] = (input_channel, input_channel + 1)
            input_req = self.Input(*input_roi)

            # If possible, use the result array itself as a scratch area
            if self.Input.meta.dtype == result.dtype:
                input_req.writeInto(result[...,
                                           output_channel:output_channel + 1])

            input_data = input_req.wait()
            input_data = input_data.astype(numpy.float32,
                                           order='C',
                                           copy=False)
            input_data = input_data[..., 0]  # drop channel axis
            result[..., output_channel] = computeIntegralImage(input_data)

        pool = RequestPool()
        for output_channel, input_channel in enumerate(
                range(roi.start[-1], roi.stop[-1])):
            pool.add(
                Request(
                    partial(compute_for_channel, output_channel,
                            input_channel)))
        pool.wait()
    def execute(self, slot, subindex, roi, result):
        clipped_block_rois = getIntersectingRois(self.Input.meta.shape,
                                                 self.BlockShape.value,
                                                 (roi.start, roi.stop), True)
        if self._always_request_full_blocks:
            full_block_rois = getIntersectingRois(self.Input.meta.shape,
                                                  self.BlockShape.value,
                                                  (roi.start, roi.stop), False)
        else:
            full_block_rois = clipped_block_rois

        pool = RequestPool()
        for full_block_roi, clipped_block_roi in zip(full_block_rois,
                                                     clipped_block_rois):
            full_block_roi = numpy.asarray(full_block_roi)
            clipped_block_roi = numpy.asarray(clipped_block_roi)

            req = self.Input(*full_block_roi)
            output_roi = numpy.asarray(clipped_block_roi) - roi.start
            if (full_block_roi == clipped_block_roi).all():
                req.writeInto(result[roiToSlice(*output_roi)])
            else:
                roi_within_block = clipped_block_roi - full_block_roi[0]

                def copy_request_result(output_roi, roi_within_block,
                                        request_result):
                    self.Output.stype.copy_data(
                        result[roiToSlice(*output_roi)],
                        request_result[roiToSlice(*roi_within_block)])

                req.notify_finished(
                    partial(copy_request_result, output_roi, roi_within_block))
            pool.add(req)
            del req
        pool.wait()
Example #29
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                fullBlockShape = numpy.array([self.blockShape.value for i in self.Input.meta.shape])
                fun = self.inputs["Function"].value
                #data = self.inputs["Input"][:].wait()
                #split up requests into blocks
                shape = self.Input.meta.shape
                numBlocks = numpy.ceil(shape/(1.0*fullBlockShape)).astype("int")
                blockCache = numpy.ndarray(shape = numpy.prod(numBlocks), dtype=self.Output.meta.dtype)
                pool = RequestPool()
                #blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(*[range(i) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)
                
                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)
                    
                for i,f in enumerate(blockCache):
                    req = pool.request(partial(predict_block,i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
Example #30
0
    def execute(self, slot, subindex, roi, result):
        stacked_axisindex = self.Images[0].meta.getAxisKeys().index(
            self.AxisFlag.value)

        pool = RequestPool()
        for slot, (slot_output_start,
                   slot_output_stop) in zip(self.Images,
                                            self.stacked_output_ranges):
            if roi.start[stacked_axisindex] >= slot_output_start and roi.stop[
                    stacked_axisindex] <= slot_output_stop:
                output_roi = roi.copy()
                output_roi.start[stacked_axisindex] = max(
                    slot_output_start, roi.start[stacked_axisindex])
                output_roi.stop[stacked_axisindex] = min(
                    slot_output_stop, roi.stop[stacked_axisindex])

                request_roi = roi.copy()
                request_roi.start[stacked_axisindex] = output_roi.start[
                    stacked_axisindex] - slot_output_start
                request_roi.stop[stacked_axisindex] = output_roi.stop[
                    stacked_axisindex] - slot_output_start

                result_roi = roi.copy()
                result_roi.start = output_roi.start - roi.start
                result_roi.stop = output_roi.stop - roi.start

                req = slot(request_roi.start, request_roi.stop)
                req.writeInto(result[roiToSlice(result_roi.start,
                                                result_roi.stop)])
                pool.add(req)
        pool.wait()
Example #31
0
    def execute(self, slot, subindex, roi, result):
        assert len(roi.start) == len(roi.stop) == len(self.Output.meta.shape)
        assert slot == self.Output

        t_ind = self.RawVolume.meta.axistags.index('t')
        assert t_ind < len(self.RawVolume.meta.shape)

        def compute_features_for_time_slice(res_t_ind, t):
            axes4d = [
                k for k in self.RawVolume.meta.getTaggedShape().keys()
                if k in 'xyzc'
            ]

            # Process entire spatial volume
            s = [slice(None)] * len(self.RawVolume.meta.shape)
            s[t_ind] = slice(t, t + 1)
            s = tuple(s)

            # Request in parallel
            raw_req = self.RawVolume[s]
            raw_req.submit()

            label_req = self.LabelVolume[s]
            label_req.submit()

            if self.Atlas.ready():
                atlasVolume = self.Atlas[s].wait()
                atlasVolume = vigra.taggedView(
                    atlasVolume, axistags=self.Atlas.meta.axistags)
                atlasVolume = atlasVolume.withAxes(*axes4d)
            else:
                atlasVolume = None

            # Get results
            rawVolume = raw_req.wait()
            labelVolume = label_req.wait()

            rawVolume = vigra.taggedView(rawVolume,
                                         axistags=self.RawVolume.meta.axistags)
            labelVolume = vigra.taggedView(
                labelVolume, axistags=self.LabelVolume.meta.axistags)

            # Convert to 4D (preserve axis order)
            rawVolume = rawVolume.withAxes(*axes4d)
            labelVolume = labelVolume.withAxes(*axes4d)
            acc = self._extract(rawVolume, labelVolume, atlasVolume)

            # Copy into the result
            result[res_t_ind] = acc

        # loop over requested time slices
        pool = RequestPool()
        for res_t_ind, t in enumerate(range(roi.start[t_ind],
                                            roi.stop[t_ind])):
            pool.add(
                Request(partial(compute_features_for_time_slice, res_t_ind,
                                t)))

        pool.wait()
        return result
Example #32
0
    def predict_probabilities(self, X):
        logger.debug( "Predicting with parallel vigra RF" )
        X = numpy.asarray(X, dtype=numpy.float32)

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()
        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request( partial( forest.predictProbabilities, X ) )
            req.notify_finished( partial(update_predictions, forest) )
            pool.add( req )
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
Example #33
0
    def predict(cls, X, method="classic"):
        """
        predict if the histograms in X correspond to missing regions
        do this for subsets of X in parallel
        """

        if cls._manager is None:
            cls._manager = SVMManager()

        assert len(
            X.shape
        ) == 2, "Prediction data must have shape (nSamples, nHistogramBins)."

        nBins = X.shape[1]

        if method == "classic":
            svm = PseudoSVC()
        else:
            try:
                svm = cls._manager.get(nBins)
            except SVMManager.NotTrainedError:
                # fail gracefully if not trained => responsibility of user!
                svm = PseudoSVC()

        y = np.zeros((len(X), )) * np.nan

        pool = RequestPool()

        chunkSize = 1000  # FIXME magic number??
        nChunks = len(X) // chunkSize + (1 if len(X) % chunkSize > 0 else 0)

        s = [
            slice(k * chunkSize, min((k + 1) * chunkSize, len(X)))
            for k in range(nChunks)
        ]

        def partFun(i):
            y[s[i]] = svm.predict(X[s[i]])

        for i in range(nChunks):
            req = Request(partial(partFun, i))
            pool.add(req)

        pool.wait()
        pool.clean()

        # not neccessary
        # assert not np.any(np.isnan(y))
        return np.asarray(y)
Example #34
0
    def execute(self, slot, subindex, roi, result):
        stacked_axisindex = self.Images[0].meta.getAxisKeys().index(self.AxisFlag.value)

        pool = RequestPool()
        for slot, (slot_output_start, slot_output_stop) in zip(self.Images, self.stacked_output_ranges):
            if roi.start[stacked_axisindex] >= slot_output_start and roi.stop[stacked_axisindex] <= slot_output_stop:
                output_roi = roi.copy()
                output_roi.start[stacked_axisindex] = max(slot_output_start, roi.start[stacked_axisindex])
                output_roi.stop[stacked_axisindex] = min(slot_output_stop, roi.stop[stacked_axisindex])

                request_roi = roi.copy()
                request_roi.start[stacked_axisindex] = output_roi.start[stacked_axisindex] - slot_output_start
                request_roi.stop[stacked_axisindex] = output_roi.stop[stacked_axisindex] - slot_output_start

                result_roi = roi.copy()
                result_roi.start = output_roi.start - roi.start
                result_roi.stop = output_roi.stop - roi.start
                
                req = slot(request_roi.start, request_roi.stop)
                req.writeInto( result[roiToSlice(result_roi.start, result_roi.stop)] )
                pool.add( req )
        pool.wait()
Example #35
0
        def export(self, filename, hypothesesGraph, objectFeaturesSlot,
                   labelImageSlot, rawImageSlot):
            """Export the tracking solution stored in the hypotheses graph as a sequence of H5 files,
            one per frame, containing the label image of that frame and which objects were part
            of a move or a division.
    
            :param filename: string of the FOLDER where to save the result
            :param hypothesesGraph: hytra.core.hypothesesgraph.HypothesesGraph filled with a solution
            :param objectFeaturesSlot: lazyflow.graph.InputSlot, connected to the RegionFeaturesAll output 
                   of ilastik.applets.trackingFeatureExtraction.opTrackingFeatureExtraction.OpTrackingFeatureExtraction
            
            :returns: True on success, False otherwise
            """
            traxelIdPerTimestepToUniqueIdMap, uuidToTraxelMap = hypothesesGraph.getMappingsBetweenUUIDsAndTraxels(
            )
            timesteps = [t for t in traxelIdPerTimestepToUniqueIdMap.keys()]

            result = hypothesesGraph.getSolutionDictionary()
            mergers, detections, links, divisions = getMergersDetectionsLinksDivisions(
                result, uuidToTraxelMap)

            # group by timestep for event creation
            mergersPerTimestep = getMergersPerTimestep(mergers, timesteps)
            linksPerTimestep = getLinksPerTimestep(links, timesteps)
            detectionsPerTimestep = getDetectionsPerTimestep(
                detections, timesteps)
            divisionsPerTimestep = getDivisionsPerTimestep(
                divisions, linksPerTimestep, timesteps)

            # save to disk in parallel
            pool = RequestPool()

            timeIndex = labelImageSlot.meta.axistags.index('t')

            for timestep in traxelIdPerTimestepToUniqueIdMap.keys():
                # extract current frame lable image
                roi = [
                    slice(None) for i in range(len(labelImageSlot.meta.shape))
                ]
                roi[timeIndex] = slice(int(timestep), int(timestep) + 1)
                roi = tuple(roi)
                labelImage = labelImageSlot[roi].wait()

                if not os.path.exists(filename + '/H5-Event-Sequence'):
                    os.makedirs(filename + '/H5-Event-Sequence')
                fn = os.path.join(
                    filename,
                    "H5-Event-Sequence/{0:05d}.h5".format(int(timestep)))
                pool.add(
                    Request(
                        partial(writeEvents, int(timestep),
                                linksPerTimestep[timestep],
                                divisionsPerTimestep[timestep],
                                mergersPerTimestep[timestep],
                                detectionsPerTimestep[timestep], fn,
                                labelImage)))
            pool.wait()

            return True
    def create_and_train(self, X, y, feature_names=None):
        # Distribute trees as evenly as possible
        tree_counts = numpy.array( [self._num_trees // self._num_forests] * self._num_forests )
        tree_counts[:self._num_trees % self._num_forests] += 1
        assert tree_counts.sum() == self._num_trees
        tree_counts = map(int, tree_counts)
        tree_counts[:] = (tree_count for tree_count in tree_counts if tree_count != 0)
        
        logger.debug( "Training parallel vigra RF" )
        # Save for future reference
        known_labels = numpy.unique(y)

        X = numpy.asarray(X, numpy.float32)
        y = numpy.asarray(y, numpy.uint32)
        if y.ndim == 1:
            y = y[:, numpy.newaxis]

        assert X.ndim == 2
        assert len(X) == len(y)

        # Create N forests

        forests = []
        for tree_count in tree_counts:
            forests.append( vigra.learning.RandomForest(tree_count, **self._kwargs) ) # HERE <--- this links to C++ library

        # Train them all in parallel
        oobs = [None] * len(forests)
        pool = RequestPool()
        for i, forest in enumerate(forests):
            req = Request( partial(forest.learnRF, X, y) )
            # save the oobs
            req.notify_finished( partial( oobs.__setitem__, i ) )
            pool.add( req )

        with Timer() as timer:
            pool.wait()
        logger.info( "Training completed in {} seconds. Average OOB: {}".format( timer.seconds(), numpy.average(oobs) ) )
        return ParallelVigraRfLazyflowClassifier( forests, oobs, known_labels, feature_names )
Example #37
0
def execute_tasks(tasks):
    """
    Executes the given list of tasks (functions) in the lazyflow threadpool.
    """
    pool = RequestPool()
    for task in tasks:
        pool.add(Request(task))
    pool.wait()
Example #38
0
    def _resolveMergers(self, hypothesesGraph, model):
        '''
        run merger resolution on the hypotheses graph which contains the current solution
        '''
        logger.info("Resolving mergers.")
                
        parameters = self.Parameters.value
        withTracklets = parameters['withTracklets']
        originalGraph = hypothesesGraph.referenceTraxelGraph if withTracklets else hypothesesGraph
        resolvedMergersDict = {}
        
        # Enable full graph computation for animal tracking workflow
        withFullGraph = False
        if 'withAnimalTracking' in parameters and parameters['withAnimalTracking']: # TODO: Setting this parameter outside of the track() function (on AnimalConservationTrackingWorkflow) is not desirable 
            withFullGraph = True
            logger.info("Computing full graph on merger resolver (Only enabled on animal tracking workflow)")
        
        mergerResolver = IlastikMergerResolver(originalGraph, pluginPaths=self.pluginPaths, withFullGraph=withFullGraph)
        
        # Check if graph contains mergers, otherwise skip merger resolving
        if not mergerResolver.mergerNum:
            logger.info("Graph contains no mergers. Skipping merger resolving.")
        else:        
            # Fit and refine merger nodes using a GMM 
            # It has to be done per time-step in order to aviod loading the whole video on RAM
            traxelIdPerTimestepToUniqueIdMap, uuidToTraxelMap = getMappingsBetweenUUIDsAndTraxels(model)
            timesteps = [int(t) for t in traxelIdPerTimestepToUniqueIdMap.keys()]
            timesteps.sort()
            
            timeIndex = self.LabelImage.meta.axistags.index('t')
            
            for timestep in timesteps:
                roi = [slice(None) for i in range(len(self.LabelImage.meta.shape))]
                roi[timeIndex] = slice(timestep, timestep+1)
                roi = tuple(roi)
                
                labelImage = self.LabelImage[roi].wait()
                
                # Get coordinates for object IDs in label image. Used by GMM merger fit.
                objectIds = vigra.analysis.unique(labelImage[0,...,0])
                maxObjectId = max(objectIds)
                
                coordinatesForIds = {}
                
                pool = RequestPool()
                for objectId in objectIds:
                    pool.add(Request(partial(mergerResolver.getCoordinatesForObjectId, coordinatesForIds, labelImage[0, ..., 0], timestep, objectId)))                 

                # Run requests to get object ID coordinates
                pool.wait()              
                
                # Fit mergers and store fit info in nodes  
                if coordinatesForIds:
                    mergerResolver.fitAndRefineNodesForTimestep(coordinatesForIds, maxObjectId, timestep)   
                
            # Compute object features, re-run flow solver, update model and result, and get merger dictionary
            resolvedMergersDict = mergerResolver.run()
        return resolvedMergersDict
    def _executePredictionImage(self, slot, roi, destination):
        roi_one_channel = numpy.array( (roi.start, roi.stop) )
        roi_one_channel[...,-1] = (0,1)
        # Determine intersecting blocks
        block_shape = self._getFullShape( self.BlockShape3dDict.value )
        block_starts = getIntersectingBlocks( block_shape, roi_one_channel )
        block_starts = map( tuple, block_starts )

        # Ensure that block pipelines exist (create first if necessary)
        for block_start in block_starts:
            self._ensurePipelineExists(block_start)

        # Retrieve result from each block, and write into the appropriate region of the destination
        pool = RequestPool()
        for block_start in block_starts:
            opBlockPipeline = self._blockPipelines[block_start]
            block_roi = opBlockPipeline.block_roi
            block_intersection = getIntersection( block_roi, roi_one_channel )
            block_relative_intersection = numpy.subtract(block_intersection, block_roi[0])
            destination_relative_intersection = numpy.subtract(block_intersection, roi_one_channel[0])

            block_slot = opBlockPipeline.PredictionImage            
            if slot == self.ProbabilityChannelImage:
                block_slot = opBlockPipeline.ProbabilityChannelImage
                # Add channels back to roi
                # request all channels
                block_relative_intersection[...,-1] = (0, opBlockPipeline.ProbabilityChannelImage.meta.shape[-1])
                # But only write the ones that were specified in the original roi
                destination_relative_intersection[...,-1] = ( roi.start[-1], roi.stop[-1] )

            # Request the data
            destination_slice = roiToSlice( *destination_relative_intersection )
            req = block_slot( *block_relative_intersection )
            req.writeInto( destination[destination_slice] )
            pool.add( req )
        pool.wait()

        return destination
Example #40
0
    def read(self, view_roi, result_out):
        """
        roi: (start, stop) tuples, ordered according to description.output_axes
             roi should be relative to the view
        """
        output_axes = self.description.output_axes
        roi_transposed = list(zip(*view_roi))
        roi_dict = dict(list(zip(output_axes, roi_transposed)))
        view_roi = list(zip(*(roi_dict["z"], roi_dict["y"], roi_dict["x"])))

        # First, normalize roi and result to zyx order
        result_out = vigra.taggedView(result_out, output_axes)
        result_out = result_out.withAxes(*"zyx")

        assert numpy.array(view_roi).shape == (2, 3), "Invalid roi for 3D volume: {}".format(view_roi)
        view_roi = numpy.array(view_roi)
        assert (result_out.shape == (view_roi[1] - view_roi[0])).all()

        # User gave roi according to the view output.
        # Now offset it find global roi.
        roi = view_roi + self.description.view_origin_zyx

        tile_blockshape = (1,) + tuple(self.description.tile_shape_2d_yx)
        tile_starts = getIntersectingBlocks(tile_blockshape, roi)

        pool = RequestPool()
        for tile_start in tile_starts:
            tile_roi_in = getBlockBounds(self.description.bounds_zyx, tile_blockshape, tile_start)
            tile_roi_in = numpy.array(tile_roi_in)

            # This tile's portion of the roi
            intersecting_roi = getIntersection(roi, tile_roi_in)
            intersecting_roi = numpy.array(intersecting_roi)

            # Compute slicing within destination array and slicing within this tile
            destination_relative_intersection = numpy.subtract(intersecting_roi, roi[0])
            tile_relative_intersection = intersecting_roi - tile_roi_in[0]

            # Get a view to the output slice
            result_region = result_out[roiToSlice(*destination_relative_intersection)]

            rest_args = self._get_rest_args(tile_blockshape, tile_roi_in)
            if self.description.tile_url_format.startswith("http"):
                retrieval_fn = partial(self._retrieve_remote_tile, rest_args, tile_relative_intersection, result_region)
            else:
                retrieval_fn = partial(self._retrieve_local_tile, rest_args, tile_relative_intersection, result_region)

            PARALLEL_REQ = True
            if PARALLEL_REQ:
                pool.add(Request(retrieval_fn))
            else:
                # execute serially (leave the pool empty)
                retrieval_fn()

        if PARALLEL_REQ:
            with Timer() as timer:
                pool.wait()
            logger.info("Loading {} tiles took a total of {}".format(len(tile_starts), timer.seconds()))
    def predict_probabilities(self, X):
        logger.debug( "Predicting with parallel vigra RF" )
        X = numpy.asarray(X, dtype=numpy.float32)

        # Create a request for each forest        
        reqs = []
        for forest in self._forests:
            req = Request( partial( forest.predictProbabilities, X ) )
            reqs.append( req )

        # Execute all requests in a pool        
        pool = RequestPool()
        for req in reqs:
            pool.add( req )
        pool.wait()

        # Aggregate the results
        predictions = reqs[0].result
        for req in reqs[1:]:
            predictions += req.result

        predictions /= len(reqs)
        return predictions
Example #42
0
    def execute(self, slot, subindex, roi, result):
        t1 = time.time()
        key = roi.toSlice()
        nlabels=self.inputs["LabelsCount"].value

        traceLogger.debug("OpPredictRandomForest: Requesting classifier. roi={}".format(roi))
        forests=self.inputs["Classifier"][:].wait()

        if forests is None or any(x is None for x in forests):
            # Training operator may return 'None' if there was no data to train with
            return numpy.zeros(numpy.subtract(roi.stop, roi.start), dtype=numpy.float32)[...]

        traceLogger.debug("OpPredictRandomForest: Got classifier")
        #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels)

        newKey = key[:-1]
        newKey += (slice(0,self.inputs["Image"].meta.shape[-1],None),)

        res = self.inputs["Image"][newKey].wait()

        shape=res.shape
        prod = numpy.prod(shape[:-1])
        res.shape = (prod, shape[-1])
        features=res

        predictions = [0]*len(forests)

        def predict_forest(number):
            predictions[number] = forests[number].predictProbabilities(numpy.asarray(features, dtype=numpy.float32))

        t2 = time.time()

        # predict the data with all the forests in parallel
        pool = RequestPool()

        for i,f in enumerate(forests):
            req = pool.request(partial(predict_forest, i))

        pool.wait()
        pool.clean()

        prediction=numpy.dstack(predictions)
        prediction = numpy.average(prediction, axis=2)
        prediction.shape =  shape[:-1] + (forests[0].labelCount(),)
        #prediction = prediction.reshape(*(shape[:-1] + (forests[0].labelCount(),)))

        # If our LabelsCount is higher than the number of labels in the training set,
        # then our results aren't really valid.  FIXME !!!
        # Duplicate the last label's predictions
        for c in range(result.shape[-1]):
            result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)]

        t3 = time.time()

        self.logger.debug("predict roi=%r took %fseconds, actual RF time was %fs, feature time was %fs" % (key, t3-t1, t3-t2, t2-t1))
        
        return result
Example #43
0
 def compute_all_features():
     # Compute features in parallel
     pool = RequestPool()
     for t in range(tMax):
         pool.add(
             Request(
                 partial(compute_features_for_frame, tIndex, t,
                         features)))
     pool.wait()
Example #44
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self.LabelAndFeatureMatrix
        self.progressSignal(0.0)

        # Technically, this could result in strange progress reporting if execute() 
        #  is called by multiple threads in parallel.
        # This could be fixed with some fancier progress state, but 
        # (1) We don't expect that to by typical, and
        # (2) progress reporting is merely informational.
        num_dirty_blocks = len( self._dirty_blocks )
        def update_progress( result ):
            remaining_dirty = len( self._dirty_blocks )
            percent_complete = 95.0*(num_dirty_blocks - remaining_dirty)/num_dirty_blocks
            self.progressSignal( percent_complete )

        # Update all dirty blocks in the cache
        logger.debug( "Updating {} dirty blocks".format(num_dirty_blocks) )
        pool = RequestPool()
        for block_start in self._dirty_blocks:
            req = Request( partial(self._update_block, block_start ) )
            req.notify_finished( update_progress )
            pool.add( req )
        pool.wait()

        # Concatenate the all blockwise results
        if self._blockwise_feature_matrices:
            total_feature_matrix = numpy.concatenate( self._blockwise_feature_matrices.values(), axis=0 )
        else:
            # No label points at all.
            # Return an empty label&feature matrix (of the correct shape)
            num_feature_channels = self.FeatureImage.meta.shape[-1]
            total_feature_matrix = numpy.ndarray( shape=(0, 1 + num_feature_channels), dtype=numpy.float )

        self.progressSignal(100.0)
        logger.debug( "After update, there are {} clean blocks".format( len(self._blockwise_feature_matrices) ) )
        result[0] = total_feature_matrix
    def execute(self, slot, subindex, roi, result):

        featList = []
        labelsList = []

        for i in range(len(self.Labels)):
            feats = self.Features[i]([]).wait()

            # TODO: we should be able to use self.Labels[i].value,
            # but the current implementation of Slot.value() does not
            # do the right thing.
            labels = self.Labels[i]([]).wait()

            featstmp, labelstmp = make_feature_array(feats, labels)
            featList.append(featstmp)
            labelsList.append(labelstmp)

        featMatrix = _concatenate(featList, axis=0)
        labelsMatrix = _concatenate(labelsList, axis=0)
        print "training on matrix:", featMatrix.shape, featMatrix.dtype

        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            result[:] = None
            return
        oob = [0] * self.ForestCount.value
        try:
            # Ensure there are no NaNs in the feature matrix
            # TODO: There should probably be a better way to fix this...
            featMatrix = numpy.asarray(featMatrix, dtype=numpy.float32)
            nanFeatMatrix = numpy.isnan(featMatrix)
            if nanFeatMatrix.any():
                warnings.warn("Feature matrix has NaN values!  Replacing with 0.0...")
                featMatrix[numpy.where(nanFeatMatrix)] = 0.0
            # train and store forests in parallel
            pool = RequestPool()
            for i in range(self.ForestCount.value):
                def train_and_store(number):
                    result[number] = vigra.learning.RandomForest(self._tree_count)
                    oob[number] = result[number].learnRF(featMatrix, numpy.asarray(labelsMatrix, dtype=numpy.uint32))
                    print "intermediate oob:", oob[number]
                req = Request( partial(train_and_store, i) )
                pool.add( req )
            pool.wait()
            pool.clean()
        except:
            print ("couldn't learn classifier")
            raise
        oob_total = numpy.mean(oob)
        print "training finished, out of bag error:", oob_total
        return result
Example #46
0
 def _waitForBlocks(self, block_starts):
     """
     Make sure that all blocks in the given list of blocks are present in the cache before returning.
     (Blocks that are not yet present will be requested from our Input slot.)
     """
     reqPool = RequestPool() # (Do the work in parallel.)
     for block_start in block_starts:
         entire_block_roi = getBlockBounds( self.Output.meta.shape, self._blockshape, block_start )
         f = partial( self._ensureCached, entire_block_roi)
         reqPool.add( Request(f) )
     logger.debug( "Waiting for {} blocks...".format( len(block_starts) ) )
     reqPool.wait()
Example #47
0
    def testBasic(self):
        graph = Graph()
        opDataProvider = OpArrayPiperWithAccessCount(graph=graph)
        opCache = OpUnblockedArrayCache(graph=graph)

        data = np.random.random((100, 100, 100)).astype(np.float32)
        opDataProvider.Input.setValue(vigra.taggedView(data, 'zyx'))
        opCache.Input.connect(opDataProvider.Output)

        assert opCache.CleanBlocks.value == []

        roi = ((30, 30, 30), (50, 50, 50))
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 1
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]

        # Request the same data a second time.
        # Access count should not change.
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 1
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]

        # Now invalidate a part of the data
        # The cache will discard it, so the access count should increase.
        opDataProvider.Input.setDirty((30, 30, 30), (31, 31, 31))
        assert opCache.CleanBlocks.value == []
        cache_data = opCache.Output(*roi).wait()
        assert (cache_data == data[roiToSlice(*roi)]).all()
        assert opDataProvider.accessCount == 2

        # Repeat this next part just for safety
        for _ in range(10):
            # Make sure the cache is empty
            opDataProvider.Input.setDirty((30, 30, 30), (31, 31, 31))
            opDataProvider.accessCount = 0

            # Create many requests for the same data.
            # Upstream data should only be accessed ONCE.
            pool = RequestPool()
            for _ in range(10):
                pool.add(opCache.Output(*roi))
            pool.wait()
            assert opDataProvider.accessCount == 1

        # Also, make sure requests for INNER rois of stored blocks are also serviced from memory
        opDataProvider.accessCount = 0
        inner_roi = ((35, 35, 35), (45, 45, 45))
        cache_data = opCache.Output(*inner_roi).wait()
        assert (cache_data == data[roiToSlice(*inner_roi)]).all()
        assert opDataProvider.accessCount == 0
        assert opCache.CleanBlocks.value == [roiToSlice(*roi)]
    def run(self):

        #         # Load caches beforehand (To remove overhead of reading frames)
        #         with Timer() as timerCaches:
        #             rawVol = self.opCacheRaw.Output([]).wait()
        #             binaryVol = self.opCacheBinary.Output([]).wait()
        #
        #         print "Caches took {} secs".format(timerCaches.seconds())
        #
        #         del rawVol
        #         del binaryVol

        # Profile object extraction simplified
        print(
            "\nStarting object extraction simplified (single-thread, without cache)"
        )

        with Timer() as timerObjectFeaturesSimp:
            featsObjectFeaturesSimp = self.opObjectFeaturesSimp.Features(
                []).wait()

        print("Simplified object extraction took: {} seconds".format(
            timerObjectFeaturesSimp.seconds()))

        # Profile object extraction optimized
        print("\nStarting object extraction (multi-thread, without cache)")

        with Timer() as timerObjectExtraction:
            featsObjectExtraction = self.opObjectExtraction.RegionFeatures(
                []).wait()

        print("Object extraction took: {} seconds".format(
            timerObjectExtraction.seconds()))

        # Profile for basic multi-threaded feature computation
        # just a multi-threaded loop that labels volumes and extract object features directly (No operators, no plugin system, no overhead, just a loop)
        featsBasicFeatureComp = dict.fromkeys(
            list(range(self.op5Raw.Output.meta.shape[0])), None)

        print("\nStarting basic multi-threaded feature computation")
        pool = RequestPool()
        for t in range(0, self.op5Raw.Output.meta.shape[0], 1):
            pool.add(
                Request(
                    partial(self._computeObjectFeatures, t,
                            featsBasicFeatureComp)))

        with Timer() as timerBasicFeatureComp:
            pool.wait()

        print(
            "Basic multi-threaded feature extraction took: {} seconds".format(
                timerBasicFeatureComp.seconds()))
Example #49
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                shape = self.Input.meta.shape
                # self.blockshape has None in the last dimension to indicate that it should not be
                # handled block-wise. None is replaced with the image shape in the respective axis.
                fullBlockShape = []
                for u, v in zip(self.blockShape.value, shape):
                    if u is not None:
                        fullBlockShape.append(u)
                    else:
                        fullBlockShape.append(v)
                fullBlockShape = numpy.array(fullBlockShape,
                                             dtype=numpy.float64)

                # data = self.inputs["Input"][:].wait()
                # split up requests into blocks

                numBlocks = numpy.ceil(shape / fullBlockShape).astype("int")
                blockCache = numpy.ndarray(shape=numpy.prod(numBlocks),
                                           dtype=self.Output.meta.dtype)
                pool = RequestPool()
                # blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(
                    *[list(range(i)) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                fun = self.inputs["Function"].value

                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i, f in enumerate(blockCache):
                    req = pool.request(partial(predict_block, i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
    def _train_forests_with_feature_importance(forests,
                                               X,
                                               y,
                                               feature_names,
                                               export_path=None):
        """
        Train all RFs (in parallel) and compute feature importances while doing so.
        The importances table will be logged as INFO, and also exported to a file if export_path is given.

        Returns: oobs and importances
        """
        oobs = [None] * len(forests)
        importances = [None] * len(forests)

        def store_training_results(i, training_results):
            oob, importance_results = training_results
            oobs[i] = oob
            importances[i] = importance_results

        with Timer() as train_timer:
            pool = RequestPool()
            for i, forest in enumerate(forests):
                req = Request(partial(forest.learnRFWithFeatureSelection, X,
                                      y))
                # save the training results
                req.notify_finished(partial(store_training_results, i))
                pool.add(req)
            pool.wait()

        logger.info("Training took, {} seconds".format(train_timer.seconds()))

        # Forests may have different numbers of trees,
        # so take a weighted average of their importances
        tree_counts = [f.treeCount() for f in forests]
        weights = numpy.array(tree_counts).astype(float)
        weights /= weights.sum()

        named_importances = collections.OrderedDict(
            list(
                zip(feature_names,
                    numpy.average(importances, weights=weights, axis=0))))

        importance_table = generate_importance_table(named_importances,
                                                     sort="overall",
                                                     export_path=export_path)

        logger.info(
            "Feature importance measurements during training: \n{}".format(
                importance_table))

        return oobs, named_importances
    def _execute_Output(self, slot, subindex, roi, result):
        """
        Overridden from OpUnblockedArrayCache
        """
        def copy_block(full_block_roi, clipped_block_roi):
            full_block_roi = numpy.asarray(full_block_roi)
            clipped_block_roi = numpy.asarray(clipped_block_roi)
            output_roi = numpy.asarray(clipped_block_roi) - roi.start

            # If data data exists already or we can just fetch it without needing extra scratch space,
            # just call the base class
            block_roi = self._get_containing_block_roi(clipped_block_roi)
            if block_roi is not None or (full_block_roi
                                         == clipped_block_roi).all():
                self._execute_Output_impl(clipped_block_roi,
                                          result[roiToSlice(*output_roi)])
            elif self.Input.meta.dontcache:
                # Data isn't in the cache, but we don't need it in the cache anyway.
                self.Input(*clipped_block_roi).writeInto(
                    result[roiToSlice(*output_roi)]).block()
            else:
                # Data doesn't exist yet in the cache.
                # Request the full block, but then discard the parts we don't need.

                # (We use allocateDestination() here to support MaskedArray types.)
                # TODO: We should probably just get rid of MaskedArray support altogether...
                full_block_data = self.Output.stype.allocateDestination(
                    SubRegion(self.Output, *full_block_roi))
                self._execute_Output_impl(full_block_roi, full_block_data)

                roi_within_block = clipped_block_roi - full_block_roi[0]
                self.Output.stype.copy_data(
                    result[roiToSlice(*output_roi)],
                    full_block_data[roiToSlice(*roi_within_block)])

        clipped_block_rois = getIntersectingRois(self.Input.meta.shape,
                                                 self._blockshape,
                                                 (roi.start, roi.stop), True)
        full_block_rois = getIntersectingRois(self.Input.meta.shape,
                                              self._blockshape,
                                              (roi.start, roi.stop), False)

        pool = RequestPool()
        for full_block_roi, clipped_block_roi in zip(full_block_rois,
                                                     clipped_block_rois):
            req = Request(
                partial(copy_block, full_block_roi, clipped_block_roi))
            pool.add(req)
        pool.wait()
    def execute(self, slot, subindex, roi, result):
        assert slot == self.ConcatenatedOutput
        self.progressSignal(0.0)

        num_dirty_slots = len(self._dirty_slots)
        subtask_progress = {}
        progress_lock = RequestLock()

        def forward_progress_updates(feature_slot, progress):
            with progress_lock:
                subtask_progress[feature_slot] = progress
                total_progress = 0.95 * sum(
                    subtask_progress.values()) / num_dirty_slots
            self.progressSignal(total_progress)

        logger.debug( "Updating features for {} dirty images out of {}"\
                      "".format( len(self._dirty_slots), len(self.FeatureMatrices) ) )

        pool = RequestPool()
        subresults = []
        for feature_slot, progress_slot in zip(self.FeatureMatrices,
                                               self.ProgressSignals):
            subresults.append([None])
            req = feature_slot[:]
            req.writeInto(subresults[-1])

            # Only use progress for slots that were dirty.
            # The others are going to be really fast.
            if feature_slot in self._dirty_slots:
                sub_progress_signal = progress_slot.value
                sub_progress_signal.subscribe(
                    partial(forward_progress_updates, feature_slot))
            pool.add(req)
        pool.wait()

        # Reset dirty slots
        self._dirty_slots = set()

        # Since the subresults are returned in 'value' slots,
        #  we have to unpack them from their single-element lists.
        subresult_list = list(itertools.chain(*subresults))

        total_matrix = numpy.concatenate(subresult_list, axis=0)
        self.progressSignal(100.0)
        result[0] = total_matrix
Example #53
0
    def _train_forests(forests, X, y):
        """
        Train all RFs (in parallel), and return the oobs.
        """
        oobs = [None] * len(forests)
        def store_oob_results(i, oob):
            oobs[i] = oob

        with Timer() as train_timer:
            pool = RequestPool()
            for i, forest in enumerate(forests):
                req = Request( partial(forest.learnRF, X, y) )
                # save the oob results
                req.notify_finished( partial( store_oob_results, i ) )
                pool.add( req )
            pool.wait()          
        logger.info("Training took, {} seconds".format( train_timer.seconds() ) )
        return oobs
Example #54
0
    def create_and_train(self, X, y, feature_names=None):
        # Distribute trees as evenly as possible
        tree_counts = numpy.array([self._num_trees // self._num_forests] *
                                  self._num_forests)
        tree_counts[:self._num_trees % self._num_forests] += 1
        assert tree_counts.sum() == self._num_trees
        tree_counts = map(int, tree_counts)
        tree_counts[:] = (tree_count for tree_count in tree_counts
                          if tree_count != 0)

        logger.debug("Training parallel vigra RF")
        # Save for future reference
        known_labels = numpy.unique(y)

        X = numpy.asarray(X, numpy.float32)
        y = numpy.asarray(y, numpy.uint32)
        if y.ndim == 1:
            y = y[:, numpy.newaxis]

        assert X.ndim == 2
        assert len(X) == len(y)

        # Create N forests
        forests = []
        for tree_count in tree_counts:
            forests.append(
                vigra.learning.RandomForest(tree_count, **self._kwargs))

        # Train them all in parallel
        oobs = [None] * len(forests)
        pool = RequestPool()
        for i, forest in enumerate(forests):
            req = Request(partial(forest.learnRF, X, y))
            # save the oobs
            req.notify_finished(partial(oobs.__setitem__, i))
            pool.add(req)

        with Timer() as timer:
            pool.wait()
        logger.info("Training completed in {} seconds. Average OOB: {}".format(
            timer.seconds(), numpy.average(oobs)))
        return ParallelVigraRfLazyflowClassifier(forests, oobs, known_labels,
                                                 feature_names)
Example #55
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self.LabelAndFeatureMatrix
        self.progressSignal(0.0)

        # Technically, this could result in strange progress reporting if execute()
        #  is called by multiple threads in parallel.
        # This could be fixed with some fancier progress state, but
        # (1) We don't expect that to by typical, and
        # (2) progress reporting is merely informational.
        num_dirty_blocks = len(self._dirty_blocks)

        def update_progress(result):
            remaining_dirty = len(self._dirty_blocks)
            percent_complete = 95.0 * (num_dirty_blocks -
                                       remaining_dirty) / num_dirty_blocks
            self.progressSignal(percent_complete)

        # Update all dirty blocks in the cache
        logger.debug("Updating {} dirty blocks".format(num_dirty_blocks))
        pool = RequestPool()
        for block_start in self._dirty_blocks:
            req = Request(partial(self._update_block, block_start))
            req.notify_finished(update_progress)
            pool.add(req)
        pool.wait()

        # Concatenate the all blockwise results
        if self._blockwise_feature_matrices:
            total_feature_matrix = numpy.concatenate(
                self._blockwise_feature_matrices.values(), axis=0)
        else:
            # No label points at all.
            # Return an empty label&feature matrix (of the correct shape)
            num_feature_channels = self.FeatureImage.meta.shape[-1]
            total_feature_matrix = numpy.ndarray(shape=(0, 1 +
                                                        num_feature_channels),
                                                 dtype=numpy.float)

        self.progressSignal(100.0)
        logger.debug("After update, there are {} clean blocks".format(
            len(self._blockwise_feature_matrices)))
        result[0] = total_feature_matrix
Example #56
0
    def _executePredictionImage(self, slot, roi, destination):
        roi_one_channel = numpy.array((roi.start, roi.stop))
        roi_one_channel[..., -1] = (0, 1)
        # Determine intersecting blocks
        block_shape = self._getFullShape(self.BlockShape3dDict.value)
        block_starts = getIntersectingBlocks(block_shape, roi_one_channel)
        block_starts = map(tuple, block_starts)

        # Ensure that block pipelines exist (create first if necessary)
        for block_start in block_starts:
            self._ensurePipelineExists(block_start)

        # Retrieve result from each block, and write into the appropriate region of the destination
        pool = RequestPool()
        for block_start in block_starts:
            opBlockPipeline = self._blockPipelines[block_start]
            block_roi = opBlockPipeline.block_roi
            block_intersection = getIntersection(block_roi, roi_one_channel)
            block_relative_intersection = numpy.subtract(
                block_intersection, block_roi[0])
            destination_relative_intersection = numpy.subtract(
                block_intersection, roi_one_channel[0])

            block_slot = opBlockPipeline.PredictionImage
            if slot == self.ProbabilityChannelImage:
                block_slot = opBlockPipeline.ProbabilityChannelImage
                # Add channels back to roi
                # request all channels
                block_relative_intersection[..., -1] = (
                    0, opBlockPipeline.ProbabilityChannelImage.meta.shape[-1])
                # But only write the ones that were specified in the original roi
                destination_relative_intersection[..., -1] = (roi.start[-1],
                                                              roi.stop[-1])

            # Request the data
            destination_slice = roiToSlice(*destination_relative_intersection)
            req = block_slot(*block_relative_intersection)
            req.writeInto(destination[destination_slice])
            pool.add(req)
        pool.wait()

        return destination
Example #57
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                fullBlockShape = numpy.array(
                    [self.blockShape.value for i in self.Input.meta.shape])
                fun = self.inputs["Function"].value
                #data = self.inputs["Input"][:].wait()
                #split up requests into blocks
                shape = self.Input.meta.shape
                numBlocks = numpy.ceil(shape /
                                       (1.0 * fullBlockShape)).astype("int")
                blockCache = numpy.ndarray(shape=numpy.prod(numBlocks),
                                           dtype=self.Output.meta.dtype)
                pool = RequestPool()
                #blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(*[range(i) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i, f in enumerate(blockCache):
                    req = pool.request(partial(predict_block, i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
Example #58
0
    def predict_probabilities(self, X):
        logger.debug("Predicting with parallel vigra RF")
        X = numpy.asarray(X, dtype=numpy.float32)

        # Create a request for each forest
        reqs = []
        for forest in self._forests:
            req = Request(partial(forest.predictProbabilities, X))
            reqs.append(req)

        # Execute all requests in a pool
        pool = RequestPool()
        for req in reqs:
            pool.add(req)
        pool.wait()

        # Aggregate the results
        predictions = self._forests[0].treeCount() * reqs[0].result
        for forest, req in zip(self._forests[1:], reqs[1:]):
            predictions += forest.treeCount() * req.result

        predictions /= self._num_trees
        return predictions
    def execute(self, slot, subindex, slot_roi, target):
        assert slot == self.Features or slot == self.Output
        if slot == self.Features:
            feature_slice = roiToSlice(slot_roi.start, slot_roi.stop)
            index = subindex[0]
            feature_slice = list(feature_slice)

            # Translate channel slice of this feature to the channel slice of the output slot.
            output_channel_offset = self.featureOutputChannels[index][0]
            feature_slice[1] = slice(
                output_channel_offset + feature_slice[1].start,
                output_channel_offset + feature_slice[1].stop)
            slot_roi = SubRegion(self.Output, pslice=feature_slice)

            # Get output slot region for this channel
            return self.execute(self.Output, (), slot_roi, target)
        elif slot == self.Output:
            # Correlation of variable 'families' representing reference frames:
            #  ______________________________
            # | input/output frame           |  input/output shape given by slots
            # |  _________________________   |
            # | | smooth frame            |  |  pre-smoothing op needs halo around filter roi
            # | |  ____________________   |  |
            # | | |filter frame        |  |  |  filter needs halo around target roi
            # | | |  _______________   |  |  |
            # | | | | target frame  |  |  |  |  target is given by output_roi

            # note: The 'full_' variable prefix refers to the full 5D shape (tczyx), without 'full_' variables mostly
            #       refer to the 3D space subregion (zyx)

            full_output_slice = slot_roi.toSlice()

            logger.debug(
                f"OpPixelFeaturesPresmoothed: request {slot_roi.pprint()}")

            assert (slot_roi.stop <= self.Output.meta.shape).all()

            full_output_shape = self.Output.meta.shape
            full_output_start, full_output_stop = sliceToRoi(
                full_output_slice, full_output_shape)
            assert len(full_output_shape) == 5
            if all(self.ComputeIn2d.value
                   ):  # todo: check for this particular slice
                axes2enlarge = (0, 1, 1)
            else:
                axes2enlarge = (1, 1, 1)

            output_shape = full_output_shape[2:]
            output_start = full_output_start[2:]
            output_stop = full_output_stop[2:]

            axistags = self.Output.meta.axistags
            target = target.view(vigra.VigraArray)
            target.axistags = copy.copy(axistags)

            # filter roi in input frame
            # sigma = 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood
            input_filter_start, input_filter_stop = roi.enlargeRoiForHalo(
                output_start,
                output_stop,
                output_shape,
                0.7,
                self.WINDOW_SIZE,
                enlarge_axes=axes2enlarge)

            # smooth roi in input frame
            input_smooth_start, input_smooth_stop = roi.enlargeRoiForHalo(
                input_filter_start,
                input_filter_stop,
                output_shape,
                self.max_sigma,
                self.WINDOW_SIZE,
                enlarge_axes=axes2enlarge,
            )

            # target roi in filter frame
            filter_target_start = roi.TinyVector(output_start -
                                                 input_filter_start)
            filter_target_stop = roi.TinyVector(output_stop -
                                                input_filter_start)

            # filter roi in smooth frame
            smooth_filter_start = roi.TinyVector(input_filter_start -
                                                 input_smooth_start)
            smooth_filter_stop = roi.TinyVector(input_filter_stop -
                                                input_smooth_start)

            filter_target_slice = roi.roiToSlice(filter_target_start,
                                                 filter_target_stop)
            input_smooth_slice = roi.roiToSlice(input_smooth_start,
                                                input_smooth_stop)

            # pre-smooth for all requested time slices and all channels
            full_input_smooth_slice = (full_output_slice[0], slice(None),
                                       *input_smooth_slice)
            req = self.Input[full_input_smooth_slice]
            source = req.wait()
            req.clean()
            req.destination = None
            if source.dtype != numpy.float32:
                sourceF = source.astype(numpy.float32)
                try:
                    source.resize((1, ), refcheck=False)
                except Exception:
                    pass
                del source
                source = sourceF

            sourceV = source.view(vigra.VigraArray)
            sourceV.axistags = copy.copy(self.Input.meta.axistags)

            dimCol = len(self.scales)
            dimRow = self.matrix.shape[0]

            presmoothed_source = [None] * dimCol

            source_smooth_shape = tuple(smooth_filter_stop -
                                        smooth_filter_start)
            full_source_smooth_shape = (
                full_output_stop[0] - full_output_start[0],
                self.Input.meta.shape[1],
            ) + source_smooth_shape
            try:
                for j in range(dimCol):
                    for i in range(dimRow):
                        if self.matrix[i, j]:
                            # There is at least one filter op with this scale
                            break
                    else:
                        # There is no filter op at this scale
                        continue

                    if self.scales[j] > 1.0:
                        tempSigma = math.sqrt(self.scales[j]**2 - 1.0)
                    else:
                        tempSigma = self.scales[j]

                    presmoothed_source[j] = numpy.ndarray(
                        full_source_smooth_shape, numpy.float32)

                    droi = (
                        (0, *tuple(smooth_filter_start._asint())),
                        (sourceV.shape[1],
                         *tuple(smooth_filter_stop._asint())),
                    )
                    for i, vsa in enumerate(sourceV.timeIter()):
                        presmoothed_source[j][
                            i, ...] = self._computeGaussianSmoothing(
                                vsa,
                                tempSigma,
                                droi,
                                in2d=self.ComputeIn2d.value[j])

            except RuntimeError as e:
                if "kernel longer than line" in str(e):
                    raise RuntimeError(
                        "Feature computation error:\nYour image is too small to apply a filter with "
                        f"sigma={self.scales[j]:.1f}. Please select features with smaller sigmas."
                    )
                else:
                    raise e

            del sourceV
            try:
                source.resize((1, ), refcheck=False)
            except ValueError:
                # Sometimes this fails, but that's okay.
                logger.debug("Failed to free array memory.")
            del source

            cnt = 0
            written = 0
            closures = []
            # connect individual operators
            for i in range(dimRow):
                for j in range(dimCol):
                    if self.matrix[i, j]:
                        oslot = self.featureOps[i][j].Output
                        req = None
                        slices = oslot.meta.shape[1]
                        if (cnt + slices >= slot_roi.start[1]
                                and slot_roi.start[1] - cnt < slices
                                and slot_roi.start[1] + written <
                                slot_roi.stop[1]):
                            begin = 0
                            if cnt < slot_roi.start[1]:
                                begin = slot_roi.start[1] - cnt
                            end = slices
                            if cnt + end > slot_roi.stop[1]:
                                end = slot_roi.stop[1] - cnt

                            # feature slice in output frame
                            feature_slice = (slice(None),
                                             slice(
                                                 written, written + end -
                                                 begin)) + (slice(None), ) * 3

                            subtarget = target[feature_slice]
                            # readjust the roi for the new source array
                            full_filter_target_slice = [
                                full_output_slice[0],
                                slice(begin, end), *filter_target_slice
                            ]
                            filter_target_roi = SubRegion(
                                oslot, pslice=full_filter_target_slice)

                            closure = partial(
                                oslot.operator.execute,
                                oslot,
                                (),
                                filter_target_roi,
                                subtarget,
                                sourceArray=presmoothed_source[j],
                            )
                            closures.append(closure)

                            written += end - begin
                        cnt += slices
            pool = RequestPool()
            for c in closures:
                pool.request(c)
            pool.wait()
            pool.clean()

            for i in range(len(presmoothed_source)):
                if presmoothed_source[i] is not None:
                    try:
                        presmoothed_source[i].resize((1, ))
                    except Exception:
                        presmoothed_source[i] = None