예제 #1
0
    def provide(self, request):

        report_next_timeout = 2
        num_rejected = 0

        timing = Timing(self)
        timing.start()

        have_good_batch = False
        while not have_good_batch:

            batch = self.upstream_provider.request_batch(request)

            if batch.arrays[self.ensure_nonempty].data.size != 0:

                have_good_batch = True
                logger.debug("Accepted batch with shape: %s",
                             batch.arrays[self.ensure_nonempty].data.shape)

            else:

                num_rejected += 1

                if timing.elapsed() > report_next_timeout:
                    logger.info(
                        "rejected %s batches, been waiting for a good one "
                        "since %s", num_rejected, report_next_timeout)
                    report_next_timeout *= 2

        timing.stop()
        batch.profiling_stats.add(timing)

        return batch
예제 #2
0
    def provide(self, request):

        report_next_timeout = 10
        num_rejected = 0

        timing = Timing(self)
        timing.start()

        assert self.mask in request, (
            "Reject can only be used if a GT mask is requested")

        have_good_batch = False
        while not have_good_batch:

            batch = self.upstream_provider.request_batch(request)
            mask_ratio = batch.arrays[self.mask].data.mean()
            have_good_batch = mask_ratio > self.min_masked

            if not have_good_batch and self.reject_probability < 1.:
                have_good_batch = random.random() > self.reject_probability

            if not have_good_batch:

                logger.debug("reject batch with mask ratio %f at %s",
                             mask_ratio, batch.arrays[self.mask].spec.roi)
                num_rejected += 1

                if timing.elapsed() > report_next_timeout:

                    logger.warning(
                        "rejected %d batches, been waiting for a good one "
                        "since %ds", num_rejected, report_next_timeout)
                    report_next_timeout *= 2

            else:

                logger.debug("accepted batch with mask ratio %f at %s",
                             mask_ratio, batch.arrays[self.mask].spec.roi)

        timing.stop()
        batch.profiling_stats.add(timing)

        return batch
예제 #3
0
    def provide(self, request):

        report_next_timeout = 10
        num_rejected = 0

        timing = Timing(self)
        timing.start()

        assert self.mask_volume_type in request.volumes, "Reject can only be used if a GT mask is requested"

        have_good_batch = False
        while not have_good_batch:

            batch = self.upstream_provider.request_batch(request)
            mask_ratio = batch.volumes[self.mask_volume_type].data.mean()
            have_good_batch = mask_ratio >= self.min_masked

            if not have_good_batch:

                logger.debug("reject batch with mask ratio %f at " %
                             mask_ratio +
                             str(batch.volumes[self.mask_volume_type].roi))
                num_rejected += 1

                if timing.elapsed() > report_next_timeout:

                    logger.warning(
                        "rejected %d batches, been waiting for a good one since %ds"
                        % (num_rejected, report_next_timeout))
                    report_next_timeout *= 2

        logger.debug("good batch with mask ratio %f found at " % mask_ratio +
                     str(batch.volumes[self.mask_volume_type].roi))

        timing.stop()
        batch.profiling_stats.add(timing)

        return batch
예제 #4
0
    def provide(self, request):
        random.seed(request.random_seed)

        report_next_timeout = 10
        num_rejected = 0

        timing = Timing(self)
        timing.start()
        if self.mask:
            assert self.mask in request, (
                "Reject can only be used if %s is provided" % self.mask)
        if self.ensure_nonempty:
            assert self.ensure_nonempty in request, (
                "Reject can only be used if %s is provided" %
                self.ensure_nonempty)

        have_good_batch = False
        while not have_good_batch:

            batch = self.upstream_provider.request_batch(request)

            if self.mask:
                mask_ratio = batch.arrays[self.mask].data.mean()
            else:
                mask_ratio = None

            if self.ensure_nonempty:
                num_points = len(list(
                    batch.points[self.ensure_nonempty].nodes))
            else:
                num_points = None

            have_min_mask = mask_ratio is None or mask_ratio > self.min_masked
            have_points = num_points is None or num_points > 0

            have_good_batch = have_min_mask and have_points

            if not have_good_batch and self.reject_probability < 1.:
                have_good_batch = random.random() > self.reject_probability

            if not have_good_batch:
                if self.mask:
                    logger.debug("reject batch with mask ratio %f at %s",
                                 mask_ratio, batch.arrays[self.mask].spec.roi)
                if self.ensure_nonempty:
                    logger.debug("reject batch with empty points in %s",
                                 batch.points[self.ensure_nonempty].spec.roi)
                num_rejected += 1

                if timing.elapsed() > report_next_timeout:

                    logger.warning(
                        "rejected %d batches, been waiting for a good one "
                        "since %ds", num_rejected, report_next_timeout)
                    report_next_timeout *= 2

            else:
                if self.mask:
                    logger.debug("accepted batch with mask ratio %f at %s",
                                 mask_ratio, batch.arrays[self.mask].spec.roi)
                if self.ensure_nonempty:
                    logger.debug("accepted batch with nonempty points in %s",
                                 self.ensure_nonempty)

        timing.stop()
        batch.profiling_stats.add(timing)

        return batch
예제 #5
0
class PrintProfilingStats(BatchFilter):
    '''Print profiling information about nodes upstream of this node in the DAG.

    The output also includes a ``TOTAL`` section, which shows the wall-time 
    spent in the upstream and downstream passes. For the downstream pass, this 
    information is not available in the first iteration, since the request-batch 
    cycle is not completed, yet.

    Args:

        every (``int``):

            Collect statistics about that many batch requests and show min,
            max, mean, and median runtimes.
    '''
    def __init__(self, every=1):

        self.every = every
        self.n = 0
        self.accumulated_stats = ProfilingStats()
        self.__upstream_timing = Timing(self)
        self.__upstream_timing_summary = TimingSummary()
        self.__downstream_timing = Timing(self)
        self.__downstream_timing_summary = TimingSummary()

    def prepare(self, request):

        self.__downstream_timing.stop()
        # skip the first one, where we don't know how much time we spent
        # downstream
        if self.__downstream_timing.elapsed() > 0:
            self.__downstream_timing_summary.add(self.__downstream_timing)
            self.__downstream_timing = Timing(self)

        self.__upstream_timing.start()

    def process(self, batch, request):

        self.__upstream_timing.stop()
        self.__upstream_timing_summary.add(self.__upstream_timing)
        self.__upstream_timing = Timing(self)

        self.__downstream_timing.start()

        self.n += 1
        print_stats = self.n % self.every == 0

        self.accumulated_stats.merge_with(batch.profiling_stats)

        if not print_stats:
            return

        span_start, span_end = self.accumulated_stats.span()

        stats = "\n"
        stats += "Profiling Stats\n"
        stats += "===============\n"
        stats += "\n"
        stats += "NODE".ljust(20)
        stats += "METHOD".ljust(10)
        stats += "COUNTS".ljust(10)
        stats += "MIN".ljust(10)
        stats += "MAX".ljust(10)
        stats += "MEAN".ljust(10)
        stats += "MEDIAN".ljust(10)
        stats += "\n"

        summaries = list(self.accumulated_stats.get_timing_summaries().items())
        summaries.sort()

        for (node_name, method_name), summary in summaries:

            if summary.counts() > 0:
                stats += node_name[:19].ljust(20)
                stats += method_name[:19].ljust(
                    10) if method_name is not None else ' ' * 10
                stats += ("%d" % summary.counts())[:9].ljust(10)
                stats += ("%.2f" % summary.min())[:9].ljust(10)
                stats += ("%.2f" % summary.max())[:9].ljust(10)
                stats += ("%.2f" % summary.mean())[:9].ljust(10)
                stats += ("%.2f" % summary.median())[:9].ljust(10)
                stats += "\n"

        stats += "\n"
        stats += "TOTAL"
        stats += "\n"

        for phase, summary in zip(
            ['upstream', 'downstream'],
            [self.__upstream_timing_summary, self.__downstream_timing_summary
             ]):

            if summary.counts() > 0:
                stats += phase[:19].ljust(30)
                stats += ("%d" % summary.counts())[:9].ljust(10)
                stats += ("%.2f" % summary.min())[:9].ljust(10)
                stats += ("%.2f" % summary.max())[:9].ljust(10)
                stats += ("%.2f" % summary.mean())[:9].ljust(10)
                stats += ("%.2f" % summary.median())[:9].ljust(10)
                stats += "\n"

        stats += "\n"

        logger.info(stats)

        # reset summaries
        self.accumulated_stats = ProfilingStats()
        self.__upstream_timing_summary = TimingSummary()
        self.__downstream_timing_summary = TimingSummary()