コード例 #1
0
def test_add_result():
    """Test interface BenchmarkResult.add_result()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.add_result('metric1', 300)
    result.add_result('metric1', 200)
    assert (result.result['metric1'][0] == 300)
    assert (result.result['metric1'][1] == 200)
コード例 #2
0
def test_serialize_deserialize():
    """Test serialization/deserialization and compare the results."""
    # Result with one metric.
    result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=2)
    result.add_result('metric1', 300, ReduceType.MAX)
    result.add_result('metric1', 200, ReduceType.MAX)
    result.add_result('metric2', 100, ReduceType.AVG)
    result.add_raw_data('metric1', [1, 2, 3], False)
    result.add_raw_data('metric1', [4, 5, 6], False)
    result.add_raw_data('metric1', [7, 8, 9], False)
    start_time = '2021-02-03 16:59:49'
    end_time = '2021-02-03 17:00:08'
    result.set_timestamp(start_time, end_time)
    result.set_benchmark_type(BenchmarkType.MICRO)

    expected = (
        '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
        '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
        '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
        '"result": {"return_code": [0], "metric1": [300, 200], "metric2": [100]}, '
        '"reduce_op": {"return_code": null, "metric1": "max", "metric2": "avg"}}'
    )
    assert (result.to_string() == expected)
コード例 #3
0
ファイル: base.py プロジェクト: microsoft/superbenchmark
class Benchmark(ABC):
    """The base class of all benchmarks."""
    def __init__(self, name, parameters=''):
        """Constructor.

        Args:
            name (str): benchmark name.
            parameters (str): benchmark parameters.
        """
        self._name = name
        self._argv = list(filter(
            None,
            parameters.split(' '))) if parameters is not None else list()
        self._benchmark_type = None
        self._parser = argparse.ArgumentParser(
            add_help=False,
            usage=argparse.SUPPRESS,
            allow_abbrev=False,
            formatter_class=SortedMetavarTypeHelpFormatter,
        )
        self._args = None
        self._curr_run_index = 0
        self._result = None

    def add_parser_arguments(self):
        """Add the specified arguments."""
        self._parser.add_argument(
            '--run_count',
            type=int,
            default=1,
            required=False,
            help='The run count of benchmark.',
        )
        self._parser.add_argument(
            '--duration',
            type=int,
            default=0,
            required=False,
            help='The elapsed time of benchmark in seconds.',
        )
        self._parser.add_argument(
            '--log_raw_data',
            action='store_true',
            default=False,
            help=
            'Log raw data into file instead of saving it into result object.',
        )

    def get_configurable_settings(self):
        """Get all the configurable settings.

        Return:
            All configurable settings in raw string.
        """
        return self._parser.format_help().strip()

    def parse_args(self, ignore_invalid=False):
        """Parse the arguments.

        Return:
            ret (bool): whether parse succeed or not.
            args (argparse.Namespace): parsed arguments.
            unknown (list): unknown arguments.
        """
        try:
            args, unknown = self._parser.parse_known_args(self._argv)
        except BaseException as e:
            if ignore_invalid:
                logger.info(
                    'Missing or invliad parameters, will ignore the error and skip the args checking.'
                )
                return True, None, []
            else:
                logger.error(
                    'Invalid argument - benchmark: {}, message: {}.'.format(
                        self._name, str(e)))
                return False, None, []

        ret = True
        if len(unknown) > 0:
            logger.error(
                'Unknown arguments - benchmark: {}, unknown arguments: {}'.
                format(self._name, ' '.join(unknown)))
            ret = False

        return ret, args, unknown

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.

        Return:
            True if _preprocess() succeed.
        """
        self.add_parser_arguments()
        ret, self._args, unknown = self.parse_args()

        if not ret:
            self._result = BenchmarkResult(self._name, self._benchmark_type,
                                           ReturnCode.INVALID_ARGUMENT)
            return False

        self._result = BenchmarkResult(self._name,
                                       self._benchmark_type,
                                       ReturnCode.SUCCESS,
                                       run_count=self._args.run_count)

        if not isinstance(self._benchmark_type, BenchmarkType):
            logger.error(
                'Invalid benchmark type - benchmark: {}, type: {}'.format(
                    self._name, type(self._benchmark_type)))
            self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE)
            return False

        return True

    def _postprocess(self):
        """Postprocess/cleanup operations after the benchmarking.

        Return:
            True if _postprocess() succeed.
        """
        return True

    @abstractmethod
    def _benchmark(self):
        """Implementation for benchmarking."""
        pass

    def run(self):
        """Function to launch the benchmarking.

        Return:
            True if run benchmark successfully.
        """
        ret = True
        try:
            ret &= self._preprocess()
            if ret:
                self._start_time = datetime.utcnow().strftime(
                    '%Y-%m-%d %H:%M:%S')
                for self._curr_run_index in range(self._args.run_count):
                    ret &= self._benchmark()
                self._end_time = datetime.utcnow().strftime(
                    '%Y-%m-%d %H:%M:%S')
                self._result.set_timestamp(self._start_time, self._end_time)

                if ret:
                    ret &= self.__check_result_format()
        except BaseException as e:
            self._result.set_return_code(ReturnCode.RUNTIME_EXCEPTION_ERROR)
            logger.error(
                'Run benchmark failed - benchmark: {}, message: {}'.format(
                    self._name, str(e)))
        finally:
            ret &= self._postprocess()

        return ret

    def __check_result_format(self):
        """Check the validation of result object.

        Return:
            True if the result is valid.
        """
        if (not self.__check_result_type()) or (
                not self.__check_summarized_result()) or (
                    not self.__check_raw_data()):
            self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
            return False

        return True

    def __check_result_type(self):
        """Check the type of result object.

        Return:
            True if the result is instance of BenchmarkResult.
        """
        if not isinstance(self._result, BenchmarkResult):
            logger.error(
                'Invalid benchmark result type - benchmark: {}, type: {}'.
                format(self._name, type(self._result)))
            return False

        return True

    def __is_list_type(self, data, t):
        if isinstance(data, list) and all(
                isinstance(item, t) for item in data):
            return True
        return False

    def __is_list_list_type(self, data, t):
        if (self.__is_list_type(data, list) and all(
                isinstance(value, t) for item in data for value in item)):
            return True
        return False

    def __check_summarized_result(self):
        """Check the validation of summary result.

        Return:
            True if the summary result is instance of List[Number].
        """
        for metric in self._result.result:
            if not self.__is_list_type(self._result.result[metric],
                                       numbers.Number):
                logger.error(
                    'Invalid summarized result - benchmark: {}, metric: {}, result: {}.'
                    .format(self._name, metric, self._result.result[metric]))
                return False

        return True

    def __check_raw_data(self):
        """Check the validation of raw data.

        Return:
            True if the raw data is:
              instance of List[List[Number]] for BenchmarkType.MODEL.
              instance of List[str] for BenchmarkType.DOCKER.
              instance of List[List[Number]] or List[str] for BenchmarkType.MICRO.
        """
        for metric in self._result.raw_data:
            is_valid = True
            if self._benchmark_type == BenchmarkType.MODEL:
                is_valid = self.__is_list_list_type(
                    self._result.raw_data[metric], numbers.Number)
            elif self._benchmark_type == BenchmarkType.DOCKER:
                is_valid = self.__is_list_type(self._result.raw_data[metric],
                                               str)
            elif self._benchmark_type == BenchmarkType.MICRO:
                is_valid = self.__is_list_type(
                    self._result.raw_data[metric],
                    str) or self.__is_list_list_type(
                        self._result.raw_data[metric], numbers.Number)
            if not is_valid:
                logger.error(
                    'Invalid raw data type - benchmark: {}, metric: {}, raw data: {}.'
                    .format(self._name, metric, self._result.raw_data[metric]))
                return False

        return True

    def _process_percentile_result(self, metric, result, reduce_type=None):
        """Function to process the percentile results.

        Args:
            metric (str): metric name which is the key.
            result (List[numbers.Number]): numerical result.
            reduce_type (ReduceType): The type of reduce function.
        """
        if len(result) > 0:
            percentile_list = ['50', '90', '95', '99', '99.9']
            for percentile in percentile_list:
                self._result.add_result(
                    '{}_{}'.format(metric, percentile),
                    np.percentile(result,
                                  float(percentile),
                                  interpolation='nearest'), reduce_type)

    def print_env_info(self):
        """Print environments or dependencies information."""
        # TODO: will implement it when add real benchmarks in the future.
        pass

    @property
    def name(self):
        """Decoration function to access benchmark name."""
        return self._result.name

    @property
    def type(self):
        """Decoration function to access benchmark type."""
        return self._result.type

    @property
    def run_count(self):
        """Decoration function to access benchmark run_count."""
        return self._result.run_count

    @property
    def return_code(self):
        """Decoration function to access benchmark return_code."""
        return self._result.return_code

    @property
    def start_time(self):
        """Decoration function to access benchmark start_time."""
        return self._result.start_time

    @property
    def end_time(self):
        """Decoration function to access benchmark end_time."""
        return self._result.end_time

    @property
    def raw_data(self):
        """Decoration function to access benchmark raw_data."""
        return self._result.raw_data

    @property
    def result(self):
        """Decoration function to access benchmark result."""
        return self._result.result

    @property
    def serialized_result(self):
        """Decoration function to access benchmark result."""
        return self._result.to_string()

    @property
    def default_metric_count(self):
        """Decoration function to get the count of default metrics."""
        return self._result.default_metric_count