Ejemplo n.º 1
0
    def _preprocess_logs(self, logs, op_type_list, _nvtx_meta_data_dict):
        """
        get the op_type counted in the profile.
        get the scheduling list that needs to be analyse.
        """
        flag_nvtx_time_start = False
        nvtx_time_start_step = 0

        for i in range(len(logs)):
            line = api_param.parse_string(logs[i])
            if flag_nvtx_time_start:
                infos = line.strip().split()
                if not infos:
                    continue
                nvtx_range_type = infos[-1]
                if nvtx_range_type in ['pybind_imperative_func', 'compute']:
                    op_type = infos[-2]
                    if op_type not in op_type_list and '_grad' not in op_type:
                        op_type_list.append(op_type)
                        _nvtx_meta_data_dict[op_type +
                                             ' pybind_imperative_func'] = None
                        _nvtx_meta_data_dict[op_type] = None
                        _nvtx_meta_data_dict[op_type + ' compute'] = None
                        _nvtx_meta_data_dict[op_type + '_grad'] = None
                        _nvtx_meta_data_dict[op_type + '_grad compute'] = None
            if not flag_nvtx_time_start and 'NVTX Push-Pop Range Statistics:' in line:
                flag_nvtx_time_start = True
                nvtx_time_start_step = i

        return nvtx_time_start_step
Ejemplo n.º 2
0
    def __init__(self, case_detail, compare_framework):
        self.case_name = case_detail["name"]
        self.op_type = parse_op_type(self.case_name)
        self.disabled = case_detail.get("disabled", False)
        self.compare_framework = compare_framework

        if case_detail.get("parameters", None):
            parameters = api_param.parse_string(case_detail["parameters"])
            parameters = parameters.replace(" ", "")
            parameters = parameters.replace("\n", " ")
        else:
            parameters = "--"
        self.parameters = parameters

        self.gpu_forward = {}
        self.gpu_backward = {}
        self.gpu_backward_forward = {}
        self.cpu_forward = {}
        self.cpu_backward = {}
        self.cpu_backward_forward = {}
        for device in ["gpu", "cpu"]:
            for direction in ["forward", "backward", "backward_forward"]:
                attr_name = device + "_" + direction
                result = getattr(self, attr_name)

                paddle_total, paddle_gpu_time, paddle_gflops, paddle_gbs = self._get_case_value(
                    case_detail, "paddle", device, "speed", direction)
                result["paddle"] = {
                    "total": paddle_total,
                    "gpu_time": paddle_gpu_time,
                    "gflops": paddle_gflops,
                    "gbs": paddle_gbs
                }

                competitor_total, competitor_gpu_time, competitor_gflops, competitor_gbs = self._get_case_value(
                    case_detail, self.compare_framework, device, "speed",
                    direction)
                result[self.compare_framework] = {
                    "total": competitor_total,
                    "gpu_time": competitor_gpu_time,
                    "gflops": competitor_gflops,
                    "gbs": competitor_gbs
                }
                total_result, total_ratio = _compare(paddle_total,
                                                     competitor_total)
                gpu_time_result, gpu_time_ratio = _compare(paddle_gpu_time,
                                                           competitor_gpu_time)
                result["compare"] = {
                    "total": total_result,
                    "total_ratio": total_ratio,
                    "gpu_time": gpu_time_result,
                    "gpu_time_ratio": gpu_time_ratio
                }

                accuracy, difference = self._get_case_value(
                    case_detail, "paddle", device, "accuracy", direction)
                result["accuracy"] = str(accuracy)
                result["difference"] = str(difference)
Ejemplo n.º 3
0
    def _parse_logs(self, logs):
        kernel_line_from = None
        kernel_line_to = None
        memcpy_line_from = None
        memcpy_line_to = None
        for i in range(len(logs)):
            line = api_param.parse_string(logs[i])
            if "CUDA Kernel Statistics:" in line:
                kernel_line_from = i
                for j in range(i + 2, len(logs)):
                    if logs[j] == "":
                        kernel_line_to = j
                        break
            if "CUDA Memory Operation Statistics (by time):" in line:
                memcpy_line_from = i
                for j in range(i + 2, len(logs)):
                    if logs[j] == "":
                        memcpy_line_to = j
                        break

        parse_status = False
        kernel_gpu_time = 0.0
        if kernel_line_from is not None and kernel_line_to is not None:
            for i in range(kernel_line_from, kernel_line_to):
                print(logs[i])
            print("")
            parse_status = True
            kernel_gpu_time = self._parse_gpu_time(logs[kernel_line_from + 4])

        memcpy_gpu_time = 0.0
        if memcpy_line_from is not None and memcpy_line_to is not None:
            for i in range(memcpy_line_from, memcpy_line_to):
                print(logs[i])
            print("")
            parse_status = True
            memcpy_gpu_time = self._parse_gpu_time(logs[memcpy_line_from + 4])

        total_gpu_time = kernel_gpu_time + memcpy_gpu_time
        if total_gpu_time != 0.0:
            print(
                "total gpu_time: {:.4f} ms (kernel: {:.4f} ms ({:.2f}%); memcpy: {:.4f} ms ({:.2f}%))"
                .format(total_gpu_time, kernel_gpu_time,
                        kernel_gpu_time * 100 / total_gpu_time,
                        memcpy_gpu_time,
                        memcpy_gpu_time * 100 / total_gpu_time))
        else:
            print(
                "total gpu_time: {:.4f} ms (kernel: {:.4f} ms; memcpy: {:.4f} ms)"
                .format(total_gpu_time, kernel_gpu_time, memcpy_gpu_time))
        print("")
        return parse_status, total_gpu_time
Ejemplo n.º 4
0
def _parse_nvprof_logs(logs):
    line_from = None
    line_to = None
    total_gpu_time = 0.0
    for i in range(len(logs)):
        line = api_param.parse_string(logs[i])
        if "GPU activities:" in line:
            line_from = i - 1
        if line_from is not None and "API calls:" in line:
            line_to = i - 1
    if line_from is not None and line_to is not None:
        for i in range(line_from, line_to):
            print(logs[i])
        print("")
        return True, _parse_gpu_time(logs[line_from + 1])
    else:
        return False, 0.0
Ejemplo n.º 5
0
    def __init__(self, case_detail):
        self.case_name = case_detail["name"]
        self.op_type = parse_op_type(self.case_name)

        if case_detail.get("parameters", None):
            parameters = api_param.parse_string(case_detail["parameters"])
            parameters = parameters.replace(" ", "")
            parameters = parameters.replace("\n", " ")
        else:
            parameters = "--"
        self.parameters = parameters

        self.gpu_forward = {}
        self.gpu_backward = {}
        self.cpu_forward = {}
        self.cpu_backward = {}
        for device in ["gpu", "cpu"]:
            for direction in ["forward", "backward"]:
                attr_name = device + "_" + direction
                result = getattr(self, attr_name)

                paddle_total, paddle_gpu_time = self._get_case_value(
                    case_detail, "paddle", device, "speed", direction)
                result["paddle"] = {
                    "total": paddle_total,
                    "gpu_time": paddle_gpu_time
                }

                tf_total, tf_gpu_time = self._get_case_value(
                    case_detail, "tensorflow", device, "speed", direction)
                result["tensorflow"] = {
                    "total": tf_total,
                    "gpu_time": tf_gpu_time
                }
                result["compare"] = {
                    "total": _compare(paddle_total, tf_total),
                    "gpu_time": _compare(paddle_gpu_time, tf_gpu_time)
                }

                accuracy, difference = self._get_case_value(
                    case_detail, "paddle", device, "accuracy", direction)
                result["accuracy"] = str(accuracy)
                result["difference"] = str(difference)
Ejemplo n.º 6
0
    def _parse_logs(self, logs, profile_start_step, profile_end_step,
                    sync_interval):
        """
        parse logs to analyse performance.
        """
        parse_status = False
        total_step_time = 0.0
        total_op_call_time_per_step = 0.0
        # num step of using profile
        num_step = profile_end_step - profile_start_step
        # Profile data in start_step and end_step may be not correct,
        # so we need to select some reliable data. Number of reliable
        # step data is step_count.
        step_count = 0

        op_type_list = []
        # scheduling time:
        # op_type pybind_imperative_func (imperative_avg_time)
        # op_type (fwd_trace_op_avg_time)
        # op_type compute (fwd_op_compute_avg_time)
        # op_type_grad (bwd_trace_op_avg_time)
        # op_type_grad compute (bwd_op_compute_avg_time)
        _nvtx_meta_data_dict = {}
        scheduling_time_dict = {}

        # preprocess logs to get op_type appeared in logs and
        # initialize data in _nvtx_meta_data_dict to None.
        nvtx_time_start_step = self._preprocess_logs(logs, op_type_list,
                                                     _nvtx_meta_data_dict)

        # parse report to get meta scheduling time
        for i in range(nvtx_time_start_step, len(logs)):
            line = api_param.parse_string(logs[i])
            infos = line.strip().split()
            if not infos:
                continue
            nvtx_range_type = infos[-1]
            if nvtx_range_type in ['pybind_imperative_func', 'compute']:
                nvtx_range_type = infos[-2] + ' ' + nvtx_range_type

            # step time
            if nvtx_range_type.isdigit(
            ) and int(nvtx_range_type) > profile_start_step and int(
                    nvtx_range_type
            ) < profile_end_step - 1 and int(nvtx_range_type) % sync_interval:
                step_count += 1
                step_time = self._to_float(infos[1]) * 1E-6
                total_step_time += step_time

            # nvtx time
            if nvtx_range_type in _nvtx_meta_data_dict:
                avg_time = self._calculate_avg_time_per_op(infos)
                _nvtx_meta_data_dict[nvtx_range_type] = round(avg_time, 6)

                if '_grad' in nvtx_range_type and 'compute' not in nvtx_range_type or 'pybind_imperative_func' in nvtx_range_type:
                    total_op_call_time_per_step += self._calculate_avg_time_per_step(
                        infos, num_step)

        # analyse scheduling time
        scheduling_time_dict['step_time'] = round(
            total_step_time / step_count, 6) if step_count != 0 else None
        scheduling_time_dict['op_call_time_per_step'] = round(
            total_op_call_time_per_step, 6)
        scheduling_time_dict[
            'python_call_time'] = self._calculate_scheduling_time(
                scheduling_time_dict['step_time'],
                scheduling_time_dict['op_call_time_per_step'])
        for op_type in op_type_list:
            scheduling_time_dict[
                op_type] = self._get_scheduling_time_from_meta_data(
                    op_type, _nvtx_meta_data_dict)

        parse_status = True
        return parse_status, op_type_list, scheduling_time_dict