Example #1
0
    def _search_file_from_data_path(self, profiling_id, device_id):
        """
        Search framework files from data path.

        Args:
            profiling_id (str): The profiling ID.
            device_id (str): The device ID.

        Raises:
            ProfilerRawFileException: If the framework file type is inconsistent.
            ProfilerDeviceIdMismatchException: If the device id is mismatch
                with framework in the raw dir.
        """
        profiling_data_path = os.path.join(self._raw_data_dir, 'container',
                                           device_id, 'data')
        if not os.path.isdir(profiling_data_path):
            return

        files = os.listdir(profiling_data_path)
        for file in files:
            pattern = re.search(self._regex_framework_in_data, file)
            if not pattern or file.endswith('.done') or file.endswith('.zip'):
                continue
            attrs = pattern.groupdict()

            profiling_id_in_path = attrs.get('profiling_id')
            if profiling_id_in_path != profiling_id:
                continue

            device_id_in_path = attrs.get('device_id')
            if device_id_in_path != device_id:
                raise ProfilerDeviceIdMismatchException()

            data_type = attrs.get('data_type')
            if data_type.startswith('vm.'):
                if self._backend_type and self._backend_type != 'vm':
                    raise ProfilerRawFileException(
                        'Backend type is inconsistent.')
                self._backend_type = 'vm'
                data_type = data_type.split('.')[1]
            else:
                if self._backend_type and self._backend_type != 'ge':
                    raise ProfilerRawFileException(
                        'Backend type is inconsistent.')
                self._backend_type = 'ge'
            if data_type.startswith('graph_desc_info'):
                self._framework_path['graph'].append(
                    os.path.join(profiling_data_path, file))
            elif data_type.startswith('task_desc_info'):
                self._framework_path['task'].append(
                    os.path.join(profiling_data_path, file))
            elif data_type.startswith('point'):
                self._framework_path['point'].append(
                    os.path.join(profiling_data_path, file))
Example #2
0
    def _search_file_from_job_path(self, device_id, search_in_sub_path=False):
        """
        Search framework files from job path.

        Args:
            device_id (str): The device ID.
            search_in_sub_path (bool): `True` if search file in profiling dir,
                else search in profiling sub dir. Default: False.

        Raises:
            ProfilerRawFileException: If the framework file type is inconsistent.
            ProfilerDeviceIdMismatchException: If the device id is mismatch
                with framework in the raw dir.
        """
        profiling_dir = os.path.join(self._profiling_path, 'data') \
            if search_in_sub_path else self._profiling_path
        if not os.path.isdir(profiling_dir):
            return

        files = os.listdir(profiling_dir)
        for file in files:
            pattern = re.search(self._regex_framework, file)
            if not pattern or file.endswith('.done'):
                continue
            attrs = pattern.groupdict()

            device_id_in_path = attrs.get('device_id')
            if device_id_in_path != device_id:
                raise ProfilerDeviceIdMismatchException()

            data_type = attrs.get('data_type')
            if data_type.startswith('vm.'):
                if self._backend_type and self._backend_type != 'vm':
                    raise ProfilerRawFileException(
                        'Backend type is inconsistent.')
                self._backend_type = 'vm'
                data_type = data_type.split('.')[1]
            else:
                if self._backend_type and self._backend_type != 'ge':
                    raise ProfilerRawFileException(
                        'Backend type is inconsistent.')
                self._backend_type = 'ge'
            if data_type.startswith('graph_desc_info'):
                self._framework_path['graph'].append(
                    os.path.join(profiling_dir, file))
            elif data_type.startswith('task_desc_info'):
                self._framework_path['task'].append(
                    os.path.join(profiling_dir, file))
            elif data_type.startswith('point'):
                self._framework_path['point'].append(
                    os.path.join(profiling_dir, file))
    def _get_minddata_pipeline_info(self):
        """Get the number of thread cores in minddata pipeline operator"""
        file_name = self._minddata_pipeline_display_filename.format(
            self._device_id)
        file_path = os.path.join(self._profiling_dir, file_name)
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid minddata_pipeline_info file path.")
        if not os.path.exists(file_path):
            log.error('Did not find the minddata_pipeline file: %s', file_path)
            raise ProfilerFileNotFoundException(
                msg='Did not find the minddata_pipeline file:{}'.format(
                    file_path))

        with open(file_path, 'r', encoding='utf-8') as file:
            try:
                minddata_pipeline_info = json.load(file)
            except json.JSONDecodeError as err:
                log.exception(err)
                raise ProfilerRawFileException(
                    "Fail to parse minddata pipeline file")

        minddata_pipeline_op_info = []
        for item in minddata_pipeline_info.get("op_info"):
            op_info_dict = dict()
            op_info_dict["op_id"] = item.get("op_id")
            op_info_dict["num_workers"] = item.get("num_workers")
            minddata_pipeline_op_info.append(op_info_dict)
        return minddata_pipeline_op_info
Example #4
0
    def _parse_and_save_op_info(self, csv_writer, op_id_info_cache,
                                sample_interval):
        """
        Parse and save the minddata pipeline operator information.

        Args:
            csv_writer (csv.writer): The csv writer.
            op_id_info_cache (dict): The operator id and information cache.
            sample_interval (int): The sample interval.

        Raises:
            ProfilerRawFileException: If the operator that id is 0 does not exist.
        """
        queue = Queue()
        root_node = op_id_info_cache.get(0)
        if not root_node:
            raise ProfilerRawFileException(
                'The format of minddata pipeline raw file is wrong, '
                'the operator that id is 0 does not exist.')
        root_node['parent_id'] = None
        queue.put_nowait(root_node)

        while not queue.empty():
            node = queue.get_nowait()
            self._update_child_node(node, op_id_info_cache)
            csv_writer.writerow(self._get_op_info(node, sample_interval))

            op_id = node.get('op_id')
            children_ids = node.get('children')
            if not children_ids:
                continue
            for child_op_id in children_ids:
                sub_node = op_id_info_cache.get(child_op_id)
                sub_node['parent_id'] = op_id
                queue.put_nowait(sub_node)
Example #5
0
    def _parse_and_save(self, pipeline_info):
        """
        Parse and save the parsed minddata pipeline file.

        Args:
            pipeline_info (dict): The pipeline info reads from the raw file of
                the minddata pipeline.

        Raises:
            ProfilerRawFileException: If the format of minddata pipeline raw
                file is wrong.
        """
        sample_interval = pipeline_info.get('sampling_interval')
        op_info = pipeline_info.get('op_info')
        if sample_interval is None or not op_info:
            raise ProfilerRawFileException(
                'The format of minddata pipeline raw file is wrong.')

        op_id_info_cache = {}
        for item in op_info:
            op_id_info_cache[item.get('op_id')] = item

        with open(self._save_path, 'w') as save_file:
            csv_writer = csv.writer(save_file)
            csv_writer.writerow(self._col_names)
            self._parse_and_save_op_info(csv_writer, op_id_info_cache,
                                         sample_interval)
    def _load_point_info(self):
        """Load point info."""
        file_path = os.path.join(self._profiling_dir,
                                 'step_trace_point_info.json')
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid step_trace_point_info file path.")

        # If step_trace_point_info_{self._device_id}.json file exist, load this file.
        file_path_new = os.path.join(
            self._profiling_dir,
            f'step_trace_point_info_{self._device_id}.json')
        file_path_new = validate_and_normalize_path(
            file_path_new,
            raise_key="Invalid step_trace_point_info file path.")
        if os.path.isfile(file_path_new):
            file_path = file_path_new

        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    self._point_info = json.load(file)
                except (json.JSONDecodeError, TypeError) as err:
                    log.exception(err)
                    raise ProfilerRawFileException(
                        'Fail to parse point info file.')
Example #7
0
 def _load_point_info(self):
     """Load point info."""
     file_path = os.path.join(self._profiling_dir, 'step_trace_point_info.json')
     if os.path.isfile(file_path):
         with open(file_path, 'r', encoding='utf-8') as file:
             try:
                 self._point_info = json.load(file)
             except (json.JSONDecodeError, TypeError) as err:
                 log.exception(err)
                 raise ProfilerRawFileException('Fail to parse point info file.')
Example #8
0
    def parse(self):
        """
        Parse the minddata pipeline files.

        Raises:
            ProfilerRawFileException: If fails to parse the raw file of
                minddata pipeline or the file is empty.
        """
        with open(self._pipeline_path, 'r') as file:
            try:
                pipeline_info = json.load(file)
            except (json.JSONDecodeError, TypeError) as err:
                logger.exception(err)
                raise ProfilerRawFileException(
                    'Fail to parse minddata pipeline file.')
        if not pipeline_info:
            logger.warning('The minddata pipeline file is empty.')
            raise ProfilerRawFileException(
                'The minddata pipeline file is empty.')

        self._parse_and_save(pipeline_info)
    def _load(self):
        """Load cpu_utilization info."""
        file_name = self._cpu_utilization_display_filename.format(
            self._device_id)
        file_path = os.path.join(self._profiling_dir, file_name)
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid cpu_utilization_info file path.")
        if not os.path.exists(file_path):
            log.error('Did not find the cpu utilization file: %s', file_path)
            raise ProfilerFileNotFoundException(
                msg='Did not find the cpu utilization file.')

        with open(file_path, 'r', encoding='utf-8') as src_file:
            try:
                self._data = json.load(src_file)
            except json.JSONDecodeError as err:
                log.exception(err)
                raise ProfilerRawFileException(
                    "Fail to parse cpu_utilization info file")
Example #10
0
    def _convert_field_type(self, row):
        """
        Convert the field type to the specific type.

        Args:
            row (list): One row data from parsed data.

        Returns:
            list, the converted data.
        """
        try:
            return [
                row[0], row[1], row[2], row[3],
                int(row[4]),
                self._format_float_data(float(row[5])),
                self._format_float_data(float(row[6])),
                self._format_float_data(float(row[7])), row[8]
            ]
        except IndexError as err:
            log.exception(err)
            raise ProfilerRawFileException(
                'failed to get HOST CPU operator detail data.')
Example #11
0
    def _convert_field_type(row):
        """
        Convert the field type to the specific type.

        Args:
            row (list): One row data from parsed data.

        Returns:
            list, the converted data.
        """
        try:
            return [
                row[0],
                int(row[1]),
                int(row[2]),
                float(row[3]),
                float(row[4]),
                float(row[5]) * 100
            ]
        except IndexError as err:
            log.exception(err)
            raise ProfilerRawFileException(
                'failed to get HOST CPU operator type data.')