def get_flops(self): """Get flops for each device.""" flops_info_list = [] max_flops = 0 for host_map_ip, device_id, rank_id in self._host_device_rank_relation: host_dir = os.path.join(self._cluster_dir, host_map_ip, 'profiler') validate_and_normalize_path( host_dir, raise_key='Invalid host directory {}.'.format(host_map_ip)) file_path = self._get_flops_file_for_each_device( host_dir, device_id) # Forward compatible. If flops file do not exist, return empty data. if not os.path.exists(file_path): flops_info_list = [] break file_content = self._get_file_content(file_path) max_flops = max(max_flops, file_content.get('FLOPs')) flops_dict = { 'host_ip': host_map_ip, 'device_id': device_id, 'rank_id': rank_id, } flops_dict.update(file_content) flops_info_list.append(flops_dict) # Normalize the flops by divide the max flops in all device. for flops_info in flops_info_list: flops_info['FLOPs_norm'] = flops_info['FLOPs'] / max_flops return flops_info_list
def get_peak_memory(self): """Get peak memory for each device.""" peak_mem_list = [] for host_map_ip, device_id, rank_id in self._host_device_rank_relation: host_dir = os.path.join(self._cluster_dir, host_map_ip, 'profiler') validate_and_normalize_path( host_dir, raise_key='Invalid host directory {}.'.format(host_map_ip)) file_path = self._get_memory_file_for_each_device( host_dir, device_id) file_content = self._get_file_content(file_path) capacity = file_content.get('capacity') peak_mem = file_content.get('peak_mem') mem_dict = { 'host_ip': host_map_ip, 'device_id': device_id, 'rank_id': rank_id, 'capacity': capacity, 'peak_mem': peak_mem } peak_mem_list.append(mem_dict) return peak_mem_list
def _load_point_info(self): """Load point info.""" file_path = os.path.join(self._profiling_dir, 'step_trace_point_info.json') file_path = validate_and_normalize_path( file_path, raise_key="Invalid step_trace_point_info file path.") # If step_trace_point_info_{self._device_id}.json file exist, load this file. file_path_new = os.path.join( self._profiling_dir, f'step_trace_point_info_{self._device_id}.json') file_path_new = validate_and_normalize_path( file_path_new, raise_key="Invalid step_trace_point_info file path.") if os.path.isfile(file_path_new): file_path = file_path_new if os.path.isfile(file_path): with open(file_path, 'r', encoding='utf-8') as file: try: self._point_info = json.load(file) except (json.JSONDecodeError, TypeError) as err: log.exception(err) raise ProfilerRawFileException( 'Fail to parse point info file.')
def _get_flops_file_for_each_device(self, path, device_id): """Get memory file for each device.""" filename = self._summary_filename.format(device_id) file_path = os.path.join(path, filename) validate_and_normalize_path(file_path, raise_key='Invalid flops file path.') return file_path
def _load(self): """Load data according to the parsed AICORE operator file.""" op_detail_file_path = os.path.join( self._profiling_dir, self._file_name_aicore_detail_time.format(self._device_id)) framework_file_path = os.path.join( self._profiling_dir, self._file_name_framework_info.format(self._device_id)) flops_file_path = os.path.join( self._profiling_dir, self._file_name_flops.format(self._device_id)) op_detail_file_path = validate_and_normalize_path( op_detail_file_path, raise_key='Invalid aicore_detail file path.') framework_file_path = validate_and_normalize_path( framework_file_path, raise_key='Invalid framework file path.') flops_file_path = validate_and_normalize_path( flops_file_path, raise_key='Invalid flops file path.') if not os.path.isfile(op_detail_file_path): logger.warning('The file <%s> does not exist.', op_detail_file_path) return if not os.path.isfile(framework_file_path): logger.warning('The file <%s> does not exist.', framework_file_path) return framework_infos = dict() with open(framework_file_path, 'r') as file: csv_reader = csv.reader(file) next(csv_reader) for info in csv_reader: framework_infos[info[3]] = self._convert_framework_field_type( info) flops_infos = dict() if os.path.isfile(flops_file_path): with open(flops_file_path, 'r') as f_obj: # skip the first line which is header info. next(f_obj) for line in f_obj: flops_line = line.strip().split(',') # flops_line[0] is full_op_name. flops_infos[flops_line[0]] = flops_line[1:] else: logger.warning('The file <%s> does not exist.', flops_file_path) with open(op_detail_file_path, 'r') as file: csv_reader = csv.reader(file) next(csv_reader) for info in csv_reader: detail_info = self._get_op_detail_info(info, framework_infos, flops_infos) self._data.append(detail_info) del framework_infos del flops_infos
def get_timeline_summary(self): """ Get timeline summary information for UI display. Returns: json, the content of timeline summary information. """ summary_filename = self._timeline_summary_filename.format(self._device_id) file_path = os.path.join(self._profiling_dir, summary_filename) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline summary path.' ) timeline_summary = {} if os.path.exists(file_path): try: with open(file_path, 'r') as f_obj: timeline_summary = json.load(f_obj) except (IOError, OSError, json.JSONDecodeError) as err: logger.error('Error occurred when read timeline summary file: %s', err) raise ProfilerIOException else: logger.info('No timeline summary file. Please check the output path.') return timeline_summary
def _get_minddata_queue_step_time_info(self): """Get the sampling time information at the steps of the host queue""" minddata_queue_step_time_info = [] minddata_analyser = MinddataAnalyser(self._profiling_dir, self._device_id) file_path = minddata_analyser.get_device_queue_file_path() file_path = validate_and_normalize_path( file_path, raise_key="Invalid device_queue file path") if not os.path.exists(file_path): log.error('Did not find the device queue file: %s', file_path) raise ProfilerFileNotFoundException( msg='Did not find the device queue file.') with open(file_path) as data_file: for line in data_file.readlines(): op_info = line.split() # op_info is a list like:['1','64','8','2','85406783'] # The value of the first element in op_info is '0' or '1'. # '0' means that the time information is recorded. # '1' means that the queue information is recorded. # '1':queue info , '64':queue capacity, '8':step_num, '2':queue size, '85406783':sampling time. if op_info and op_info[0] == "1": minddata_queue_step_time_info.append( [op_info[2], op_info[4]]) return minddata_queue_step_time_info
def get_cluster_link_info(): """ Get cluster link info. Returns: Response, the cluster link info. Raises: ParamValueError: If the search condition contains some errors. Examples: >>>POST http://xxx/v1/mindinsight/profile/search-cluster-link """ train_id = get_train_id(request) cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id) try: cluster_profiler_dir = validate_and_normalize_path( cluster_profiler_dir, 'cluster_profiler') except ValidationError: raise ParamValueError('Invalid cluster_profiler dir') condition = request.stream.read() try: condition = json.loads(condition) if condition else {} except (json.JSONDecodeError, ValueError): raise ParamValueError("Json data parse failed.") device_id = condition.get("device_id", "0") to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('cluster_hccl', cluster_profiler_dir, device_id) link_info = analyser.get_cluster_link_info(condition) return jsonify(link_info)
def _write_timeline_data_into_file(self, timeline_data): """ Write the timeline information into the file, including operator name, stream id, start time and duration. Args: timeline_data (list): The metadata to be written into the file. [ ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'], ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'], [...] ] """ # sorted by start times timeline_data.sort(key=lambda x: float(x[2])) filename = 'output_timeline_data_{}.txt'.format(self._device_id) file_path = os.path.join(self._output_path, filename) file_path = validate_and_normalize_path( file_path, raise_key='Invalid file path of timeline data.') # write to file try: with open(file_path, 'w') as f_obj: f_obj.write(TIMELINE_FILE_COLUMN_TITLE + '\n') for timeline in timeline_data: timeline = [str(item) for item in timeline] f_obj.write(','.join(timeline) + '\n') except (IOError, OSError) as err: logger.error( 'Error occurred when writing intermediate timeline file: %s', err) raise ProfilerIOException
def _get_file_path(self, device_type, file_type): """ Get memory usage summary file. Args: device_type (str): Device type, e.g., GPU, Ascend. file_type (str): memory usage file type, e.g., summary, details. Returns: str, file path of memory usage file corresponding to its file_type. """ filename = "" if device_type == "ascend": if file_type is FileType.SUMMARY.value: filename = self._summary_filename.format(self._device_id) elif file_type is FileType.DETAILS.value: filename = self._details_filename.format(self._device_id) else: logger.error('Memory Usage only supports Ascend for now. Please check the device type.') raise ParamValueError("Invalid device type.") file_path = os.path.join(self._profiling_dir, filename) file_path = validate_and_normalize_path( file_path, raise_key='Invalid memory usage file path.' ) return file_path
def get_cluster_flops(): """ Get cluster FLOPs. Returns: str, the cluster FLOPs. Raises: ParamValueError: If the cluster profiler dir is invalid. Examples: >>>GET http://xxx/v1/mindinsight/profile/cluster-flops """ train_id = get_train_id(request) if not train_id: raise ParamValueError('No train id.') cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id) cluster_profiler_dir = validate_and_normalize_path(cluster_profiler_dir, 'cluster_profiler') check_train_job_and_profiler_dir(cluster_profiler_dir) analyser = AnalyserFactory.instance().get_analyser('cluster_flops', cluster_profiler_dir) flops = analyser.get_flops() return jsonify(flops)
def _get_pipeline_path(self, source_dir): """ Get the minddata pipeline file path. Args: source_dir (str): The minddata pipeline source dir. Returns: str, the minddata pipeline file path. """ pipeline_path = os.path.join( source_dir, self._raw_pipeline_file_name.format(self._device_id)) try: pipeline_path = validate_and_normalize_path( pipeline_path, 'profiler') except ValidationError: logger.warning('Minddata pipeline file is invalid.') raise ProfilerPathErrorException( 'Minddata pipeline file is invalid.') if not os.path.isfile(pipeline_path): logger.warning('The minddata pipeline file <%s> not found.', pipeline_path) raise ProfilerFileNotFoundException(pipeline_path) return pipeline_path
def get_display_timeline(self): """ Get timeline data for UI display. Returns: json, the content of timeline data. """ # Search timeline json file under profiling dir. timeline_filename = self._timeline_filename.format(self._device_id) display_filename = self._display_filename.format(self._device_id) file_list = [ filename for filename in os.listdir(self._profiling_dir) if timeline_filename in filename or display_filename in filename ] # Check if there is a timeline json file for display file_path = os.path.join(self._profiling_dir, display_filename) if display_filename not in file_list: file_path = os.path.join(self._profiling_dir, timeline_filename) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline json path.') timeline = [] if os.path.exists(file_path): try: with open(file_path, 'r') as f_obj: timeline = json.load(f_obj) except (IOError, OSError) as err: logger.error( 'Error occurred when read timeline display file: %s', err) raise ProfilerIOException else: logger.info('No timeline file. Please check the output path.') return timeline
def get_display_timeline(self, device_type): """ Get timeline data for UI display. Returns: json, the content of timeline data. """ if device_type == "ascend": display_filename = self._ascend_display_filename.format( self._device_id) elif device_type == "gpu": display_filename = self._gpu_display_filename.format( self._device_id) else: logger.info( 'device type should be ascend or gpu. Please check the device type.' ) raise ParamValueError("Invalid device_type.") file_path = os.path.join(self._profiling_dir, display_filename) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline json path.') timeline = [] if os.path.exists(file_path): try: with open(file_path, 'r') as f_obj: timeline = json.load(f_obj) except (IOError, OSError, json.JSONDecodeError) as err: logger.error( 'Error occurred when read timeline display file: %s', err) raise ProfilerIOException else: logger.info('No timeline file. Please check the output path.') return timeline
def load_timeline_data(self): """Load timeline data from file.""" file_path = os.path.join( self._profiling_dir, self._output_timeline_data_file_path.format(self._device_id)) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline txt file path.') if not os.path.exists(file_path): logger.error("Failed to find parsed timeline file.") raise ProfilerFileNotFoundException('parsed timeline file') stream_count_dict = {} try: with open(file_path, 'r') as f_obj: for line in f_obj: if not line.startswith('op_name'): line_list = line.strip('\n').split(',') self._parse_timeline_data(line_list) self._update_num_of_streams(line_list, stream_count_dict) except (IOError, OSError) as err: logger.error( 'Error occurred when read timeline intermediate file: %s', err) raise ProfilerIOException # Update timeline summary info self._timeline_summary['num_of_streams'] = len( stream_count_dict.keys())
def _get_communication_info(self, host_ip, device_id, step_num): """Get step trace info.""" file_name = 'hccl_raw_{}.csv'.format(device_id) communication_file_path = \ os.path.join(self._cluster_profiler_dir, 'cluster_profiler', host_ip, 'profiler', file_name) communication_file_path = validate_and_normalize_path( communication_file_path, raise_key="Invalid communication file path.") if not os.path.exists(communication_file_path): log.error('Did not find the file: %s', communication_file_path) raise ProfilerFileNotFoundException( msg='Did not find the file:{}'.format(communication_file_path)) communication_info = list() step_num = str(step_num) with open(communication_file_path, 'r') as src_file: csv_reader = csv.reader(src_file) # when the step_num value is 0, it means the average value. # The last line of the step_trace_raw_{}_detail_time.csv records the average value. # The first element of the last line is '-'. step_num = '-' if step_num == '0' else step_num for row in csv_reader: if row[0] == step_num: communication_info = row break # Convert string to floating point and dictionary if communication_info: communication_info[1] = float(communication_info[1]) communication_info[2] = float(communication_info[2]) communication_info[3] = json.loads(communication_info[3]) communication_info[4] = json.loads(communication_info[4]) return communication_info
def _get_total_step_num(self): """Get the num of train step.""" total_step_num = 0 # Take the data of one of the machines to get the total number of steps. host_ip_dir = self._host_ips_dir[0] target_dir_path = os.path.join(self._cluster_profiler_dir, 'cluster_profiler', host_ip_dir, 'profiler') target_dir_path = validate_and_normalize_path( target_dir_path, raise_key="Invalid profiler dir path.") if not os.path.exists(target_dir_path): log.error('Did not find cluster_profiler dir : %s', target_dir_path) raise ProfilerDirNotFoundException( msg='Did not find cluster_profiler dir:{}'.format( target_dir_path)) entries = os.scandir(target_dir_path) for entry in entries: if entry.is_symlink(): continue if entry.is_file() and entry.name.startswith('hccl_raw'): file_path = os.path.join(target_dir_path, entry.name) with open(file_path, 'r') as src_file: lines = src_file.readlines() # The first row is col_name, the last row is the average. if len(lines) > 2: total_step_num = len(lines) - 2 break return total_step_num
def _get_total_step_num(self): """Get the num of train step.""" total_step_num = 0 # take the data of one of the machines to get the total number of steps. host_ip_dir = self._host_ips_dir[0] target_dir_path = os.path.join(self._cluster_profiler_dir, 'cluster_profiler', host_ip_dir, 'profiler') target_dir_path = validate_and_normalize_path( target_dir_path, raise_key="Invalid profiler dir path.") if not os.path.exists(target_dir_path): log.error('Did not find cluster_profiler dir : %s', target_dir_path) raise ProfilerDirNotFoundException( msg='Did not find cluster_profiler dir:{}'.format( target_dir_path)) entries = os.scandir(target_dir_path) for entry in entries: if entry.is_symlink(): continue if entry.is_file() and entry.name.startswith('step_trace_raw'): file_path = os.path.join(target_dir_path, entry.name) with open(file_path, 'r') as src_file: lines = src_file.readlines() # The penultimate line represents the information of the last step # The step num index is 0 if len(lines) > 1: total_step_num = lines[-2].split(',')[0] break return total_step_num
def _get_step_trace_info(self, host_ip, device_id, step_num): """Get step trace info.""" file_name = 'step_trace_raw_{}_detail_time.csv'.format(device_id) step_trace_file_path = \ os.path.join(self._cluster_profiler_dir, 'cluster_profiler', host_ip, 'profiler', file_name) step_trace_file_path = validate_and_normalize_path( step_trace_file_path, raise_key="Invalid step trace file path.") if not os.path.exists(step_trace_file_path): log.error('Did not find the file: %s', step_trace_file_path) raise ProfilerFileNotFoundException( msg='Did not find the file:{}'.format(step_trace_file_path)) step_trace_info = list() step_num = str(step_num) with open(step_trace_file_path, 'r') as src_file: lines = src_file.readlines() # when the step_num value is 0, it means the average value. # The last line of the step_trace_raw_{}_detail_time.csv records the average value. if step_num == '0': step_trace_info = lines[-1].strip('\n').split(',') else: for line in lines: line = line.strip('\n').split(',') if line[0] == step_num: step_trace_info = line # step_trace_info[6]: iteration_interval time # step_trace_info[7]: fp_and_bp time # step_trace_info[8]: tail time # divided by 1e5, the unit becomes a millisecond iteration_interval = float(step_trace_info[6]) / 1e5 fp_and_bp = float(step_trace_info[7]) / 1e5 tail = float(step_trace_info[8]) / 1e5 step_trace_info = [iteration_interval, fp_and_bp, tail] return step_trace_info
def get_min_cycle_counter_from_file(self): """ Get minimum cycle counter. Returns: float, the minimum value of the cycle counter. """ file_path = os.path.join( self._profiling_dir, self._min_cycle_counter_file_path.format(self._device_id)) file_path = validate_and_normalize_path( file_path, raise_key='Invalid min cycle counter file path.') if os.path.exists(file_path): try: with open(file_path, 'r') as f_obj: min_cycle_counter = f_obj.read() min_cycle_counter = float(min_cycle_counter) \ if not min_cycle_counter == 'inf' else 0 except (IOError, OSError) as err: logger.error( 'Error occurred when read minimum cycle counter: %s', err) raise ProfilerIOException else: min_cycle_counter = 0 logger.info("No min cycle counter recorded.") return min_cycle_counter
def write_timeline_to_json_by_limitation(self): """Write timeline to json by limitation.""" display_filename = self._display_filename.format(self._device_id) display_file_path = os.path.join( self._profiling_dir, display_filename ) display_file_path = validate_and_normalize_path( display_file_path, raise_key='Invalid timeline display json path.' ) length = len(self._timeline_meta) try: with open(display_file_path, 'w') as json_file: json_file.write('[') for index, item in enumerate(self._timeline_meta): json.dump(item, json_file) file_size = os.path.getsize(display_file_path) if file_size > SIZE_LIMIT: break if index == length - 1: break json_file.write(',') json_file.write(']') except (IOError, OSError) as err: logger.error('Error occurred when write timeline display file: %s', err) raise ProfilerIOException
def get_timeline_summary(self): """ Get timeline summary information for UI display. Returns: json, the content of timeline summary information. """ file_path = None summary_file_name = 'timeline_summary_{}.json'.format(self._device_id) if summary_file_name in os.listdir(self._profiling_dir): file_path = os.path.join(self._profiling_dir, summary_file_name) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline summary path.') timeline_summary = {} if os.path.exists(file_path): try: with open(file_path, 'r') as f_obj: timeline_summary = json.load(f_obj) except (IOError, OSError) as err: logger.error( 'Error occurred when read timeline summary file: %s', err) raise ProfilerIOException return timeline_summary
def _load_timeline_data(self): """Load timeline data from file.""" file_path = os.path.join( self._profiling_dir, self._output_timeline_data_file_path.format(self._device_id) ) file_path = validate_and_normalize_path( file_path, raise_key='Invalid timeline txt file path.' ) if not os.path.exists(file_path): logger.error("Failed to find parsed timeline file.") raise ProfilerFileNotFoundException('parsed timeline file') timeline_list = [] try: with open(file_path, 'r') as f_obj: for line in f_obj: if not line.startswith('op_name'): line_list = line.strip('\n').split(',') timeline_list.append(line_list) except (IOError, OSError) as err: logger.error('Error occurred when read timeline intermediate file: %s', err) raise ProfilerIOException return timeline_list
def get_profile_device_list(): """ Get profile device list. Returns: list, the available device list. Raises: ParamValueError: If the search condition contains some errors. Examples: >>> POST http://xxxx/v1/mindinsight/profile/devices """ profiler_dir = get_profiler_dir(request) train_id = get_train_id(request) if not profiler_dir or not train_id: raise ParamValueError("No profiler_dir or train_id.") profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir) try: profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler") except ValidationError: raise ParamValueError("Invalid profiler dir") check_train_job_and_profiler_dir(profiler_dir_abs) device_list, _ = analyse_device_list_from_profiler_dir(profiler_dir_abs) return jsonify(device_list)
def _get_minddata_pipeline_info(self): """Get the number of thread cores in minddata pipeline operator""" file_name = self._minddata_pipeline_display_filename.format( self._device_id) file_path = os.path.join(self._profiling_dir, file_name) file_path = validate_and_normalize_path( file_path, raise_key="Invalid minddata_pipeline_info file path.") if not os.path.exists(file_path): log.error('Did not find the minddata_pipeline file: %s', file_path) raise ProfilerFileNotFoundException( msg='Did not find the minddata_pipeline file:{}'.format( file_path)) with open(file_path, 'r', encoding='utf-8') as file: try: minddata_pipeline_info = json.load(file) except json.JSONDecodeError as err: log.exception(err) raise ProfilerRawFileException( "Fail to parse minddata pipeline file") minddata_pipeline_op_info = [] for item in minddata_pipeline_info.get("op_info"): op_info_dict = dict() op_info_dict["op_id"] = item.get("op_id") op_info_dict["num_workers"] = item.get("num_workers") minddata_pipeline_op_info.append(op_info_dict) return minddata_pipeline_op_info
def write_min_cycle_counter_to_file(self): """Write minimum cycle counter into a txt file.""" min_cycle_counter = self._min_cycle_counter file_name = 'min_cycle_counter_' + self._device_id + '.txt' file_path = os.path.join(self._output_path, file_name) file_path = validate_and_normalize_path( file_path, raise_key='Invalid min cycle counter file path.') with open(file_path, 'w') as file: file.write(str(min_cycle_counter))
def __init__(self, subgraph='all', is_detail=True, is_show_op_path=False, output_path='./data', optypes_to_deal='', optypes_not_deal='Variable', job_id=""): # get device_id and device_target device_target = "" try: import mindspore.context as context dev_id = str(context.get_context("device_id")) device_target = context.get_context("device_target") except ImportError: logger.error("Profiling: fail to import context from mindspore.") except ValueError as err: logger.error("Profiling: fail to get context, %s", err.message) if not dev_id: dev_id = os.getenv('DEVICE_ID') if not dev_id: dev_id = "0" logger.error("Fail to get DEVICE_ID, use 0 instead.") if device_target and device_target != "Davinci" \ and device_target != "Ascend": msg = ("Profiling: unsupport backend: %s" \ % device_target) raise RuntimeError(msg) self._dev_id = dev_id self._container_path = os.path.join(self._base_profiling_container_path, dev_id) data_path = os.path.join(self._container_path, "data") if not os.path.exists(data_path): os.makedirs(data_path) self._output_path = validate_and_normalize_path(output_path, 'Profiler output path (' + output_path + ')') self._output_path = os.path.join(self._output_path, "profiler") if not os.path.exists(self._output_path): os.makedirs(self._output_path) os.environ['PROFILING_MODE'] = 'true' os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace' # use context interface to open profiling, for the new mindspore version(after 2020.5.21) try: import mindspore.context as context context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace") except ImportError: logger.error("Profiling: fail to import context from mindspore.") except ValueError as err: logger.error("Profiling: fail to set context, %s", err.message) os.environ['AICPU_PROFILING_MODE'] = 'true' os.environ['PROFILING_DIR'] = str(self._container_path) self._subgraph = check_subgraph(subgraph) self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else [] self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] self._detail = check_bool(is_detail, 'is_detail') self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') self._profiling_job_id = job_id self._start_time = int(time.time() * 10000000) logger.info("Profiling: profiling start time: %d", self._start_time)
def __init__(self, hwts_output_file, output_filename, op_task_info, output_path, device_id): hwts_output_file = validate_and_normalize_path( hwts_output_file, raise_key='Invalid hwts output file path.') self._hwts_output_file = hwts_output_file self._output_filename = output_filename self._op_task_info = op_task_info self._output_path = output_path self._device_id = device_id self._min_cycle_counter = float("inf")
def _load(self): """Load data according to the parsed AICORE operator file.""" op_detail_file_path = os.path.join( self._profiling_dir, self._file_name_aicore_detail_time.format(self._device_id) ) framework_file_path = os.path.join( self._profiling_dir, self._file_name_framework_info.format(self._device_id) ) op_detail_file_path = validate_and_normalize_path( op_detail_file_path, raise_key='Invalid aicore_detail file path.' ) framework_file_path = validate_and_normalize_path( framework_file_path, raise_key='Invalid framework file path.' ) if not os.path.isfile(op_detail_file_path): logger.warning('The file <%s> does not exist.', op_detail_file_path) return if not os.path.isfile(framework_file_path): logger.warning('The file <%s> does not exist.', framework_file_path) return framework_infos = dict() with open(framework_file_path, 'r') as file: csv_reader = csv.reader(file) _ = next(csv_reader) for info in csv_reader: framework_infos[info[3]] = self._convert_framework_field_type( info ) with open(op_detail_file_path, 'r') as file: csv_reader = csv.reader(file) _ = next(csv_reader) for info in csv_reader: detail_info = self._get_op_detail_info(info, framework_infos) self._data.append(detail_info) del framework_infos
def get_profile_summary_proposal(): """ Get summary profiling proposal. Returns: str, the summary profiling proposal. Raises: ParamValueError: If the parameters contain some errors. Examples: >>> GET http://xxxx/v1/mindinsight/profile/summary/propose """ profiler_dir = get_profiler_dir(request) train_id = get_train_id(request) device_id = get_device_id(request) if not profiler_dir or not train_id: raise ParamValueError("No profiler_dir or train_id.") to_int(device_id, 'device_id') profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir) try: profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler") except ValidationError: raise ParamValueError("Invalid profiler dir") check_train_job_and_profiler_dir(profiler_dir_abs) step_trace_condition = { "filter_condition": { "mode": "proc", "proc_name": "iteration_interval", "step_id": 0 } } options = {'step_trace': {"iter_interval": step_trace_condition}} proposal_type_list = [ 'step_trace', 'minddata', 'minddata_pipeline', 'common' ] proposal_obj = ComposeProposal(profiler_dir_abs, device_id, proposal_type_list) proposal_info = proposal_obj.get_proposal(options) # Use json.dumps for orderly return return CustomResponse(json.dumps(proposal_info), mimetype='application/json')