예제 #1
0
def get_profile_summary_proposal():
    """
    Get summary profiling proposal.

    Returns:
        str, the summary profiling proposal.

    Raises:
        ParamValueError: If the parameters contain some errors.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/summary/propose
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    device_id = get_device_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")
    to_int(device_id, 'device_id')

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    step_trace_condition = {
        "filter_condition": {
            "mode": "proc",
            "proc_name": "iteration_interval",
            "step_id": 0
        }
    }
    options = {'step_trace': {"iter_interval": step_trace_condition}}

    proposal_type_list = [
        'step_trace', 'minddata', 'minddata_pipeline', 'common'
    ]
    proposal_obj = ComposeProposal(profiler_dir_abs, device_id,
                                   proposal_type_list)
    proposal_info = proposal_obj.get_proposal(options)
    # Use json.dumps for orderly return
    return CustomResponse(json.dumps(proposal_info),
                          mimetype='application/json')
예제 #2
0
def get_profile_op_info():
    """
    Get operation profiling info.

    Returns:
        str, the operation profiling information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/ops/search
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    search_condition = request.stream.read()
    try:
        search_condition = json.loads(
            search_condition if search_condition else "{}")
    except (json.JSONDecodeError, ValueError):
        raise ParamValueError("Json data parse failed.")
    validate_condition(search_condition)

    device_id = search_condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    op_type = search_condition.get("op_type")

    analyser = AnalyserFactory.instance().get_analyser(op_type,
                                                       profiler_dir_abs,
                                                       device_id)

    op_info = analyser.query(search_condition)
    return jsonify(op_info)
예제 #3
0
def get_minddata_pipeline_op_queue_info():
    """
    Get minddata pipeline operator info and queue info.

    Returns:
        str, the operation information and queue information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/minddata-pipeline/op-queue
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)
    condition = request.stream.read()
    try:
        condition = json.loads(condition) if condition else {}
    except Exception:
        raise ParamValueError("Json data parse failed.")
    validate_minddata_pipeline_condition(condition)

    device_id = condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    analyser = AnalyserFactory.instance().get_analyser('minddata_pipeline',
                                                       profiler_dir_abs,
                                                       device_id)
    op_info = analyser.query(condition)
    return jsonify(op_info)
예제 #4
0
def get_training_trace_graph():
    """
    Get training trace info of one step.

    Returns:
        Response, the training trace info of one step.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    graph_type = request.args.get("type", default='0')
    graph_type = to_int(graph_type, 'graph_type')
    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    graph_info = {}
    try:
        analyser = AnalyserFactory.instance().get_analyser(
            'step_trace', profiler_dir_abs, device_id)
    except ProfilerFileNotFoundException:
        return jsonify(graph_info)

    graph_info = analyser.query(
        {'filter_condition': {
            'mode': 'step',
            'step_id': graph_type
        }})
    graph_info['summary'] = analyser.summary
    graph_info['point_info'] = analyser.point_info(graph_type)
    graph_info['is_heterogeneous'] = False

    # In heterogeneous training scene, do not display step trace data.
    cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv"
    if cpu_op_type_file_name in os.listdir(profiler_dir_abs):
        graph_info = {'is_heterogeneous': True}

    return jsonify(graph_info)
예제 #5
0
def get_minddata_cpu_utilization_info():
    """
    Get minddata cpu utilization info.

    Returns:
        str, the minddata cpu utilization info.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>>POST http://xxx/v1/mindinsight/profile/minddata-cpu-utilization-summary
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)

    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)
    condition = request.stream.read()
    try:
        condition = json.loads(condition) if condition else {}
    except (json.JSONDecodeError, ValueError):
        raise ParamValueError("Json data parse failed.")

    device_id = condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    analyser = AnalyserFactory.instance().get_analyser(
        'minddata_cpu_utilization', profiler_dir_abs, device_id)
    cpu_utilization = analyser.query(condition)
    return jsonify(cpu_utilization)
예제 #6
0
def get_minddata_pipeline_queue_info():
    """
    Get the special minddata pipeline queue info.

    Returns:
        str, the queue information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/minddata-pipeline/queue
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get('device_id', default='0')
    to_int(device_id, 'device_id')
    op_id = request.args.get('op_id', type=int)
    if op_id is None:
        raise ParamValueError(
            "Invalid operator id or operator id does not exist.")

    analyser = AnalyserFactory.instance().get_analyser('minddata_pipeline',
                                                       profiler_dir_abs,
                                                       device_id)
    op_queue_info = analyser.get_op_and_parent_op_info(op_id)
    return jsonify(op_queue_info)
예제 #7
0
    def _get_train_tensors(self, train_id, tags, step, dims, detail):
        """
        Get tensor data for given train_id, tags, step, dims and detail.

        Args:
            train_id (str): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tensor, it's necessary when detail is set to 'data'.
            dims (str): Specify dims of tensor, it's necessary when detail is set to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            list[dict], a list of dictionaries containing the `train_id`, `tag`, `values`.

        Raises:
            TensorNotExistError, If tensor with specific train_id and tag is not exist in cache.
            ParamValueError, If the value of detail is not within available values:
                            'stats', 'histogram' and 'data'.
        """

        tensors_response = []
        for tag in tags:
            try:
                tensors = self._data_manager.list_tensors(train_id, tag)
            except ParamValueError as err:
                raise TensorNotExistError(err.message)

            if tensors and not isinstance(tensors[0].value, TensorContainer):
                raise TensorNotExistError(
                    "there is no tensor data in this tag: {}".format(tag))

            if detail is None or detail == 'stats':
                values = self._get_tensors_summary(detail, tensors)
            elif detail == 'data':
                Validation.check_param_empty(step=step, dims=dims)
                # Limit to query max two dimensions for tensor in table view.
                dims = TensorUtils.parse_shape(dims,
                                               limit=MAX_DIMENSIONS_FOR_TENSOR)
                step = to_int(step, "step")
                values = self._get_tensors_data(step, dims, tensors)
            elif detail == 'histogram':
                values = self._get_tensors_histogram(tensors)
            else:
                raise ParamValueError(
                    'Can not support this value: {} of detail.'.format(detail))

            tensor = {"train_id": train_id, "tag": tag, "values": values}
            tensors_response.append(tensor)

        return tensors_response
예제 #8
0
def get_flops_summary():
    """
    Get flops summary info.

    Returns:
        Response, the flops summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/flops-summary
    """
    train_id = request.args.get("train_id")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        train_id, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('flops',
                                                       profiler_dir_abs,
                                                       device_id)
    summary = analyser.get_flops_summary()

    return summary
    def check_offset(cls, offset, default_value=0):
        """
        Check offset parameter, it must be greater or equal 0.

        Args:
            offset (Union[str, int]): Value can be string number or int.
            default_value (int): Default value for checked offset. Default: 0.

        Returns:
            int, offset.
        """

        if offset is None:
            return default_value
        offset = to_int(offset, 'offset')
        if offset < 0:
            raise ParamValueError("'offset' should be greater than or equal to 0.")
        return offset
예제 #10
0
    def _get_info_dict_from_row_data(self, row_info, time_type):
        """
        Get step info in dict format.

        Args:
            row_info (list[str]): Step info, the value is corresponding to `__column__`.
            time_type (str): The value type. `systime` keeps the original value.
                `realtime` transforms the value in millisecond. Default: `realtime`.

        Returns:
            dict, step trace information. The key is in `__column__`.
        """
        row_info_dict = {}
        for key, value in zip(self.__column__, row_info):
            if key == 'step_num':
                continue
            value = to_int(value, key)
            row_info_dict[key] = to_millisecond(value) if time_type == 'realtime' else value
        return row_info_dict
예제 #11
0
def get_field_value(row_info, field_name, header, time_type='realtime'):
    """
    Extract basic info through row_info.

    Args:
        row_info (list): The list of data info in one row.
        field_name (str): The name in header.
        header (list[str]): The list of field names.
        time_type (str): The type of value, `realtime` or `systime`. Default: `realtime`.

    Returns:
        dict, step trace info in dict format.
    """
    field_index = header.index(field_name)
    value = row_info[field_index]
    value = to_int(value, field_name)
    if time_type == 'realtime':
        value = to_millisecond(value)

    return value
예제 #12
0
def get_timeline_detail():
    """
    Get timeline detail.

    Returns:
        Response, the detail information of timeline.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline
    """
    summary_dir = request.args.get("dir")
    profiler_dir = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir, device_id)
    timeline = analyser.get_display_timeline()

    return jsonify(timeline)
예제 #13
0
def get_timeline_summary():
    """
    Get timeline summary info.

    Returns:
        Response, the timeline summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
    """
    summary_dir = request.args.get("dir")
    profiler_dir = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir, device_id)
    summary = analyser.get_timeline_summary()

    return summary
    def check_limit(cls, limit, min_value=1, max_value=1000, default_value=100):
        """
        Check limit parameter, it should between min_value and max_value.

        Args:
            limit (Union[str, int]): Value can be string number or int.
            min_value (int): Limit should greater or equal this value. Default: 1.
            max_value (int): Limit should less or equal this value. Default: 1000.
            default_value (int): Default value for limit. Default: 100.

        Returns:
            int, limit.
        """

        if limit is None:
            return default_value

        limit = to_int(limit, 'limit')
        if limit < min_value or limit > max_value:
            raise ParamValueError("'limit' should in [{}, {}].".format(min_value, max_value))
        return limit
예제 #15
0
def validate_and_set_job_id_env(job_id_env):
    """
    Validate the job id and set it in environment.

    Args:
        job_id_env (str): The id that to be set in environment parameter `JOB_ID`.

    Returns:
        int, the valid job id env.
    """
    if job_id_env is None:
        return job_id_env
    # get job_id_env in int type
    valid_id = to_int(job_id_env, 'job_id_env')
    # check the range of valid_id
    if valid_id and 255 < valid_id < sys.maxsize:
        os.environ['JOB_ID'] = job_id_env
    else:
        log.warning(
            "Invalid job_id_env %s. The value should be int and between 255 and %s. Use"
            "default job id env instead.", job_id_env, sys.maxsize)
    return valid_id
예제 #16
0
def get_timeline_summary():
    """
    Get timeline summary info.

    Returns:
        Response, the timeline summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
    """
    summary_dir = request.args.get("dir")
    profiler_dir = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    if not os.path.exists(profiler_dir):
        msg = 'The profiler dir is not found!'
        raise ProfilerDirNotFoundException(msg=msg)
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir, device_id)
    summary = analyser.get_timeline_summary()

    return summary