Esempio n. 1
0
def get_memory_usage_graphics():
    """
    Get graphic representation of memory usage.

    Returns:
        Response, the graphic representation of memory usage.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/memory-graphics
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    if device_type not in ['ascend']:
        logger.info(
            "Invalid device_type, Memory Usage only supports Ascend for now.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('memory_usage',
                                                       profiler_dir_abs,
                                                       device_id)
    graphics = analyser.get_memory_usage_graphics(device_type)

    return graphics
Esempio n. 2
0
def get_memory_usage_breakdowns():
    """
    Get memory breakdowns of each node.

    Returns:
        Response, the memory breakdowns for each node.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/memory-breakdowns
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    graph_id = request.args.get("graph_id", default='0')
    node_id = request.args.get("node_id", default='0')
    node_id = to_int(node_id, 'node_id')
    if device_type not in ['ascend']:
        logger.error(
            "Invalid device_type, Memory Usage only supports Ascend for now.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('memory_usage',
                                                       profiler_dir_abs,
                                                       device_id)
    breakdowns = analyser.get_memory_usage_breakdowns(device_type, graph_id,
                                                      node_id)

    return breakdowns
Esempio n. 3
0
def get_timeline_detail():
    """
    Get timeline detail.

    Returns:
        Response, the detail information of timeline.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    scope_name_num = request.args.get("scope_name_num", default='0')
    if device_type not in ['gpu', 'ascend']:
        logger.info(
            "Invalid device_type, device_type should be gpu or ascend.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir_abs,
                                                       device_id)
    timeline = analyser.get_display_timeline(device_type, scope_name_num)

    return jsonify(timeline)
Esempio n. 4
0
def get_profile_device_list():
    """
    Get profile device list.

    Returns:
        list, the available device list.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/devices
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_list, _ = analyse_device_list_from_profiler_dir(profiler_dir_abs)
    return jsonify(device_list)
Esempio n. 5
0
def get_timeline_summary():
    """
    Get timeline summary info.

    Returns:
        Response, the timeline summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    if device_type not in ['gpu', 'ascend']:
        logger.info(
            "Invalid device_type, device_type should be gpu or ascend.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir_abs,
                                                       device_id)
    summary = analyser.get_timeline_summary(device_type)

    return summary
Esempio n. 6
0
def get_time_info():
    """
    Get minddata operation info.

    Returns:
        Response, the minddata operation info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/minddata_op
    """
    profiler_dir_abs = get_profiler_abs_dir(request)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = unquote_args(request, "device_id")
    to_int(device_id, 'device_id')
    op_type = unquote_args(request, "type")

    time_info = {
        'size': 0,
        'info': [],
        "summary": {
            "time_summary": {}
        },
        "advise": {}
    }
    minddata_analyser = AnalyserFactory.instance().get_analyser(
        'minddata', profiler_dir_abs, device_id)
    if op_type == "get_next":
        _, time_info = minddata_analyser.analyse_get_next_info(
            info_type="time")
    elif op_type == "device_queue":
        _, time_info = minddata_analyser.analyse_device_queue_info(
            info_type="time")

    return jsonify(time_info)
Esempio n. 7
0
def get_process_summary():
    """
    Get interval process summary.

    Returns:
        Response, the process summary.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/process_summary
    """
    profiler_dir_abs = get_profiler_abs_dir(request)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = unquote_args(request, "device_id")
    to_int(device_id, 'device_id')

    minddata_analyser = AnalyserFactory.instance().get_analyser(
        'minddata', profiler_dir_abs, device_id)
    get_next_queue_info, _ = minddata_analyser.analyse_get_next_info(
        info_type="queue")
    device_queue_info, _ = minddata_analyser.analyse_device_queue_info(
        info_type="queue")

    result = MinddataAnalyser.analyse_queue_summary(get_next_queue_info,
                                                    device_queue_info)

    return jsonify(result)
Esempio n. 8
0
def get_queue_info():
    """
    Get each type queue info.

    Returns:
        Response, the queue info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/queue_info
    """
    profiler_dir_abs = get_profiler_abs_dir(request)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = unquote_args(request, "device_id")
    to_int(device_id, 'device_id')
    queue_type = unquote_args(request, "type")
    queue_info = {}

    minddata_analyser = AnalyserFactory.instance().get_analyser(
        'minddata', profiler_dir_abs, device_id)
    if queue_type == "get_next":
        queue_info, _ = minddata_analyser.analyse_get_next_info(
            info_type="queue")
    elif queue_type == "device_queue":
        queue_info, _ = minddata_analyser.analyse_device_queue_info(
            info_type="queue")

    return jsonify(queue_info)
Esempio n. 9
0
def get_target_time_info():
    """
    Get all the time information of the specified column.

    Returns:
        Response, all the time information of the specified column.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/training-trace/target-time-info
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    proc_name = request.args.get("type")
    validate_ui_proc(proc_name)
    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('step_trace',
                                                       profiler_dir_abs,
                                                       device_id)
    target_time_info = analyser.query(
        {'filter_condition': {
            'mode': 'proc',
            'proc_name': proc_name
        }})
    target_time_info['summary'] = analyser.summary
    return jsonify(target_time_info)
Esempio n. 10
0
def get_cluster_flops():
    """
    Get cluster FLOPs.

    Returns:
        str, the cluster FLOPs.

    Raises:
        ParamValueError: If the cluster profiler dir is invalid.

    Examples:
        >>>GET http://xxx/v1/mindinsight/profile/cluster-flops
    """
    train_id = get_train_id(request)
    if not train_id:
        raise ParamValueError('No train id.')
    cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id)
    cluster_profiler_dir = validate_and_normalize_path(cluster_profiler_dir,
                                                       'cluster_profiler')
    check_train_job_and_profiler_dir(cluster_profiler_dir)

    analyser = AnalyserFactory.instance().get_analyser('cluster_flops',
                                                       cluster_profiler_dir)
    flops = analyser.get_flops()
    return jsonify(flops)
Esempio n. 11
0
def get_training_trace_graph():
    """
    Get training trace info of one step.

    Returns:
        Response, the training trace info of one step.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    graph_type = request.args.get("type", default='0')
    graph_type = to_int(graph_type, 'graph_type')
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')
    graph_info = {}
    try:
        analyser = AnalyserFactory.instance().get_analyser(
            'step_trace', profiler_dir_abs, device_id)
    except ProfilerFileNotFoundException:
        return jsonify(graph_info)

    graph_info = analyser.query(
        {'filter_condition': {
            'mode': 'step',
            'step_id': graph_type
        }})
    graph_info['summary'] = analyser.summary
    graph_info['point_info'] = analyser.point_info
    return jsonify(graph_info)
Esempio n. 12
0
def get_profile_summary_proposal():
    """
    Get summary profiling proposal.

    Returns:
        str, the summary profiling proposal.

    Raises:
        ParamValueError: If the parameters contain some errors.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/summary/propose
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    device_id = get_device_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")
    to_int(device_id, 'device_id')

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    step_trace_condition = {
        "filter_condition": {
            "mode": "proc",
            "proc_name": "iteration_interval",
            "step_id": 0
        }
    }
    options = {'step_trace': {"iter_interval": step_trace_condition}}

    proposal_type_list = [
        'step_trace', 'minddata', 'minddata_pipeline', 'common'
    ]
    proposal_obj = ComposeProposal(profiler_dir_abs, device_id,
                                   proposal_type_list)
    proposal_info = proposal_obj.get_proposal(options)
    # Use json.dumps for orderly return
    return CustomResponse(json.dumps(proposal_info),
                          mimetype='application/json')
Esempio n. 13
0
def get_profile_op_info():
    """
    Get operation profiling info.

    Returns:
        str, the operation profiling information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/ops/search
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    search_condition = request.stream.read()
    try:
        search_condition = json.loads(
            search_condition if search_condition else "{}")
    except (json.JSONDecodeError, ValueError):
        raise ParamValueError("Json data parse failed.")
    validate_condition(search_condition)

    device_id = search_condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    op_type = search_condition.get("op_type")

    analyser = AnalyserFactory.instance().get_analyser(op_type,
                                                       profiler_dir_abs,
                                                       device_id)

    op_info = analyser.query(search_condition)
    return jsonify(op_info)
Esempio n. 14
0
def get_minddata_pipeline_op_queue_info():
    """
    Get minddata pipeline operator info and queue info.

    Returns:
        str, the operation information and queue information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/minddata-pipeline/op-queue
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)
    condition = request.stream.read()
    try:
        condition = json.loads(condition) if condition else {}
    except Exception:
        raise ParamValueError("Json data parse failed.")
    validate_minddata_pipeline_condition(condition)

    device_id = condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    analyser = AnalyserFactory.instance().get_analyser('minddata_pipeline',
                                                       profiler_dir_abs,
                                                       device_id)
    op_info = analyser.query(condition)
    return jsonify(op_info)
Esempio n. 15
0
def get_training_trace_graph():
    """
    Get training trace info of one step.

    Returns:
        Response, the training trace info of one step.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    graph_type = request.args.get("type", default='0')
    graph_type = to_int(graph_type, 'graph_type')
    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    graph_info = {}
    try:
        analyser = AnalyserFactory.instance().get_analyser(
            'step_trace', profiler_dir_abs, device_id)
    except ProfilerFileNotFoundException:
        return jsonify(graph_info)

    graph_info = analyser.query(
        {'filter_condition': {
            'mode': 'step',
            'step_id': graph_type
        }})
    graph_info['summary'] = analyser.summary
    graph_info['point_info'] = analyser.point_info(graph_type)
    graph_info['is_heterogeneous'] = False

    # In heterogeneous training scene, do not display step trace data.
    cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv"
    if cpu_op_type_file_name in os.listdir(profiler_dir_abs):
        graph_info = {'is_heterogeneous': True}

    return jsonify(graph_info)
Esempio n. 16
0
def get_minddata_cpu_utilization_info():
    """
    Get minddata cpu utilization info.

    Returns:
        str, the minddata cpu utilization info.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>>POST http://xxx/v1/mindinsight/profile/minddata-cpu-utilization-summary
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)

    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)
    condition = request.stream.read()
    try:
        condition = json.loads(condition) if condition else {}
    except (json.JSONDecodeError, ValueError):
        raise ParamValueError("Json data parse failed.")

    device_id = condition.get("device_id", "0")
    to_int(device_id, 'device_id')
    analyser = AnalyserFactory.instance().get_analyser(
        'minddata_cpu_utilization', profiler_dir_abs, device_id)
    cpu_utilization = analyser.query(condition)
    return jsonify(cpu_utilization)
Esempio n. 17
0
def get_minddata_pipeline_queue_info():
    """
    Get the special minddata pipeline queue info.

    Returns:
        str, the queue information.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/minddata-pipeline/queue
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir.")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get('device_id', default='0')
    to_int(device_id, 'device_id')
    op_id = request.args.get('op_id', type=int)
    if op_id is None:
        raise ParamValueError(
            "Invalid operator id or operator id does not exist.")

    analyser = AnalyserFactory.instance().get_analyser('minddata_pipeline',
                                                       profiler_dir_abs,
                                                       device_id)
    op_queue_info = analyser.get_op_and_parent_op_info(op_id)
    return jsonify(op_queue_info)
Esempio n. 18
0
def get_flops_summary():
    """
    Get flops summary info.

    Returns:
        Response, the flops summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/flops-summary
    """
    train_id = request.args.get("train_id")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        train_id, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('flops',
                                                       profiler_dir_abs,
                                                       device_id)
    summary = analyser.get_flops_summary()

    return summary