Esempio n. 1
0
def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict):
    global unhandled_enter_secs, unhandled_enter_nsecs
    global unhandled_infos

    if event_name not in unhandled_enter_nsecs:
        unhandled_enter_nsecs[event_name] = {}
        unhandled_enter_secs[event_name] = {}
        unhandled_infos[event_name] = {}

    common_pid = event_fields_dict["common_pid"]
    common_secs = event_fields_dict["common_s"]
    common_nsecs = event_fields_dict["common_ns"]

    if "exit__return" not in event_name:
        unhandled_enter_nsecs[event_name][common_pid] = common_nsecs
        unhandled_enter_secs[event_name][common_pid] = common_secs
    else:
        func_name = event_name.replace("_exit__return", "")

        start_ns = Duration.nanoseconds(
            unhandled_enter_secs[func_name][common_pid],
            unhandled_enter_nsecs[func_name][common_pid])
        end_ns = Duration.nanoseconds(common_secs, common_nsecs)

        if common_pid not in unhandled_infos[func_name]:
            unhandled_infos[func_name][common_pid] = []

        unhandled_infos[func_name][common_pid].append(
            TransferInfo(start_ns, end_ns, 0, 0, func_name))
Esempio n. 2
0
def probe_libdpu__libdpu_dpu_copy_to_wram_for_dpu_exit__return(
        event_name, context, common_cpu, common_secs, common_nsecs, common_pid,
        common_comm, common_callchain, __probe_func, __probe_ret_ip,
        perf_sample_dict):
    global wram_enter_nsecs, wram_enter_secs, wram_pid

    size = wram_pid[common_pid][0]
    path = rank_path[wram_pid[common_pid][1]]
    start_ns = Duration.nanoseconds(wram_enter_secs[common_pid],
                                    wram_enter_nsecs[common_pid])
    end_ns = Duration.nanoseconds(common_secs, common_nsecs)

    wram_write_infos.setdefault(common_pid, []).append(
        TransferInfo(start_ns, end_ns, size, path, "dpu_copy_to_wram_for_dpu"))
Esempio n. 3
0
def probe_libdpu__libdpu_dpu_copy_from_wram_for_matrix_exit__return(
        event_name, context, common_cpu, common_secs, common_nsecs, common_pid,
        common_comm, common_callchain, __probe_func, __probe_ret_ip,
        perf_sample_dict):
    global wram_enter_nsecs, wram_enter_secs, wram_pid, wram_xfer_size

    start_ns = Duration.nanoseconds(wram_enter_secs[common_pid],
                                    wram_enter_nsecs[common_pid])
    end_ns = Duration.nanoseconds(common_secs, common_nsecs)
    path = rank_path[wram_pid[common_pid][1]]
    nr_dpus = len(
        wram_xfer_size[common_pid]) if common_pid in wram_xfer_size else 64
    size = wram_pid[common_pid][0] * nr_dpus
    wram_read_infos.setdefault(common_pid, []).append(
        TransferInfo(start_ns, end_ns, size, path,
                     "dpu_copy_from_wram_{}dpus".format(nr_dpus)))
Esempio n. 4
0
def probe_libdpu__libdpu_dpu_copy_from_mrams_exit__return(
        event_name, context, common_cpu, common_secs, common_nsecs, common_pid,
        common_comm, common_callchain, __probe_func, __probe_ret_ip,
        perf_sample_dict):
    global mram_enter_secs, mram_enter_nsecs, mram_pid, mram_xfer_size
    global mram_read_infos
    global rank_path

    transfer_matrix = mram_pid[common_pid][0]
    path = rank_path[mram_pid[common_pid][1]]
    xfer_size = mram_pid[common_pid][2]
    start_ns = Duration.nanoseconds(
        mram_enter_secs[common_pid][transfer_matrix],
        mram_enter_nsecs[common_pid][transfer_matrix])
    end_ns = Duration.nanoseconds(common_secs, common_nsecs)

    nb_dpus = len(mram_xfer_size[transfer_matrix])
    xfer_size *= nb_dpus

    mram_read_infos.setdefault(common_pid, []).append(
        TransferInfo(start_ns, end_ns, xfer_size, path,
                     "dpu_copy_from_mrams_{}dpus".format(nb_dpus)))
Esempio n. 5
0
def print_transfer_info(size,
                        func_name,
                        total_duration,
                        nb_size,
                        iram_bandwidth=False):
    average_duration = Duration.seconds(total_duration) / nb_size

    print_duration = PrintResult.format_duration(total_duration / nb_size)
    print_size = PrintResult.format_size(size, iram_bandwidth)
    print_bandwidth = PrintResult.format_size(size / average_duration,
                                              iram_bandwidth)

    print("{}\t{}\t{}/s\t{}\t{}".format(print_size.center(10),
                                        str(nb_size).center(10),
                                        print_bandwidth.center(10),
                                        print_duration.center(10),
                                        " ".join(func_name)))