def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict): global unhandled_enter_secs, unhandled_enter_nsecs global unhandled_infos if event_name not in unhandled_enter_nsecs: unhandled_enter_nsecs[event_name] = {} unhandled_enter_secs[event_name] = {} unhandled_infos[event_name] = {} common_pid = event_fields_dict["common_pid"] common_secs = event_fields_dict["common_s"] common_nsecs = event_fields_dict["common_ns"] if "exit__return" not in event_name: unhandled_enter_nsecs[event_name][common_pid] = common_nsecs unhandled_enter_secs[event_name][common_pid] = common_secs else: func_name = event_name.replace("_exit__return", "") start_ns = Duration.nanoseconds( unhandled_enter_secs[func_name][common_pid], unhandled_enter_nsecs[func_name][common_pid]) end_ns = Duration.nanoseconds(common_secs, common_nsecs) if common_pid not in unhandled_infos[func_name]: unhandled_infos[func_name][common_pid] = [] unhandled_infos[func_name][common_pid].append( TransferInfo(start_ns, end_ns, 0, 0, func_name))
def probe_libdpu__libdpu_dpu_copy_to_wram_for_dpu_exit__return( event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, common_callchain, __probe_func, __probe_ret_ip, perf_sample_dict): global wram_enter_nsecs, wram_enter_secs, wram_pid size = wram_pid[common_pid][0] path = rank_path[wram_pid[common_pid][1]] start_ns = Duration.nanoseconds(wram_enter_secs[common_pid], wram_enter_nsecs[common_pid]) end_ns = Duration.nanoseconds(common_secs, common_nsecs) wram_write_infos.setdefault(common_pid, []).append( TransferInfo(start_ns, end_ns, size, path, "dpu_copy_to_wram_for_dpu"))
def probe_libdpu__libdpu_dpu_copy_from_wram_for_matrix_exit__return( event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, common_callchain, __probe_func, __probe_ret_ip, perf_sample_dict): global wram_enter_nsecs, wram_enter_secs, wram_pid, wram_xfer_size start_ns = Duration.nanoseconds(wram_enter_secs[common_pid], wram_enter_nsecs[common_pid]) end_ns = Duration.nanoseconds(common_secs, common_nsecs) path = rank_path[wram_pid[common_pid][1]] nr_dpus = len( wram_xfer_size[common_pid]) if common_pid in wram_xfer_size else 64 size = wram_pid[common_pid][0] * nr_dpus wram_read_infos.setdefault(common_pid, []).append( TransferInfo(start_ns, end_ns, size, path, "dpu_copy_from_wram_{}dpus".format(nr_dpus)))
def probe_libdpu__libdpu_dpu_copy_from_mrams_exit__return( event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, common_callchain, __probe_func, __probe_ret_ip, perf_sample_dict): global mram_enter_secs, mram_enter_nsecs, mram_pid, mram_xfer_size global mram_read_infos global rank_path transfer_matrix = mram_pid[common_pid][0] path = rank_path[mram_pid[common_pid][1]] xfer_size = mram_pid[common_pid][2] start_ns = Duration.nanoseconds( mram_enter_secs[common_pid][transfer_matrix], mram_enter_nsecs[common_pid][transfer_matrix]) end_ns = Duration.nanoseconds(common_secs, common_nsecs) nb_dpus = len(mram_xfer_size[transfer_matrix]) xfer_size *= nb_dpus mram_read_infos.setdefault(common_pid, []).append( TransferInfo(start_ns, end_ns, xfer_size, path, "dpu_copy_from_mrams_{}dpus".format(nb_dpus)))
def print_transfer_info(size, func_name, total_duration, nb_size, iram_bandwidth=False): average_duration = Duration.seconds(total_duration) / nb_size print_duration = PrintResult.format_duration(total_duration / nb_size) print_size = PrintResult.format_size(size, iram_bandwidth) print_bandwidth = PrintResult.format_size(size / average_duration, iram_bandwidth) print("{}\t{}\t{}/s\t{}\t{}".format(print_size.center(10), str(nb_size).center(10), print_bandwidth.center(10), print_duration.center(10), " ".join(func_name)))