def func(opr, *, f0=TimeFuncHelper.eval_time_func(prof_type, args.top_end_key, np.max)): t = f0(opr) if t is not None and (t < args.min_time or t > args.max_time): return None return t
def summary(): device_end_func = TimeFuncHelper.eval_time_func("device", "end", np.max) device_kern_func = TimeFuncHelper.eval_time_func("device", "kern", np.max) host_end_func = TimeFuncHelper.eval_time_func("host", "end", np.max) def get_tot_time(func): rec = analyzer_tot.select(func, aggregate=np.sum) if not rec: return "N/A" rec = rec[0] return rec.time tab = [] tot_dev_time = get_tot_time(device_end_func) tot_host_time = get_tot_time(host_end_func) tab.append(("total device time", tot_dev_time)) tab.append(("total host time", tot_host_time)) if args.copy_time: def fmt(a, b): a = a[0] b = b[0] return "tot={:.4f} avg={:.4f}".format(a.time, b.time) tab.append( ( "copy time", fmt( analyzer.select( device_end_func, lambda opr: opr.opr_info["type"] == "Copy", aggregate=np.sum, ), analyzer.select( device_end_func, lambda opr: opr.opr_info["type"] == "Copy", aggregate=np.mean, ), ), ) ) tab.append( ( "copy wait time", fmt( analyzer.select( device_kern_func, lambda opr: opr.opr_info["type"] == "Copy", aggregate=np.sum, ), analyzer.select( device_kern_func, lambda opr: opr.opr_info["type"] == "Copy", aggregate=np.mean, ), ), ) ) if args.confluence: tab_str = _tabulate_confluence(tab, headers=["name", "value"]) else: tab_str = tabulate(tab) return tab_str, tot_dev_time, tot_host_time