def compare_impl(full_raw_stack,
                 full_actuals,
                 chunk_raw_stack,
                 chunk_actuals,
                 fancy_func=False,
                 external_trim=None):

    # cross check both stacks -- make sure they have the same shape
    cross_check(full_raw_stack, chunk_raw_stack)

    # build a full tree -- just to get the aten op name
    root = stack_parser.exec_time_tree(full_raw_stack, fancy_func=fancy_func)
    # this must be true -- the root has 2 children -- the aten op and other
    op_name = root.children[0].func

    # process CPU_log and HB_log
    # CPU_log should be given by full input data
    # HB_log should be given by chunk input data
    cpu_log = process_CPU_stack.parse(full_raw_stack, fancy_func=fancy_func)
    hb_log = process_HB_stack.parse(chunk_raw_stack,
                                    fancy_func=fancy_func,
                                    trimming=True)

    # re-apply trim of 0 if using external tirmming is enabled
    # since we can have more than one @TRIM@ node, we cannot just adjust these nodes
    if external_trim is not None:

        def reset_trim(root):
            if root.func == "@TRIM@":
                root.time = float(0)

        hb_log = traversal(hb_log, reset_trim)
        stack_parser.exec_time_apply_trim(hb_log)

    # get total time on device
    # so we accumulate all simualted time
    total_time_on_HB = 0
    if external_trim is None:

        def acc_trim(root):
            nonlocal total_time_on_HB
            if root.func == "@TRIM@":
                total_time_on_HB += root.time

        traversal(hb_log, acc_trim)
    else:
        total_time_on_HB = external_trim

    # debug
    print(stack_parser.exec_time_print_tree(cpu_log))
    print(stack_parser.exec_time_print_tree(hb_log))
    print("total time on HB = " + str(total_time_on_HB))

    # process input tensors
    actuals = actual_parser.parse(full_actuals, chunk_actuals)

    return ATen_OP(op_name, cpu_log, hb_log, total_time_on_HB, actuals)
Example #2
0
def test_compare_aten_op_1():

    aten_op = compare(current_path + "/demo/full.std",
                      current_path + "/demo/chunk.std",
                      current_path + "/demo/manycore_stats.log")

    # pytest assertion
    assert aten_op.hb_device_time == 1204.603
    assert aten_op.hb_host_time == 170.0
    assert aten_op.xeon_time == 34.451
    cpu_graph = stack_parser.exec_time_print_tree(aten_op.cpu_log)
    assert cpu_graph == "|- Node(@CPU_LOG@ : 34.451)\n  |- Node(at::Tensor at::CPUType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.043)\n  |- Node(at::native::add_stub::add_stub() : 34.316)"
    hb_graph = stack_parser.exec_time_print_tree(aten_op.hb_log)
    assert hb_graph == "|- Node(@HB_LOG@ : 170.0)\n  |- Node(at::Tensor at::HammerBladeType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.004)\n  |- Node(at::native::add_stub::add_stub() : 170.0)\n    |- Node(@OFFLOAD_KERNEL@__tensorlib_add : 0.0)\n      |- Node(@TRIM@ : 0.0)"
Example #3
0
def test_hb_stack_1():
    cpu_log = process_HB_stack.parse(raw_stack)
    graph = stack_parser.exec_time_print_tree(cpu_log)
    assert graph == """|- Node(@HB_LOG@ : 0.234)\n  |- Node(at::Tensor at::HammerBladeType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.01)\n  |- Node(at::native::add_stub::add_stub() : 0.187)\n    |- Node(@OFFLOAD_KERNEL@__tensorlib_add : 0.145)\n      |- Node(@TRIM@ : 0.0)"""
Example #4
0
def test_cpu_stack_1():
    cpu_log = process_CPU_stack.parse(raw_stack)
    graph = stack_parser.exec_time_print_tree(cpu_log)
    assert graph == """|- Node(@CPU_LOG@ : 0.067)\n  |- Node(at::Tensor at::CPUType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.015)\n  |- Node(at::native::add_stub::add_stub() : 0.009)"""
Example #5
0
def test_stack_parsing_1():
    root = stack_parser.exec_time_tree(raw_stack)
    graph = stack_parser.exec_time_print_tree(root)
    assert graph == """|- Node(time_in_roi : 0.399)\n  |- Node(at::Tensor at::CPUType::{anonymous}::add(const at::Tensor&, const at::Tensor&, c10::Scalar) : 0.399)\n    |- Node(@CPU_LOG@ : 0.067)\n      |- Node(at::Tensor at::CPUType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.015)\n      |- Node(at::native::add_stub::add_stub() : 0.009)\n    |- Node(@HB_LOG@ : 0.234)\n      |- Node(at::Tensor at::HammerBladeType::{anonymous}::empty(c10::IntArrayRef, const c10::TensorOptions&, c10::optional<c10::MemoryFormat>) : 0.01)\n      |- Node(at::native::add_stub::add_stub() : 0.187)\n        |- Node(@OFFLOAD_KERNEL@__tensorlib_add : 0.145)\n          |- Node(@TRIM@ : 0.0)\n    |- Node(at::Tensor at::CPUType::{anonymous}::llcopy(const at::Tensor&) : 0.025)\n  |- Node(other : 0.0)"""