Beispiel #1
0
def main():

    debugger_backend = d.DbgServices(
        dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet")

    _ = debugger_backend.initialize(
        net_name="Network Name goes here!", is_sync_mode=True)

    # NOTES:
    # -> watch_condition=6 is MIN_LT
    # -> watch_condition=18 is CHANGE_TOO_LARGE

    # test 1: watchpoint set and hit (watch_condition=6)
    param1 = d.Parameter(name="param", disabled=False, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
                                                         "Conv2D-op308":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": False
                                                          }}, parameter_list=[param1])

    watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
    if len(watchpoint_hits_test_1) != 1:
        print("ERROR -> test 1: watchpoint set but not hit just once")
    print_watchpoint_hits(watchpoint_hits_test_1, 1)

    # test 2: watchpoint remove and ensure it's not hit
    _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
    watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_2:
        print("ERROR -> test 2: watchpoint removed but hit")

    # test 3: watchpoint set and not hit, then remove
    param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
                                                         "Conv2D-op308":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": False
                                                          }}, parameter_list=[param2])

    watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_3:
        print("ERROR -> test 3: watchpoint set but not supposed to be hit")
    _ = debugger_backend.remove_watchpoint(watchpoint_id=2)

    # test 4: weight change watchpoint set and hit
    param_abs_mean_update_ratio_gt = d.Parameter(
        name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
    param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                                                         "Parameter[6]_11/fc3.bias":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": True
                                                          }}, parameter_list=[param_abs_mean_update_ratio_gt,
                                                                              param_epsilon])

    watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
    if len(watchpoint_hits_test_4) != 1:
        print("ERROR -> test 4: watchpoint weight change set but not hit just once")
    print_watchpoint_hits(watchpoint_hits_test_4, 4)
def main():

    debugger_backend = d.DbgServices(
        dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")

    _ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)

    # NOTES:
    # -> watch_condition=6 is MIN_LT
    # -> watch_condition=18 is CHANGE_TOO_LARGE

    # test 1: watchpoint set and hit (watch_condition=6)
    param1 = d.Parameter(name="param", disabled=False, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                        check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
                                                         "_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
                                                         {"device_id": [0], "root_graph_id": [1], "is_parameter": False
                                                          }}, parameter_list=[param1])

    watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
    if len(watchpoint_hits_test_1) != 1:
        print("ERROR -> test 1: watchpoint set but not hit just once")
    print_watchpoint_hits(watchpoint_hits_test_1, 1)

    # test 2: watchpoint remove and ensure it's not hit
    _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
    watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_2:
        print("ERROR -> test 2: watchpoint removed but hit")

    # test 3: watchpoint set and not hit, then remove
    param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                        check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
                                                         "_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
                                                         {"device_id": [0], "root_graph_id": [1], "is_parameter": False
                                                          }}, parameter_list=[param2])

    watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_3:
        print("ERROR -> test 3: watchpoint set but not supposed to be hit")
    _ = debugger_backend.remove_watchpoint(watchpoint_id=2)
Beispiel #3
0
def main():

    debugger_backend = d.DbgServices(
        dump_file_path=
        "/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet")

    _ = debugger_backend.initialize(net_name="Network Name goes here!",
                                    is_sync_mode=True)

    # parameter
    info1 = d.TensorInfo(
        node_name=
        "Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
        slot=0,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=True)
    # output tensor with zero slot
    info2 = d.TensorInfo(
        node_name=
        "Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168",
        slot=0,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=False)
    # output tensor with non-zero slot
    info3 = d.TensorInfo(
        node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346",
        slot=1,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=False)

    tensor_info = [info1, info2, info3]

    tensor_data = debugger_backend.read_tensors(tensor_info)

    print_read_tensors(tensor_info, tensor_data)
def test_sync_trans_read_tensors():

    debugger_backend = d.DbgServices(
        dump_file_path="../data/dump/gpu_dumps/sync_trans_true/alexnet")

    _ = debugger_backend.initialize(net_name="Network Name goes here!",
                                    is_sync_mode=True)

    # parameter
    info1 = d.TensorInfo(
        node_name=
        "Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
        slot=0,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=True)
    # output tensor with zero slot
    info2 = d.TensorInfo(
        node_name=
        "Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
        slot=0,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=False)
    # output tensor with non-zero slot
    info3 = d.TensorInfo(
        node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
        slot=1,
        iteration=2,
        device_id=0,
        root_graph_id=0,
        is_parameter=False)

    tensor_info = [info1, info2, info3]

    tensor_data = debugger_backend.read_tensors(tensor_info)

    print_read_tensors(tensor_info, tensor_data)
    assert compare_actual_with_expected(test_name)
Beispiel #5
0
def main():

    debugger_backend = d.DbgServices(
        dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")

    _ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)

    # output tensor with zero slot
    info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
                                   "conv3-Conv2d/Conv2D-op169",
                         slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
    # output tensor with non-zero slot
    info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
                                   "ReLUV2-op348",
                         slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)

    tensor_info = [info1, info2]

    tensor_data = debugger_backend.read_tensors(tensor_info)

    print_read_tensors(tensor_info, tensor_data)
def test_sync_trans_false_watchpoints():

    if GENERATE_GOLDEN:
        f_write = open(test_name + ".expected", "w")
    else:
        f_write = open(test_name + ".actual", "w")

    debugger_backend = d.DbgServices(
        dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet")

    _ = debugger_backend.initialize(
        net_name="Alexnet", is_sync_mode=True)

    # NOTES:
    # -> watch_condition=6 is MIN_LT
    # -> watch_condition=18 is CHANGE_TOO_LARGE

    # test 1: watchpoint set and hit (watch_condition=6)
    param1 = d.Parameter(name="param", disabled=False, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
                                                         "Conv2D-op308":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": False
                                                          }}, parameter_list=[param1])

    watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
    if len(watchpoint_hits_test_1) != 1:
        f_write.write(
            "ERROR -> test 1: watchpoint set but not hit just once\n")
    print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)

    # test 2: watchpoint remove and ensure it's not hit
    _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
    watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_2:
        f_write.write("ERROR -> test 2: watchpoint removed but hit\n")

    # test 3: watchpoint set and not hit, then remove
    param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
                                                         "Conv2D-op308":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": False
                                                          }}, parameter_list=[param2])

    watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_3:
        f_write.write(
            "ERROR -> test 3: watchpoint set but not supposed to be hit\n")
    _ = debugger_backend.remove_watchpoint(watchpoint_id=2)

    # test 4: weight change watchpoint set and hit
    param_abs_mean_update_ratio_gt = d.Parameter(
        name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
    param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                                                         "Parameter[6]_11/fc3.bias":
                                                         {"device_id": [0], "root_graph_id": [0], "is_parameter": True
                                                          }}, parameter_list=[param_abs_mean_update_ratio_gt,
                                                                              param_epsilon])

    watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
    if len(watchpoint_hits_test_4) != 1:
        f_write.write(
            "ERROR -> test 4: watchpoint weight change set but not hit just once\n")
    print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
    f_write.close()
    assert compare_actual_with_expected(test_name)