Python create_trial_fast_refresh Examples, tests.tensorflow.utils.create_trial_fast_refresh Python Examples

Example #1

0

Show file

File: test_keras_mirrored.py Project: thaisep/sagemaker-debugger

def test_collection_reductions(out_dir, tf_eager_mode):
    tf.keras.backend.clear_session()
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=3),
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.GRADIENTS,
        ],
    )
    hook.get_collection(
        CollectionKeys.WEIGHTS).reduction_config = ReductionConfig(
            norms=["l1"])
    train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0]

    try:
        tr.tensor(weight_name).value(0)
        assert False
    except TensorUnavailableForStep:
        try:
            assert tr.tensor(weight_name).reduction_value(0, "l1") is not None
        except ValueError:
            # some tensors reduction can't be computed
            pass
    except TensorUnavailable:
        # sometimes we might not have tensor saved if it was only being
        # saved as reduction and the reduction computation failed
        pass

Example #2

0

Show file

File: test_keras_mirrored.py Project: taza1/sagemaker-debugger

def test_save_all_workers(out_dir, zcc=False):
    # Skip if no GPUS
    if get_available_gpus() == 0:
        return
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
        include_workers="all",
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.workers()) == get_available_gpus()
    assert len(tr.tensor_names(collection="weights"))
    assert (len(
        tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) ==
            strategy.num_replicas_in_sync)

    assert "conv2d/weights/conv2d/kernel:0" in tr.tensor_names(
        collection="weights")
    assert (len(tr.tensor("conv2d/weights/conv2d/kernel:0").workers(5)) ==
            strategy.num_replicas_in_sync)

    assert len(tr.tensor_names(collection="biases"))
    assert "conv2d/weights/conv2d/bias:0" in tr.tensor_names(
        collection="biases")
    assert (len(tr.tensor(tr.tensor_names(
        collection="biases")[0]).workers(5)) == strategy.num_replicas_in_sync)
    assert len(tr.tensor_names(collection="gradients"))

Example #3

0

Show file

File: test_keras_mirrored.py Project: thaisep/sagemaker-debugger

def test_include_regex_opt_var(out_dir, tf_eager_mode, workers):
    include_collections = ["custom_optimizer_variables"]
    save_config = SaveConfig(save_interval=3)
    hook = KerasHook(
        out_dir=out_dir,
        save_config=save_config,
        include_collections=include_collections,
        include_workers=workers,
    )
    hook.get_collection("custom_optimizer_variables").include("Adam")
    strategy, _ = train_model(out_dir,
                              hook=hook,
                              steps=["train"],
                              eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_optimizer_variables")

    if tf_eager_mode:
        assert len(tnames) == 5
    else:
        assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)

Example #4

0

Show file

File: test_keras_mirrored.py Project: thaisep/sagemaker-debugger

def test_include_regex(out_dir, tf_eager_mode, workers):
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=["custom_coll"],
        include_workers=workers,
    )
    hook.get_collection("custom_coll").include("dense")
    strategy, _ = train_model(out_dir,
                              hook=hook,
                              steps=["train"],
                              eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    if tf_eager_mode:
        assert len(tnames) == (12 if is_tf_2_2() else 4)
    else:
        assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)

Example #5

0

Show file

def verify_shapes(out_dir, step_num, multiworker=False):
    trial = create_trial_fast_refresh(out_dir)
    for tname in trial.tensor_names(step=step_num):
        tensor = trial.tensor(tname)
        if multiworker is False:
            assert isinstance(tensor.shape(step_num),
                              tuple), (tname, tensor.shape(step_num))
            try:
                if not is_scalar(tensor.value(step_num)):
                    # test did not save value except scalars which dont use reduction config
                    #  so it should raise the below exception
                    assert False
            except TensorUnavailableForStep:
                pass
        else:
            workers = tensor.workers(step_num)
            assert len(workers) > 1
            for w in workers:
                try:
                    if not is_scalar(tensor.value(step_num, worker=w)):
                        # test did not save value so it should raise the below exception
                        assert False
                except TensorUnavailableForStep:
                    pass

                assert isinstance(tensor.shape(step_num, worker=w), tuple), (
                    tname,
                    w,
                    tensor.shape(step_num, worker=w),
                )

Example #6

0

Show file

def test_basic(out_dir, zcc=False):
    strategy = helper_mirrored(
        out_dir,
        steps=["train", "eval", "predict", "train"],
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.GRADIENTS,
            CollectionKeys.LOSSES,
        ],
        eval_distributed=False,
        zcc=zcc,
    )
    if skip_trial_check():
        return

    tr = create_trial_fast_refresh(out_dir)
    # wts, grads, losses
    print(tr.tensor_names())
    assert len(
        tr.tensor_names()) == 8 + 8 + (1 * strategy.num_replicas_in_sync) + 1
    assert len(tr.steps()) == 7
    assert len(tr.steps(ModeKeys.TRAIN)) == 3
    assert len(tr.steps(ModeKeys.EVAL)) == 2
    assert len(tr.steps(ModeKeys.PREDICT)) == 2

    assert "dense_1/kernel:0" in tr.tensor_names(collection="weights")
    for tname in tr.tensor_names(collection="weights"):
        for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
            assert len(tr.tensor(tname).workers(
                s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync
            for worker in tr.tensor(tname).workers(s, ModeKeys.TRAIN):
                assert tr.tensor(tname).value(s,
                                              worker=worker,
                                              mode=ModeKeys.TRAIN) is not None
        for s in tr.tensor(tname).steps(ModeKeys.EVAL):
            assert len(tr.tensor(tname).workers(
                s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync
            assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None

    tensornames = tr.tensor_names(regex="Identity_\d+:0")
    for s in tr.tensor(tensornames[0]).steps(ModeKeys.TRAIN):
        for w in tr.tensor(tensornames[0]).workers(s, ModeKeys.TRAIN):
            assert tr.tensor(tensornames[0]).value(
                s, worker=w, mode=ModeKeys.TRAIN) is not None
        assert (len(tr.tensor(tensornames[0]).workers(
            s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync)

    for tname in tr.tensor_names(collection="losses"):
        if tname != tensornames[0]:
            for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
                assert len(tr.tensor(tname).workers(s, ModeKeys.TRAIN)) == 1
                assert tr.tensor(tname).value(s,
                                              mode=ModeKeys.TRAIN) is not None

    tname = "sparse_softmax_cross_entropy_loss/value:0"
    for s in tr.tensor(tname).steps(ModeKeys.EVAL):
        assert len(tr.tensor(tname).workers(
            s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync
        assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None

Example #7

0

Show file

File: test_keras_mirrored.py Project: thaisep/sagemaker-debugger

def test_save_all(out_dir, tf_eager_mode, workers):
    save_config = SaveConfig(save_steps=[5])
    strategy, saved_scalars = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=save_config,
        steps=["train"],
        eager=tf_eager_mode,
        include_workers=workers,
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    if tf_eager_mode:
        if is_tf_2_2():
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 +
                                       8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1)
            # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers
        else:
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 +
                                       1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1)
    else:
        assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
                3 * strategy.num_replicas_in_sync +
                2 * strategy.num_replicas_in_sync)
        # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3
    for tname in tr.tensor_names():
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
    verify_files(out_dir, save_config, saved_scalars)

Example #8

0

Show file

File: test_keras.py Project: thaisep/sagemaker-debugger

def test_regex_filtering_for_default_collections(out_dir):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[CollectionKeys.LAYERS, CollectionKeys.GRADIENTS],
    )
    hook.get_collection(CollectionKeys.LAYERS).include("^dense")
    hook.get_collection(CollectionKeys.GRADIENTS).include("gradients/dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=10),
        steps=["train"],
        run_eagerly=True,
    )

    tr = create_trial_fast_refresh(out_dir)
    layer_tnames = tr.tensor_names(collection=CollectionKeys.LAYERS)
    gradient_tnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS)
    assert len(layer_tnames) == (4 if is_tf_2_2() else 0)
    assert len(gradient_tnames) == (4 if is_tf_2_2() else 0)
    layer_pattern = r"^(dense)(_\d+)?\/(inputs|outputs)"
    gradient_pattern = r"gradients/dense"
    for tname in layer_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=layer_pattern, string=tname) is not None
    for tname in gradient_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=gradient_pattern, string=tname) is not None

Example #9

0

Show file

def test_base_reductions(out_dir, tf_eager_mode):
    helper_keras_fit(
        trial_dir=out_dir,
        include_collections=[
            CollectionKeys.WEIGHTS, CollectionKeys.METRICS,
            CollectionKeys.LOSSES
        ],
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS,
                                         reductions=ALLOWED_REDUCTIONS),
        run_eagerly=tf_eager_mode,
    )
    tr = create_trial_fast_refresh(out_dir)
    weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0]
    try:
        tr.tensor(weight_name).value(0)
        assert False
    except TensorUnavailableForStep:
        assert tr.tensor(weight_name).reduction_value(0, "l1") is not None
        assert len(tr.tensor(weight_name).reduction_values(
            0)) == len(ALLOWED_REDUCTIONS) + len(ALLOWED_NORMS)

    loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0]
    assert tr.tensor(loss_name).value(0) is not None

    metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0]
    assert tr.tensor(metric_name).value(0) is not None

Example #10

0

Show file

File: test_keras_mirrored.py Project: thaisep/sagemaker-debugger

def test_base_reductions(out_dir, tf_eager_mode):
    train_model(
        out_dir,
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.METRICS,
            CollectionKeys.LOSSES,
        ],
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS,
                                         reductions=ALLOWED_REDUCTIONS),
        steps=["train"],
        eager=tf_eager_mode,
    )

    tr = create_trial_fast_refresh(out_dir)
    weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0]

    try:
        tr.tensor(weight_name).value(0)
        assert False
    except TensorUnavailableForStep:
        assert tr.tensor(weight_name).reduction_values(0)

    loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0]
    assert tr.tensor(loss_name).value(0) is not None

    metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0]
    assert tr.tensor(metric_name).value(0) is not None

Example #11

0

Show file

File: test_keras.py Project: brightsparc/sagemaker-debugger

def exhaustive_check(out_dir, use_tf_keras):
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.LOSSES,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    train_model(
        out_dir,
        include_collections=include_collections,
        use_tf_keras=use_tf_keras,
        eager=False,
        steps=["train", "eval", "predict", "train"],
    )

    tr = create_trial_fast_refresh(out_dir)
    if use_tf_keras:
        assert len(tr.tensor_names()) == 18
    else:
        # can't save optimizer variables in this case
        assert len(tr.tensor_names()) == 13

    assert len(tr.modes()) == 3
    assert len(tr.steps(
        ModeKeys.TRAIN)) == 8  # 0, 3, 6, 9, 12, 15, 18, 19(end of epoch)
    assert len(tr.steps(ModeKeys.EVAL)) == 4
    assert len(tr.steps(ModeKeys.PREDICT)) == 2  # ran 4 steps above

    assert len(tr.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
    gradient_name = tr.tensor_names(collection=CollectionKeys.GRADIENTS)[0]
    assert len(tr.tensor(gradient_name).steps(ModeKeys.TRAIN)) == 7
    assert len(tr.tensor(gradient_name).steps(ModeKeys.EVAL)) == 0

    assert len(tr.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(tr.tensor_names(collection=CollectionKeys.BIASES)) == 2
    weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0]

    assert len(tr.tensor(weight_name).steps()) == 13
    assert len(tr.tensor(weight_name).steps(ModeKeys.TRAIN)) == 7
    assert len(tr.tensor(weight_name).steps(ModeKeys.EVAL)) == 4

    assert len(tr.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0]
    assert len(tr.tensor(loss_name).steps()) == 12

    assert len(tr.tensor_names(collection=CollectionKeys.METRICS)) == 3

    if use_tf_keras:
        assert len(
            tr.tensor_names(
                collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
        opt_var_name = tr.tensor_names(
            collection=CollectionKeys.OPTIMIZER_VARIABLES)[0]
        assert tr.tensor(opt_var_name).value(0) is not None
        assert len(tr.tensor(opt_var_name).steps(ModeKeys.EVAL)) == 0

Example #12

0

Show file

File: test_keras.py Project: brightsparc/sagemaker-debugger

def test_save_all(out_dir):
    train_model(out_dir,
                include_collections=None,
                save_all=True,
                steps=["train"])
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 21
    assert len(tr.steps()) == 4

Example #13

0

Show file

def test_keras_fit_shapes(out_dir):
    hook = smd.KerasHook(
        out_dir=out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
    )
    helper_keras_fit(trial_dir=out_dir, hook=hook)
    print(create_trial_fast_refresh(out_dir).tensor_names(step=0))
    verify_shapes(out_dir, 0)

Example #14

0

Show file

File: test_write.py Project: vandanavk/sagemaker-debugger

def test_hook_write(out_dir):
    pre_test_clean_up()
    # set up hook
    hook = SessionHook(
        out_dir, save_all=True, include_collections=None, save_config=SaveConfig(save_interval=999)
    )
    helper_hook_write(out_dir, hook)
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="weights"))

Example #15

0

Show file

def test_reductions(out_dir):
    strategy = helper_mirrored(
        out_dir,
        steps=["train", "eval"],
        reduction_config=smd.ReductionConfig(reductions=["sum", "max"],
                                             abs_reductions=["sum", "max"],
                                             norms=["l1"]),
        include_collections=[
            CollectionKeys.WEIGHTS, CollectionKeys.BIASES,
            CollectionKeys.LOSSES
        ],
        eval_distributed=True,
    )
    if skip_trial_check():
        return

    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 8 + 1 * strategy.num_replicas_in_sync + 1
    assert len(tr.steps()) == 4
    assert len(tr.steps(ModeKeys.TRAIN)) == 2
    assert len(tr.steps(ModeKeys.EVAL)) == 2

    for tname in tr.tensor_names(collection="weights"):
        for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
            try:
                tr.tensor(tname).value(s, mode=ModeKeys.TRAIN)
                assert False
            except TensorUnavailableForStep:
                # for some tensors l1 reduction can't be saved due to improper dimensions for the reduction
                assert len(
                    tr.tensor(tname).reduction_values(
                        s, mode=ModeKeys.TRAIN)) >= 4

        for s in tr.tensor(tname).steps(ModeKeys.EVAL):
            try:
                tr.tensor(tname).value(s, mode=ModeKeys.EVAL)
                assert False
            except TensorUnavailableForStep:
                # for some tensors l1 reduction can't be saved due to improper dimensions for the reduction
                assert len(
                    tr.tensor(tname).reduction_values(s,
                                                      mode=ModeKeys.EVAL)) >= 4

    for tname in tr.tensor_names(collection="losses"):
        for s in tr.tensor(tname).steps(ModeKeys.EVAL):
            assert len(
                tr.tensor(tname).reduction_values(s, mode=ModeKeys.EVAL)) == 0
            assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None

    for tname in tr.tensor_names(collection="losses"):
        for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
            assert len(
                tr.tensor(tname).reduction_values(s, mode=ModeKeys.TRAIN)) == 0
            assert tr.tensor(tname).value(s, mode=ModeKeys.TRAIN) is not None

Example #16

0

Show file

def test_save_all(out_dir):
    strategy = helper_mirrored(
        out_dir, steps=["train"], num_steps=1, save_all=True, eval_distributed=True
    )
    if skip_trial_check():
        return
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) > 100
    assert len(tr.steps())
    assert len(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="biases"))
    assert len(tr.tensor_names(collection="gradients"))

Example #17

0

Show file

def test_include_regex(out_dir):
    hook = KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    train_model(out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"])

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == 12
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None

Example #18

0

Show file

File: test_keras.py Project: sophiayue1116/sagemaker-debugger

def test_layer_names_gradient_tape(out_dir):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[CollectionKeys.LAYERS],
    )
    helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9))

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection=CollectionKeys.LAYERS)
    pattern = r"^(flatten|dense|dropout)(_\d+)?\/(inputs|outputs)"
    for tname in tnames:
        assert re.match(pattern=pattern, string=tname) is not None

Example #19

0

Show file

def test_save_all_full(out_dir, hook=None):
    tf.reset_default_graph()
    if hook is None:
        hook = SessionHook(out_dir=out_dir,
                           save_all=True,
                           save_config=SaveConfig(save_interval=2))

    simple_model(hook)
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) > 50
    print(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="weights")) == 1
    assert len(tr.tensor_names(collection="gradients")) == 1
    assert len(tr.tensor_names(collection="losses")) == 1

Example #20

0

Show file

File: test_keras.py Project: brightsparc/sagemaker-debugger

def test_collection_add(out_dir):
    train_model(
        out_dir,
        include_collections=["relu"],
        save_config=SaveConfig(save_interval=9),
        create_relu_collection=True,
        steps=["train"],
    )

    tr = create_trial_fast_refresh(out_dir)
    relu_coll_tensor_names = tr.tensor_names(collection="relu")

    assert len(relu_coll_tensor_names) == 2
    assert tr.tensor(relu_coll_tensor_names[0]).value(0) is not None
    assert tr.tensor(relu_coll_tensor_names[1]).value(0) is not None

Example #21

0

Show file

def test_clash_with_custom_callback(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.OUTPUTS,
            CollectionKeys.GRADIENTS,
        ],
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        add_callbacks=["fetch_tensor"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 6 + 6 + strategy.num_replicas_in_sync * 1 + 3

Example #22

0

Show file

def test_save_one_worker(out_dir):
    strategy = helper_mirrored(
        out_dir,
        steps=["train"],
        num_steps=1,
        save_all=True,
        eval_distributed=True,
        include_workers="one",
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.workers()) == 1
    assert len(tr.steps())
    assert len(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="biases"))
    assert len(tr.tensor_names(collection="gradients"))

Example #23

0

Show file

File: test_keras_mirrored.py Project: taza1/sagemaker-debugger

def test_save_all(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
            3 * strategy.num_replicas_in_sync +
            2 * strategy.num_replicas_in_sync)
    # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3

Example #24

0

Show file

def test_eval_distributed(out_dir):
    strategy = helper_mirrored(
        out_dir,
        steps=["train", "eval"],
        include_collections=[
            CollectionKeys.WEIGHTS, CollectionKeys.BIASES,
            CollectionKeys.LOSSES
        ],
        eval_distributed=True,
    )
    if skip_trial_check():
        return
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 8 + 1 * strategy.num_replicas_in_sync + 1
    assert len(tr.steps()) == 4
    assert len(tr.steps(ModeKeys.TRAIN)) == 2
    assert len(tr.steps(ModeKeys.EVAL)) == 2

    for tname in tr.tensor_names(collection="weights"):
        for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
            assert len(tr.tensor(tname).workers(
                s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync
            for worker in tr.tensor(tname).workers(s, ModeKeys.TRAIN):
                assert tr.tensor(tname).value(s,
                                              worker=worker,
                                              mode=ModeKeys.TRAIN) is not None
        for s in tr.tensor(tname).steps(ModeKeys.EVAL):
            assert len(tr.tensor(tname).workers(
                s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync
            assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None

    tensornames = tr.tensor_names(regex="Identity_\d+:0")
    for s in tr.tensor(tensornames[0]).steps(ModeKeys.TRAIN):
        for w in tr.tensor(tensornames[0]).workers(s, ModeKeys.TRAIN):
            assert tr.tensor(tensornames[0]).value(
                s, worker=w, mode=ModeKeys.TRAIN) is not None
        assert (len(tr.tensor(tensornames[0]).workers(
            s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync)

    for tname in tr.tensor_names(collection="losses"):
        for s in tr.tensor(tname).steps(ModeKeys.EVAL):
            assert len(tr.tensor(tname).workers(s, ModeKeys.EVAL)) == 1
            assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None
        if tname != tensornames[0]:
            for s in tr.tensor(tname).steps(ModeKeys.TRAIN):
                assert len(tr.tensor(tname).workers(s, ModeKeys.EVAL)) == 1
                assert tr.tensor(tname).value(s,
                                              mode=ModeKeys.EVAL) is not None

Example #25

0

Show file

def test_clash_with_tb_callback(out_dir):
    # this test cannot be run in non-eager mode
    helper_keras_fit(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.LOSSES,
            CollectionKeys.METRICS,
        ],
        add_callbacks=["tensorboard"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == (7 if (is_tf_2_2() or is_tf_2_3()) else 8)

Example #26

0

Show file

File: test_keras.py Project: brightsparc/sagemaker-debugger

def test_clash_with_tb_callback(out_dir):
    train_model(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.LOSSES,
            CollectionKeys.METRICS,
        ],
        add_callbacks=["tensorboard"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 8
    shutil.rmtree(out_dir)

Example #27

0

Show file

def test_include_regex(out_dir):
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=["custom_coll"],
        include_workers="all",
    )
    hook.get_collection("custom_coll").include("dense")
    strategy = train_model(out_dir, hook=hook, steps=["train"])

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == 4 + 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None

Example #28

0

Show file

File: test_keras.py Project: sophiayue1116/sagemaker-debugger

def test_gradtape_include_regex(out_dir):
    """
    Test custom collection with regex
    """
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9))

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == (12 if is_tf_2_2() else 8)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None

Example #29

0

Show file

def test_save_one_worker(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
        include_workers="one",
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.workers()) == 1
    assert len(tr.steps())
    assert len(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="weights"))
    assert len(tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) == 1
    assert len(tr.tensor_names(collection="biases"))
    assert len(tr.tensor(tr.tensor_names(collection="biases")[0]).workers(5)) == 1
    assert len(tr.tensor_names(collection="gradients"))

Example #30

0

Show file

File: test_keras.py Project: sophiayue1116/sagemaker-debugger

def test_include_regex(out_dir, tf_eager_mode):
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        run_eagerly=tf_eager_mode,
    )

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")
    assert len(tnames) == (12 if is_tf_2_2() else 4)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None