def test_save_all(out_dir, tf_eager_mode, workers):
    save_config = SaveConfig(save_steps=[5])
    strategy, saved_scalars = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=save_config,
        steps=["train"],
        eager=tf_eager_mode,
        include_workers=workers,
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    if tf_eager_mode:
        if is_tf_2_2():
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 +
                                       8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1)
            # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers
        else:
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 +
                                       1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1)
    else:
        assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
                3 * strategy.num_replicas_in_sync +
                2 * strategy.num_replicas_in_sync)
        # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3
    for tname in tr.tensor_names():
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
    verify_files(out_dir, save_config, saved_scalars)
예제 #2
0
def test_regex_filtering_for_default_collections(out_dir):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[CollectionKeys.LAYERS, CollectionKeys.GRADIENTS],
    )
    hook.get_collection(CollectionKeys.LAYERS).include("^dense")
    hook.get_collection(CollectionKeys.GRADIENTS).include("gradients/dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=10),
        steps=["train"],
        run_eagerly=True,
    )

    tr = create_trial_fast_refresh(out_dir)
    layer_tnames = tr.tensor_names(collection=CollectionKeys.LAYERS)
    gradient_tnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS)
    assert len(layer_tnames) == (4 if is_tf_2_2() else 0)
    assert len(gradient_tnames) == (4 if is_tf_2_2() else 0)
    layer_pattern = r"^(dense)(_\d+)?\/(inputs|outputs)"
    gradient_pattern = r"gradients/dense"
    for tname in layer_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=layer_pattern, string=tname) is not None
    for tname in gradient_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=gradient_pattern, string=tname) is not None
예제 #3
0
def test_keras_fit(out_dir, tf_eager_mode, saveall):
    hook = smd.KerasHook(out_dir=out_dir, save_all=saveall)
    helper_keras_fit(
        trial_dir=out_dir,
        hook=hook,
        eager=tf_eager_mode,
        steps=["train", "eval", "predict", "train"],
    )

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x eager mode
    if saveall:  # save losses, metrics, weights, biases
        if tf_eager_mode:
            assert len(trial.tensor_names()) == (12 if is_tf_2_2() else 13)
        else:
            assert len(trial.tensor_names()) == 21
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
        assert len(
            trial.tensor_names(
                collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
        assert (
            len(
                trial.tensor_names(
                    collection=CollectionKeys.OPTIMIZER_VARIABLES,
                    mode=ModeKeys.EVAL)) == 0,
            "No Optimizer Variables Should be Saved in EVAL Mode",
        )
    else:  # save the default losses and metrics
        assert len(trial.tensor_names()) == (3 if is_tf_2_2() and tf_eager_mode
                                             else 4)
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if is_tf_2_2() and tf_eager_mode else 3)
예제 #4
0
def test_include_collections(out_dir, tf_eager_mode):
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS),
    )
    helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    if tf_eager_mode:
        assert len(trial.tensor_names()) == (7 if is_tf_2_2() else 8)
    else:
        assert len(trial.tensor_names()) == 18
        assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if is_tf_2_2() and tf_eager_mode else 3
    )
예제 #5
0
def test_keras_fit_pure_eager(out_dir, tf_eager_mode):
    """
    Test save all and save default collection in fit() pure eager mode
    """
    hook = smd.KerasHook(out_dir=out_dir,
                         save_all=True,
                         save_config=SaveConfig(save_interval=3))
    helper_keras_fit(trial_dir=out_dir,
                     hook=hook,
                     eager=tf_eager_mode,
                     run_eagerly=True)

    trial = smd.create_trial(path=out_dir)
    if is_tf_2_2():
        assert len(trial.tensor_names()) == 27
    else:
        assert len(trial.tensor_names()) == (20 if is_tf_2_3() else 21)
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(
        trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(
        collection=CollectionKeys.INPUTS)) == (1 if is_tf_2_2() else 0)
    assert len(trial.tensor_names(
        collection=CollectionKeys.OUTPUTS)) == (2 if is_tf_2_2() else 0)
def test_keras_fit(out_dir, tf_eager_mode, saveall):
    hook = smd.KerasHook(out_dir=out_dir, save_all=saveall)
    ts = time.time()
    hook.save_scalar("foobar", 1, sm_metric=True, timestamp=ts)
    scalars_to_be_saved = dict()
    scalars_to_be_saved["scalar/foobar"] = (ts, 0)
    helper_keras_fit(
        trial_dir=out_dir,
        hook=hook,
        run_eagerly=tf_eager_mode,
        steps=["train", "eval", "predict", "train"],
    )

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x eager mode
    if saveall:  # save losses, metrics, weights, biases, scalar
        if tf_eager_mode:
            if is_tf_2_2():
                assert len(trial.tensor_names()) == 28
            else:
                assert len(trial.tensor_names()) == (21 if is_tf_2_3() else 14)
            assert len(trial.tensor_names(collection=CollectionKeys.INPUTS)) == (
                1 if is_tf_2_2() else 0
            )
            assert len(trial.tensor_names(collection=CollectionKeys.OUTPUTS)) == (
                2 if is_tf_2_2() else 0
            )
        else:
            assert len(trial.tensor_names()) == 21
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
        assert (
            len(
                trial.tensor_names(
                    collection=CollectionKeys.OPTIMIZER_VARIABLES, mode=ModeKeys.EVAL
                )
            )
            == 0,
            "No Optimizer Variables Should be Saved in EVAL Mode",
        )
    else:  # save the default losses and metrics
        assert len(trial.tensor_names()) == (
            4 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 5
        )
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3
    )
    for tname in trial.tensor_names():
        assert trial.tensor(tname).value(0) is not None
def test_include_regex(out_dir, tf_eager_mode, workers):
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=["custom_coll"],
        include_workers=workers,
    )
    hook.get_collection("custom_coll").include("dense")
    strategy, _ = train_model(out_dir,
                              hook=hook,
                              steps=["train"],
                              eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    if tf_eager_mode:
        assert len(tnames) == (12 if is_tf_2_2() else 4)
    else:
        assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
예제 #8
0
def test_gradtape_include_collections(out_dir):
    """
    This test ensures that a training script written with GradientTape
    handles the case where hook config contains all collections mentioned
    through include collections
    """
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS,
                                         reductions=ALLOWED_REDUCTIONS),
    )
    helper_keras_gradtape(out_dir, hook=hook)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    assert len(trial.tensor_names()) == (16 if is_tf_2_2() else 15)
    assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
    assert len(
        trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 1
def test_hook_from_json(out_dir, tf_eager_mode, monkeypatch):
    monkeypatch.setenv(
        CONFIG_FILE_PATH_ENV_STR,
        "tests/tensorflow/hooks/test_json_configs/test_collection_defaults.json",
    )
    hook = smd.KerasHook.create_from_json_file()
    helper_keras_fit(out_dir, hook=hook, steps=["train"], run_eagerly=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    assert len(trial.tensor_names()) == (5 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 6)
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3
    )
예제 #10
0
def test_weights_collections(out_dir, tf_eager_mode):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=3),
        include_collections=[CollectionKeys.WEIGHTS],
    )

    helper_keras_fit(out_dir, hook=hook, steps=["train"], run_eagerly=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    assert len(trial.tensor_names()) == (5 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 6)
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3
    )
예제 #11
0
def test_include_collections(out_dir, tf_eager_mode):
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
        "custom_optimizer_variables",
    ]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS,
                                         reductions=ALLOWED_REDUCTIONS),
    )
    hook.get_collection("custom_optimizer_variables").include("Adam")
    helper_keras_fit(out_dir,
                     hook=hook,
                     steps=["train", "eval", "predict"],
                     run_eagerly=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    if tf_eager_mode:
        if is_tf_2_2():
            assert len(trial.tensor_names()) == 16
        else:
            assert len(trial.tensor_names()) == (12 if is_tf_2_3() else 13)
    else:
        assert len(trial.tensor_names()) == 18
        assert len(
            trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
    assert len(
        trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(
        trial.tensor_names(collection="custom_optimizer_variables")) == 5
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3)
def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    with SagemakerSimulator(enable_tb=enable_tb) as sim:
        helper_keras_fit(script_mode=script_mode,
                         eager_mode=eager_mode,
                         run_eagerly=run_eagerly,
                         sim=sim)
        hook = smd.get_hook()
        assert hook
        # Check if the hook was executed with the default
        # hook configuration
        assert hook.has_default_hook_configuration()
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."

        # DEFAULT TENSORS SAVED
        assert len(trial.tensor_names(
            collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
        assert len(trial.tensor_names(
            collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
        assert (len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
                ), "Weights were not expected to be saved by default"
        assert (len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
                ), "Biases were not expected to be saved by default"
예제 #13
0
def helper_test_keras_v2_gradienttape(script_mode: bool = False,
                                      json_file_contents="{}",
                                      default=False):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()

    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        helper_keras_gradienttape_train(script_mode=script_mode,
                                        json_file_contents=json_file_contents,
                                        sim=sim)
        hook = smd.get_hook()

        if script_mode:
            assert hook
            if default:
                assert hook.has_default_hook_configuration()
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
        else:
            if version.parse(tf.__version__) < version.parse("2.1.2"):
                assert not hook  # only supported on TF 2.1.2 and greater
                return
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
            if is_tf_2_2() and default is False:
                # Inputs and Outputs are not saved with the default collection configurations.
                assert len(trial.tensor_names(collection="inputs")) > 0
                assert len(trial.tensor_names(collection="outputs")) > 0
                assert trial.tensor_names(collection="outputs") == [
                    "predictions"
                ]
                if "dense_layers" in json_file_contents:
                    # Only assert for test_keras_v2_multi_collections
                    # which defines this custom collection
                    assert len(
                        trial.tensor_names(collection="dense_layers")) > 0
                else:
                    assert len(
                        trial.tensor_names(collection="dense_layers")) == 0
예제 #14
0
def test_keras_fit(out_dir, tf_eager_mode, saveall):
    hook = smd.KerasHook(out_dir=out_dir, save_all=saveall)
    helper_keras_fit(
        trial_dir=out_dir,
        hook=hook,
        eager=tf_eager_mode,
        steps=["train", "eval", "predict", "train"],
    )

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x eager mode
    if saveall:  # save losses, metrics, weights, biases
        if tf_eager_mode:
            assert len(trial.tensor_names()) == (7 if is_tf_2_2() else 8)
        else:
            assert len(trial.tensor_names()) == 21
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    else:  # save the default losses and metrics
        assert len(trial.tensor_names()) == (3 if is_tf_2_2() and tf_eager_mode else 4)
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == (
        2 if is_tf_2_2() and tf_eager_mode else 3
    )
예제 #15
0
def test_include_only_custom_collection(out_dir, tf_eager_mode):
    include_collections = ["custom_optimizer_variables"]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS),
    )
    hook.get_collection("custom_optimizer_variables").include("Adam")
    helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    assert len(trial.tensor_names()) == (8 if is_tf_2_2() and tf_eager_mode else 9)
    assert len(trial.tensor_names(collection="custom_optimizer_variables")) == 5
예제 #16
0
def test_clash_with_tb_callback(out_dir):
    # this test cannot be run in non-eager mode
    helper_keras_fit(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.LOSSES,
            CollectionKeys.METRICS,
        ],
        add_callbacks=["tensorboard"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == (7 if (is_tf_2_2() or is_tf_2_3()) else 8)
예제 #17
0
def test_gradtape_include_regex(out_dir):
    """
    Test custom collection with regex
    """
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9))

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == (12 if is_tf_2_2() else 8)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
def test_clash_with_tb_callback(out_dir):
    train_model(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.GRADIENTS,
            CollectionKeys.LOSSES,
            CollectionKeys.METRICS,
        ],
        steps=["train"],
        add_callbacks=["tensorboard"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == (10 if is_tf_2_2() else 11)
예제 #19
0
def test_gradtape_persistent(out_dir, saveall):
    """
    Test save all and save default collection
    """
    hook = smd.KerasHook(out_dir=out_dir, save_all=saveall, save_config=SaveConfig(save_interval=3))
    helper_keras_gradtape(trial_dir=out_dir, hook=hook, persistent=True)

    trial = smd.create_trial(path=out_dir)
    if saveall:  # save losses, metrics, weights, biases
        assert len(trial.tensor_names()) == (25 if is_tf_2_2() else 15)
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    else:  # save the default losses and metrics
        assert len(trial.tensor_names()) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 1
예제 #20
0
def test_include_regex(out_dir, tf_eager_mode):
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        run_eagerly=tf_eager_mode,
    )

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")
    assert len(tnames) == (12 if is_tf_2_2() else 4)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
예제 #21
0
def test_subclassed_model(out_dir):
    # Download and load MNIST dataset.
    (x_train,
     y_train), (x_test,
                y_test) = tf.keras.datasets.mnist.load_data("MNIST-data")
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Add a channels dimension
    x_train = x_train[..., tf.newaxis]
    x_test = x_test[..., tf.newaxis]

    # Create an instance of the model
    model = MyModel()

    train_ds = (tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(10000, seed=123).batch(2))

    MyModel.hook = smd.KerasHook(
        out_dir,
        save_all=True,
        save_config=smd.SaveConfig(save_steps=[x for x in range(10)],
                                   save_interval=1),
    )

    MyModel.hook.register_model(model)
    model.compile(optimizer="Adam", loss="mse", run_eagerly=True)
    model.fit(train_ds, epochs=1, steps_per_epoch=10, callbacks=[MyModel.hook])

    trial = smd.create_trial(out_dir)
    assert len(trial.tensor_names(collection=smd.CollectionKeys.LAYERS)) == 8

    assert trial.tensor_names(collection=smd.CollectionKeys.LOSSES) == ["loss"]
    if is_tf_2_2():
        # Feature to save model inputs and outputs was first added for TF 2.2.0
        assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == [
            "model_input"
        ]
        assert trial.tensor_names(collection=smd.CollectionKeys.OUTPUTS) == [
            "labels",
            "predictions",
        ]
        assert len(
            trial.tensor_names(collection=smd.CollectionKeys.GRADIENTS)) == 6
예제 #22
0
def mode_allworkers(out_dir, mode):
    path = build_json(out_dir,
                      include_workers="all",
                      include_collections=["weights", "optimizer_variables"])
    num_workers = len(get_available_gpus())
    mode_args = list(HOROVOD_KERAS_TEST_SCRIPT_ARGS) + ["--model_dir", out_dir]
    if mode == "cpu":
        mode_args += ["--use_only_cpu", "true"]
    launch_horovod_job(
        script_file_path=HOROVOD_TF2_TEST_MNIST_SCRIPT,
        script_args=mode_args,
        num_workers=num_workers,
        config_file_path=path,
        mode=mode,
    )
    tr = create_trial(out_dir)
    assert len(tr.workers()) == num_workers
    assert len(tr.tensor_names()) == (13 if is_tf_2_2() else 14)
    assert len(tr.tensor(
        tr.tensor_names(collection="weights")[0]).workers(0)) == num_workers
예제 #23
0
def test_save_all(out_dir, tf_eager_mode):
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
        eager=tf_eager_mode,
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    if tf_eager_mode:
        assert len(tr.tensor_names()) == (6 + 2 + 1 + 5 if is_tf_2_2() else 6 +
                                          3 + 1 + 5)
        # weights, metrics, losses, optimizer variables
    else:
        assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
                3 * strategy.num_replicas_in_sync +
                2 * strategy.num_replicas_in_sync)
        # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3
def exhaustive_check(trial_dir, include_workers="one", eager=True):
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    strategy, _ = train_model(
        trial_dir,
        include_collections=include_collections,
        steps=["train", "eval", "predict", "train"],
        include_workers=include_workers,
        eager=eager,
    )

    tr = create_trial_fast_refresh(trial_dir)
    print(tr.tensor_names())

    if include_workers == "all":
        assert len(tr.workers()) == strategy.num_replicas_in_sync
        if eager:
            if is_tf_2_2():
                assert len(tr.tensor_names()) == (6 + 1 + 2 + 5 + 1 + 6 + 2)
                # 6 weights, 1 loss, 2 metrics, 5 optimizer variables, 6 gradients, 2 outputs for Tf 2.2, 1 scalar
            else:
                assert len(tr.tensor_names()) == (6 + 1 + 2 + 5 + 1 if
                                                  (is_tf_2_2() or is_tf_2_3())
                                                  else 6 + 1 + 3 + 5 + 1)
                # 6 weights, 1 loss, 2 metrics, 5 optimizer variables for Tf 2.3, 1 scalar
                # 6 weights, 1 loss, 3 metrics, 5 optimizer variables for Tf 2.1, 1 scalar

        else:
            assert len(
                tr.tensor_names()) == (6 + 6 + 1 + 3 +
                                       strategy.num_replicas_in_sync * 3 + 5)
    else:
        assert len(tr.workers()) == 1
        if eager:
            assert len(tr.tensor_names()) == (6 + 3 + 1)
        else:
            assert len(tr.tensor_names()) == (6 + 6 + 1 + 3 + 1 * 3 + 5)

    # 6 weights, 6 gradients, 1 loss, 3 metrics, 24 outputs (8 for each mode), 5 optimizer variables
    assert len(tr.modes()) == 3
    assert len(tr.steps()) == 14
    assert len(tr.steps(
        ModeKeys.TRAIN)) == 8  # 0, 3, 6, 9, 12, 15, 18, 19(end of epoch)
    assert len(tr.steps(ModeKeys.EVAL)) == 4
    assert len(tr.steps(ModeKeys.PREDICT)) == 2  # ran 4 steps above

    assert len(tr.tensor_names(collection=CollectionKeys.BIASES)) == 3
    wtnames = tr.tensor_names(collection=CollectionKeys.WEIGHTS)
    assert len(wtnames) == 3

    for wtname in wtnames:
        assert len(tr.tensor(wtname).steps()) == 13, wtname
        assert len(tr.tensor(wtname).steps(ModeKeys.TRAIN)) == 7
        for s in tr.tensor(wtname).steps(ModeKeys.TRAIN):
            assert tr.tensor(wtname).value(s, mode=ModeKeys.TRAIN) is not None
            for worker in tr.workers():
                assert tr.tensor(wtname).value(s,
                                               mode=ModeKeys.TRAIN,
                                               worker=worker) is not None
        assert len(tr.tensor(wtname).steps(ModeKeys.EVAL)) == 4
        for s in tr.tensor(wtname).steps(ModeKeys.EVAL):
            assert tr.tensor(wtname).value(s, mode=ModeKeys.EVAL) is not None
            for worker in tr.workers():
                assert tr.tensor(wtname).value(s,
                                               mode=ModeKeys.EVAL,
                                               worker=worker) is not None
        assert len(tr.tensor(wtname).steps(ModeKeys.PREDICT)) == 2

    if not eager:
        gradnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS)
        assert len(gradnames) == 6
        for gradname in gradnames:
            assert len(tr.tensor(gradname).steps(ModeKeys.TRAIN)) == 7
            for s in tr.tensor(gradname).steps(ModeKeys.TRAIN):
                assert tr.tensor(gradname).value(
                    s, mode=ModeKeys.TRAIN) is not None
            assert len(tr.tensor(gradname).steps(ModeKeys.EVAL)) == 0
            assert len(tr.tensor(gradname).steps(ModeKeys.PREDICT)) == 0

        optvarnames = tr.tensor_names(
            collection=CollectionKeys.OPTIMIZER_VARIABLES)
        assert len(optvarnames) == 5
        for optvarname in optvarnames:
            assert len(tr.tensor(optvarname).steps(ModeKeys.TRAIN)) == 7
            for s in tr.tensor(optvarname).steps(ModeKeys.TRAIN):
                assert tr.tensor(optvarname).value(
                    s, mode=ModeKeys.TRAIN) is not None
            assert len(tr.tensor(optvarname).steps(ModeKeys.EVAL)) == 0
            assert len(tr.tensor(optvarname).steps(ModeKeys.PREDICT)) == 0

    assert len(tr.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0]
    # loss is not in predict mode (so less 2)
    # add one for end of epoch
    assert len(tr.tensor(loss_name).steps(ModeKeys.TRAIN)) == 8
    assert len(tr.tensor(loss_name).steps(ModeKeys.EVAL)) == 4
    assert len(tr.tensor(loss_name).steps(ModeKeys.PREDICT)) == 0
    assert len(tr.tensor(loss_name).steps()) == 12

    metricnames = tr.tensor_names(collection=CollectionKeys.METRICS)
    assert len(metricnames) == (2 if (is_tf_2_2() or is_tf_2_3()) else 3)
# Third Party
import numpy as np
import pytest
from tensorflow.python.framework.dtypes import _NP_TO_TF
from tests.tensorflow2.utils import is_tf_2_2

# First Party
from smdebug.core.tfevent.util import _get_proto_dtype


@pytest.mark.skipif(is_tf_2_2() is False,
                    reason="Brain Float Is Unavailable in lower versions of TF"
                    )
def test_tensorflow2_datatypes():
    # _NP_TO_TF contains all the mappings
    # of numpy to tf types
    try:
        from tensorflow.python import _pywrap_bfloat16

        # TF 2.x.x Implements a Custom Numpy Datatype for Brain Floating Type
        # Which is currently only supported on TPUs
        _np_bfloat16 = _pywrap_bfloat16.TF_bfloat16_type()
        _NP_TO_TF.pop(_np_bfloat16)
    except (ModuleNotFoundError, ValueError, ImportError):
        pass

    for _type in _NP_TO_TF:
        try:
            _get_proto_dtype(np.dtype(_type))
        except Exception:
            assert False, f"{_type} not supported"
# Third Party
import numpy as np
import pytest
import tensorflow as tf
from tensorflow.python.framework.dtypes import _NP_TO_TF
from tests.tensorflow2.utils import is_tf_2_2

# First Party
from smdebug.core.tfevent.util import _get_proto_dtype


@pytest.mark.skipif(
    is_tf_2_2() is False, reason="Brain Float Is Unavailable in these versions of TF"
)
def test_tensorflow2_datatypes():
    # _NP_TO_TF contains all the mappings
    # of numpy to tf types
    try:
        from tensorflow.python import _pywrap_bfloat16

        # TF 2.x.x Implements a Custom Numpy Datatype for Brain Floating Type
        # Which is currently only supported on TPUs
        _np_bfloat16 = _pywrap_bfloat16.TF_bfloat16_type()
        _NP_TO_TF.pop(_np_bfloat16)
    except (ModuleNotFoundError, ValueError, ImportError):
        pass

    for _type in _NP_TO_TF:
        try:
            _get_proto_dtype(np.dtype(_type))
        except Exception:
def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    with SagemakerSimulator(enable_tb=enable_tb) as sim:
        model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True)
            opt = hook.wrap_optimizer(opt)
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=1,
                                validation_split=0.2,
                                callbacks=[hook])
            test_scores = model.evaluate(x_test,
                                         y_test,
                                         verbose=2,
                                         callbacks=[hook])
        else:
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=1,
                                validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."

        # DEFAULT TENSORS SAVED
        assert len(trial.tensor_names(
            collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
        assert len(trial.tensor_names(
            collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
        assert (len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
                ), "Weights were not expected to be saved by default"
        assert (len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
                ), "Biases were not expected to be saved by default"
def helper_test_keras_v2_json_config(json_file_contents,
                                     script_mode: bool = False,
                                     eager_mode: bool = True,
                                     custom_classifier=False):
    """ Tests ZCC with custom hook configs """
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    with SagemakerSimulator(json_file_contents=json_file_contents,
                            enable_tb=enable_tb) as sim:
        if custom_classifier:
            model = CustomClassifierModel([
                tf.keras.layers.Flatten(input_shape=(28, 28)),
                tf.keras.layers.Dense(128, activation="relu"),
                tf.keras.layers.Dropout(0.2),
                tf.keras.layers.Dense(10, activation="softmax"),
            ])
        else:
            model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook.create_from_json_file()
            opt = hook.wrap_optimizer(opt)
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=2,
                                validation_split=0.2,
                                callbacks=[hook])
            test_scores = model.evaluate(x_test,
                                         y_test,
                                         verbose=2,
                                         callbacks=[hook])
        else:
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                epochs=2,
                                batch_size=64,
                                validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        if not eager_mode and is_tf_2_2():
            assert len(trial.tensor_names(collection="gradients")) > 0
        assert len(trial.tensor_names(collection="weights")) > 0
        assert len(trial.tensor_names(collection="losses")) > 0
        if is_tf_2_2():
            assert len(trial.tensor_names(collection="inputs")) > 0
            assert len(trial.tensor_names(collection="outputs")) > 0
            if "dense_layers" in json_file_contents:
                # Only assert for test_keras_v2_multi_collections
                # which defines this custom collection
                assert len(trial.tensor_names(collection="dense_layers")) > 0
            else:
                assert len(trial.tensor_names(collection="dense_layers")) == 0
    hook = smd.KerasHook(trial_dir, save_all=True)
    return hook


def create_model():
    input_layer = tf.keras.layers.Input(name="Image_input",
                                        shape=(224),
                                        dtype="float32")
    model = tf.keras.layers.Dense(5)(input_layer)
    model = tf.keras.layers.Activation("softmax", name="output-softmax")(model)
    model = tf.keras.models.Model(inputs=input_layer, outputs=[model])
    return model


@pytest.mark.skipif(
    is_tf_2_2() is False,
    reason=
    "Feature to save model inputs and outputs was first added for TF 2.2.0",
)
def test_support_dicts(out_dir):
    model = create_model()
    optimizer = tf.keras.optimizers.Adadelta(lr=1.0,
                                             rho=0.95,
                                             epsilon=None,
                                             decay=0.0)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer)
    inputs, labels = get_data()
    smdebug_hook = create_hook(out_dir)
    model.fit(inputs,
              labels,
              batch_size=16,
예제 #30
0
def helper_test_keras_v2_gradienttape(script_mode: bool = False,
                                      json_file_contents="{}",
                                      default=False):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()

    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28,
                                                 1)),  # WA for TF issue #36279
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10, activation="softmax"),
        ])
        (x_train, y_train), _ = get_keras_data()
        dataset = tf.data.Dataset.from_tensor_slices(
            (tf.cast(x_train[..., tf.newaxis] / 255,
                     tf.float32), tf.cast(y_train, tf.int64)))
        dataset = dataset.shuffle(1000).batch(64)

        opt = tf.keras.optimizers.RMSprop()
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
        n_epochs = 1
        if script_mode:
            if json_file_contents == "{}":
                hook = smd.KerasHook(out_dir=sim.out_dir,
                                     export_tensorboard=True)
            else:
                hook = smd.KerasHook.create_from_json_file()

            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with hook.wrap_tape(tf.GradientTape()) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                    hook.save_tensor(tensor_name="accuracy",
                                     tensor_value=acc,
                                     collections_to_write="metrics")
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
        else:
            # ZCC support added from smdebug v0.8.0)
            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with tf.GradientTape(persistent=True) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            if not (is_tf_2_2() or is_tf_2_3()):
                assert not hook  # only supported on TF 2.2 and greater
                return
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
            if is_tf_2_2() and default is False:
                # Inputs and Outputs are not saved with the default collection configurations.
                assert len(trial.tensor_names(collection="inputs")) > 0
                assert len(trial.tensor_names(collection="outputs")) > 0
                assert trial.tensor_names(collection="outputs") == [
                    "predictions"
                ]
                if "dense_layers" in json_file_contents:
                    # Only assert for test_keras_v2_multi_collections
                    # which defines this custom collection
                    assert len(
                        trial.tensor_names(collection="dense_layers")) > 0
                else:
                    assert len(
                        trial.tensor_names(collection="dense_layers")) == 0