예제 #1
0
def test_keras_gradients(script_mode, tf_optimizer):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    json_file_contents = """
            {
                "S3OutputPath": "s3://sagemaker-test",
                "LocalPath": "/opt/ml/output/tensors",
                "CollectionConfigurations": [
                    {
                        "CollectionName": "gradients"
                    },
                    {
                        "CollectionName": "optimizer_variables"
                    },
                    {
                        "CollectionName": "losses"
                    }
                ]
            }
            """
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        model = get_keras_model_v1()
        (x_train, y_train), (x_test, y_test) = get_keras_data()

        if tf_optimizer:
            opt = tf.train.RMSPropOptimizer(0.1)
        else:
            opt = tf.keras.optimizers.RMSprop()

        if script_mode:
            hook = smd.KerasHook(
                out_dir=sim.out_dir,
                include_collections=["gradients", "optimizer_variables", "losses"],
            )
            opt = hook.wrap_optimizer(opt)
            model.compile(
                loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
            )
            history = model.fit(
                x_train, y_train, batch_size=16, epochs=5, validation_split=0.2, callbacks=[hook]
            )
            test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
        else:
            model.compile(
                loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
            )
            history = model.fit(x_train, y_train, batch_size=16, epochs=5, validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert len(trial.tensor_names(collection="gradients")) > 0
        if not tf_optimizer:
            # as this is only supported for keras optimizers currently
            assert len(trial.tensor_names(collection="optimizer_variables")) > 0
def test_linear_classifier(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        # Setup
        train_input_fn, eval_input_fn = get_input_fns()
        x_feature = tf.feature_column.numeric_column("x", shape=(28, 28))
        estimator = tf.compat.v1.estimator.LinearClassifier(
            feature_columns=[x_feature],
            model_dir="/tmp/mnist_linear_classifier",
            n_classes=10)

        # Train
        if script_mode:
            hook = smd.EstimatorHook(out_dir=sim.out_dir)
            estimator.train(input_fn=train_input_fn, steps=100, hooks=[hook])
        else:
            estimator.train(input_fn=train_input_fn, steps=100)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
예제 #3
0
def test_keras_v1(script_mode):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    with SagemakerSimulator() as sim:
        model = get_keras_model_v1()
        (x_train, y_train), (x_test, y_test) = get_keras_data()

        model.compile(
            loss="sparse_categorical_crossentropy",
            optimizer=tf.keras.optimizers.RMSprop(),
            metrics=["accuracy"],
        )
        if script_mode:
            hook = smd.KerasHook(out_dir=sim.out_dir)
            history = model.fit(
                x_train, y_train, batch_size=64, epochs=5, validation_split=0.2, callbacks=[hook]
            )
            test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
        else:
            history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
def test_monitored_session(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        train_op, X, Y = get_train_op_and_placeholders()
        init = tf.compat.v1.global_variables_initializer()
        mnist = get_data()

        if script_mode:
            hook = smd.SessionHook(out_dir=sim.out_dir)
            sess = tf.train.MonitoredSession(hooks=[hook])
        else:
            sess = tf.train.MonitoredSession()

        with sess:
            sess.run(init)
            for step in range(1, 101):
                batch_x, batch_y = mnist.train.next_batch(32)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
def test_estimator(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        # Setup
        mnist_classifier = get_estimator()
        train_input_fn, eval_input_fn = get_input_fns()

        # Train and evaluate
        train_steps, eval_steps = 80, 20
        if script_mode:
            hook = smd.EstimatorHook(out_dir=sim.out_dir)
            hook.set_mode(mode=smd.modes.TRAIN)
            mnist_classifier.train(input_fn=train_input_fn,
                                   steps=train_steps,
                                   hooks=[hook])
            hook.set_mode(mode=smd.modes.EVAL)
            mnist_classifier.evaluate(input_fn=eval_input_fn,
                                      steps=eval_steps,
                                      hooks=[hook])
        else:
            mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        print(trial)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert trial.steps() == [0, train_steps], "Wrong step count for trial."
예제 #6
0
def test_monitored_session(script_mode):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    json_file_contents = """
            {
                "S3OutputPath": "s3://sagemaker-test",
                "LocalPath": "/opt/ml/output/tensors",
                "HookParameters" : {
                    "save_interval": "100"
                }
            }
            """
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        train_op, X, Y = get_train_op_and_placeholders()
        init = tf.global_variables_initializer()
        mnist = get_data()

        if script_mode:
            hook = smd.SessionHook(out_dir=sim.out_dir)
            sess = tf.train.MonitoredSession(hooks=[hook])
        else:
            sess = tf.train.MonitoredSession()

        with sess:
            sess.run(init)
            for step in range(1, 101):
                batch_x, batch_y = mnist.train.next_batch(32)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
예제 #7
0
def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode:
        tf.compat.v1.disable_eager_execution()
    with SagemakerSimulator() as sim:
        model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True)
            opt = hook.wrap_optimizer(opt)
            model.compile(loss="sparse_categorical_crossentropy",
                          optimizer=opt,
                          metrics=["accuracy"])
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=2,
                                validation_split=0.2,
                                callbacks=[hook])
            test_scores = model.evaluate(x_test,
                                         y_test,
                                         verbose=2,
                                         callbacks=[hook])
        else:
            model.compile(loss="sparse_categorical_crossentropy",
                          optimizer=opt,
                          metrics=["accuracy"])
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=2,
                                validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."

        # DEFAULT TENSORS SAVED
        assert len(trial.tensor_names(
            collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
        assert len(trial.tensor_names(
            collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
        assert (len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
                ), "Weights were not expected to be saved by default"
        assert (len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
                ), "Biases were not expected to be saved by default"
def helper_test_keras_v2_json_config(json_file_contents,
                                     script_mode: bool = False,
                                     eager_mode: bool = True):
    """ Tests ZCC with custom hook configs """
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode:
        tf.compat.v1.disable_eager_execution()
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook.create_from_json_file()
            opt = hook.wrap_optimizer(opt)
            model.compile(loss="sparse_categorical_crossentropy",
                          optimizer=opt,
                          metrics=["accuracy"])
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=2,
                                validation_split=0.2,
                                callbacks=[hook])
            test_scores = model.evaluate(x_test,
                                         y_test,
                                         verbose=2,
                                         callbacks=[hook])
        else:
            model.compile(loss="sparse_categorical_crossentropy",
                          optimizer=opt,
                          metrics=["accuracy"])
            history = model.fit(x_train,
                                y_train,
                                epochs=2,
                                batch_size=64,
                                validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        if not eager_mode:
            assert len(trial.tensor_names(collection="gradients")) > 0
        assert len(trial.tensor_names(collection="weights")) > 0
        assert len(trial.tensor_names(collection="losses")) > 0
예제 #9
0
def test_keras_to_estimator(script_mode):
    """ Works as intended. """
    import tensorflow.compat.v1.keras as keras

    tf.reset_default_graph()
    smd.del_hook()
    keras.backend.clear_session()
    with SagemakerSimulator() as sim:
        model = keras.models.Sequential([
            keras.layers.Dense(16, activation="relu", input_shape=(4, )),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(1, activation="sigmoid"),
        ])

        def input_fn():
            split = tfds.Split.TRAIN
            data_dir = TEST_DATASET_S3_PATH if use_s3_datasets() else None
            dataset = tfds.load("iris",
                                data_dir=data_dir,
                                split=split,
                                as_supervised=True)
            dataset = dataset.map(lambda features, labels:
                                  ({
                                      "dense_input": features
                                  }, labels))
            dataset = dataset.batch(32).repeat()
            return dataset

        model.compile(loss="categorical_crossentropy", optimizer="adam")
        model.summary()

        keras_estimator = tf.keras.estimator.model_to_estimator(
            keras_model=model, model_dir=sim.out_dir)

        if script_mode:
            hook = smd.EstimatorHook(sim.out_dir)
            hook.set_mode(smd.modes.TRAIN)
            keras_estimator.train(input_fn=input_fn, steps=25, hooks=[hook])
            hook.set_mode(smd.modes.EVAL)
            eval_result = keras_estimator.evaluate(input_fn=input_fn,
                                                   steps=10,
                                                   hooks=[hook])
        else:
            keras_estimator.train(input_fn=input_fn, steps=25)
            keras_estimator.evaluate(input_fn=input_fn, steps=10)

        tr = smd.create_trial(sim.out_dir)
        assert len(tr.tensor_names()) == 1
        assert tr.steps() == [0, 25]
        assert len(tr.steps(smd.modes.TRAIN)) == 1
        assert len(tr.steps(smd.modes.EVAL)) == 1
예제 #10
0
def helper_test_keras_v2_gradienttape(script_mode: bool = False,
                                      json_file_contents="{}",
                                      default=False):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()

    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        helper_keras_gradienttape_train(script_mode=script_mode,
                                        json_file_contents=json_file_contents,
                                        sim=sim)
        hook = smd.get_hook()

        if script_mode:
            assert hook
            if default:
                assert hook.has_default_hook_configuration()
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
        else:
            if version.parse(tf.__version__) < version.parse("2.1.2"):
                assert not hook  # only supported on TF 2.1.2 and greater
                return
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
            if is_tf_2_2() and default is False:
                # Inputs and Outputs are not saved with the default collection configurations.
                assert len(trial.tensor_names(collection="inputs")) > 0
                assert len(trial.tensor_names(collection="outputs")) > 0
                assert trial.tensor_names(collection="outputs") == [
                    "predictions"
                ]
                if "dense_layers" in json_file_contents:
                    # Only assert for test_keras_v2_multi_collections
                    # which defines this custom collection
                    assert len(
                        trial.tensor_names(collection="dense_layers")) > 0
                else:
                    assert len(
                        trial.tensor_names(collection="dense_layers")) == 0
def test_keras_gradients_mirrored(include_workers="one"):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    json_file_contents_p1 = """
            {
                "S3OutputPath": "s3://sagemaker-test",
                "LocalPath": "/opt/ml/output/tensors",
                "HookParameters" : {

            """
    json_file_contents_p2 = f'"include_workers": "{include_workers}",'
    json_file_contents_p3 = """
                    "save_interval": "3"
                },
                "CollectionConfigurations": [
                    {
                        "CollectionName": "gradients"
                    },
                    {
                        "CollectionName": "optimizer_variables"
                    },
                    {
                        "CollectionName": "losses"
                    },
                    {
                        "CollectionName": "weights"
                    },
                    {
                        "CollectionName": "biases"
                    },
                    {
                        "CollectionName": "outputs"
                    },
                    {
                        "CollectionName": "metrics"
                    }
                ]
            }
            """
    json_file_contents = json_file_contents_p1 + json_file_contents_p2 + json_file_contents_p3
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        test_tf_keras("/opt/ml/output/tensors",
                      zcc=True,
                      include_workers=include_workers)
예제 #12
0
def test_estimator(script_mode):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        train_steps, eval_steps = 80, 20
        helper_train(
            script_mode=script_mode, sim=sim, train_steps=train_steps, eval_steps=eval_steps
        )

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        print(trial)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert trial.steps() == [0, train_steps], "Wrong step count for trial."
def test_monitored_session_gradients_zcc():
    """ Works as intended. """
    smd.del_hook()
    json_file_contents = """
    {
        "S3OutputPath": "s3://sagemaker-test",
        "LocalPath": "/opt/ml/output/tensors",
        "CollectionConfigurations": [
            {
                "CollectionName": "gradients"
            },
            {
                "CollectionName": "losses"
            }
        ]
    }
    """
    tf.reset_default_graph()
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        train_op, X, Y = get_train_op_and_placeholders()
        init = tf.compat.v1.global_variables_initializer()
        mnist = get_data()

        sess = tf.train.MonitoredSession()

        with sess:
            sess.run(init)
            for step in range(1, 101):
                batch_x, batch_y = mnist.train.next_batch(32)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert len(trial.tensor_names(collection="gradients")) > 0
def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    with SagemakerSimulator(enable_tb=enable_tb) as sim:
        helper_keras_fit(script_mode=script_mode,
                         eager_mode=eager_mode,
                         run_eagerly=run_eagerly,
                         sim=sim)
        hook = smd.get_hook()
        assert hook
        # Check if the hook was executed with the default
        # hook configuration
        assert hook.has_default_hook_configuration()
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."

        # DEFAULT TENSORS SAVED
        assert len(trial.tensor_names(
            collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
        assert len(trial.tensor_names(
            collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
        assert (len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
                ), "Weights were not expected to be saved by default"
        assert (len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
                ), "Biases were not expected to be saved by default"
예제 #15
0
def test_bert_simple():
    # Test bert with the default smdebug configuration
    smd.del_hook()
    with SagemakerSimulator(enable_tb=False) as sim:
        epochs = 1
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        data = tfds.load("glue/mrpc")
        train_dataset = glue_convert_examples_to_features(data["train"],
                                                          tokenizer,
                                                          max_length=128,
                                                          task="mrpc")
        train_dataset = train_dataset.shuffle(100).batch(32).repeat(2)
        optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        model.compile(optimizer=optimizer, loss=loss)
        model.fit(train_dataset, epochs=epochs, steps_per_epoch=10)

    hook = smd.get_hook()
    assert hook.has_default_hook_configuration()
    hook.close()
    # Check that hook created and tensors saved
    trial = smd.create_trial(path=sim.out_dir)
    assert len(trial.steps()) > 0, "Nothing saved at any step."
    assert len(trial.tensor_names()) > 0, "Tensors were not saved."

    # DEFAULT TENSORS SAVED
    assert len(trial.tensor_names(
        collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
    assert len(trial.tensor_names(
        collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
    assert (len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
            ), "Weights were not expected to be saved by default"
    assert (len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
            ), "Biases were not expected to be saved by default"
예제 #16
0
def test_sagemaker():
    json_file_contents = """
{
    "S3OutputPath": "s3://sagemaker-test",
    "LocalPath": "/opt/ml/output/tensors",
    "HookParameters": null,
    "CollectionConfigurations": [
        {
            "CollectionName": "weights",
            "CollectionParameters": null
        },
        {
            "CollectionName": "losses",
            "CollectionParameters": null
        }
    ],
    "DebugHookSpecification": null
}
"""
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        smd.del_hook()
        hook = smd.get_hook(hook_type="session", create_if_not_exists=True)
        print(hook)
        assert "weights" in hook.include_collections, hook
예제 #17
0
def helper_test_keras_v2_gradienttape(script_mode: bool = False,
                                      json_file_contents="{}",
                                      default=False):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()

    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28,
                                                 1)),  # WA for TF issue #36279
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10, activation="softmax"),
        ])
        (x_train, y_train), _ = get_keras_data()
        dataset = tf.data.Dataset.from_tensor_slices(
            (tf.cast(x_train[..., tf.newaxis] / 255,
                     tf.float32), tf.cast(y_train, tf.int64)))
        dataset = dataset.shuffle(1000).batch(64)

        opt = tf.keras.optimizers.RMSprop()
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
        n_epochs = 1
        if script_mode:
            if json_file_contents == "{}":
                hook = smd.KerasHook(out_dir=sim.out_dir,
                                     export_tensorboard=True)
            else:
                hook = smd.KerasHook.create_from_json_file()

            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with hook.wrap_tape(tf.GradientTape()) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                    hook.save_tensor(tensor_name="accuracy",
                                     tensor_value=acc,
                                     collections_to_write="metrics")
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            assert hook
            if default:
                assert hook.has_default_hook_configuration()
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
        else:
            # ZCC support added from smdebug v0.8.0)
            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with tf.GradientTape(persistent=True) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            if not (is_tf_2_2() or is_tf_2_3()):
                assert not hook  # only supported on TF 2.2 and greater
                return
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
            if is_tf_2_2() and default is False:
                # Inputs and Outputs are not saved with the default collection configurations.
                assert len(trial.tensor_names(collection="inputs")) > 0
                assert len(trial.tensor_names(collection="outputs")) > 0
                assert trial.tensor_names(collection="outputs") == [
                    "predictions"
                ]
                if "dense_layers" in json_file_contents:
                    # Only assert for test_keras_v2_multi_collections
                    # which defines this custom collection
                    assert len(
                        trial.tensor_names(collection="dense_layers")) > 0
                else:
                    assert len(
                        trial.tensor_names(collection="dense_layers")) == 0
def helper_test_keras_v2_json_config(json_file_contents,
                                     script_mode: bool = False,
                                     eager_mode: bool = True,
                                     custom_classifier=False):
    """ Tests ZCC with custom hook configs """
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    with SagemakerSimulator(json_file_contents=json_file_contents,
                            enable_tb=enable_tb) as sim:
        if custom_classifier:
            model = CustomClassifierModel([
                tf.keras.layers.Flatten(input_shape=(28, 28)),
                tf.keras.layers.Dense(128, activation="relu"),
                tf.keras.layers.Dropout(0.2),
                tf.keras.layers.Dense(10, activation="softmax"),
            ])
        else:
            model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook.create_from_json_file()
            opt = hook.wrap_optimizer(opt)
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                batch_size=64,
                                epochs=2,
                                validation_split=0.2,
                                callbacks=[hook])
            test_scores = model.evaluate(x_test,
                                         y_test,
                                         verbose=2,
                                         callbacks=[hook])
        else:
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train,
                                y_train,
                                epochs=2,
                                batch_size=64,
                                validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        if not eager_mode and is_tf_2_2():
            assert len(trial.tensor_names(collection="gradients")) > 0
        assert len(trial.tensor_names(collection="weights")) > 0
        assert len(trial.tensor_names(collection="losses")) > 0
        if is_tf_2_2():
            assert len(trial.tensor_names(collection="inputs")) > 0
            assert len(trial.tensor_names(collection="outputs")) > 0
            if "dense_layers" in json_file_contents:
                # Only assert for test_keras_v2_multi_collections
                # which defines this custom collection
                assert len(trial.tensor_names(collection="dense_layers")) > 0
            else:
                assert len(trial.tensor_names(collection="dense_layers")) == 0
def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()
    if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False:
        # v1 training APIs are currently not supported
        # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0
        tf.compat.v1.disable_eager_execution()
    enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True
    run_eagerly = None
    if is_tf_2_2() or is_tf_2_3():
        run_eagerly = eager_mode
    with SagemakerSimulator(enable_tb=enable_tb) as sim:
        model = get_keras_model_v2()
        (x_train, y_train), (x_test, y_test) = get_keras_data()
        x_train, x_test = x_train / 255, x_test / 255

        opt = tf.keras.optimizers.RMSprop()
        if script_mode:
            hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True)
            opt = hook.wrap_optimizer(opt)
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(
                x_train, y_train, batch_size=64, epochs=1, validation_split=0.2, callbacks=[hook]
            )
            test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
        else:
            model.compile(
                loss="sparse_categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"],
                run_eagerly=run_eagerly,
            )
            history = model.fit(x_train, y_train, batch_size=64, epochs=1, validation_split=0.2)
            test_scores = model.evaluate(x_test, y_test, verbose=2)

        hook = smd.get_hook()
        assert hook
        # Check if the hook was executed with the default
        # hook configuration
        assert hook.has_default_hook_configuration()
        hook.close()
        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."

        # DEFAULT TENSORS SAVED
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) > 0, "No Losses Saved"
        assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) > 0, "No Metrics Saved"
        assert (
            len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 0
        ), "Weights were not expected to be saved by default"
        assert (
            len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0
        ), "Biases were not expected to be saved by default"
def helper_test_keras_v2_gradienttape(script_mode: bool = False,
                                      json_file_contents="{}"):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    tf.keras.backend.clear_session()

    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28,
                                                 1)),  # WA for TF issue #36279
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10, activation="softmax"),
        ])
        (x_train, y_train), _ = get_keras_data()
        dataset = tf.data.Dataset.from_tensor_slices(
            (tf.cast(x_train[..., tf.newaxis] / 255,
                     tf.float32), tf.cast(y_train, tf.int64)))
        dataset = dataset.shuffle(1000).batch(64)

        opt = tf.keras.optimizers.RMSprop()
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
        n_epochs = 2
        if script_mode:
            if json_file_contents == "{}":
                hook = smd.KerasHook(out_dir=sim.out_dir,
                                     export_tensorboard=True)
            else:
                hook = smd.KerasHook.create_from_json_file()

            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with hook.wrap_tape(tf.GradientTape()) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                    hook.record_tensor_value(tensor_name="accuracy",
                                             tensor_value=acc)
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            assert hook
            hook.close()
            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert len(trial.tensor_names(collection="losses")) > 0
        else:
            # ZCC doesn't support yet (as of smdebug v0.7.2)
            for epoch in range(n_epochs):
                print("Epoch %d/%d" % (epoch + 1, n_epochs))
                for data, labels in dataset:
                    dataset_labels = labels
                    labels = tf.one_hot(labels, depth=10)
                    with tf.GradientTape(persistent=True) as tape:
                        logits = model(data, training=True)  # (32,10)
                        loss_value = cce(labels, logits)
                    grads = tape.gradient(loss_value, model.variables)
                    opt.apply_gradients(zip(grads, model.variables))
                    acc = train_acc_metric(dataset_labels, logits)
                log = "Epoch %d " % (epoch + 1)
                log += "Accuracy %.4f" % train_acc_metric.result()
                print(log)
                train_acc_metric.reset_states()
            hook = smd.get_hook()
            assert not hook
def test_estimator_gradients_zcc(nested=False, mirrored=False):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    json_file_contents = """
        {
            "S3OutputPath": "s3://sagemaker-test",
            "LocalPath": "/opt/ml/output/tensors",
            "HookParameters" : {
                "save_interval": "2",
                "include_workers": "all"
            },
            "CollectionConfigurations": [
                {
                    "CollectionName": "gradients"
                },
                {
                    "CollectionName": "weights"
                },
                {
                    "CollectionName": "losses"
                },
                {
                    "CollectionName": "biases"
                }
            ]
        }
        """
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:

        if mirrored:
            test_basic("/opt/ml/output/tensors", zcc=True)
        else:
            # Setup
            mnist_classifier = get_estimator(nested_optimizer=nested,
                                             mirrored=mirrored)
            train_input_fn, eval_input_fn = get_input_fns()

            # Train and evaluate
            train_steps, eval_steps = 10, 10
            mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)

            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            print(trial)
            assert smd.get_hook() is not None, "Hook was not created."
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert trial.steps() == [
                0,
                2,
                4,
                6,
                8,
                10,
                12,
                14,
                16,
                18,
            ], "Wrong step count for trial."
            print(trial.tensor_names(collection="gradients"))
            assert len(trial.tensor_names(collection="gradients")) > 0
            assert len(trial.tensor_names(collection="weights")) > 0
            assert len(trial.tensor_names(collection="losses")) > 0
            assert len(
                trial.tensor(
                    trial.tensor_names(collection="gradients")[0]).steps()) > 0
            assert len(trial.modes()) == 2
def test_tensorflow2_with_unsupported_version(eager_mode: bool = True):
    """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes."""
    smd.del_hook()
    helper_keras_fit()
    hook = smd.get_hook()
    assert hook is None