예제 #1
0
def test_estimator(script_mode):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        # Setup
        mnist_classifier = get_estimator()
        train_input_fn, eval_input_fn = get_input_fns()

        # Train and evaluate
        train_steps, eval_steps = 80, 20
        if script_mode:
            hook = smd.EstimatorHook(out_dir=sim.out_dir)
            hook.set_mode(mode=smd.modes.TRAIN)
            mnist_classifier.train(input_fn=train_input_fn,
                                   steps=train_steps,
                                   hooks=[hook])
            hook.set_mode(mode=smd.modes.EVAL)
            mnist_classifier.evaluate(input_fn=eval_input_fn,
                                      steps=eval_steps,
                                      hooks=[hook])
        else:
            mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        print(trial)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert trial.steps() == [0, train_steps], "Wrong step count for trial."
예제 #2
0
def test_estimator(out_dir, tf_eager_mode, saveall):
    """ Works as intended. """
    if tf_eager_mode is False:
        tf.compat.v1.disable_eager_execution()
        tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session()
    mnist_classifier = get_estimator()
    train_input_fn, eval_input_fn = get_input_fns()

    # Train and evaluate
    train_steps, eval_steps = 8, 2
    hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall)
    hook.set_mode(mode=smd.modes.TRAIN)
    mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook])
    hook.set_mode(mode=smd.modes.EVAL)
    mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook])

    # Check that hook created and tensors saved
    trial = smd.create_trial(path=out_dir)
    tnames = trial.tensor_names()
    assert len(trial.steps()) > 0
    if saveall:
        # Number of tensors in each collection
        # vanilla TF 2.2: all = 300, loss = 1, weights = 4, gradients = 0, biases = 18, optimizer variables = 0, metrics = 0, others = 277
        # AWS-TF 2.2 : all = 300, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 269
        # AWS-TF 2.1 : all = 309, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 278
        assert len(tnames) >= 300
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 4
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 18
        assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0
    else:
        assert len(tnames) == 1
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
예제 #3
0
def helper_train(script_mode=False, sim=None, train_steps=80, eval_steps=20):
    # Setup
    mnist_classifier = get_estimator()
    train_input_fn, eval_input_fn = get_input_fns()

    # Train and evaluate

    if script_mode:
        hook = smd.EstimatorHook(out_dir=sim.out_dir)
        hook.set_mode(mode=smd.modes.TRAIN)
        mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook])
        hook.set_mode(mode=smd.modes.EVAL)
        mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook])
    else:
        mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
        mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)
예제 #4
0
def helper_test_estimator_gradients_zcc(nested=False, mirrored=False):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    json_file_contents = """
        {
            "S3OutputPath": "s3://sagemaker-test",
            "LocalPath": "/opt/ml/output/tensors",
            "HookParameters" : {
                "save_interval": "2",
                "include_workers": "all"
            },
            "CollectionConfigurations": [
                {
                    "CollectionName": "gradients"
                },
                {
                    "CollectionName": "weights"
                },
                {
                    "CollectionName": "losses"
                },
                {
                    "CollectionName": "biases"
                }
            ]
        }
        """
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:

        if mirrored:
            test_basic("/opt/ml/output/tensors", zcc=True)
        else:
            # Setup
            mnist_classifier = get_estimator(nested_optimizer=nested,
                                             mirrored=mirrored)
            train_input_fn, eval_input_fn = get_input_fns()

            # Train and evaluate
            train_steps, eval_steps = 10, 10
            mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)

            # Check that hook created and tensors saved
            trial = smd.create_trial(path=sim.out_dir)
            print(trial)
            assert smd.get_hook() is not None, "Hook was not created."
            assert len(trial.steps()) > 0, "Nothing saved at any step."
            assert len(trial.tensor_names()) > 0, "Tensors were not saved."
            assert trial.steps() == [
                0,
                2,
                4,
                6,
                8,
                10,
                12,
                14,
                16,
                18,
            ], "Wrong step count for trial."
            print(trial.tensor_names(collection="gradients"))
            assert len(trial.tensor_names(collection="gradients")) > 0
            assert len(trial.tensor_names(collection="weights")) > 0
            assert len(trial.tensor_names(collection="losses")) > 0
            assert len(
                trial.tensor(
                    trial.tensor_names(collection="gradients")[0]).steps()) > 0
            assert len(trial.modes()) == 2