def test_estimator(script_mode): """ Works as intended. """ smd.del_hook() tf.reset_default_graph() with SagemakerSimulator() as sim: # Setup mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate train_steps, eval_steps = 80, 20 if script_mode: hook = smd.EstimatorHook(out_dir=sim.out_dir) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) else: mnist_classifier.train(input_fn=train_input_fn, steps=train_steps) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps) # Check that hook created and tensors saved trial = smd.create_trial(path=sim.out_dir) print(trial) assert smd.get_hook() is not None, "Hook was not created." assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." assert trial.steps() == [0, train_steps], "Wrong step count for trial."
def test_estimator(out_dir, tf_eager_mode, saveall): """ Works as intended. """ if tf_eager_mode is False: tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate train_steps, eval_steps = 8, 2 hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) # Check that hook created and tensors saved trial = smd.create_trial(path=out_dir) tnames = trial.tensor_names() assert len(trial.steps()) > 0 if saveall: # Number of tensors in each collection # vanilla TF 2.2: all = 300, loss = 1, weights = 4, gradients = 0, biases = 18, optimizer variables = 0, metrics = 0, others = 277 # AWS-TF 2.2 : all = 300, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 269 # AWS-TF 2.1 : all = 309, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 278 assert len(tnames) >= 300 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 18 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0 else: assert len(tnames) == 1 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
def helper_train(script_mode=False, sim=None, train_steps=80, eval_steps=20): # Setup mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate if script_mode: hook = smd.EstimatorHook(out_dir=sim.out_dir) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) else: mnist_classifier.train(input_fn=train_input_fn, steps=train_steps) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)
def helper_test_estimator_gradients_zcc(nested=False, mirrored=False): """ Works as intended. """ smd.del_hook() tf.reset_default_graph() json_file_contents = """ { "S3OutputPath": "s3://sagemaker-test", "LocalPath": "/opt/ml/output/tensors", "HookParameters" : { "save_interval": "2", "include_workers": "all" }, "CollectionConfigurations": [ { "CollectionName": "gradients" }, { "CollectionName": "weights" }, { "CollectionName": "losses" }, { "CollectionName": "biases" } ] } """ with SagemakerSimulator(json_file_contents=json_file_contents) as sim: if mirrored: test_basic("/opt/ml/output/tensors", zcc=True) else: # Setup mnist_classifier = get_estimator(nested_optimizer=nested, mirrored=mirrored) train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate train_steps, eval_steps = 10, 10 mnist_classifier.train(input_fn=train_input_fn, steps=train_steps) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps) # Check that hook created and tensors saved trial = smd.create_trial(path=sim.out_dir) print(trial) assert smd.get_hook() is not None, "Hook was not created." assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." assert trial.steps() == [ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, ], "Wrong step count for trial." print(trial.tensor_names(collection="gradients")) assert len(trial.tensor_names(collection="gradients")) > 0 assert len(trial.tensor_names(collection="weights")) > 0 assert len(trial.tensor_names(collection="losses")) > 0 assert len( trial.tensor( trial.tensor_names(collection="gradients")[0]).steps()) > 0 assert len(trial.modes()) == 2