def test_linear_classifier(script_mode: bool): """ Works as intended. """ smd.del_hook() tf.reset_default_graph() with SagemakerSimulator() as sim: # Setup train_input_fn, eval_input_fn = get_input_fns() x_feature = tf.feature_column.numeric_column("x", shape=(28, 28)) estimator = tf.compat.v1.estimator.LinearClassifier( feature_columns=[x_feature], model_dir="/tmp/mnist_linear_classifier", n_classes=10) # Train if script_mode: hook = smd.EstimatorHook(out_dir=sim.out_dir) estimator.train(input_fn=train_input_fn, steps=100, hooks=[hook]) else: estimator.train(input_fn=train_input_fn, steps=100) # Check that hook created and tensors saved trial = smd.create_trial(path=sim.out_dir) assert smd.get_hook() is not None, "Hook was not created." assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved."
def test_estimator(script_mode: bool): """ Works as intended. """ smd.del_hook() tf.reset_default_graph() with SagemakerSimulator() as sim: # Setup mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate train_steps, eval_steps = 80, 20 if script_mode: hook = smd.EstimatorHook(out_dir=sim.out_dir) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) else: mnist_classifier.train(input_fn=train_input_fn, steps=train_steps) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps) # Check that hook created and tensors saved trial = smd.create_trial(path=sim.out_dir) print(trial) assert smd.get_hook() is not None, "Hook was not created." assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." assert trial.steps() == [0, train_steps], "Wrong step count for trial."
def test_linear_classifier(out_dir, tf_eager_mode, saveall): """ Works as intended. """ if tf_eager_mode is False: tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() train_input_fn, eval_input_fn = get_input_fns() x_feature = tf.feature_column.numeric_column("x", shape=(28, 28)) estimator = tf.estimator.LinearClassifier( feature_columns=[x_feature], model_dir="/tmp/mnist_linear_classifier", n_classes=10 ) hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall) estimator.train(input_fn=train_input_fn, steps=10, hooks=[hook]) # Check that hook created and tensors saved trial = smd.create_trial(path=out_dir) tnames = trial.tensor_names() assert len(trial.steps()) > 0 if saveall: # Number of tensors in each collection # vanilla TF 2.2: all = 214, loss = 2, weights = 1, gradients = 0, biases = 12, optimizer variables = 0, metrics = 0, others = 199 # AWS-TF 2.2: all = 219, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 197 # AWS-TF 2.1: all = 226, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 204 assert len(tnames) >= 214 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 12 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0 else: assert len(tnames) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 2
def test_estimator(out_dir, tf_eager_mode, saveall): """ Works as intended. """ if tf_eager_mode is False: tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate train_steps, eval_steps = 8, 2 hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) # Check that hook created and tensors saved trial = smd.create_trial(path=out_dir) tnames = trial.tensor_names() assert len(trial.steps()) > 0 if saveall: # Number of tensors in each collection # vanilla TF 2.2: all = 300, loss = 1, weights = 4, gradients = 0, biases = 18, optimizer variables = 0, metrics = 0, others = 277 # AWS-TF 2.2 : all = 300, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 269 # AWS-TF 2.1 : all = 309, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 278 assert len(tnames) >= 300 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 18 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0 else: assert len(tnames) == 1 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
def test_keras_to_estimator(script_mode): """ Works as intended. """ import tensorflow.compat.v1.keras as keras tf.reset_default_graph() smd.del_hook() keras.backend.clear_session() with SagemakerSimulator() as sim: model = keras.models.Sequential([ keras.layers.Dense(16, activation="relu", input_shape=(4, )), keras.layers.Dropout(0.2), keras.layers.Dense(1, activation="sigmoid"), ]) def input_fn(): split = tfds.Split.TRAIN data_dir = TEST_DATASET_S3_PATH if use_s3_datasets() else None dataset = tfds.load("iris", data_dir=data_dir, split=split, as_supervised=True) dataset = dataset.map(lambda features, labels: ({ "dense_input": features }, labels)) dataset = dataset.batch(32).repeat() return dataset model.compile(loss="categorical_crossentropy", optimizer="adam") model.summary() keras_estimator = tf.keras.estimator.model_to_estimator( keras_model=model, model_dir=sim.out_dir) if script_mode: hook = smd.EstimatorHook(sim.out_dir) hook.set_mode(smd.modes.TRAIN) keras_estimator.train(input_fn=input_fn, steps=25, hooks=[hook]) hook.set_mode(smd.modes.EVAL) eval_result = keras_estimator.evaluate(input_fn=input_fn, steps=10, hooks=[hook]) else: keras_estimator.train(input_fn=input_fn, steps=25) keras_estimator.evaluate(input_fn=input_fn, steps=10) tr = smd.create_trial(sim.out_dir) assert len(tr.tensor_names()) == 1 assert tr.steps() == [0, 25] assert len(tr.steps(smd.modes.TRAIN)) == 1 assert len(tr.steps(smd.modes.EVAL)) == 1
def test_keras_to_estimator(out_dir, tf_eager_mode): if not tf_eager_mode: tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() model = tf.keras.models.Sequential([ tf.keras.layers.Dense(16, activation="relu", input_shape=(4, )), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(1, activation="sigmoid"), ]) def input_fn(): split = tfds.Split.TRAIN data_dir = TEST_DATASET_S3_PATH if use_s3_datasets() else None dataset = tfds.load("iris", data_dir=data_dir, split=split, as_supervised=True) dataset = dataset.map(lambda features, labels: ({ "dense_input": features }, labels)) dataset = dataset.batch(32).repeat() return dataset model.compile(loss="categorical_crossentropy", optimizer="adam") model.summary() keras_estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir=out_dir) hook = smd.EstimatorHook(out_dir) hook.set_mode(smd.modes.TRAIN) keras_estimator.train(input_fn=input_fn, steps=25, hooks=[hook]) hook.set_mode(smd.modes.EVAL) eval_result = keras_estimator.evaluate(input_fn=input_fn, steps=10, hooks=[hook]) from smdebug.trials import create_trial tr = create_trial(out_dir) assert len(tr.tensor_names()) == 1 assert len(tr.steps()) == 2 assert len(tr.steps(smd.modes.TRAIN)) == 1 assert len(tr.steps(smd.modes.EVAL)) == 1
def helper_train(script_mode=False, sim=None, train_steps=80, eval_steps=20): # Setup mnist_classifier = get_estimator() train_input_fn, eval_input_fn = get_input_fns() # Train and evaluate if script_mode: hook = smd.EstimatorHook(out_dir=sim.out_dir) hook.set_mode(mode=smd.modes.TRAIN) mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook]) hook.set_mode(mode=smd.modes.EVAL) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook]) else: mnist_classifier.train(input_fn=train_input_fn, steps=train_steps) mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--random_seed", type=bool, default=False) parser.add_argument("--out_dir", type=str) parser.add_argument("--save_interval", type=int, default=500) parser.add_argument("--num_epochs", type=int, default=5, help="Number of epochs to train for") parser.add_argument( "--num_steps", type=int, help= "Number of steps to train for. If this is passed, it overrides num_epochs", ) parser.add_argument( "--num_eval_steps", type=int, help="Number of steps to evaluate for. If this" "is passed, it doesnt evaluate over the full eval set", ) parser.add_argument("--model_dir", type=str, default="/tmp/mnist_model") args = parser.parse_args() if args.random_seed: tf.set_random_seed(2) np.random.seed(2) random.seed(12) ##### Enabling SageMaker Debugger ########### # creating hook hook = smd.EstimatorHook( out_dir=args.out_dir, include_collections=["weights", "gradients"], save_config=smd.SaveConfig(save_interval=args.save_interval), ) def cnn_model_fn(features, labels, mode): """Model function for CNN.""" # Input Layer input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) # Convolutional Layer #1 conv1 = tf.layers.conv2d( inputs=input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu, ) # Pooling Layer #1 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) # Convolutional Layer #2 and Pooling Layer #2 conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) # Dense Layer pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) dropout = tf.layers.dropout( inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN) # Logits Layer logits = tf.layers.dense(inputs=dropout, units=10) predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.nn.softmax(logits, name="softmax_tensor"), } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.GradientDescentOptimizer( learning_rate=args.lr) ##### Enabling SageMaker Debugger ########### # Wrap your optimizer as follows to help SageMaker Debugger identify gradients # This does not change your optimization logic, it returns back the same optimizer optimizer = hook.wrap_optimizer(optimizer) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) # Load training and eval data ((train_data, train_labels), (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data() train_data = train_data / np.float32(255) train_labels = train_labels.astype(np.int32) # not required eval_data = eval_data / np.float32(255) eval_labels = eval_labels.astype(np.int32) # not required mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir=args.model_dir) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_data}, y=train_labels, batch_size=128, num_epochs=args.num_epochs, shuffle=True, ) eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False) ##### Enabling SageMaker Debugger ########### # Set training mode so SMDebug can classify the steps into training mode hook.set_mode(smd.modes.TRAIN) ##### Enabling SageMaker Debugger ########### # pass hook to hooks parameter of train method mnist_classifier.train(input_fn=train_input_fn, steps=args.num_steps, hooks=[hook]) ##### Enabling SageMaker Debugger ########### # Set eval mode so SMDebug can classify the steps into eval mode hook.set_mode(smd.modes.EVAL) ##### Enabling SageMaker Debugger ########### # pass hook to hooks parameter of evaluate method mnist_classifier.evaluate(input_fn=eval_input_fn, steps=args.num_eval_steps, hooks=[hook])
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, help="S3 path for the model") parser.add_argument("--lr", type=float, help="Learning Rate", default=0.001) parser.add_argument("--steps", type=int, help="Number of steps to run", default=100) parser.add_argument("--scale", type=float, help="Scaling factor for inputs", default=1.0) parser.add_argument("--random_seed", type=bool, default=False) parser.add_argument("--out_dir", type=str) parser.add_argument("--save_interval", type=int, default=500) args = parser.parse_args() # these random seeds are only intended for test purpose. # for now, 2,2,12 could promise no assert failure when running tests # if you wish to change the number, notice that certain steps' tensor value may be capable of variation if args.random_seed: tf.set_random_seed(2) np.random.seed(2) random.seed(12) hook = smd.EstimatorHook( out_dir=args.out_dir, include_collections=["weights", "gradients"], save_config=smd.SaveConfig(save_interval=args.save_interval), ) # Network definition # Note the use of name scopes with tf.name_scope("foobar"): x = tf.placeholder(shape=(None, 2), dtype=tf.float32) w = tf.Variable(initial_value=[[10.0], [10.0]], name="weight1") with tf.name_scope("foobaz"): w0 = [[1], [1.0]] y = tf.matmul(x, w0) loss = tf.reduce_mean((tf.matmul(x, w) - y)**2, name="loss") hook.add_to_collection("losses", loss) global_step = tf.Variable(17, name="global_step", trainable=False) increment_global_step_op = tf.assign(global_step, global_step + 1) optimizer = tf.train.AdamOptimizer(args.lr) # Wrap the optimizer with wrap_optimizer so smdebug can find gradients to save optimizer = hook.wrap_optimizer(optimizer) # use this wrapped optimizer to minimize loss optimizer_op = optimizer.minimize(loss, global_step=increment_global_step_op) # pass the hook to hooks parameter of monitored session sess = tf.train.MonitoredSession(hooks=[hook]) # use this session for running the tensorflow model hook.set_mode(smd.modes.TRAIN) for i in range(args.steps): x_ = np.random.random((10, 2)) * args.scale _loss, opt, gstep = sess.run( [loss, optimizer_op, increment_global_step_op], {x: x_}) print(f"Step={i}, Loss={_loss}") hook.set_mode(smd.modes.EVAL) for i in range(args.steps): x_ = np.random.random((10, 2)) * args.scale sess.run([loss, increment_global_step_op], {x: x_})