Example #1
0
def test_monitored_session(script_mode):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    json_file_contents = """
            {
                "S3OutputPath": "s3://sagemaker-test",
                "LocalPath": "/opt/ml/output/tensors",
                "HookParameters" : {
                    "save_interval": "100"
                }
            }
            """
    with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
        train_op, X, Y = get_train_op_and_placeholders()
        init = tf.global_variables_initializer()
        mnist = get_data()

        if script_mode:
            hook = smd.SessionHook(out_dir=sim.out_dir)
            sess = tf.train.MonitoredSession(hooks=[hook])
        else:
            sess = tf.train.MonitoredSession()

        with sess:
            sess.run(init)
            for step in range(1, 101):
                batch_x, batch_y = mnist.train.next_batch(32)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
def test_monitored_session(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        train_op, X, Y = get_train_op_and_placeholders()
        init = tf.compat.v1.global_variables_initializer()
        mnist = get_data()

        if script_mode:
            hook = smd.SessionHook(out_dir=sim.out_dir)
            sess = tf.train.MonitoredSession(hooks=[hook])
        else:
            sess = tf.train.MonitoredSession()

        with sess:
            sess.run(init)
            for step in range(1, 101):
                batch_x, batch_y = mnist.train.next_batch(32)
                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
Example #3
0
def test_new_graph(out_dir):
    # tests that we can correctly interpret an explicitly created graph
    g1 = tf.get_default_graph()
    g = tf.Graph()
    with g.as_default():
        assert g != g1
        assert g == tf.get_default_graph()
        hook = smd.SessionHook(
            out_dir,
            include_collections=["weights", "losses", "scalars"],
            save_config=smd.SaveConfig(save_steps=[0, 1, 2, 3]),
        )
        with tf.name_scope("foobar"):
            x = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            w = tf.Variable(initial_value=[[10.0], [10.0]], name="weight1")
        with tf.name_scope("foobaz"):
            w0 = [[1], [1.0]]
            y = tf.matmul(x, w0)
        loss = tf.reduce_mean((tf.matmul(x, w) - y)**2, name="loss")
        hook.get_collection("losses").add(loss)
        global_step = tf.Variable(17, name="global_step", trainable=False)
        increment_global_step_op = tf.assign(global_step, global_step + 1)

        optimizer = tf.train.AdamOptimizer(0.1)
        optimizer = hook.wrap_optimizer(optimizer)
        optimizer_op = optimizer.minimize(loss,
                                          global_step=increment_global_step_op)
        sess = tf.train.MonitoredSession(hooks=[hook])
        for i in range(5):
            x_ = np.random.random((10, 2)) * 0.1
            sess.run([loss, optimizer_op, increment_global_step_op], {x: x_})
        sess.close()
        tr = create_trial(out_dir)
        assert len(tr.tensor_names())
def test_shapes(out_dir, save_raw_tensor=False):
    pre_test_clean_up()
    rdnc = smd.ReductionConfig(save_shape=True,
                               save_raw_tensor=save_raw_tensor)
    hook = smd.SessionHook(
        out_dir=out_dir,
        save_config=smd.SaveConfig(save_interval=1),
        reduction_config=rdnc,
        include_collections=["weights", "gradients", "losses"],
    )
    simple_model(hook)
    verify_shapes(out_dir, 0)
Example #5
0
def test_reductions(out_dir, save_raw_tensor=False):
    pre_test_clean_up()
    rdnc = smd.ReductionConfig(
        reductions=ALLOWED_REDUCTIONS,
        abs_reductions=ALLOWED_REDUCTIONS,
        norms=ALLOWED_NORMS,
        abs_norms=ALLOWED_NORMS,
        save_raw_tensor=save_raw_tensor,
    )
    hook = smd.SessionHook(
        out_dir=out_dir,
        save_config=smd.SaveConfig(save_interval=1),
        reduction_config=rdnc,
        include_collections=["weights", "gradients", "losses"],
    )
    helper_test_reductions(out_dir, hook, save_raw_tensor)
Example #6
0
def test_uninit_sess_run(out_dir):
    train_op, X, Y = get_train_op_and_placeholders()
    init = tf.global_variables_initializer()
    mnist = get_data()
    hook = smd.SessionHook(out_dir, include_collections=["weights"])
    sess = tf.train.MonitoredSession(hooks=[hook])

    with sess:
        sess.run(init)
        for step in range(1, 101):
            batch_x, batch_y = mnist.train.next_batch(32)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})

    # Check that hook created and tensors saved
    trial = smd.create_trial(path=out_dir)
    assert len(trial.steps()) > 0, "Nothing saved at any step."
    assert len(trial.tensor_names()) > 0, "Tensors were not saved."
    assert len(trial.tensor_names(collection="weights")) > 0
Example #7
0
def helper_mirrored(
    trial_dir,
    save_all=False,
    num_steps=3,
    save_config=None,
    reduction_config=None,
    include_collections=None,
    steps=None,
    zcc=False,
    eval_distributed=False,
    include_workers="all",
):
    num_gpus = get_available_gpus()
    num_devices = num_gpus if num_gpus > 0 else 1
    batch_size = 10 * num_devices

    # input_fn which serves Dataset
    input_fn_provider = InputFnProvider(
        per_device_batch_size(batch_size, num_devices))

    # Use multiple GPUs by MirroredStragtegy.
    # All avaiable GPUs will be used if `num_gpus` is omitted.
    # if num_devices > 1:
    distribution = tf.contrib.distribute.MirroredStrategy()
    # print("### Doing Multi GPU Training")
    # else:
    #     distribution = None
    # Pass to RunConfig
    config = tf.estimator.RunConfig(
        train_distribute=distribution,
        eval_distribute=distribution if eval_distributed else None,
        model_dir="/tmp/mnist_convnet_model",
    )

    if save_config is None:
        save_config = smd.SaveConfig(save_interval=2)

    if include_collections is None:
        include_collections = [
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.GRADIENTS,
            CollectionKeys.LOSSES,
        ]

    if not zcc:
        ts_hook = smd.SessionHook(
            out_dir=trial_dir,
            save_all=save_all,
            include_collections=include_collections,
            save_config=save_config,
            reduction_config=reduction_config,
            include_workers=include_workers,
        )
    else:
        print("zcc is passed. ignoring include_collections and save_config")

    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              config=config)
    if steps is None:
        steps = ["train"]

    for s in steps:
        if s == "train":
            print("Starting train")
            if not zcc:
                ts_hook.set_mode(smd.modes.TRAIN)
                # Train the model
                mnist_classifier.train(
                    input_fn=input_fn_provider.train_input_fn,
                    steps=num_steps,
                    hooks=[ts_hook])
            else:
                mnist_classifier.train(
                    input_fn=input_fn_provider.train_input_fn, steps=num_steps)
        elif s == "eval":
            print("Starting eval")

            if not zcc:
                ts_hook.set_mode(smd.modes.EVAL)
                # Evaluate the model and print results
                mnist_classifier.evaluate(
                    input_fn=input_fn_provider.eval_input_fn,
                    steps=num_steps,
                    hooks=[ts_hook])
            else:
                mnist_classifier.evaluate(
                    input_fn=input_fn_provider.eval_input_fn, steps=num_steps)
        elif s == "predict":
            print("Starting predict")
            if not zcc:
                ts_hook.set_mode(smd.modes.PREDICT)
                # Evaluate the model and print results
                p = mnist_classifier.predict(
                    input_fn=input_fn_provider.eval_input_fn, hooks=[ts_hook])
            else:
                p = mnist_classifier.predict(
                    input_fn=input_fn_provider.eval_input_fn)
            for i in range(num_steps):
                next(p)
    get_hook()._cleanup()
    return distribution
def help_test_mnist(
    path,
    save_config=None,
    hook=None,
    set_modes=True,
    num_steps=10,
    num_eval_steps=None,
    steps=None,
    include_collections=None,
):
    trial_dir = path
    tf.reset_default_graph()

    def cnn_model_fn(features, labels, mode):
        """Model function for CNN."""
        # Input Layer
        input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

        # Convolutional Layer #1
        conv1 = tf.layers.conv2d(
            inputs=input_layer,
            filters=32,
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu,
        )

        # Pooling Layer #1
        pool1 = tf.layers.max_pooling2d(inputs=conv1,
                                        pool_size=[2, 2],
                                        strides=2)

        # Convolutional Layer #2 and Pooling Layer #2
        conv2 = tf.layers.conv2d(inputs=pool1,
                                 filters=64,
                                 kernel_size=[5, 5],
                                 padding="same",
                                 activation=tf.nn.relu)
        pool2 = tf.layers.max_pooling2d(inputs=conv2,
                                        pool_size=[2, 2],
                                        strides=2)

        # Dense Layer
        pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
        dense = tf.layers.dense(inputs=pool2_flat,
                                units=1024,
                                activation=tf.nn.relu)
        dropout = tf.layers.dropout(
            inputs=dense,
            rate=0.4,
            training=mode == tf.estimator.ModeKeys.TRAIN)

        # Logits Layer
        logits = tf.layers.dense(inputs=dropout, units=10)

        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
            optimizer = smd.get_hook().wrap_optimizer(optimizer)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        eval_metric_ops = {
            "accuracy":
            tf.metrics.accuracy(labels=labels,
                                predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    # Load training and eval data
    ((train_data, train_labels),
     (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data()

    train_data = train_data / np.float32(255)
    train_labels = train_labels.astype(np.int32)  # not required

    eval_data = eval_data / np.float32(255)
    eval_labels = eval_labels.astype(np.int32)  # not required

    mnist_classifier = tf.estimator.Estimator(
        model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": train_data},
                                                        y=train_labels,
                                                        batch_size=2,
                                                        num_epochs=None,
                                                        shuffle=True)

    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data},
                                                       y=eval_labels,
                                                       num_epochs=1,
                                                       batch_size=1,
                                                       shuffle=False)

    if hook is None:
        if include_collections is None:
            include_collections = ["weights", "gradients", "default", "losses"]
        hook = smd.SessionHook(out_dir=trial_dir,
                               save_config=save_config,
                               include_collections=include_collections)

    if num_eval_steps is None:
        num_eval_steps = num_steps

    def train(num_steps):
        if set_modes:
            hook.set_mode(smd.modes.TRAIN)
        mnist_classifier.train(input_fn=train_input_fn,
                               steps=num_steps,
                               hooks=[hook])

    def evaluate(num_eval_steps):
        if set_modes:
            hook.set_mode(smd.modes.EVAL)
        mnist_classifier.evaluate(input_fn=eval_input_fn,
                                  steps=num_eval_steps,
                                  hooks=[hook])

    # def train_and_evaluate(num_steps, num_eval_steps):
    #     tf.estimator.train_and_evaluate(
    #         mnist_classifier,
    #         train_spec=tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=num_steps),#, hooks=[hook]),
    #         eval_spec=tf.estimator.EvalSpec(input_fn=eval_input_fn, steps=num_eval_steps)#, hooks=[hook]),
    #     )

    if steps is None:
        steps = ["train", "eval", "train"]

    for s in steps:
        if s == "train":
            # train one step and display the probabilties
            train(num_steps)
        elif s == "eval":
            evaluate(num_eval_steps)
        # elif s == "traineval":
        #     train_and_evaluate(num_steps, num_eval_steps)

    hook.close()
Example #9
0
def test_only_w_g(out_dir):
    pre_test_clean_up()
    hook = smd.SessionHook(out_dir,
                           save_all=False,
                           save_config=smd.SaveConfig(save_interval=2))
    helper_test_only_w_g(out_dir, hook)