Esempio n. 1
0
def test_fusedbatchnorm_nchw():
    #Test 1: tf_model TF-native
    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, in_0 = tf_model()
        feed_dict = {in_0: k_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, in_0 = ng_model()
        feed_dict = {in_0: k_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    # transpose TF output from NHWC to NCHW for comparison with ngraph output
    result1_bool = np.allclose(
        np.transpose(tf_outval[0], (0, 3, 1, 2)),
        ng_outval[0],
        rtol=0,
        atol=1e-02)
    # these TF outputs do not need to be transposed since they have only 1 dimension
    result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02)
    result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02)

    assert (result1_bool and result2_bool and result3_bool)
Esempio n. 2
0
def run_ngraph_grappler_optimizer(input_gdef, output_nodes):
    graph = tf.Graph()
    with graph.as_default():
        tf.import_graph_def(input_gdef, name="")
    grappler_meta_graph_def = tf.train.export_meta_graph(
        graph_def=graph.as_graph_def(add_shapes=True), graph=graph)

    _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape")
    output_collection = meta_graph_pb2.CollectionDef()
    output_list = output_collection.node_list.value
    for i in output_nodes:
        if isinstance(i, tf.Tensor):
            output_list.append(_to_bytes(i.name))
        else:
            output_list.append(_to_bytes(i))
    # TODO(laigd): use another key as the outputs are really not train_op.
    grappler_meta_graph_def.collection_def["train_op"].CopyFrom(
        output_collection)

    session_config_with_trt = tf.ConfigProto()
    session_config_with_trt = ngraph_bridge.update_config(
        session_config_with_trt)

    output_gdef = tf_optimizer.OptimizeGraph(session_config_with_trt,
                                             grappler_meta_graph_def,
                                             graph_id=b"tf_graph")
    return output_gdef
    def test_ng_serialize_to_json(self):
        for f in glob.glob("tf_function_ngraph_cluster*.json"):
            os.remove(f)
        initial_contents = set(os.listdir())
        xshape = (3, 4, 5)
        x = tf.compat.v1.placeholder(tf.float32, shape=xshape)
        out = tf.nn.l2_loss(tf.abs(x))
        values = np.random.rand(*xshape)

        config = ngraph_bridge.update_config(tf.compat.v1.ConfigProto())
        ngraph_enable_serialize = os.environ.pop('NGRAPH_ENABLE_SERIALIZE',
                                                 None)
        os.environ['NGRAPH_ENABLE_SERIALIZE'] = '1'
        ngraph_bridge.enable()
        with tf.compat.v1.Session(config=config) as sess:
            out = sess.run((out), feed_dict={x: values})
        os.environ.pop('NGRAPH_ENABLE_SERIALIZE', None)
        if ngraph_enable_serialize is not None:
            os.environ['NGRAPH_ENABLE_SERIALIZE'] = \
                ngraph_enable_serialize

        final_contents = set(os.listdir())
        assert (len(final_contents) - len(initial_contents) == 1)
        new_files = final_contents.difference(initial_contents)
        flname = new_files.pop()
        assert (flname.startswith('tf_function_') and flname.endswith('json'))
        os.remove(flname)
def run_mnist(_):

    # Create the model
    x = tf.compat.v1.placeholder(tf.float32, [None, 784])
    W = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))
    y = tf.matmul(x, W) + b

    # Define loss and optimizer
    y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])

    # The raw formulation of cross-entropy,
    #
    #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
    #                                 reduction_indices=[1]))
    #
    # can be numerically unstable.
    #
    # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
    # outputs of 'y', and then average across the batch.
    '''
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
    '''
    # Enable soft placement and tracing as needed
    config = tf.compat.v1.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=True,
                                      inter_op_parallelism_threads=1)
    config_ngraph_enabled = ngraph_bridge.update_config(config)

    sess = tf.compat.v1.Session(config=config_ngraph_enabled)
    tf.compat.v1.global_variables_initializer().run(session=sess)
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = np.reshape(x_train, (60000, 784))
    x_train = x_train.astype(np.float32) / 255
    y_train = to_categorical(y_train, num_classes=10)
    # Train
    train_loops = FLAGS.train_loop_count
    for i in range(train_loops):
        index = np.random.choice(60000, 100)
        batch_xs = x_train[index]
        batch_ys = y_train[index]
        if (i == 1):
            start = time.time()
        sess.run(y, feed_dict={x: batch_xs, y_: batch_ys})
        print("Step: ", i)

    end = time.time()

    # Save the TF graph as pdf
    tf.train.write_graph(tf.compat.v1.get_default_graph(),
                         '.',
                         'mnist_fprop_py.pbtxt',
                         as_text=True)

    print("Inference time: %f seconds" % (end - start))
Esempio n. 5
0
    def test_update_config_adds_optimizer_only_once(self):

        # Helper function to count the number of occurances in a config
        def count_ng_optimizers(config):
            custom_opts = config.graph_options.rewrite_options.custom_optimizers
            count = 0
            for i in range(len(custom_opts)):
                if custom_opts[i].name == 'ngraph-optimizer':
                    count += 1
            return count

        # allow_soft_placement is set just to simulate
        # a real world non-empty initial ConfigProto
        config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
        assert count_ng_optimizers(config) == 0
        config_new_1 = ngraph_bridge.update_config(config)
        config_new_2 = ngraph_bridge.update_config(config_new_1)
        assert count_ng_optimizers(config) == count_ng_optimizers(
            config_new_1) == count_ng_optimizers(config_new_2) == 1
def test_conv2dbackpropinput_nhwc(padding):
    np_filter = np.random.rand(*filter_size_hwio).astype('f')
    n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f')

    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, filter_size, out_backprop = tf_model(padding)
        feed_dict = {filter_size: np_filter, out_backprop: n_np_out}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, filter_size, out_backprop = ng_model(padding)
        feed_dict = {filter_size: np_filter, out_backprop: n_np_out}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
Esempio n. 7
0
def test_conv2d():
    #Test 1: tf_model TF-native
    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, input_data = tf_model()
        feed_dict = {input_data: t_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, input_data = ng_model()
        feed_dict = {input_data: n_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(np.transpose(tf_outval, (0, 3, 1, 2)),
                       ng_outval,
                       rtol=0,
                       atol=1e-02)
Esempio n. 8
0
def test_maxpoolbackprop_nhwc(padding):
    g_np = grad_nhwc[padding]
    o_np = output_nhwc[padding]

    #Test 1: tf_model TF-native
    with tf.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, orig_in, orig_out, grad = tf_model(padding)
        feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, orig_in, orig_out, grad = ng_model(padding)
        feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert (np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02))
 def test_update_config(self):
     config = tf.ConfigProto()
     config.allow_soft_placement = True
     config_new = ngraph_bridge.update_config(config)
     rewriter_options = config_new.graph_options.rewrite_options
     ngraph_optimizer_name = rewriter_options.custom_optimizers[0].name
     assert ngraph_optimizer_name == 'ngraph-optimizer'
     ngraph_optimizer = rewriter_options.custom_optimizers[0]
     ngraph_optimizer.parameter_map["max_batch_size"].s = b'64'
     ngraph_optimizer.parameter_map["ice_cores"].s = b'12'
     assert config_new.__str__(
     ) == 'allow_soft_placement: true\ngraph_options {\n  rewrite_options {\n    meta_optimizer_iterations: ONE\n    min_graph_nodes: -1\n    custom_optimizers {\n      name: "ngraph-optimizer"\n      parameter_map {\n        key: "device_id"\n        value {\n          s: ""\n        }\n      }\n      parameter_map {\n        key: "ice_cores"\n        value {\n          s: "12"\n        }\n      }\n      parameter_map {\n        key: "max_batch_size"\n        value {\n          s: "64"\n        }\n      }\n      parameter_map {\n        key: "ngraph_backend"\n        value {\n          s: "CPU"\n        }\n      }\n    }\n  }\n}\n'
def test_fusedbatchnorm_nhwc():
    #Test 1: tf_model TF-native
    with tf.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, in_0 = tf_model()
        feed_dict = {in_0: k_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, in_0 = ng_model()
        feed_dict = {in_0: k_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    result1_bool = np.allclose(tf_outval[0], ng_outval[0], rtol=0, atol=1e-02)
    result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02)
    result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02)

    assert (result1_bool and result2_bool and result3_bool)
Esempio n. 11
0
def test_conv2dbackpropfilter_nchw(padding):
    n_np_inp = np.random.rand(*input_sizes_nchw).astype('f')
    n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f')

    #Reshape to NHWC for TF
    t_np_inp = np.transpose(n_np_inp, (0, 2, 3, 1))
    t_np_out = np.transpose(n_np_out, (0, 2, 3, 1))

    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, input_data, out_backprop = tf_model(padding)
        feed_dict = {input_data: t_np_inp, out_backprop: t_np_out}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, input_data, out_backprop = ng_model(padding)
        feed_dict = {input_data: n_np_inp, out_backprop: n_np_out}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
Esempio n. 12
0
    def with_ngraph(self, l, config=tf.ConfigProto()):
        # TODO: Stop grappler on failure (Add fail_on_optimizer_errors=True)
        config = ngraph_bridge.update_config(config)

        ngraph_tf_disable_deassign_clusters = os.environ.pop(
            'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ngraph_bridge.enable()
        with tf.Session(config=config) as sess:
            retval = l(sess)

        os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

        if ngraph_tf_disable_deassign_clusters is not None:
            os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \
                ngraph_tf_disable_deassign_clusters

        return retval
Esempio n. 13
0
    def with_ngraph(self, l, config=None):
        # Passing config as None and then initializing it inside
        # because mutable objects should not be used as defaults in python
        if config is None:
            config = tf.compat.v1.ConfigProto()
        # TODO: Stop grappler on failure (Add fail_on_optimizer_errors=True)
        config = ngraph_bridge.update_config(config)

        ngraph_tf_disable_deassign_clusters = os.environ.pop(
            'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ngraph_bridge.enable()
        with tf.compat.v1.Session(config=config) as sess:
            retval = l(sess)

        os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

        if ngraph_tf_disable_deassign_clusters is not None:
            os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \
                ngraph_tf_disable_deassign_clusters

        return retval
train_step = x.assign(axpy)
with tf.control_dependencies([train_step]):
    train_op = tf.no_op('train_op')

# Configure the session
config = tf.ConfigProto(
    allow_soft_placement=True,
    log_device_placement=False,
    inter_op_parallelism_threads=1,
    graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(
        opt_level=tf.OptimizerOptions.L0,
        do_common_subexpression_elimination=False,
        do_constant_folding=False,
        do_function_inlining=False,
    )))
config_ngraph_enabled = ngraph_bridge.update_config(config)

# Create session and run
with tf.Session(config=config_ngraph_enabled) as sess:
    print("Python: Running with Session")
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()

    event_times = []
    sess.run(tf.global_variables_initializer())
    for i in range(10):
        (result_axpy) = sess.run((train_op),
                                 options=options,
                                 run_metadata=run_metadata),
        print(i)
        event_times.append(timeline.Timeline(run_metadata.step_stats))
Esempio n. 15
0
# Setup TensorBoard
graph_location = "/tmp/" + getpass.getuser() + "/tensorboard-logs/test"
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)

# Define the data
a = tf.constant(np.full((2, 2), 5, dtype=np.int8), name='alpha')
x = tf.compat.v1.placeholder(tf.int8, [None, 2], name='x')
y = tf.compat.v1.placeholder(tf.int8, shape=(2, 2), name='y')

c = a * x
axpy = c + y

# Configure the session
config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=1)
config_ngraph_enabled = ngraph_bridge.update_config(config, backend_name='CPU')

# Create session and run
with tf.compat.v1.Session(config=config_ngraph_enabled) as sess:
    print("Python: Running with Session")
    options = tf.compat.v1.RunOptions(
        trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
    run_metadata = tf.compat.v1.RunMetadata()

    event_times = []
    for i in range(1):
        (result_axpy, result_c) = sess.run((axpy, c),
                                           feed_dict={
                                               x: np.ones((2, 2)),
                                               y: np.ones((2, 2)),
                                           },
Esempio n. 16
0
def main(FLAGS):
    using_client = (os.environ.get('NGRAPH_ENABLE_CLIENT') is not None)

    if using_client:
        print('Using client')
    else:
        print('Not using client')

    imagenet_inference_labels = get_imagenet_inference_labels()
    imagenet_training_labels = get_imagenet_training_labels()

    util.VAL_IMAGE_FLAGS = FLAGS

    assert (
        sorted(imagenet_training_labels) == sorted(imagenet_inference_labels))

    if not using_client:
        validation_nums = get_validation_labels(FLAGS)
        x_test = get_validation_images(FLAGS)
        validation_labels = imagenet_inference_labels[validation_nums]
    else:
        x_test = np.random.rand(FLAGS.batch_size, FLAGS.image_size,
                                FLAGS.image_size, 3)

    if FLAGS.ngraph:
        import ngraph_bridge
        print(ngraph_bridge.__version__)

    config = tf.compat.v1.ConfigProto()
    config.intra_op_parallelism_threads = 44
    config.inter_op_parallelism_threads = 44
    if FLAGS.ngraph:
        config = ngraph_bridge.update_config(config)
    sess = tf.compat.v1.Session(config=config)
    graph_def = load_model(FLAGS.model)
    tf.import_graph_def(graph_def, name='')

    input_tensor = sess.graph.get_tensor_by_name('input:0')
    output_tensor = sess.graph.get_tensor_by_name(
        'MobilenetV2/Logits/Conv2d_1c_1x1/BiasAdd:0')

    print('performing inference')
    start_time = time.time()
    y_pred = sess.run(output_tensor, {input_tensor: x_test})
    end_time = time.time()
    runtime = end_time - start_time
    per_image_runtime = runtime / float(FLAGS.batch_size)
    print('performed inference, runtime (s):', np.round(runtime, 2))
    print('runtime per image (s)', np.round(per_image_runtime, 2))
    y_pred = np.squeeze(y_pred)

    if (FLAGS.batch_size == 1):
        top5 = y_pred.argsort()[-5:]
    else:
        top5 = np.flip(y_pred.argsort()[:, -5:], axis=1)

    if not using_client:
        preds = imagenet_training_labels[top5]

        if FLAGS.batch_size < 10:
            print('validation_labels', validation_labels)
            print('validation_labels shape', validation_labels.shape)
            print('preds', preds)
            print('preds shape', preds.shape)

        util.accuracy(preds, validation_labels)
Esempio n. 17
0
def run_mnist(_):
    # Import data
    mnist = learn.datasets.mnist.read_data_sets(FLAGS.data_dir +
                                                'MNIST-data-%d' % hvd.rank(),
                                                one_hot=True)

    # Create the model
    with tf.name_scope("mnist_placholder"):
        x = tf.placeholder(tf.float32, [None, 784])
        W = tf.Variable(tf.zeros([784, 10]))
        b = tf.Variable(tf.zeros([10]))
        y = tf.matmul(x, W) + b

        # Define loss and optimizer
        y_ = tf.placeholder(tf.float32, [None, 10])

    # The raw formulation of cross-entropy,
    #
    #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
    #                                 reduction_indices=[1]))
    #
    # can be numerically unstable.
    #
    # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
    # outputs of 'y', and then average across the batch.
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
    #global_step = tf.train.get_or_create_global_step()
    global_step = tf.contrib.framework.get_or_create_global_step()
    opt = tf.train.GradientDescentOptimizer(0.5)
    # Add MPI Distributed Optimizer
    with tf.name_scope("horovod_opt"):
        opt = hvd.DistributedOptimizer(opt)
    train_step = opt.minimize(cross_entropy, global_step=global_step)

    # The StopAtStepHook handles stopping after running given steps.
    hooks = [
        hvd.BroadcastGlobalVariablesHook(0),
        tf.train.StopAtStepHook(last_step=10)
    ]

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Enable soft placement and tracing as needed
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=True,
                            inter_op_parallelism_threads=1)
    config_ngraph_enabled = ngraph_bridge.update_config(config)

    #config.graph_options.optimizer_options.global_jit_level = jit_level
    run_metadata = tf.RunMetadata()

    #init_op = tf.global_variables_initializer()
    print("Variables initialized ...")

    # The MonitoredTrainingSession takes care of session initialization
    with tf.train.MonitoredTrainingSession(
            hooks=hooks, config=config_ngraph_enabled) as mon_sess:
        start = time.time()
        train_writer = tf.summary.FileWriter(FLAGS.log_dir, mon_sess.graph)
        while not mon_sess.should_stop():
            # Train
            batch_xs, batch_ys = mnist.train.next_batch(100)
            mon_sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

            # Test trained model
            if not mon_sess.should_stop():
                print(
                    "Accuracy: ",
                    mon_sess.run(accuracy,
                                 feed_dict={
                                     x: mnist.test.images,
                                     y_: mnist.test.labels
                                 }))

        end = time.time()

    if hvd.rank() == 0:
        print("Training time: %f seconds" % (end - start))
Esempio n. 18
0
def calculate_output(param_dict, select_device, input_example):
    """Calculate the output of the imported graph given the input.

    Load the graph def from graph file on selected device, then get the tensors based on the input and output name from the graph,
    then feed the input_example to the graph and retrieves the output vector.

    Args:
    param_dict: The dictionary contains all the user-input data in the json file.
    select_device: "NGRAPH" or "CPU".
    input_example: A map with key is the name of the input tensor, and value is the random generated example

    Returns:
        The output vector obtained from running the input_example through the graph.
    """
    tf.reset_default_graph()
    is_ckpt = False

    if "pb_graph_location" in param_dict and "checkpoint_graph_location" in param_dict:
        raise Exception(
            "Only Graph or Checkpoint file can be specified, not both!")

    if "pb_graph_location" in param_dict:
        pb_filename = param_dict["pb_graph_location"]
    elif "checkpoint_graph_location" in param_dict:
        checkpoint_filename = param_dict["checkpoint_graph_location"]
        is_ckpt = True
    else:
        raise Exception(
            "Input graph file OR Input checkpoint file is required!")

    output_tensor_name = param_dict["output_tensor_name"]

    config = tf.ConfigProto(inter_op_parallelism_threads=1,
                            allow_soft_placement=True)
    config_ngraph_enabled = ngraph_bridge.update_config(config)

    sess = tf.Session(config=config_ngraph_enabled)
    set_os_env(select_device)

    # if checkpoint, then load checkpoint
    if (is_ckpt):
        meta_filename = checkpoint_filename + '.meta'
        if not tf.gfile.Exists(meta_filename):
            raise Exception("Meta file does not exist")
        else:
            saver = tf.train.import_meta_graph(meta_filename)

        if not tf.train.checkpoint_exists(checkpoint_filename):
            raise Exception("Checkpoint with this prefix does not exist")
        else:
            saver.restore(sess, checkpoint_filename)

        print("Model restored: " + select_device)
        graph = tf.get_default_graph()

    #if graph, then load graph
    else:
        graph_def = tf.GraphDef()
        if pb_filename.endswith("pbtxt"):
            with open(pb_filename, "r") as f:
                text_format.Merge(f.read(), graph_def)
        else:
            with open(pb_filename, "rb") as f:
                graph_def.ParseFromString(f.read())

        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graph_def)
        sess = tf.Session(graph=graph, config=config)

    # if no outputs are specified, then compare for all tensors
    if len(output_tensor_name) == 0:
        output_tensor_name = sum([[j.name for j in i.outputs]
                                  for i in graph.get_operations()], [])

    # Create the tensor to its corresponding example map
    tensor_to_example_map = {}
    for item in input_example:
        t = graph.get_tensor_by_name(item)
        tensor_to_example_map[t] = input_example[item]

    tensors = []
    skipped_tensors = []
    output_tensor = [graph.get_tensor_by_name(i) for i in output_tensor_name]
    for name in output_tensor_name:
        try:
            output_tensor = sess.run(name, feed_dict=tensor_to_example_map)
            tensors.append(output_tensor)
        except Exception as e:
            skipped_tensors.append(name)
    return tensors, output_tensor_name, skipped_tensors
Esempio n. 19
0
def train_mnist_cnn(FLAGS):
    # Config
    config = tf.compat.v1.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=True,
                                      inter_op_parallelism_threads=4)
    config_ngraph_enabled = ngraph_bridge.update_config(config)

    # Note: Additional configuration option to boost performance is to set the
    # following environment for the run:
    # OMP_NUM_THREADS=44 KMP_AFFINITY=granularity=fine,scatter
    # The OMP_NUM_THREADS number should correspond to the number of
    # cores in the system

    # Create the model
    x = tf.compat.v1.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn_inference(x)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)

    accuracy = tf.reduce_mean(correct_prediction)
    tf.compat.v1.summary.scalar('test accuracy', accuracy)

    graph_location = "/tmp/" + getpass.getuser(
    ) + "/tensorboard-logs/mnist-convnet"
    print('Saving graph to: %s' % graph_location)

    merged = tf.compat.v1.summary.merge_all()
    train_writer = tf.compat.v1.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.compat.v1.get_default_graph())
    saver = tf.compat.v1.train.Saver()
    with tf.compat.v1.Session(config=config_ngraph_enabled) as sess:
        saver.restore(sess, FLAGS.model_dir)
        #sess.run(tf.global_variables_initializer())
        test_accuracy_final = 0
        num_eval_cycles = FLAGS.num_eval_cycles
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = np.reshape(x_train, (60000, 784))
        x_train = x_train.astype(np.float32) / 255
        y_train = to_categorical(y_train, num_classes=10)

        for i in range(num_eval_cycles):
            index = np.random.choice(60000, FLAGS.batch_size)
            x_random = x_train[index]
            y_random = y_train[index]
            t = time.time()
            summary, test_accuracy = sess.run([merged, accuracy],
                                              feed_dict={
                                                  x: x_random,
                                                  y_: y_random,
                                                  keep_prob: 0.5
                                              })
            test_accuracy_final = test_accuracy_final + test_accuracy
            reference_accucary = 0.9612
            print('step %d, test_accuracy %g, %g sec for infernce step' %
                  (i, test_accuracy, time.time() - t))
            train_writer.add_summary(summary, i)
        test_accuracy_final = test_accuracy_final / num_eval_cycles
        print("Inference  finished")
        print(test_accuracy_final)
        print('Reference accuracy %g' % (reference_accucary))
        assert (reference_accucary -
                test_accuracy_final) / reference_accucary < 0.05
        print(
            "The validation accuracy is within 5% of the trained reference data!"
        )
        return test_accuracy_final
def train_mnist_cnn(FLAGS):
    # Config
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False,
                            inter_op_parallelism_threads=1)
    # Enable the custom optimizer using the rewriter config options
    config = ngraph_bridge.update_config(config)

    # Note: Additional configuration option to boost performance is to set the
    # following environment for the run:
    # OMP_NUM_THREADS=44 KMP_AFFINITY=granularity=fine,scatter
    # The OMP_NUM_THREADS number should correspond to the number of
    # cores in the system

    # Set Seed
    shuffle_batch = True

    if FLAGS.make_deterministic:
        seed = 1
        tf.random.set_random_seed(seed)
        shuffle_batch = False

    supported_optimizers = ["adam", "sgd", "momentum"]

    assert (FLAGS.optimizer in supported_optimizers), "Optimizer not supported"

    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    optimizer_scope = FLAGS.optimizer + "_optimizer"
    with tf.name_scope(optimizer_scope):
        if FLAGS.optimizer == "adam":
            train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        elif FLAGS.optimizer == "sgd":
            train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(
                cross_entropy)
        elif FLAGS.optimizer == "momentum":
            train_step = tf.train.MomentumOptimizer(
                1e-4, 0.9).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)
    tf.summary.scalar('Training accuracy', accuracy)
    tf.summary.scalar('Loss function', cross_entropy)

    graph_location = "/tmp/" + getpass.getuser(
    ) + "/tensorboard-logs/mnist-convnet"
    print('Saving graph to: %s' % graph_location)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        train_loops = FLAGS.train_loop_count
        loss_values = []
        for i in range(train_loops):
            batch = mnist.train.next_batch(FLAGS.batch_size,
                                           shuffle=shuffle_batch)
            if i % 10 == 0:
                t = time.time()
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 1.0
                })
                #tf.summary.scalar('Training accuracy', train_accuracy)
                print('step %d, training accuracy %g, %g sec to evaluate' %
                      (i, train_accuracy, time.time() - t))
            t = time.time()
            _, summary, loss = sess.run([train_step, merged, cross_entropy],
                                        feed_dict={
                                            x: batch[0],
                                            y_: batch[1],
                                            keep_prob: 0.5
                                        })
            loss_values.append(loss)
            print('step %d, loss %g, %g sec for training step' %
                  (i, loss, time.time() - t))
            train_writer.add_summary(summary, i)

        print("Training finished. Running test")

        num_test_images = FLAGS.test_image_count
        x_test = mnist.test.images[:num_test_images]
        y_test = mnist.test.labels[:num_test_images]

        test_accuracy = accuracy.eval(feed_dict={
            x: x_test,
            y_: y_test,
            keep_prob: 1.0
        })
        print('test accuracy %g' % test_accuracy)
        saver.save(sess, FLAGS.model_dir)
        return loss_values, test_accuracy
Esempio n. 21
0
def train_mnist_cnn(FLAGS):
    # Config
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False,
                            inter_op_parallelism_threads=1)
    config_ngraph_enabled = ngraph_bridge.update_config(config)

    # Note: Additional configuration option to boost performance is to set the
    # following environment for the run:
    # OMP_NUM_THREADS=44 KMP_AFFINITY=granularity=fine,scatter
    # The OMP_NUM_THREADS number should correspond to the number of
    # cores in the system

    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    # add distributed wrapper to "adam_optimizer"
    opt = hvd.DistributedOptimizer(tf.train.AdamOptimizer(1e-4))
    global_step = tf.contrib.framework.get_or_create_global_step()
    with tf.name_scope('distributed_optimizer'):
        train_step = opt.minimize(cross_entropy, global_step=global_step)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)
    tf.summary.scalar('Training accuracy', accuracy)
    tf.summary.scalar('Loss function', cross_entropy)

    graph_location = "/tmp/" + getpass.getuser(
    ) + "/tensorboard-logs/mnist-convnet"
    print('Saving graph to: %s' % graph_location)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    saver = tf.train.Saver()
    train_loops = FLAGS.train_loop_count
    num_test_images = FLAGS.test_image_count
    hooks = [
        # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states
        # from rank 0 to all other processes. This is necessary to ensure consistent
        # initialization of all workers when training is started with random weights
        # or restored from a checkpoint.
        hvd.BroadcastGlobalVariablesHook(0),
        # Horovod: adjust number of steps based on number of ranks.
        #tf.train.StopAtStepHook(train_loops // hvd.size())
        tf.train.StopAtStepHook(train_loops)
    ]

    with tf.train.MonitoredTrainingSession(
            hooks=hooks, config=config_ngraph_enabled) as sess:

        step = 0
        start = time.time()

        loss_values = []
        test_accuracy = []
        while not sess.should_stop():
            batch = mnist.train.next_batch(FLAGS.batch_size)
            sess.run(train_step, feed_dict={x: batch[0], y_: batch[1]})
            step += 1
            if step % 10 == 0:
                t = time.time()
                if hvd.rank() == 0:
                    print('step %d training accuracy %g %g sec to evaluate' %
                          (step,
                           sess.run(accuracy,
                                    feed_dict={
                                        x: batch[0],
                                        y_: batch[1]
                                    }), time.time() - t))
            t = time.time()
            _, summary, loss = sess.run([train_step, merged, cross_entropy],
                                        feed_dict={
                                            x: batch[0],
                                            y_: batch[1],
                                            keep_prob: 0.5
                                        })
            loss_values.append(loss)
            if hvd.rank() == 0:
                print('step %d, loss %g, %g sec for training step' %
                      (step, loss, time.time() - t))
            train_writer.add_summary(summary, step)

            if step == (train_loops // hvd.size() - 1) and hvd.rank() == 0:
                x_test = mnist.test.images[:num_test_images]
                y_test = mnist.test.labels[:num_test_images]
                print('test accuracy: ',
                      sess.run(accuracy, feed_dict={
                          x: x_test,
                          y_: y_test
                      }))
                test_accuracy.append(accuracy)

        print("Training finished. Running test")
        saver.save(sess, FLAGS.model_dir)
        return loss_values, test_accuracy