예제 #1
0
def test_fusedbatchnorm_nchw():
    #Test 1: tf_model TF-native
    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, in_0 = tf_model()
        feed_dict = {in_0: k_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, in_0 = ng_model()
        feed_dict = {in_0: k_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    # transpose TF output from NHWC to NCHW for comparison with ngraph output
    result1_bool = np.allclose(
        np.transpose(tf_outval[0], (0, 3, 1, 2)),
        ng_outval[0],
        rtol=0,
        atol=1e-02)
    # these TF outputs do not need to be transposed since they have only 1 dimension
    result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02)
    result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02)

    assert (result1_bool and result2_bool and result3_bool)
예제 #2
0
    def on_ngraph_change(self, change):
        if change['type'] == 'change' and change['name'] == 'value':
            i = self.ngraph_backends.index(change['new'])

            if self.ngraph_backends[i] == 'DISABLED':
                self.use_ngraph = False
                ngraph_bridge.disable()
            else:
                self.use_ngraph = True
                ngraph_bridge.enable()
                ngraph_bridge.set_backend(self.ngraph_backends[i])
예제 #3
0
def set_os_env(select_device):
    if select_device == 'CPU':
        # run on TF only
        ngraph_bridge.disable()
    else:
        if not ngraph_bridge.is_enabled():
            ngraph_bridge.enable()
        assert select_device[:
                             7] == "NGRAPH_", "Expecting device name to start with NGRAPH_"
        back_end = select_device.split("NGRAPH_")
        os.environ['NGRAPH_TF_BACKEND'] = back_end[1]
예제 #4
0
    def without_ngraph(self, l, config=tf.ConfigProto()):
        ngraph_tf_disable_deassign_clusters = os.environ.pop(
            'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

        ngraph_bridge.disable()
        with tf.Session(config=config) as sess:
            retval = l(sess)

        if ngraph_tf_disable_deassign_clusters is not None:
            os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \
                ngraph_tf_disable_deassign_clusters

        return retval
def test_conv2dbackpropinput_nhwc(padding):
    np_filter = np.random.rand(*filter_size_hwio).astype('f')
    n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f')

    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, filter_size, out_backprop = tf_model(padding)
        feed_dict = {filter_size: np_filter, out_backprop: n_np_out}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, filter_size, out_backprop = ng_model(padding)
        feed_dict = {filter_size: np_filter, out_backprop: n_np_out}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
    def test_mnist_training(self, optimizer):

        class mnist_training_flags:

            def __init__(self, data_dir, model_dir, training_iterations,
                         training_batch_size, validation_batch_size,
                         make_deterministic, training_optimizer):
                self.data_dir = data_dir
                self.model_dir = model_dir
                self.train_loop_count = training_iterations
                self.batch_size = training_batch_size
                self.test_image_count = validation_batch_size
                self.make_deterministic = make_deterministic
                self.optimizer = optimizer

        data_dir = '/tmp/' + getpass.getuser() + 'tensorflow/mnist/input_data'
        train_loop_count = 50
        batch_size = 50
        test_image_count = None
        make_deterministic = True
        model_dir = './mnist_trained/'

        FLAGS = mnist_training_flags(data_dir, model_dir, train_loop_count,
                                     batch_size, test_image_count,
                                     make_deterministic, optimizer)

        # Run on nGraph
        ng_loss_values, ng_test_accuracy = train_mnist_cnn(FLAGS)
        ng_values = ng_loss_values + [ng_test_accuracy]
        # Reset the Graph
        tf.compat.v1.reset_default_graph()

        # disable ngraph-tf
        ngraph_bridge.disable()
        tf_loss_values, tf_test_accuracy = train_mnist_cnn(FLAGS)
        tf_values = tf_loss_values + [tf_test_accuracy]

        # compare values
        assert np.allclose(
            ng_values, tf_values,
            atol=1e-3), "Loss or Accuracy values don't match"
예제 #7
0
def test_conv2d():
    #Test 1: tf_model TF-native
    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, input_data = tf_model()
        feed_dict = {input_data: t_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, input_data = ng_model()
        feed_dict = {input_data: n_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(np.transpose(tf_outval, (0, 3, 1, 2)),
                       ng_outval,
                       rtol=0,
                       atol=1e-02)
예제 #8
0
def test_maxpoolbackprop_nhwc(padding):
    g_np = grad_nhwc[padding]
    o_np = output_nhwc[padding]

    #Test 1: tf_model TF-native
    with tf.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, orig_in, orig_out, grad = tf_model(padding)
        feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, orig_in, orig_out, grad = ng_model(padding)
        feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert (np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02))
def test_fusedbatchnorm_nhwc():
    #Test 1: tf_model TF-native
    with tf.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, in_0 = tf_model()
        feed_dict = {in_0: k_np}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, in_0 = ng_model()
        feed_dict = {in_0: k_np}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    result1_bool = np.allclose(tf_outval[0], ng_outval[0], rtol=0, atol=1e-02)
    result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02)
    result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02)

    assert (result1_bool and result2_bool and result3_bool)
    def setup_ngraph_bridge(self, backend):
        # Enviornment variables
        os.environ['PLAIDML_USE_STRIPE'] = '1'

        if self.workers < 1:
            os.environ['OMP_NUM_THREADS'] = 1
        else:
            # Use default
            if os.getenv('OMP_NUM_THREADS') is not None:
                del os.environ['OMP_NUM_THREADS']

        import ngraph_bridge

        if backend == 'DISABLED' or backend == 'TF':
            ngraph_bridge.disable()
        elif backend == 'CPU':
            ngraph_bridge.set_backend('CPU')
            ngraph_bridge.enable()
        elif backend == 'PLAIDML':
            ngraph_bridge.set_backend('PLAIDML')
            ngraph_bridge.enable()
        else:
            print("ERROR: Unsupported backend " + backend + " selected.")
예제 #11
0
def test_conv2dbackpropfilter_nchw(padding):
    n_np_inp = np.random.rand(*input_sizes_nchw).astype('f')
    n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f')

    #Reshape to NHWC for TF
    t_np_inp = np.transpose(n_np_inp, (0, 2, 3, 1))
    t_np_out = np.transpose(n_np_out, (0, 2, 3, 1))

    with tf.compat.v1.Session(config=config) as sess_tf:
        ngraph_bridge.disable()
        tf_out, input_data, out_backprop = tf_model(padding)
        feed_dict = {input_data: t_np_inp, out_backprop: t_np_out}
        tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict)

    #Test 2: model2 with ngraph, NNP backend
    with tf.compat.v1.Session(config=config) as sess_ng:
        ngraph_bridge.enable()
        ngraph_bridge.update_config(config)
        os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
        ng_out, input_data, out_backprop = ng_model(padding)
        feed_dict = {input_data: n_np_inp, out_backprop: n_np_out}
        ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict)

    assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
예제 #12
0
 def test_disable(self):
     ngraph_bridge.disable()
     assert ngraph_bridge.is_enabled() == 0
예제 #13
0
    def run(self):
        """run benchmark with optimized graph"""

        print("Run inference with dummy data")

        config = tf.compat.v1.ConfigProto()
        config.intra_op_parallelism_threads = self.args.num_intra_threads
        config.inter_op_parallelism_threads = self.args.num_inter_threads
        config.use_per_session_threads = True

        data_graph = tf.Graph()
        with data_graph.as_default():
            input_shape = [
                self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3
            ]
            images = tf.random.uniform(
                input_shape,
                0.0,
                255.0,
                dtype=tf.float32,
                seed=42,
                name='synthetic_images')

        infer_graph = tf.Graph()
        with infer_graph.as_default():
            graph_def = tf.compat.v1.GraphDef()
            with tf.io.gfile.GFile(self.args.input_graph, 'rb') as input_file:
                input_graph_content = input_file.read()
                graph_def.ParseFromString(input_graph_content)
            print(
                "Optimizing graph %s for inference..." % self.args.input_graph)
            output_graph = optimize_for_inference(
                graph_def, [INPUTS], [OUTPUTS], dtypes.float32.as_datatype_enum,
                False)
            tf.import_graph_def(output_graph, name='')

        input_tensor = infer_graph.get_tensor_by_name('input_tensor:0')
        output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0')

        # Run without nGraph first
        print("Run inference (without nGraph)")
        ngraph_bridge.disable()
        data_sess = tf.compat.v1.Session(graph=data_graph, config=config)
        infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config)

        iteration = 0
        num_processed_images = 0
        num_remaining_images = self.args.num_images
        tf_time = 0.0
        tf_labels = np.array([], dtype=np.int32)
        while num_remaining_images >= self.args.batch_size:
            np_images = data_sess.run(images)
            if iteration > self.args.warmup_iters:
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

            tf_start_time = time.time()
            predictions = infer_sess.run(output_tensor,
                                         {input_tensor: np_images})
            tf_elapsed_time = time.time() - tf_start_time

            if iteration > self.args.warmup_iters:
                tf_time += tf_elapsed_time
                tf_labels = np.append(tf_labels, np.argmax(
                    predictions, axis=-1))
            iteration += 1

        print("Total execution time (TF): ", tf_time)

        # Run with nGraph now
        print("Run inference (with nGraph)")
        ngraph_bridge.enable()

        data_sess = tf.compat.v1.Session(graph=data_graph, config=config)
        infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config)

        iteration = 0
        num_processed_images = 0
        num_remaining_images = self.args.num_images
        ngtf_time = 0.0
        ngtf_labels = np.array([], dtype=np.int32)
        while num_remaining_images >= self.args.batch_size:
            np_images = data_sess.run(images)
            if iteration > self.args.warmup_iters:
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

            ngtf_start_time = time.time()
            predictions = infer_sess.run(output_tensor,
                                         {input_tensor: np_images})
            ngtf_elapsed_time = time.time() - ngtf_start_time

            if iteration > self.args.warmup_iters:
                ngtf_time += ngtf_elapsed_time
                ngtf_labels = np.append(ngtf_labels,
                                        np.argmax(predictions, axis=-1))
            iteration += 1

        print("Total execution time (NGTF): ", ngtf_time)

        print("Processed %d images. Batch size = %d" % (num_processed_images,
                                                        self.args.batch_size))
        print("Avg throughput (TF): %0.4f img/s" %
              (num_processed_images / tf_time))
        print("Avg throughput (NGTF): %0.4f img/s" %
              (num_processed_images / ngtf_time))
        assert ((tf_labels == ngtf_labels).all())