def test_fusedbatchnorm_nchw(): #Test 1: tf_model TF-native with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, in_0 = tf_model() feed_dict = {in_0: k_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, in_0 = ng_model() feed_dict = {in_0: k_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) # transpose TF output from NHWC to NCHW for comparison with ngraph output result1_bool = np.allclose( np.transpose(tf_outval[0], (0, 3, 1, 2)), ng_outval[0], rtol=0, atol=1e-02) # these TF outputs do not need to be transposed since they have only 1 dimension result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02) result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02) assert (result1_bool and result2_bool and result3_bool)
def on_ngraph_change(self, change): if change['type'] == 'change' and change['name'] == 'value': i = self.ngraph_backends.index(change['new']) if self.ngraph_backends[i] == 'DISABLED': self.use_ngraph = False ngraph_bridge.disable() else: self.use_ngraph = True ngraph_bridge.enable() ngraph_bridge.set_backend(self.ngraph_backends[i])
def set_os_env(select_device): if select_device == 'CPU': # run on TF only ngraph_bridge.disable() else: if not ngraph_bridge.is_enabled(): ngraph_bridge.enable() assert select_device[: 7] == "NGRAPH_", "Expecting device name to start with NGRAPH_" back_end = select_device.split("NGRAPH_") os.environ['NGRAPH_TF_BACKEND'] = back_end[1]
def without_ngraph(self, l, config=tf.ConfigProto()): ngraph_tf_disable_deassign_clusters = os.environ.pop( 'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None) ngraph_bridge.disable() with tf.Session(config=config) as sess: retval = l(sess) if ngraph_tf_disable_deassign_clusters is not None: os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = \ ngraph_tf_disable_deassign_clusters return retval
def test_conv2dbackpropinput_nhwc(padding): np_filter = np.random.rand(*filter_size_hwio).astype('f') n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f') with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, filter_size, out_backprop = tf_model(padding) feed_dict = {filter_size: np_filter, out_backprop: n_np_out} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, filter_size, out_backprop = ng_model(padding) feed_dict = {filter_size: np_filter, out_backprop: n_np_out} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
def test_mnist_training(self, optimizer): class mnist_training_flags: def __init__(self, data_dir, model_dir, training_iterations, training_batch_size, validation_batch_size, make_deterministic, training_optimizer): self.data_dir = data_dir self.model_dir = model_dir self.train_loop_count = training_iterations self.batch_size = training_batch_size self.test_image_count = validation_batch_size self.make_deterministic = make_deterministic self.optimizer = optimizer data_dir = '/tmp/' + getpass.getuser() + 'tensorflow/mnist/input_data' train_loop_count = 50 batch_size = 50 test_image_count = None make_deterministic = True model_dir = './mnist_trained/' FLAGS = mnist_training_flags(data_dir, model_dir, train_loop_count, batch_size, test_image_count, make_deterministic, optimizer) # Run on nGraph ng_loss_values, ng_test_accuracy = train_mnist_cnn(FLAGS) ng_values = ng_loss_values + [ng_test_accuracy] # Reset the Graph tf.compat.v1.reset_default_graph() # disable ngraph-tf ngraph_bridge.disable() tf_loss_values, tf_test_accuracy = train_mnist_cnn(FLAGS) tf_values = tf_loss_values + [tf_test_accuracy] # compare values assert np.allclose( ng_values, tf_values, atol=1e-3), "Loss or Accuracy values don't match"
def test_conv2d(): #Test 1: tf_model TF-native with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, input_data = tf_model() feed_dict = {input_data: t_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, input_data = ng_model() feed_dict = {input_data: n_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(np.transpose(tf_outval, (0, 3, 1, 2)), ng_outval, rtol=0, atol=1e-02)
def test_maxpoolbackprop_nhwc(padding): g_np = grad_nhwc[padding] o_np = output_nhwc[padding] #Test 1: tf_model TF-native with tf.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, orig_in, orig_out, grad = tf_model(padding) feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, orig_in, orig_out, grad = ng_model(padding) feed_dict = {orig_in: i_np, orig_out: o_np, grad: g_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert (np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02))
def test_fusedbatchnorm_nhwc(): #Test 1: tf_model TF-native with tf.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, in_0 = tf_model() feed_dict = {in_0: k_np} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, in_0 = ng_model() feed_dict = {in_0: k_np} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) result1_bool = np.allclose(tf_outval[0], ng_outval[0], rtol=0, atol=1e-02) result2_bool = np.allclose(tf_outval[1], ng_outval[1], rtol=0, atol=1e-02) result3_bool = np.allclose(tf_outval[2], ng_outval[2], rtol=0, atol=1e-02) assert (result1_bool and result2_bool and result3_bool)
def setup_ngraph_bridge(self, backend): # Enviornment variables os.environ['PLAIDML_USE_STRIPE'] = '1' if self.workers < 1: os.environ['OMP_NUM_THREADS'] = 1 else: # Use default if os.getenv('OMP_NUM_THREADS') is not None: del os.environ['OMP_NUM_THREADS'] import ngraph_bridge if backend == 'DISABLED' or backend == 'TF': ngraph_bridge.disable() elif backend == 'CPU': ngraph_bridge.set_backend('CPU') ngraph_bridge.enable() elif backend == 'PLAIDML': ngraph_bridge.set_backend('PLAIDML') ngraph_bridge.enable() else: print("ERROR: Unsupported backend " + backend + " selected.")
def test_conv2dbackpropfilter_nchw(padding): n_np_inp = np.random.rand(*input_sizes_nchw).astype('f') n_np_out = np.random.rand(*out_backprop_in_sizes[padding]).astype('f') #Reshape to NHWC for TF t_np_inp = np.transpose(n_np_inp, (0, 2, 3, 1)) t_np_out = np.transpose(n_np_out, (0, 2, 3, 1)) with tf.compat.v1.Session(config=config) as sess_tf: ngraph_bridge.disable() tf_out, input_data, out_backprop = tf_model(padding) feed_dict = {input_data: t_np_inp, out_backprop: t_np_out} tf_outval = sess_tf.run(tf_out, feed_dict=feed_dict) #Test 2: model2 with ngraph, NNP backend with tf.compat.v1.Session(config=config) as sess_ng: ngraph_bridge.enable() ngraph_bridge.update_config(config) os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1' ng_out, input_data, out_backprop = ng_model(padding) feed_dict = {input_data: n_np_inp, out_backprop: n_np_out} ng_outval = sess_ng.run(ng_out, feed_dict=feed_dict) assert np.allclose(tf_outval, ng_outval, rtol=0, atol=1e-02)
def test_disable(self): ngraph_bridge.disable() assert ngraph_bridge.is_enabled() == 0
def run(self): """run benchmark with optimized graph""" print("Run inference with dummy data") config = tf.compat.v1.ConfigProto() config.intra_op_parallelism_threads = self.args.num_intra_threads config.inter_op_parallelism_threads = self.args.num_inter_threads config.use_per_session_threads = True data_graph = tf.Graph() with data_graph.as_default(): input_shape = [ self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3 ] images = tf.random.uniform( input_shape, 0.0, 255.0, dtype=tf.float32, seed=42, name='synthetic_images') infer_graph = tf.Graph() with infer_graph.as_default(): graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(self.args.input_graph, 'rb') as input_file: input_graph_content = input_file.read() graph_def.ParseFromString(input_graph_content) print( "Optimizing graph %s for inference..." % self.args.input_graph) output_graph = optimize_for_inference( graph_def, [INPUTS], [OUTPUTS], dtypes.float32.as_datatype_enum, False) tf.import_graph_def(output_graph, name='') input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0') # Run without nGraph first print("Run inference (without nGraph)") ngraph_bridge.disable() data_sess = tf.compat.v1.Session(graph=data_graph, config=config) infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config) iteration = 0 num_processed_images = 0 num_remaining_images = self.args.num_images tf_time = 0.0 tf_labels = np.array([], dtype=np.int32) while num_remaining_images >= self.args.batch_size: np_images = data_sess.run(images) if iteration > self.args.warmup_iters: num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size tf_start_time = time.time() predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) tf_elapsed_time = time.time() - tf_start_time if iteration > self.args.warmup_iters: tf_time += tf_elapsed_time tf_labels = np.append(tf_labels, np.argmax( predictions, axis=-1)) iteration += 1 print("Total execution time (TF): ", tf_time) # Run with nGraph now print("Run inference (with nGraph)") ngraph_bridge.enable() data_sess = tf.compat.v1.Session(graph=data_graph, config=config) infer_sess = tf.compat.v1.Session(graph=infer_graph, config=config) iteration = 0 num_processed_images = 0 num_remaining_images = self.args.num_images ngtf_time = 0.0 ngtf_labels = np.array([], dtype=np.int32) while num_remaining_images >= self.args.batch_size: np_images = data_sess.run(images) if iteration > self.args.warmup_iters: num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size ngtf_start_time = time.time() predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) ngtf_elapsed_time = time.time() - ngtf_start_time if iteration > self.args.warmup_iters: ngtf_time += ngtf_elapsed_time ngtf_labels = np.append(ngtf_labels, np.argmax(predictions, axis=-1)) iteration += 1 print("Total execution time (NGTF): ", ngtf_time) print("Processed %d images. Batch size = %d" % (num_processed_images, self.args.batch_size)) print("Avg throughput (TF): %0.4f img/s" % (num_processed_images / tf_time)) print("Avg throughput (NGTF): %0.4f img/s" % (num_processed_images / ngtf_time)) assert ((tf_labels == ngtf_labels).all())