def _no_rewrite_session_config(self): rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions( rewrite_options=rewriter_config) return config_pb2.ConfigProto(graph_options=graph_options)
class RNNTest(test.TestCase): rewrites = rewriter_config_pb2.RewriterConfig() rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF customer_optimizer = rewrites.custom_optimizers.add() customer_optimizer.name = 'ExperimentalImplementationSelector' rewrites.min_graph_nodes = -1 graph_options = config_pb2.GraphOptions(rewrite_options=rewrites) config = config_pb2.ConfigProto(graph_options=graph_options) def setUp(self): self.config = RNNTest.config def tearDown(self): ops.reset_default_graph() def test_unifiedRNN(self): input_shape = 10 rnn_state_size = 8 output_shape = 8 timestep = 4 batch = 100 epoch = 1 with self.cached_session(config=self.config, use_gpu=True) as sess: (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) y_train = keras.utils.to_categorical(y_train, output_shape) layer = UnifiedLSTM(rnn_state_size) inputs = array_ops.placeholder( dtypes.float32, shape=(None, timestep, input_shape), name='inputs') predict = array_ops.placeholder( dtypes.float32, shape=(None, output_shape), name='predict') outputs, runtime = layer(inputs) loss = losses.softmax_cross_entropy(predict, outputs) optimizer = gradient_descent.GradientDescentOptimizer(0.001) train_op = optimizer.minimize(loss) sess.run([variables.global_variables_initializer()]) existing_loss = 0 for _ in range(epoch): loss_value, _, runtime_value = sess.run([loss, train_op, runtime], { inputs: x_train, predict: y_train }) if test.is_gpu_available(): self.assertEquals(runtime_value, b'cudnn') else: self.assertEquals(runtime_value, b'cpu') # Make sure the loss is updated for every epoch # (layer weights properly updated). self.assertNotEqual(existing_loss, loss_value) existing_loss = loss_value def test_unifiedRNN_with_cond(self): # This test is to demonstrate the graph rewrite of grappler plugin under # the condition that the function returns different number of internal # states. input_shape = 10 rnn_state_size = 8 output_shape = 8 timestep = 4 batch = 100 epoch = 1 with self.cached_session(config=self.config, use_gpu=True) as sess: (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) y_train = keras.utils.to_categorical(y_train, output_shape) layer = UnifiedLSTM(rnn_state_size) inputs = array_ops.placeholder( dtypes.float32, shape=(None, timestep, input_shape), name='inputs') predict = array_ops.placeholder( dtypes.float32, shape=(None, output_shape), name='predict') zeros = array_ops.zeros([batch, output_shape]) dummy_runtime = constant_op.constant( 'unknown', dtype=dtypes.string, name='runtime') a = constant_op.constant(0) b = constant_op.constant(1) # Will always run the lstm layer. outputs, runtime = control_flow_ops.cond( gen_math_ops.less(a, b), lambda: layer(inputs), lambda: (zeros, dummy_runtime)) loss = losses.softmax_cross_entropy(predict, outputs) optimizer = gradient_descent.GradientDescentOptimizer(0.001) train_op = optimizer.minimize(loss) sess.run([variables.global_variables_initializer()]) existing_loss = 0 for _ in range(epoch): loss_value, _, runtime_value = sess.run([loss, train_op, runtime], { inputs: x_train, predict: y_train }) if test.is_gpu_available(): self.assertEquals(runtime_value, b'cudnn') else: self.assertEquals(runtime_value, b'cpu') # Make sure the loss is updated for every epoch # (layer weights properly updated). self.assertNotEqual(existing_loss, loss_value) existing_loss = loss_value @test_util.run_in_graph_and_eager_modes(config=config) def test_keras_model_with_lstm(self): input_shape = 10 rnn_state_size = 8 output_shape = 8 timestep = 4 batch = 100 epoch = 10 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) y_train = keras.utils.to_categorical(y_train, output_shape) layer = UnifiedLSTM(rnn_state_size) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) outputs, unused_runtime = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('rmsprop', loss='mse') model.fit(x_train, y_train, epochs=epoch) def _measure_performance(self, test_config, model, x_train, y_train): batch = test_config['batch'] epoch = test_config['epoch'] warmup_epoch = test_config['warmup_epoch'] # warm up the model model.fit(x_train, y_train, batch_size=batch, epochs=warmup_epoch) start_time = time.time() model.fit(x_train, y_train, batch_size=batch, epochs=epoch - warmup_epoch) end_time = time.time() return (end_time - start_time) / (epoch - warmup_epoch) def _time_performance_run_cudnn_lstm(self, test_config, x_train, y_train): # Get the performance number for standard Cudnn LSTM input_shape = test_config['input_shape'] rnn_state_size = test_config['rnn_state_size'] timestep = test_config['timestep'] cudnn_lstm_layer = CuDNNLSTM(rnn_state_size) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) outputs = cudnn_lstm_layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('sgd', 'mse') sec_per_epoch = self._measure_performance( test_config, model, x_train, y_train) logging.info('Average performance for %s per epoch is: %s', 'CuDNN LSTM', sec_per_epoch) return sec_per_epoch def _time_performance_run_unifed_lstm_gpu( self, test_config, x_train, y_train): # Get performance number for Unified_LSTM with grappler swap the impl input_shape = test_config['input_shape'] rnn_state_size = test_config['rnn_state_size'] timestep = test_config['timestep'] layer = UnifiedLSTM(rnn_state_size) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) outputs, _ = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('sgd', 'mse') sec_per_epoch = self._measure_performance( test_config, model, x_train, y_train) logging.info('Average performance for %s per epoch is: %s', 'Unified LSTM', sec_per_epoch) return sec_per_epoch def _time_performance_run_normal_lstm( self, test_config, x_train, y_train): # Get performance number for standard LSTM on GPU. input_shape = test_config['input_shape'] rnn_state_size = test_config['rnn_state_size'] timestep = test_config['timestep'] layer = keras.layers.LSTM(rnn_state_size) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) outputs = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('sgd', 'mse') sec_per_epoch = self._measure_performance( test_config, model, x_train, y_train) logging.info('Average performance for %s per epoch is: %s', 'Normal LSTM', sec_per_epoch) return sec_per_epoch @test_util.run_in_graph_and_eager_modes(config=config, use_gpu=True) def test_performance_with_standard_cudnn_impl(self): if not test.is_gpu_available(): self.skipTest('performance test will only run on GPU') batch = 64 num_batch = 10 test_config = { 'input_shape': 128, 'rnn_state_size': 64, 'output_shape': 64, 'timestep': 50, 'batch': batch, 'epoch': 20, # The performance for warmup epoch is ignored. 'warmup_epoch': 1, } (x_train, y_train), _ = testing_utils.get_test_data( train_samples=(batch * num_batch), test_samples=0, input_shape=(test_config['timestep'], test_config['input_shape']), num_classes=test_config['output_shape']) y_train = keras.utils.to_categorical(y_train, test_config['output_shape']) cudnn_duration = self._time_performance_run_cudnn_lstm( test_config, x_train, y_train) unified_lstm_gpu_duration = self._time_performance_run_unifed_lstm_gpu( test_config, x_train, y_train) normal_lstm_duration = self._time_performance_run_normal_lstm( test_config, x_train, y_train) cudnn_vs_unified = cudnn_duration / unified_lstm_gpu_duration unified_vs_normal = normal_lstm_duration / unified_lstm_gpu_duration # TODO(scottzhu): reeanble the test after moving it to benchmark test suite. # The current test has performance flakiness issue. logging.info('Expect the performance of Unified LSTM is within 80% of ' 'CuDNN LSTM, got {0:.2f}%'.format(cudnn_vs_unified * 100)) logging.info('Expect the performance of Unified LSTM is more than 5 times' ' of normal LSTM, got {0:.2f}'.format(unified_vs_normal))
def _no_rewrite_session_config(self): rewriter_config = rewriter_config_pb2.RewriterConfig( pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions( rewrite_options=rewriter_config) return config_pb2.ConfigProto(graph_options=graph_options)
def no_rewrite_session_config(): rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, constant_folding=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) return config_pb2.ConfigProto(graph_options=graph_options)
from tensorflow.python.keras.utils import np_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import gradient_descent from tensorflow.python.util import nest # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() _rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON _rewrites.min_graph_nodes = -1 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) @keras_parameterized.run_all_keras_modes(config=_config) class LSTMV2Test(keras_parameterized.TestCase): @parameterized.named_parameters( ('non_tan_activation', 'relu', 'sigmoid', 0, False, True), ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True), ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True), ('unroll', 'tanh', 'sigmoid', 0, True, True), ('not_use_bias', 'tanh', 'sigmoid', 0, False, False), ) def test_could_use_defun_backend(self, activation, recurrent_activation, recurrent_dropout, unroll, use_bias): layer = rnn.LSTM(1,
def _GetMemoryOptimizerSessionConfig(self): rewrite_options = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS) graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options) return config_pb2.ConfigProto(graph_options=graph_options)
# requests . This is only necessary when the network fabric is experiencing a # significant error rate. Without it we'll fail a step on an network error, # while with it we'll be able to complete long steps (like complex # initializations) in the face of some network errors during RecvTensor. rpc_options.cache_rpc_response = True rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, disable_meta_optimizer=True, dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, fail_on_optimizer_errors=True, ) graph_options = config_pb2.GraphOptions( rewrite_options=rewriter_config, place_pruned_graph=True, infer_shapes=True, ) session_config = config_pb2.ConfigProto( graph_options=graph_options, allow_soft_placement=True, isolate_session_state=False, ) # share variables across sessions on TPUs session_config.experimental.share_session_state_in_clusterspec_propagation = True # TODO: research this. What does it do? # session_config.share_cluster_devices_in_session = True master = None
def npu_graph_options(graph_options=None): if (not isinstance(graph_options, config_pb2.GraphOptions)) or ( not issubclass(type(graph_options), config_pb2.GraphOptions)): graph_options = config_pb2.GraphOptions() graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF return graph_options
def test_unifiedRNN_with_cond(self): # This test is to demonstrate the graph rewrite of grappler plugin under # the condition that the function returns different number of internal # states. rewrites = rewriter_config_pb2.RewriterConfig() rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF customer_optimizer = rewrites.custom_optimizers.add() customer_optimizer.name = 'ExperimentalImplementationSelector' rewrites.min_graph_nodes = -1 graph_options = config_pb2.GraphOptions(rewrite_options=rewrites) config = config_pb2.ConfigProto(graph_options=graph_options) input_shape = 10 rnn_state_size = 8 output_shape = 8 timestep = 4 batch = 100 epoch = 1 with ops.Graph().as_default(), session.Session(config=config) as sess: (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) y_train = keras.utils.to_categorical(y_train) layer = UnifiedLSTM(rnn_state_size) inputs = array_ops.placeholder( dtypes.float32, shape=(None, timestep, input_shape), name='inputs') predict = array_ops.placeholder( dtypes.float32, shape=(None, output_shape), name='predict') zeros = array_ops.zeros([batch, output_shape]) dummy_runtime = constant_op.constant( 'unknown', dtype=dtypes.string, name='runtime') a = constant_op.constant(0) b = constant_op.constant(1) # Will always run the lstm layer. outputs, runtime = control_flow_ops.cond( gen_math_ops.less(a, b), lambda: layer(inputs), lambda: (zeros, dummy_runtime)) loss = losses.softmax_cross_entropy(predict, outputs) optimizer = gradient_descent.GradientDescentOptimizer(0.001) train_op = optimizer.minimize(loss) sess.run([variables.global_variables_initializer()]) existing_loss = 0 for _ in range(epoch): loss_value, _, runtime_value = sess.run([loss, train_op, runtime], { inputs: x_train, predict: y_train }) if test.is_gpu_available(): self.assertEquals(runtime_value, b'cudnn') else: self.assertEquals(runtime_value, b'cpu') # Make sure the loss is updated for every epoch # (layer weights properly updated). self.assertNotEqual(existing_loss, loss_value) existing_loss = loss_value
def _GetMemoryOptimizerConfig(self): rewrite_options = rewriter_config_pb2.RewriterConfig( memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS) graph_options = config_pb2.GraphOptions( rewrite_options=rewrite_options) return config_pb2.ConfigProto(graph_options=graph_options)