Ejemplo n.º 1
0
 def _no_rewrite_session_config(self):
     rewriter_config = rewriter_config_pb2.RewriterConfig(
         disable_model_pruning=True)
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewriter_config)
     return config_pb2.ConfigProto(graph_options=graph_options)
Ejemplo n.º 2
0
class RNNTest(test.TestCase):

  rewrites = rewriter_config_pb2.RewriterConfig()
  rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
  customer_optimizer = rewrites.custom_optimizers.add()
  customer_optimizer.name = 'ExperimentalImplementationSelector'
  rewrites.min_graph_nodes = -1
  graph_options = config_pb2.GraphOptions(rewrite_options=rewrites)
  config = config_pb2.ConfigProto(graph_options=graph_options)

  def setUp(self):
    self.config = RNNTest.config

  def tearDown(self):
    ops.reset_default_graph()

  def test_unifiedRNN(self):
    input_shape = 10
    rnn_state_size = 8
    output_shape = 8
    timestep = 4
    batch = 100
    epoch = 1

    with self.cached_session(config=self.config, use_gpu=True) as sess:
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = keras.utils.to_categorical(y_train, output_shape)

      layer = UnifiedLSTM(rnn_state_size)

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape), name='predict')

      outputs, runtime = layer(inputs)
      loss = losses.softmax_cross_entropy(predict, outputs)
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      train_op = optimizer.minimize(loss)

      sess.run([variables.global_variables_initializer()])
      existing_loss = 0
      for _ in range(epoch):
        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
            inputs: x_train,
            predict: y_train
        })
        if test.is_gpu_available():
          self.assertEquals(runtime_value, b'cudnn')
        else:
          self.assertEquals(runtime_value, b'cpu')
        # Make sure the loss is updated for every epoch
        # (layer weights properly updated).
        self.assertNotEqual(existing_loss, loss_value)
        existing_loss = loss_value

  def test_unifiedRNN_with_cond(self):
    # This test is to demonstrate the graph rewrite of grappler plugin under
    # the condition that the function returns different number of internal
    # states.
    input_shape = 10
    rnn_state_size = 8
    output_shape = 8
    timestep = 4
    batch = 100
    epoch = 1

    with self.cached_session(config=self.config, use_gpu=True) as sess:
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = keras.utils.to_categorical(y_train, output_shape)

      layer = UnifiedLSTM(rnn_state_size)

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape), name='predict')

      zeros = array_ops.zeros([batch, output_shape])
      dummy_runtime = constant_op.constant(
          'unknown', dtype=dtypes.string, name='runtime')
      a = constant_op.constant(0)
      b = constant_op.constant(1)
      # Will always run the lstm layer.
      outputs, runtime = control_flow_ops.cond(
          gen_math_ops.less(a, b),
          lambda: layer(inputs),
          lambda: (zeros, dummy_runtime))
      loss = losses.softmax_cross_entropy(predict, outputs)
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      train_op = optimizer.minimize(loss)

      sess.run([variables.global_variables_initializer()])
      existing_loss = 0

      for _ in range(epoch):
        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
            inputs: x_train,
            predict: y_train
        })
        if test.is_gpu_available():
          self.assertEquals(runtime_value, b'cudnn')
        else:
          self.assertEquals(runtime_value, b'cpu')
        # Make sure the loss is updated for every epoch
        # (layer weights properly updated).
        self.assertNotEqual(existing_loss, loss_value)
        existing_loss = loss_value

  @test_util.run_in_graph_and_eager_modes(config=config)
  def test_keras_model_with_lstm(self):
    input_shape = 10
    rnn_state_size = 8
    output_shape = 8
    timestep = 4
    batch = 100
    epoch = 10

    (x_train, y_train), _ = testing_utils.get_test_data(
        train_samples=batch,
        test_samples=0,
        input_shape=(timestep, input_shape),
        num_classes=output_shape)
    y_train = keras.utils.to_categorical(y_train, output_shape)

    layer = UnifiedLSTM(rnn_state_size)

    inputs = keras.layers.Input(
        shape=[timestep, input_shape], dtype=dtypes.float32)

    outputs, unused_runtime = layer(inputs)
    model = keras.models.Model(inputs, outputs)
    model.compile('rmsprop', loss='mse')
    model.fit(x_train, y_train, epochs=epoch)

  def _measure_performance(self, test_config, model, x_train, y_train):
    batch = test_config['batch']
    epoch = test_config['epoch']
    warmup_epoch = test_config['warmup_epoch']

    # warm up the model
    model.fit(x_train, y_train, batch_size=batch, epochs=warmup_epoch)
    start_time = time.time()
    model.fit(x_train, y_train, batch_size=batch, epochs=epoch - warmup_epoch)
    end_time = time.time()
    return (end_time - start_time) / (epoch - warmup_epoch)

  def _time_performance_run_cudnn_lstm(self, test_config, x_train, y_train):
    # Get the performance number for standard Cudnn LSTM
    input_shape = test_config['input_shape']
    rnn_state_size = test_config['rnn_state_size']
    timestep = test_config['timestep']

    cudnn_lstm_layer = CuDNNLSTM(rnn_state_size)
    inputs = keras.layers.Input(
        shape=[timestep, input_shape], dtype=dtypes.float32)

    outputs = cudnn_lstm_layer(inputs)
    model = keras.models.Model(inputs, outputs)
    model.compile('sgd', 'mse')

    sec_per_epoch = self._measure_performance(
        test_config, model, x_train, y_train)
    logging.info('Average performance for %s per epoch is: %s',
                 'CuDNN LSTM', sec_per_epoch)
    return sec_per_epoch

  def _time_performance_run_unifed_lstm_gpu(
      self, test_config, x_train, y_train):
    # Get performance number for Unified_LSTM with grappler swap the impl
    input_shape = test_config['input_shape']
    rnn_state_size = test_config['rnn_state_size']
    timestep = test_config['timestep']

    layer = UnifiedLSTM(rnn_state_size)
    inputs = keras.layers.Input(
        shape=[timestep, input_shape], dtype=dtypes.float32)

    outputs, _ = layer(inputs)
    model = keras.models.Model(inputs, outputs)
    model.compile('sgd', 'mse')

    sec_per_epoch = self._measure_performance(
        test_config, model, x_train, y_train)
    logging.info('Average performance for %s per epoch is: %s',
                 'Unified LSTM', sec_per_epoch)
    return sec_per_epoch

  def _time_performance_run_normal_lstm(
      self, test_config, x_train, y_train):
    # Get performance number for standard LSTM on GPU.
    input_shape = test_config['input_shape']
    rnn_state_size = test_config['rnn_state_size']
    timestep = test_config['timestep']

    layer = keras.layers.LSTM(rnn_state_size)
    inputs = keras.layers.Input(
        shape=[timestep, input_shape], dtype=dtypes.float32)

    outputs = layer(inputs)
    model = keras.models.Model(inputs, outputs)
    model.compile('sgd', 'mse')

    sec_per_epoch = self._measure_performance(
        test_config, model, x_train, y_train)
    logging.info('Average performance for %s per epoch is: %s',
                 'Normal LSTM', sec_per_epoch)
    return sec_per_epoch

  @test_util.run_in_graph_and_eager_modes(config=config, use_gpu=True)
  def test_performance_with_standard_cudnn_impl(self):
    if not test.is_gpu_available():
      self.skipTest('performance test will only run on GPU')

    batch = 64
    num_batch = 10
    test_config = {
        'input_shape': 128,
        'rnn_state_size': 64,
        'output_shape': 64,
        'timestep': 50,
        'batch': batch,
        'epoch': 20,
        # The performance for warmup epoch is ignored.
        'warmup_epoch': 1,
    }
    (x_train, y_train), _ = testing_utils.get_test_data(
        train_samples=(batch * num_batch),
        test_samples=0,
        input_shape=(test_config['timestep'], test_config['input_shape']),
        num_classes=test_config['output_shape'])
    y_train = keras.utils.to_categorical(y_train, test_config['output_shape'])

    cudnn_duration = self._time_performance_run_cudnn_lstm(
        test_config, x_train, y_train)
    unified_lstm_gpu_duration = self._time_performance_run_unifed_lstm_gpu(
        test_config, x_train, y_train)
    normal_lstm_duration = self._time_performance_run_normal_lstm(
        test_config, x_train, y_train)

    cudnn_vs_unified = cudnn_duration / unified_lstm_gpu_duration
    unified_vs_normal = normal_lstm_duration / unified_lstm_gpu_duration

    # TODO(scottzhu): reeanble the test after moving it to benchmark test suite.
    # The current test has performance flakiness issue.
    logging.info('Expect the performance of Unified LSTM is within 80% of '
                 'CuDNN LSTM, got {0:.2f}%'.format(cudnn_vs_unified * 100))
    logging.info('Expect the performance of Unified LSTM is more than 5 times'
                 ' of normal LSTM, got {0:.2f}'.format(unified_vs_normal))
 def _no_rewrite_session_config(self):
     rewriter_config = rewriter_config_pb2.RewriterConfig(
         pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF)
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewriter_config)
     return config_pb2.ConfigProto(graph_options=graph_options)
Ejemplo n.º 4
0
def no_rewrite_session_config():
  rewriter_config = rewriter_config_pb2.RewriterConfig(
      disable_model_pruning=True,
      constant_folding=rewriter_config_pb2.RewriterConfig.OFF)
  graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
  return config_pb2.ConfigProto(graph_options=graph_options)
Ejemplo n.º 5
0
from tensorflow.python.keras.utils import np_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.training import gradient_descent
from tensorflow.python.util import nest

# Global config for grappler setting that is used for graph mode test.
_rewrites = rewriter_config_pb2.RewriterConfig()
_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON
_rewrites.min_graph_nodes = -1
_graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
_config = config_pb2.ConfigProto(graph_options=_graph_options)


@keras_parameterized.run_all_keras_modes(config=_config)
class LSTMV2Test(keras_parameterized.TestCase):
    @parameterized.named_parameters(
        ('non_tan_activation', 'relu', 'sigmoid', 0, False, True),
        ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True),
        ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True),
        ('unroll', 'tanh', 'sigmoid', 0, True, True),
        ('not_use_bias', 'tanh', 'sigmoid', 0, False, False),
    )
    def test_could_use_defun_backend(self, activation, recurrent_activation,
                                     recurrent_dropout, unroll, use_bias):
        layer = rnn.LSTM(1,
Ejemplo n.º 6
0
 def _GetMemoryOptimizerSessionConfig(self):
   rewrite_options = rewriter_config_pb2.RewriterConfig(
       disable_model_pruning=True,
       memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS)
   graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options)
   return config_pb2.ConfigProto(graph_options=graph_options)
Ejemplo n.º 7
0
    # requests . This is only necessary when the network fabric is experiencing a
    # significant error rate.  Without it we'll fail a step on an network error,
    # while with it we'll be able to complete long steps (like complex
    # initializations) in the face of some network errors during RecvTensor.
    rpc_options.cache_rpc_response = True

    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        disable_meta_optimizer=True,
        dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
        fail_on_optimizer_errors=True,
        )

    graph_options = config_pb2.GraphOptions(
        rewrite_options=rewriter_config,
        place_pruned_graph=True,
        infer_shapes=True,
        )

    session_config = config_pb2.ConfigProto(
        graph_options=graph_options,
        allow_soft_placement=True,
        isolate_session_state=False,
        )
    # share variables across sessions on TPUs
    session_config.experimental.share_session_state_in_clusterspec_propagation = True

    # TODO: research this. What does it do?
    # session_config.share_cluster_devices_in_session = True
    
    master = None
Ejemplo n.º 8
0
def npu_graph_options(graph_options=None):
    if (not isinstance(graph_options, config_pb2.GraphOptions)) or (
            not issubclass(type(graph_options), config_pb2.GraphOptions)):
        graph_options = config_pb2.GraphOptions()
    graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF
    return graph_options
Ejemplo n.º 9
0
  def test_unifiedRNN_with_cond(self):
    # This test is to demonstrate the graph rewrite of grappler plugin under
    # the condition that the function returns different number of internal
    # states.
    rewrites = rewriter_config_pb2.RewriterConfig()
    rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
    customer_optimizer = rewrites.custom_optimizers.add()
    customer_optimizer.name = 'ExperimentalImplementationSelector'
    rewrites.min_graph_nodes = -1
    graph_options = config_pb2.GraphOptions(rewrite_options=rewrites)
    config = config_pb2.ConfigProto(graph_options=graph_options)

    input_shape = 10
    rnn_state_size = 8
    output_shape = 8
    timestep = 4
    batch = 100
    epoch = 1

    with ops.Graph().as_default(), session.Session(config=config) as sess:
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = keras.utils.to_categorical(y_train)

      layer = UnifiedLSTM(rnn_state_size)

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape), name='predict')

      zeros = array_ops.zeros([batch, output_shape])
      dummy_runtime = constant_op.constant(
          'unknown', dtype=dtypes.string, name='runtime')
      a = constant_op.constant(0)
      b = constant_op.constant(1)
      # Will always run the lstm layer.
      outputs, runtime = control_flow_ops.cond(
          gen_math_ops.less(a, b),
          lambda: layer(inputs),
          lambda: (zeros, dummy_runtime))
      loss = losses.softmax_cross_entropy(predict, outputs)
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      train_op = optimizer.minimize(loss)

      sess.run([variables.global_variables_initializer()])
      existing_loss = 0

      for _ in range(epoch):
        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
            inputs: x_train,
            predict: y_train
        })
        if test.is_gpu_available():
          self.assertEquals(runtime_value, b'cudnn')
        else:
          self.assertEquals(runtime_value, b'cpu')
        # Make sure the loss is updated for every epoch
        # (layer weights properly updated).
        self.assertNotEqual(existing_loss, loss_value)
        existing_loss = loss_value
Ejemplo n.º 10
0
 def _GetMemoryOptimizerConfig(self):
     rewrite_options = rewriter_config_pb2.RewriterConfig(
         memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS)
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewrite_options)
     return config_pb2.ConfigProto(graph_options=graph_options)