Ejemplo n.º 1
0
def gpu_availability():
    """
    Detect gpu on user system

    :return: Whether at least a CUDA compatible GPU is detected and usable
    :rtype: bool
    :History: 2018-Apr-25 - Written - Henry Leung (University of Toronto)
    """
    # assume if using tensorflow-gpu, then Nvidia GPU is available
    if is_built_with_cuda():
        return is_gpu_available()
    else:
        return is_built_with_cuda()
Ejemplo n.º 2
0
 def _testTypes(self, vals):
     for dtype in [np.float32, np.float64, np.int32, np.int64]:
         x = np.zeros(vals.shape).astype(dtype)
         y = vals.astype(dtype)
         var_value, op_value = self._initAssignFetch(x, y, use_gpu=False)
         self.assertAllEqual(y, var_value)
         self.assertAllEqual(y, op_value)
         var_value, op_value = self._initAssignAddFetch(x, y, use_gpu=False)
         self.assertAllEqual(x + y, var_value)
         self.assertAllEqual(x + y, op_value)
         var_value, op_value = self._initAssignSubFetch(x, y, use_gpu=False)
         self.assertAllEqual(x - y, var_value)
         self.assertAllEqual(x - y, op_value)
         if test.is_built_with_cuda() and dtype in [np.float32, np.float64]:
             var_value, op_value = self._initAssignFetch(x, y, use_gpu=True)
             self.assertAllEqual(y, var_value)
             self.assertAllEqual(y, op_value)
             var_value, op_value = self._initAssignAddFetch(x,
                                                            y,
                                                            use_gpu=True)
             self.assertAllEqual(x + y, var_value)
             self.assertAllEqual(x + y, op_value)
             var_value, op_value = self._initAssignSubFetch(x,
                                                            y,
                                                            use_gpu=False)
             self.assertAllEqual(x - y, var_value)
             self.assertAllEqual(x - y, op_value)
Ejemplo n.º 3
0
 def testBuildInfo(self):
     self.assertEqual(build_info.build_info['is_rocm_build'],
                      test.is_built_with_rocm())
     self.assertEqual(build_info.build_info['is_cuda_build'],
                      test.is_built_with_cuda())
     self.assertEqual(build_info.build_info['is_tensorrt_build'],
                      is_tensorrt_enabled())
Ejemplo n.º 4
0
def gpu_memory_manage(ratio=None, log_device_placement=False):
    """
    To manage GPU memory usage, prevent Tensorflow preoccupied all the video RAM

    :param ratio: Optional, ratio of GPU memory pre-allocating to astroNN
    :type ratio: Union[NoneType, float]
    :param log_device_placement: whether or not log the device placement
    :type log_device_placement: bool
    :History: 2017-Nov-25 - Written - Henry Leung (University of Toronto)
    """
    config =  tf.compat.v1.ConfigProto()
    if ratio is None:
        config.gpu_options.allow_growth = True
    else:
        if is_built_with_cuda():
            if ratio <= 0. or ratio > 1.:
                print(f"Invalid ratio argument -> ratio: {ratio}, it has been reset to ratio=1.0")
                ratio = 1.
            config.gpu_options.per_process_gpu_memory_fraction = ratio
        elif isinstance(ratio, float):
            warnings.warn("You have set GPU memory limit in astroNN config file but you are not using Tensorflow-GPU!")
    config.log_device_placement = log_device_placement

    if tf.compat.v1.get_default_session() is not None:
        warnings.warn("A Tensorflow session in use is detected, "
                      "astroNN will use that session to prevent overwriting session!")
    else:
        # Set global _SESSION for tensorflow to use with astroNN cpu, GPU setting
        tf.compat.v1.Session(config=config).__enter__()  # to register it as tensorflow default session

    return None
  def testInvalidLabel(self):
    features = [[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 2., 3., 4.],
                [1., 2., 3., 4.]]
    labels = [4, 3, 0, -1]

    if test.is_built_with_cuda() and test.is_gpu_available():
      with self.session(use_gpu=True) as sess:
        loss, backprop = (
            gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
                features, labels))
        tf_loss, tf_backprop = sess.run([loss, backprop])
        self.assertAllClose(
            [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75],
             [-0.968, 0.087, 0.237, 0.6439], [np.nan] * 4],
            tf_backprop,
            rtol=1e-3,
            atol=1e-3)
        self.assertAllClose(
            [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)

    with self.session(use_gpu=False) as sess:
      loss, backprop = (
          gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels))
      with self.assertRaisesOpError("Received a label value of"):
        sess.run([loss, backprop])
Ejemplo n.º 6
0
    def testInvalidLabel(self):
        features = [[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 2., 3., 4.],
                    [1., 2., 3., 4.]]
        labels = [4, 3, 0, -1]

        if test.is_built_with_cuda() and test.is_gpu_available():
            with self.session(use_gpu=True) as sess:
                loss, backprop = (
                    gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
                        features, labels))
                tf_loss, tf_backprop = self.evaluate([loss, backprop])
                self.assertAllClose(
                    [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75],
                     [-0.968, 0.087, 0.237, 0.6439], [np.nan] * 4],
                    tf_backprop,
                    rtol=1e-3,
                    atol=1e-3)
                self.assertAllClose([np.nan, 1.3862, 3.4420, np.nan],
                                    tf_loss,
                                    rtol=1e-3,
                                    atol=1e-3)

        with self.session(use_gpu=False) as sess:
            loss, backprop = (
                gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
                    features, labels))
            with self.assertRaisesOpError("Received a label value of"):
                self.evaluate([loss, backprop])
Ejemplo n.º 7
0
class CudnnRNNTestBasic(TensorFlowTestCase):
    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testLayerBasic(self):
        num_layers = 4
        num_units = 2
        batch_size = 8
        direction = CUDNN_RNN_UNIDIRECTION
        dir_count = 1

        with vs.variable_scope("main"):
            kernel_initializer = init_ops.constant_initializer(0.)
            bias_initializer = init_ops.constant_initializer(0.)
            inputs = random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units],
                dtype=dtypes.float32)

            lstm = cudnn_rnn.CudnnLSTM(num_layers,
                                       num_units,
                                       direction=direction,
                                       kernel_initializer=kernel_initializer,
                                       bias_initializer=bias_initializer,
                                       name="awesome_lstm")

            # Build the layer
            outputs1, _ = lstm(inputs)
            # Reuse the layer
            outputs2, _ = lstm(inputs)

            total_sum1 = math_ops.reduce_sum(outputs1)
            total_sum2 = math_ops.reduce_sum(outputs2)

        with vs.variable_scope("main", reuse=True):
            lstm = cudnn_rnn.CudnnLSTM(num_layers,
                                       num_units,
                                       direction=direction,
                                       kernel_initializer=kernel_initializer,
                                       bias_initializer=bias_initializer,
                                       name="awesome_lstm")

            # Reuse the layer
            outputs3, _ = lstm(inputs)
            total_sum3 = math_ops.reduce_sum(outputs3)

        self.assertEqual(1, len(variables.trainable_variables()))
        self.assertEqual(
            1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
        self.assertEqual("main/awesome_lstm/opaque_kernel",
                         variables.trainable_variables()[0].op.name)

        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            (total_sum1_v, total_sum2_v,
             total_sum3_v) = sess.run([total_sum1, total_sum2, total_sum3])
            self.assertEqual(0, total_sum1_v)
            self.assertEqual(0, total_sum2_v)
            self.assertEqual(0, total_sum3_v)
Ejemplo n.º 8
0
    def testListLocalDevices(self):
        devices = device_lib.list_local_devices()
        self.assertGreater(len(devices), 0)
        self.assertEqual(devices[0].device_type, "CPU")

        # GPU test
        if test.is_built_with_cuda():
            self.assertGreater(len(devices), 1)
            self.assertTrue("GPU" in [d.device_type for d in devices])
Ejemplo n.º 9
0
    def testBuildInfo(self):
        self.assertEqual(build_info.build_info['is_rocm_build'],
                         test.is_built_with_rocm())
        self.assertEqual(build_info.build_info['is_cuda_build'],
                         test.is_built_with_cuda())

        # TODO(b/173044576): make the test work for Windows.
        if platform.system() != 'Windows':
            # pylint: disable=g-import-not-at-top
            from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled
            self.assertEqual(build_info.build_info['is_tensorrt_build'],
                             is_tensorrt_enabled())
Ejemplo n.º 10
0
class CudnnRNNTestParamsSize(TensorFlowTestCase):

  def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size,
                            direction):
    logging.info("Testing one lstm param size with config: %s", locals())
    dtype = dtypes.float32

    model = CudnnTestModel(
        rnn_mode,
        num_layers,
        num_units,
        input_size,
        dtype=dtype,
        direction=direction)
    rnn = model.rnn

    # Min param size estimate = sum(weights.size) + sum(biases.size)
    min_params_size = (
        np.sum(map(np.prod, rnn.canonical_weight_shapes)) +
        np.sum([sp[0] for sp in rnn.canonical_bias_shapes]))

    opaque_params = rnn.trainable_variables[0]
    with self.test_session(use_gpu=True, graph=ops.get_default_graph()):
      variables.global_variables_initializer().run()
      opaque_params_size_v = opaque_params.eval().size
      self.assertLessEqual(min_params_size, opaque_params_size_v)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testOpaqueParamsSize(self):
    test_configs = [
        [4, 200, 200],
        [4, 200, 300],
        [4, 200, 100],
        [1, 100, 200],
        [2, 200, 100],
        [3, 200, 400],
    ]
    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
    rnns = [CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH]
    for (rnn, config, direction) in itertools.product(rnns, test_configs,
                                                      directions):
      num_layers, num_units, input_size = config
      with ops.Graph().as_default():
        self._TestOpaqueParamsSize(rnn, num_layers, num_units, input_size,
                                   direction)
Ejemplo n.º 11
0
 def _testTypes(self, vals):
   for dtype in [np.float32, np.float64, np.int32, np.int64]:
     x = np.zeros(vals.shape).astype(dtype)
     y = vals.astype(dtype)
     var_value, op_value = self._initAssignFetch(x, y, use_gpu=False)
     self.assertAllEqual(y, var_value)
     self.assertAllEqual(y, op_value)
     var_value, op_value = self._initAssignAddFetch(x, y, use_gpu=False)
     self.assertAllEqual(x + y, var_value)
     self.assertAllEqual(x + y, op_value)
     var_value, op_value = self._initAssignSubFetch(x, y, use_gpu=False)
     self.assertAllEqual(x - y, var_value)
     self.assertAllEqual(x - y, op_value)
     if test.is_built_with_cuda() and dtype in [np.float32, np.float64]:
       var_value, op_value = self._initAssignFetch(x, y, use_gpu=True)
       self.assertAllEqual(y, var_value)
       self.assertAllEqual(y, op_value)
       var_value, op_value = self._initAssignAddFetch(x, y, use_gpu=True)
       self.assertAllEqual(x + y, var_value)
       self.assertAllEqual(x + y, op_value)
       var_value, op_value = self._initAssignSubFetch(x, y, use_gpu=False)
       self.assertAllEqual(x - y, var_value)
       self.assertAllEqual(x - y, op_value)
Ejemplo n.º 12
0
class CudnnRNNTestParamsSize(TensorFlowTestCase):
    def _testOneLSTMParamsSize(self, num_layers, num_units, input_size,
                               direction):
        logging.info("Testing one lstm param size with config: %s", locals())
        min_params_size = _MinLSTMParamSize(num_layers, num_units, input_size,
                                            direction)
        model = _CreateModel(cudnn_rnn_ops.CUDNN_LSTM,
                             num_layers,
                             num_units,
                             input_size,
                             direction=direction)
        params_size = model.params_size()
        with self.test_session(use_gpu=True,
                               graph=ops.get_default_graph()) as sess:
            params_size_v = sess.run(params_size)
            self.assertLessEqual(min_params_size, params_size_v)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testLSTMParamsSize(self):
        test_configs = [
            [4, 200, 200],
            [4, 200, 300],
            [4, 200, 100],
            [1, 100, 200],
            [2, 200, 100],
            [3, 200, 400],
        ]
        directions = [
            cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION,
            cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION
        ]
        for (config, direction) in itertools.product(test_configs, directions):
            num_layers, num_units, input_size = config
            with ops.Graph().as_default():
                self._testOneLSTMParamsSize(num_layers, num_units, input_size,
                                            direction)
Ejemplo n.º 13
0
class CudnnRNNTestTraining(TensorFlowTestCase):

  def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1):
    """Compute the numeric gradient of y wrt to x.

    Args:
      sess: The TF session constructed with a graph containing x and y.
      y: A scalar TF Tensor in the graph constructed in sess.
      x: A TF Tensor in the graph constructed in sess.
      delta: Gradient checker's small perturbation of x[i].
      step: Only compute numerical gradients for a subset of x values.
        I.e. dy/dx[i] is computed if i % step == 0.
    Returns:
      A Tensor of the same shape and dtype as x. If x[i] is not chosen
      to compute the numerical gradient dy/x[i], the corresponding
      value is set to 0.
    """

    x_data = sess.run(x)
    x_size = x_data.size
    x_shape = x_data.shape

    numeric_grad = np.zeros(x_size, dtype=x_data.dtype)

    for i in range(0, x_size, step):
      x_pos = x_data.copy()
      if x_size == 1:
        x_pos += delta
      else:
        x_pos.flat[i] += delta
      y_pos_feed_dict = dict([(x.name, x_pos)])
      y_pos = sess.run(y, feed_dict=y_pos_feed_dict)

      x_neg = x_data.copy()
      if x_size == 1:
        x_neg -= delta
      else:
        x_neg.flat[i] -= delta
      y_neg_feed_dict = dict([(x.name, x_neg)])
      y_neg = sess.run(y, feed_dict=y_neg_feed_dict)
      numeric_grad[i] = (y_pos - y_neg) / (2 * delta)
    return numeric_grad.reshape(x_shape)

  def _GetShape(self, sess, inputs):
    if not isinstance(inputs, collections.Iterable):
      return sess.run(array_ops.shape(inputs))
    else:
      return sess.run([array_ops.shape(x) for x in inputs])

  def _GradientCheckFp16(self, sess, y, xs, num_samples,
                         tolerance=1e-6, delta=1e-4):
    """Gradient check for Fp16.

    Fp16 numerical gradients end up being zeros. Use a new way to check
    gradients:

    Given multi-variant function:
    y = f(x1, x2, ... xn)
    delta_y = f(x1 + delta_x1, x2+delta_x2, ..., xn+delta_xn) -
              f(x1, x2, ..., xn)
            = f'(x1) * delta_x1 + f'(x2) * delta_x2 + .. + f'(xn) * delta_xn
    where:
      delta_xi are very small disturbance.
      f'(xi) is the gradient of y w.r.t xi.

    The gradient check verifies the expected delta_y calculated by the above
    equation is close to the actual delta_y.
    Args:
      sess: tf.Session object.
      y: output tensor.
      xs: a tensor or a list of input tensors.
      num_samples: number of test samples to run.
      tolerance: error tolerance.
      delta: the order of magnititued of input disturbance to apply to calculate
        the output change w.r.t inputs.
    """
    sym_grads = self._ComputeSymGrads(sess, y, xs)
    xs_shapes = self._GetShape(sess, xs)

    x_vals = [sess.run(x) for x in xs]
    for _ in range(num_samples):
      delta_xs = [delta * np.random.rand(*shape.tolist())
                  for shape in xs_shapes]

      feed_dict = {}
      for x, x_val, delta_x in zip(xs, x_vals, delta_xs):
        feed_dict[x] = x_val + delta_x
      actual_delta_y = (float(sess.run(y, feed_dict=feed_dict)) -
                        float(sess.run(y)))

      expected_delta_y = 0.
      for sym_grad, delta_x in zip(sym_grads, delta_xs):
        expected_delta_y += np.dot(
            sym_grad.astype(np.float32).flatten(),
            delta_x.astype(np.float32).flatten())
      self.assertAllClose(expected_delta_y, actual_delta_y,
                          atol=tolerance, rtol=tolerance)

  def _GradientCheck(self, sess, y, xs, tolerance=1e-6, delta=1e-4):
    sym_grads = self._ComputeSymGrads(sess, y, xs)

    num_grads = [self._ComputeNumericGrad(sess, y, x, delta) for x in xs]
    self.assertEqual(len(sym_grads), len(num_grads))
    for sym, num in zip(sym_grads, num_grads):
      self.assertFalse(np.any(np.isnan(sym)))
      self.assertFalse(np.any(np.isnan(num)))
      self.assertAllClose(sym, num, atol=tolerance, rtol=tolerance)

  def _ComputeSymGrads(self, sess, y, xs):
    sym_grads_t = gradients.gradients(y, xs)
    return sess.run(sym_grads_t)

  def _TestOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size,
                             batch_size, seq_length, dir_count, dropout, dtype,
                             delta, tolerance):
    # Gradient checking runs two forward ops with almost the same input. Need to
    # make sure the drop patterns across the two runs are the same.
    logging.info("Training test with config: %s", locals())
    old_env_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE", str(False))
    os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True)

    np.random.seed(1234)
    random_seed.set_random_seed(5678)
    has_input_c = (rnn_mode == CUDNN_LSTM)
    direction = (CUDNN_RNN_UNIDIRECTION
                 if dir_count == 1 else CUDNN_RNN_BIDIRECTION)
    model = CudnnTestModel(
        rnn_mode,
        num_layers,
        num_units,
        input_size,
        direction=direction,
        dropout=dropout,
        dtype=dtype,
        training=True,
        bias_initializer=init_ops.random_normal_initializer(
            mean=1., dtype=dtype))
    rnn = model.rnn
    params = rnn.trainable_variables[0]

    inputs = variables.Variable(
        random_ops.random_uniform(
            [seq_length, batch_size, input_size], dtype=dtype),
        dtype=dtype)
    input_h = variables.Variable(
        random_ops.random_uniform(
            [num_layers * dir_count, batch_size, num_units], dtype=dtype),
        dtype=dtype)
    if has_input_c:
      input_c = variables.Variable(
          random_ops.random_uniform(
              [num_layers * dir_count, batch_size, num_units], dtype=dtype),
          dtype=dtype)
      initial_state = (input_h, input_c)
    else:
      initial_state = (input_h,)
    total_sum = model.FProp(inputs, initial_state, training=True)

    with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess:
      sess.run(variables.global_variables_initializer())
      all_inputs = [inputs, params]
      for s in initial_state:
        all_inputs.append(s)
      if dtype == dtypes.float16:
        self._GradientCheckFp16(
            sess, total_sum, all_inputs,
            num_samples=FLAGS.grad_check_num_samples,
            tolerance=tolerance, delta=delta)
      else:
        for _ in range(FLAGS.grad_check_num_samples):
          # Each time choose a different set of inputs.
          sess.run(variables.global_variables_initializer())
          self._GradientCheck(
              sess, total_sum, all_inputs,
              tolerance=tolerance, delta=delta)
      os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = old_env_state

  def _TestSimpleTrainingHelper(self, rnn_mode, test_configs):
    dropouts = [0, 0.5, 1.]
    for config, dropout in itertools.product(test_configs, dropouts):
      dtype = config.get("dtype", dtypes.float32)
      delta = config.get("delta", 1e-4)
      tolerance = config.get("tolerance", 1e-6)
      dir_count = config.get("dir_count", 1)
      shape = config["shape"]
      with ops.Graph().as_default():
        self._TestOneSimpleTraining(rnn_mode, shape["num_layers"],
                                    shape["num_units"], shape["input_size"],
                                    shape["batch_size"], shape["seq_length"],
                                    dir_count, dropout, dtype, delta,
                                    tolerance)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingLSTMFp64(self):
    test_configs = [
        {
            "dtype": dtypes.float64,
            "tolerance": 5e-6,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingLSTMFp32(self):
    test_configs = [
        {
            "dtype": dtypes.float32,
            "delta": 1e-4,
            "tolerance": 9e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingLSTMFp16(self):
    test_configs = [
        {
            "dtype": dtypes.float16,
            "delta": 1e-3,
            "tolerance": 9e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
        {
            "dtype": dtypes.float16,
            "delta": 1e-2,
            "tolerance": 9e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 6,
                "input_size": 8,
                "batch_size": 6,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingGRUFp64(self):
    test_configs = [
        {
            "dtype": dtypes.float64,
            "tolerance": 5e-6,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            }
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingGRUFp32(self):
    test_configs = [
        {
            "dtype": dtypes.float32,
            "delta": 1e-3,
            "tolerance": 4e-3,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingGRUFp16(self):
    test_configs = [
        {
            "dtype": dtypes.float16,
            "delta": 2e-3,
            "tolerance": 6e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNTanhFp64(self):
    test_configs = [
        {
            "dtype": dtypes.float64,
            "tolerance": 5e-6,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNTanhFp32(self):
    test_configs = [
        {
            "dtype": dtypes.float32,
            "delta": 1e-3,
            "tolerance": 5e-3,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNTanhFp16(self):
    test_configs = [
        {
            "dtype": dtypes.float16,
            "delta": 1e-3,
            "tolerance": 5e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNReluFp64(self):
    test_configs = [
        {
            "dtype": dtypes.float64,
            "tolerance": 5e-6,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNReluFp32(self):
    test_configs = [
        {
            "dtype": dtypes.float32,
            "delta": 1e-4,
            "tolerance": 3e-1,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTrainingRNNReluFp16(self):
    test_configs = [
        {
            "dtype": dtypes.float16,
            "delta": 1e-3,
            "tolerance": 7e-2,
            "shape": {
                "num_layers": 2,
                "num_units": 3,
                "input_size": 4,
                "batch_size": 3,
                "seq_length": 4,
            },
        },
    ]
    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)
Ejemplo n.º 14
0
class CudnnRNNTestCompatibleRNNCells(TensorFlowTestCase):

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testCudnnCompatibleLSTM(self):
    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_LSTM)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testCudnnCompatibleGRU(self):
    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_GRU)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testCudnnCompatibleRNNTanh(self):
    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_TANH)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testCudnnCompatibleRNNRelu(self):
    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_RELU)

  def _TestCudnnCompatibleRnnCellsHelper(self, rnn_mode):
    configs = [
        {
            "num_layers": 1,
            "seq_length": 3,
            "num_units": 4,
            "input_size": 5,
            "batch_size": 6,
        },
        {
            "num_layers": 2,
            "seq_length": 8,
            "num_units": 4,
            "input_size": 8,
            "batch_size": 16,
        },
        {
            "num_layers": 2,
            "seq_length": 3,
            "num_units": 4,
            "input_size": 5,
            "batch_size": 6,
        },
        {
            "num_layers": 1,
            "seq_length": 2,
            "num_units": 2,
            "input_size": 4,
            "batch_size": 1,
        },
    ]
    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
    for cfg, direction in zip(configs, directions):
      self._TestCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"],
                                        cfg["num_units"], cfg["input_size"],
                                        cfg["batch_size"], rnn_mode, direction)

  def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units,
                                   input_size, batch_size, rnn_mode, direction):
    dtype = dtypes.float32
    # Train graph
    with ops.Graph().as_default() as g:
      model = CudnnTestModel(
          rnn_mode,
          num_layers,
          num_units,
          input_size,
          direction=direction,
          dtype=dtype,
          training=True)
      target_output = array_ops.placeholder(dtype=dtype)
      loss_op = losses.log_loss(
          labels=target_output, predictions=model.total_sum)
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2)
      train_op = optimizer.minimize(loss_op)

      saver = saver_lib.Saver()

      # Train Cudnn model
      seed = 0
      with self.test_session(use_gpu=True, graph=g) as sess:
        sess.run(variables.global_variables_initializer())
        # Train 128 steps
        num_steps = 128
        for _ in range(num_steps):
          inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed)
          targets = np.random.rand()
          sess.run(
              train_op,
              feed_dict={
                  model.inputs: inputs,
                  model.initial_state: model.ZeroState(batch_size),
                  target_output: targets
              })
          seed += 1

        save_path = os.path.join(self.get_temp_dir(),
                                 ("cudnn-rnn-%s-test" % rnn_mode))
        save_v = saver.save(sess, save_path)
        self.assertEqual(save_path, save_v)

    # Cudnn inference graph
    with ops.Graph().as_default() as g:
      model = CudnnTestModel(
          rnn_mode,
          num_layers,
          num_units,
          input_size,
          direction=direction,
          dtype=dtype,
          training=False)
      rnn = model.rnn
      saver = saver_lib.Saver()

      inference_input = np.random.rand(seq_length, batch_size,
                                       input_size).astype(np.float32)
      with self.test_session(use_gpu=True, graph=g) as sess:
        sess.run(variables.global_variables_initializer())
        saver.restore(sess, save_path)

        # Cudnn inference
        cudnn_outputs_v, cudnn_output_states_v = model.Feed(
            sess, inference_input, return_sum=False)

    # Canonical RNN inference graph
    with ops.Graph().as_default() as g:
      cell_inputs = array_ops.placeholder(
          dtype, shape=[seq_length, batch_size, input_size])
      if direction == CUDNN_RNN_UNIDIRECTION:
        # outputs is one tensor, states are num_layer tuples, each 2 tensors
        (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs)
        if rnn_mode == CUDNN_LSTM:
          output_h = array_ops.stack([s.h for s in states])
          output_c = array_ops.stack([s.c for s in states])
        else:
          output_state = array_ops.stack([s for s in states])
      else:
        # outputs is one tensor.
        # states is a tuple of 2 tuples:
        # each sub tuple is num_layer tuples, each with 2 tensors.
        (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(
            rnn, cell_inputs, is_bidi=True)
        output_state_fw, output_state_bw = states
        if rnn_mode == CUDNN_LSTM:
          output_h, output_c = [], []
          for s_fw, s_bw in zip(output_state_fw, output_state_bw):
            output_h.append(array_ops.stack([s_fw.h, s_bw.h]))
            output_c.append(array_ops.stack([s_fw.c, s_bw.c]))
          output_h = array_ops.concat(output_h, axis=0)
          output_c = array_ops.concat(output_c, axis=0)
        else:
          output_state = []
          for s_fw, s_bw in zip(output_state_fw, output_state_bw):
            output_state.append(array_ops.stack([s_fw, s_bw]))
          output_state = array_ops.concat(output_state, axis=0)
      saver = saver_lib.Saver()

      with self.test_session(use_gpu=True, graph=g) as sess:
        saver.restore(sess, save_path)

        # BlockCell inference
        if rnn_mode == CUDNN_LSTM:
          outputs_v, output_h_v, output_c_v = sess.run(
              [outputs, output_h, output_c],
              feed_dict={cell_inputs: inference_input})
          self.assertAllClose(cudnn_outputs_v, outputs_v)
          cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v
          self.assertAllClose(cudnn_output_h_v, output_h_v)
          self.assertAllClose(cudnn_output_c_v, output_c_v)
        else:
          outputs_v, output_state_v = sess.run(
              [outputs, output_state],
              feed_dict={cell_inputs: inference_input})
          self.assertAllClose(cudnn_outputs_v, outputs_v, atol=2e-5, rtol=2e-5)
          (cudnn_output_h_v,) = cudnn_output_states_v
          self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5,
                              rtol=2e-5)
Ejemplo n.º 15
0
class CudnnRNNTestSaveRestore(TensorFlowTestCase):

  def _CompareWeights(self, lhs, rhs):
    self.assertEqual(len(lhs), len(rhs))
    for lw, rw in zip(lhs, rhs):
      self.assertAllEqual(lw, rw)

  def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction):
    self.assertEqual(len(lhs), len(rhs))
    if rnn_mode == CUDNN_LSTM:
      num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER
    elif rnn_mode == CUDNN_GRU:
      num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER
    elif rnn_mode == CUDNN_RNN_TANH:
      num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER
    else:
      num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER
    num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2
    num_params_per_layer *= num_dirs
    self.assertEqual(num_params_per_layer * num_layers, len(lhs))

    for i in range(num_layers):
      layer_lhs = lhs[i * num_params_per_layer: (i+1) * num_params_per_layer]
      layer_rhs = rhs[i * num_params_per_layer: (i+1) * num_params_per_layer]
      if direction == CUDNN_RNN_UNIDIRECTION:
        self._CompareSingleLayerBiases(layer_lhs, layer_rhs)
      else:
        size = len(layer_lhs)
        fw_lhs, bw_lhs = layer_lhs[:size//2], layer_lhs[size//2:]
        fw_rhs, bw_rhs = layer_rhs[:size//2], layer_rhs[size//2:]
        self._CompareSingleLayerBiases(fw_lhs, fw_rhs)
        self._CompareSingleLayerBiases(bw_lhs, bw_rhs)

  def _CompareSingleLayerBiases(self, lhs, rhs):
    self.assertEqual(len(lhs), len(rhs))

    lf_lhs, rt_lhs = lhs[:len(lhs)//2], lhs[len(lhs)//2:]
    lf_rhs, rt_rhs = rhs[:len(rhs)//2], rhs[len(rhs)//2:]
    self.assertEqual(len(lf_lhs), len(rt_lhs))
    self.assertEqual(len(lf_rhs), len(rt_rhs))

    sum_lhs, sum_rhs = [], []
    for lf, rt in zip(lf_lhs, rt_lhs):
      sum_lhs.append(lf + rt)
    for lf, rt in zip(lf_rhs, rt_rhs):
      sum_rhs.append(lf + rt)
    self.assertEqual(len(sum_lhs), len(sum_rhs))
    for lf, rt in zip(sum_lhs, sum_rhs):
      self.assertAllEqual(lf, rt)

  def _TestSaveRestoreVariable(self, rnn_mode, direction, dtype):
    input_size = 3
    num_layers = 2
    num_units = 7
    with ops.Graph().as_default() as g:
      random_seed.set_random_seed(1234)
      model = CudnnTestModel(
          rnn_mode,
          num_layers,
          num_units,
          input_size,
          direction=direction,
          dtype=dtype)
      rnn = model.rnn
      save_path = os.path.join(self.get_temp_dir(),
                               "save-restore-variable-test")
      saver = saver_lib.Saver()
      weights, biases = model.rnn.saveable._OpaqueParamsToCanonical()
      opaque_params = rnn.trainable_variables[0]
      # CudnnTestModel() creates CudnnOpaqueParamsSaveable that helps saver save
      # Cudnn vars in canonical format.
      reset_op = state_ops.assign(
          opaque_params,
          array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype))
      # Passing graph explicitly, otherwise an old sess would be reused.
      with self.test_session(use_gpu=True, graph=g) as sess:
        sess.run(variables.global_variables_initializer())
        val = saver.save(sess, save_path)
        self.assertEqual(save_path, val)
        weights_v, biases_v = sess.run([weights, biases])

        # Reset opaque param
        sess.run(reset_op)
        saver.restore(sess, save_path)
        weights_v_restored, biases_v_restored = sess.run([weights, biases])

        self._CompareWeights(weights_v, weights_v_restored)
        self._CompareBiases(biases_v, biases_v_restored, rnn_mode, num_layers,
                            direction)

  def _TestSaveRestoreTwoVariables(self, rnn_mode, direction, dtype):
    input_size = 3
    num_layers = 2
    num_units = 7
    with ops.Graph().as_default() as g:
      random_seed.set_random_seed(1234)
      with vs.variable_scope("m1"):
        model1 = CudnnTestModel(
            rnn_mode,
            num_layers,
            num_units,
            input_size,
            direction=direction,
            dtype=dtype)
      with vs.variable_scope("m2"):
        model2 = CudnnTestModel(
            rnn_mode,
            num_layers,
            num_units,
            input_size,
            direction=direction,
            dtype=dtype)
      opaque_params = (model1.rnn.trainable_variables[0],
                       model2.rnn.trainable_variables[0])
      weights1, biases1 = model1.rnn.saveable._OpaqueParamsToCanonical()
      weights2, biases2 = model2.rnn.saveable._OpaqueParamsToCanonical()
      reset_params = [
          state_ops.assign(params,
                           array_ops.zeros_like(params, dtype=dtype))
          for params in opaque_params
      ]
      reset_op = control_flow_ops.group(*reset_params)
      save_path = os.path.join(self.get_temp_dir(),
                               "save-restore-variable-test2")
      saver = saver_lib.Saver()
      # Passing graph explicitly, otherwise an old sess would be reused.
      with self.test_session(use_gpu=True, graph=g) as sess:
        sess.run(variables.global_variables_initializer())
        val = saver.save(sess, save_path)
        self.assertEqual(save_path, val)

        weights1_v, biases1_v = sess.run([weights1, biases1])
        weights2_v, biases2_v = sess.run([weights2, biases2])

        sess.run(reset_op)
        saver.restore(sess, save_path)
        weights1_v_restored, biases1_v_restored = sess.run([weights1, biases1])
        weights2_v_restored, biases2_v_restored = sess.run([weights2, biases2])

        self._CompareWeights(weights1_v, weights1_v_restored)
        self._CompareWeights(weights2_v, weights2_v_restored)
        self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode, num_layers,
                            direction)
        self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode, num_layers,
                            direction)

  def _TestSaveRestoreOutput(self, rnn_mode, direction, dtype):
    with ops.Graph().as_default() as g:
      num_layers = 2
      num_units = 7
      input_size = 7
      seq_length = 8
      batch_size = 4
      model = CudnnTestModel(
          rnn_mode,
          num_layers,
          num_units,
          input_size,
          direction=direction,
          dtype=dtype,
          training=False)
      rnn = model.rnn

      save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test")
      saver = saver_lib.Saver()

      # Only one opaque var in a cudnn layer.
      assert len(rnn.trainable_variables) == 1
      reset_params = state_ops.assign(
          rnn.trainable_variables[0],
          array_ops.zeros(
              array_ops.shape(rnn.trainable_variables[0]), dtype=dtype))

      # Passing graph explicitly, otherwise an old sess would be reused.
      with self.test_session(use_gpu=True, graph=g) as sess:
        sess.run(variables.global_variables_initializer())
        inputs, initial_state = model.SynthesizeInput(seq_length, batch_size)
        total_sum_v = model.Feed(sess, inputs, initial_state)
        val = saver.save(sess, save_path)
        self.assertEqual(save_path, val)

        sess.run(reset_params)
        saver.restore(sess, save_path)
        total_sum_v_restored = model.Feed(sess, inputs, initial_state)
        self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5)

  def _TestSaveRestoreHelper(self, rnn_mode):
    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
    dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64]
    for direction, dtype in itertools.product(directions, dtype_list):
      self._TestSaveRestoreVariable(rnn_mode, direction, dtype)
      self._TestSaveRestoreTwoVariables(rnn_mode, direction, dtype)
      self._TestSaveRestoreOutput(rnn_mode, direction, dtype)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestoreRepeatedlyCreateCustomSaveable(self):
    input_size = 3
    num_layers = 2
    num_units = 7
    with ops.Graph().as_default():
      random_seed.set_random_seed(1234)
      model = CudnnTestModel(
          CUDNN_LSTM,
          num_layers,
          num_units,
          input_size,
          direction=CUDNN_RNN_UNIDIRECTION,
          dtype=dtypes.float32)
      with self.assertRaisesRegexp(RuntimeError,
                                   "Cudnn saveable already created"):
        model.rnn._create_saveable()

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestoreLSTM(self):
    self._TestSaveRestoreHelper(CUDNN_LSTM)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestoreGRU(self):
    self._TestSaveRestoreHelper(CUDNN_GRU)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestoreRNNTanh(self):
    self._TestSaveRestoreHelper(CUDNN_RNN_TANH)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestoreRNNRelu(self):
    self._TestSaveRestoreHelper(CUDNN_RNN_RELU)
Ejemplo n.º 16
0
class CudnnRNNTestBasic(TensorFlowTestCase):

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testLayerBasic(self):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with vs.variable_scope("main"):
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Build the layer
      outputs1, _ = lstm(inputs)
      # Reuse the layer
      outputs2, _ = lstm(inputs)

      total_sum1 = math_ops.reduce_sum(outputs1)
      total_sum2 = math_ops.reduce_sum(outputs2)

    with vs.variable_scope("main", reuse=True):
      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Reuse the layer
      outputs3, _ = lstm(inputs)
      total_sum3 = math_ops.reduce_sum(outputs3)

    self.assertEqual(1, len(variables.trainable_variables()))
    self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
    self.assertEqual("main/awesome_lstm/opaque_kernel",
                     variables.trainable_variables()[0].op.name)

    with self.test_session(use_gpu=True) as sess:
      sess.run(variables.global_variables_initializer())
      (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run(
          [total_sum1, total_sum2, total_sum3])
      self.assertEqual(0, total_sum1_v)
      self.assertEqual(0, total_sum2_v)
      self.assertEqual(0, total_sum3_v)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testOptimizersSupport(self):
    for opt in ("adagrad", "adam", "rmsprop", "momentum", "sgd"):
      self._TestOptimizerSupportHelper(opt)

  def _GetOptimizer(self, opt):
    if opt == "adagrad":
      return adagrad.AdagradOptimizer(learning_rate=1e-2)
    elif opt == "adam":
      return adam.AdamOptimizer(learning_rate=1e-2)
    elif opt == "rmsprop":
      return rmsprop.RMSPropOptimizer(learning_rate=1e-2)
    elif opt == "momentum":
      return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9)
    elif opt == "sgd":
      return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2)
    else:
      raise ValueError("Unsupported optimizer: %s" % opt)

  def _TestOptimizerSupportHelper(self, opt):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with ops.Graph().as_default() as g:
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")
      outputs, _ = lstm(inputs)
      loss = math_ops.reduce_sum(outputs)
      optimizer = self._GetOptimizer(opt)
      train_op = optimizer.minimize(loss)

    with self.test_session(use_gpu=True, graph=g) as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(train_op)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveableGraphDeviceAssignment(self):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    def DeviceFn(op):
      if op.type in ("Variable", "VariableV2"):
        return "/cpu:0"
      else:
        return "/gpu:0"

    with ops.Graph().as_default() as g:
      with ops.device(DeviceFn):
        with vs.variable_scope("main"):
          kernel_initializer = init_ops.constant_initializer(3.14)
          bias_initializer = init_ops.constant_initializer(1.59)
          inputs = random_ops.random_uniform(
              [num_layers * dir_count, batch_size, num_units],
              dtype=dtypes.float32)

          lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                     direction=direction,
                                     kernel_initializer=kernel_initializer,
                                     bias_initializer=bias_initializer,
                                     name="awesome_lstm")
          outputs = lstm(inputs)

        # saver is created in the scope of DeviceFn.
        saver = saver_lib.Saver()

    with self.test_session(use_gpu=True, graph=g) as sess:
      save_path = os.path.join(self.get_temp_dir(),
                               "test-saveable-device-assignment")
      sess.run(variables.global_variables_initializer())

      saver.save(sess, save_path)
      saver.restore(sess, save_path)
      sess.run(outputs)

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testDifferentShapesEager(self):
    # Checks that kernel caching does not cause sharing of temporary storage
    # across different input shapes when executing eagerly.
    with context.eager_mode():
      with ops.device("gpu:0"):
        first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
            array_ops.zeros([28, 100, 28]))
        second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
            array_ops.zeros([28, 100, 100]))
        self.assertAllEqual([28, 100, 100], first_output.shape)
        self.assertAllEqual([28, 100, 100], second_output.shape)

        def _LossFunc():
          first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
              array_ops.zeros([28, 100, 28]))
          second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
              array_ops.zeros([28, 100, 100]))
          return (math_ops.reduce_sum(first_output) +
                  math_ops.reduce_sum(second_output))

        backprop.implicit_grad(_LossFunc)()

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testDifferentShapesGraph(self):
    # Tests that a single kernel instance presented with multiple input shapes
    # does not crash with graph execution.
    with ops.device("gpu:0"):
      layer = cudnn_rnn.CudnnGRU(1, 100)
      layer(array_ops.zeros([28, 100, 100]))

      def _Cond(index, accumulation):
        del accumulation  # unused
        return math_ops.less(index, 4)

      def _Body(index, accumulation):
        layer_input = accumulation[:, :, 10 * (1 + index % 2):]
        output, _ = layer(layer_input)
        return index + 1, accumulation + output

      original_input = array_ops.zeros([28, 100, 100])
      _, accumulation = control_flow_ops.while_loop(_Cond, _Body,
                                                    [0, original_input])
      grad, = gradients.gradients(
          math_ops.reduce_sum(accumulation), (original_input,))
    init_op = variables.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      accumulation_eval, grad_eval = sess.run((accumulation, grad))
      self.assertAllEqual([28, 100, 100], accumulation_eval.shape)
      self.assertAllEqual([28, 100, 100], grad_eval.shape)
Ejemplo n.º 17
0
class CudnnRNNTest(TensorFlowTestCase):
    def _CreateModel(self,
                     rnn_mode,
                     num_layers,
                     num_units,
                     input_size,
                     input_mode="linear_input",
                     dropout=0.):
        if rnn_mode == "lstm":
            model = cudnn_rnn_ops.CudnnLSTM(num_layers,
                                            num_units,
                                            input_size,
                                            dropout=dropout)
        elif rnn_mode == "gru":
            model = cudnn_rnn_ops.CudnnGRU(num_layers,
                                           num_units,
                                           input_size,
                                           dropout=dropout)
        elif rnn_mode == "rnn_tanh":
            model = cudnn_rnn_ops.CudnnRNNTanh(num_layers,
                                               num_units,
                                               input_size,
                                               dropout=dropout)
        elif rnn_mode == "rnn_relu":
            model = cudnn_rnn_ops.CudnnRNNRelu(num_layers,
                                               num_units,
                                               input_size,
                                               dropout=dropout)
        else:
            raise ValueError("Invalid rnn_mode: %s" % rnn_mode)
        return model

    def _create_params_savable(self, params, model):
        """Create a RNNParamsSaveable for the weight and bias parameters.

    Args:
      params: a Variable for weight and bias parameters.
      model: a CudnnRNN model.
    """
        params_saveable = cudnn_rnn_ops.RNNParamsSaveable(
            model.params_to_canonical, model.canonical_to_params, [params])
        ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, params_saveable)

    def _testSaveRestoreVariable(self, rnn_mode):
        model = self._CreateModel(rnn_mode,
                                  num_layers=2,
                                  num_units=7,
                                  input_size=3)
        random_seed.set_random_seed(1234)
        params_size_t = model.params_size()
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-variable-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            params_v = sess.run(params)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            params_v_restored = sess.run(params)
            self.assertAllEqual(params_v, params_v_restored)

    def _testSaveRestoreOutput(self, rnn_mode):
        num_layers = 2
        num_units = 7
        input_size = 7
        seq_length = 10
        batch_size = 5
        dir_count = 1
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
        params_size_t = model.params_size()
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-output-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

        has_input_c = (rnn_mode == "lstm")
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            input_c=input_c,
                            params=params,
                            is_training=False)
        else:
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            params=params,
                            is_training=False)
        total_sum = sum(map(math_ops.reduce_sum, outputs))
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run(total_sum)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            total_sum_v_restored = sess.run(total_sum)
            self.assertAllEqual(total_sum_v, total_sum_v_restored)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSaveRestore(self):
        rnn_modes = ["lstm", "gru", "rnn_tanh", "rnn_relu"]
        for rnn_mode in rnn_modes:
            self._testSaveRestoreVariable(rnn_mode)
            self._testSaveRestoreOutput(rnn_mode)

    def _MinLSTMParamSize(self,
                          num_layers,
                          num_units,
                          input_size,
                          input_mode="auto_select",
                          direction="unidirection"):
        if direction != "unidirection":
            # TODO(zhengxq): support bidirection in parameter size estimate.
            raise ValueError("Only unidirection in parameter size estimate")
        first_layer_weights = 4 * num_units * (num_units + input_size)
        higher_layer_weights = 8 * (num_layers - 1) * num_units * num_units
        all_biases = 8 * num_layers * num_units
        return first_layer_weights + higher_layer_weights + all_biases

    def _testOneLSTMParamsSize(self, num_layers, num_units, input_size):
        min_params_size = self._MinLSTMParamSize(num_layers, num_units,
                                                 input_size)
        model = self._CreateModel("lstm", num_layers, num_units, input_size)
        params_size = model.params_size()
        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size)
            self.assertLessEqual(min_params_size, params_size_v)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testLSTMParamsSize(self):
        test_configs = [
            [4, 200, 200],
            [4, 200, 300],
            [4, 200, 100],
            [1, 100, 200],
            [2, 200, 100],
            [3, 200, 400],
        ]
        with ops.Graph().as_default():
            for (num_layers, num_units, input_size) in test_configs:
                self._testOneLSTMParamsSize(num_layers, num_units, input_size)

    def _testOneSimpleInference(self, rnn_mode, num_layers, num_units,
                                input_size, batch_size, seq_length, dir_count,
                                dropout, expected, tolerance):
        random_seed.set_random_seed(5678)
        model = self._CreateModel(rnn_mode,
                                  num_layers,
                                  num_units,
                                  input_size,
                                  input_mode="auto_select",
                                  dropout=dropout)
        has_input_c = (rnn_mode == "lstm")
        params_size_t = model.params_size()
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params,
                                               is_training=False)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params,
                                     is_training=False)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run([total_sum])

            self.assertAllClose(total_sum_v[0],
                                expected,
                                atol=tolerance,
                                rtol=tolerance)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSimpleInference(self):
        # Cudnn scales result for dropout during training, therefore dropout has no
        # impact for inference results.
        # (lstm, gru, rnn_tanh are saturated in the test. rnn_relu case is most
        # demonstrative of the dropout-invariant nature of CudnnRnn.)
        test_configs = [
            {
                "rnn_mode": "lstm",
                "dropout": [0., 0.5, 1.],
                "expected": 231833.22,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "gru",
                "dropout": [0., 0.5, 1.],
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "rnn_tanh",
                "dropout": [0., 0.5, 1.],
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "rnn_relu",
                "dropout": [0., 0.5, 1.],
                "expected": 130688,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 8,
                    "input_size": 4,
                    "batch_size": 4,
                    "seq_length": 2,
                    "dir_count": 1,
                },
            },
        ]
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config["rnn_mode"]
                dropout_list = config.get("dropout", [0.])
                expected = config["expected"]
                tolerance = config["tolerance"]
                shape = config["shape"]
                for dropout in dropout_list:
                    self._testOneSimpleInference(
                        rnn_mode, shape["num_layers"], shape["num_units"],
                        shape["input_size"], shape["batch_size"],
                        shape["seq_length"], shape["dir_count"], dropout,
                        expected, tolerance)

    def _testOneSimpleTraining(self, rnn_mode, num_layers, num_units,
                               input_size, batch_size, seq_length, dir_count,
                               dropout, tolerance):
        # Gradient checking runs two forward ops with almost the same input. Need to
        # make sure the drop patterns across the two runs are the same.
        old_env_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE",
                                       str(False))
        os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True)
        has_input_c = (rnn_mode == "lstm")
        random_seed.set_random_seed(1234)
        model = self._CreateModel(rnn_mode,
                                  num_layers,
                                  num_units,
                                  input_size,
                                  dropout=dropout)
        params_size_t = model.params_size()
        input_data = variables.Variable(
            random_ops.random_uniform([seq_length, batch_size, input_size]))
        input_h = variables.Variable(
            random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units]))
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = variables.Variable(
                random_ops.random_uniform(
                    [num_layers * dir_count, batch_size, num_units]))

            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum

        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size_t)
            inputs_and_shapes = [
                (input_data, [seq_length, batch_size, input_size]),
                (input_h, [num_layers * dir_count, batch_size, num_units]),
                (params, [params_size_v]),
            ]
            if has_input_c:
                inputs_and_shapes.append(
                    (input_c, [num_layers * dir_count, batch_size, num_units
                               ]), )
            sess.run(variables.global_variables_initializer())
            all_inputs = [entry[0] for entry in inputs_and_shapes]
            all_shapes = [entry[1] for entry in inputs_and_shapes]

            err = gradient_checker.compute_gradient_error(
                all_inputs, all_shapes, total_sum, [1])

            self.assertLess(err, tolerance)
            os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = old_env_state

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSimpleTraining(self):
        test_configs = [
            {
                "rnn_mode": "lstm",
                "dropout": [0., 0.5, 1.],
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "gru",
                "dropout": [0., 0.5, 1.],
                "tolerance": 4e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "rnn_tanh",
                "dropout": [0., 0.5, 1.],
                "tolerance": 5e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": "rnn_relu",
                "dropout": [0., 0.5, 1.],
                "tolerance": 4e-1,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
        ]
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config["rnn_mode"]
                dropout_list = config.get("dropout", [0.])
                tolerance = config["tolerance"]
                shape = config["shape"]
                for dropout in dropout_list:
                    self._testOneSimpleTraining(
                        rnn_mode, shape["num_layers"], shape["num_units"],
                        shape["input_size"], shape["batch_size"],
                        shape["seq_length"], shape["dir_count"], dropout,
                        tolerance)
class SoftmaxTest(test.TestCase):

  def _npSoftmax(self, features, dim=-1, log=False):
    if dim is -1:
      dim = len(features.shape) - 1
    one_only_on_dim = list(features.shape)
    one_only_on_dim[dim] = 1
    is_fp16 = features.dtype == np.float16
    if is_fp16:
      # Do the compute in fp32 and cast the input back to fp32.
      features = features.astype(np.float32)
    e = np.exp(features - np.reshape(
        np.amax(
            features, axis=dim), one_only_on_dim))
    softmax = e / np.reshape(np.sum(e, axis=dim), one_only_on_dim)
    if log:
      res = np.log(softmax)
    else:
      res = softmax
    if is_fp16:
      res = res.astype(np.float16)
    return res

  def _testSoftmax(self, np_features, dim=-1, log=False, use_gpu=False):
    # A previous version of the code checked the op name rather than the op type
    # to distinguish between log and non-log.  Use an arbitrary name to catch
    # this bug in future.
    name = "arbitrary"
    np_softmax = self._npSoftmax(np_features, dim=dim, log=log)
    with self.test_session(use_gpu=use_gpu):
      if log:
        tf_softmax = nn_ops.log_softmax(np_features, axis=dim, name=name)
      else:
        tf_softmax = nn_ops.softmax(np_features, axis=dim, name=name)
      out = tf_softmax.eval()
    self.assertAllCloseAccordingToType(np_softmax, out)
    self.assertShapeEqual(np_softmax, tf_softmax)
    if not log:
      # Bonus check: the softmaxes should add to one in dimension dim.
      sum_along_dim = np.sum(out, axis=dim)
      self.assertAllCloseAccordingToType(
          np.ones(sum_along_dim.shape), sum_along_dim)

  def _testAll(self, features):
    self._testSoftmax(features, use_gpu=True)
    self._testSoftmax(features, log=True, use_gpu=True)
    self._testOverflow(use_gpu=True)

  def testNpSoftmax(self):
    features = [[1., 1., 1., 1.], [1., 2., 3., 4.]]
    # Batch 0: All exps are 1.  The expected result is
    # Softmaxes = [0.25, 0.25, 0.25, 0.25]
    # LogSoftmaxes = [-1.386294, -1.386294, -1.386294, -1.386294]
    #
    # Batch 1:
    # exps = [1., 2.718, 7.389, 20.085]
    # sum = 31.192
    # Softmaxes = exps / sum = [0.0320586, 0.08714432, 0.23688282, 0.64391426]
    # LogSoftmaxes = [-3.44019 , -2.44019 , -1.44019 , -0.44019]
    np_sm = self._npSoftmax(np.array(features))
    self.assertAllClose(
        np.array([[0.25, 0.25, 0.25, 0.25],
                  [0.0320586, 0.08714432, 0.23688282, 0.64391426]]),
        np_sm,
        rtol=1.e-5,
        atol=1.e-5)
    np_lsm = self._npSoftmax(np.array(features), log=True)
    self.assertAllClose(
        np.array([[-1.386294, -1.386294, -1.386294, -1.386294],
                  [-3.4401897, -2.4401897, -1.4401897, -0.4401897]]),
        np_lsm,
        rtol=1.e-5,
        atol=1.e-5)

  def _testOverflow(self, use_gpu=False):
    if use_gpu:
      type = np.float32  # pylint: disable=redefined-builtin
    else:
      type = np.float64  # pylint: disable=redefined-builtin
    max = np.finfo(type).max  # pylint: disable=redefined-builtin
    features = np.array([[1., 1., 1., 1.], [max, 1., 2., 3.]]).astype(type)
    with self.test_session(use_gpu=use_gpu):
      tf_log_softmax = nn_ops.log_softmax(features)
      out = tf_log_softmax.eval()
    self.assertAllClose(
        np.array([[-1.386294, -1.386294, -1.386294, -1.386294],
                  [0, -max, -max, -max]]),
        out,
        rtol=1.e-5,
        atol=1.e-5)

  def testFloat(self):
    self._testAll(
        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float32))

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testFloatGPU(self):
    if test.is_gpu_available(cuda_only=True):
      rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)]
      cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)]
      for row, col in zip(rows, cols):
        logging.info("Testing softmax float dtype in shape [%d, %d]", row, col)
        data = np.random.rand(row, col)
        self._testAll(data.astype(np.float32))

  def testHalf(self):
    self._testAll(
        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16))

  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testHalfGPU(self):
    if test.is_gpu_available(cuda_only=True):
      rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)]
      cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)]
      for row, col in zip(rows, cols):
        logging.info("Testing softmax half dtype in shape [%d, %d]", row, col)
        data = np.random.rand(row, col)
        self._testAll(data.astype(np.float16))

  def testDouble(self):
    self._testSoftmax(
        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64))
    self._testOverflow()

  def test1DTensorAsInput(self):
    self._testSoftmax(
        np.array([3., 2., 3., 9.]).astype(np.float64), use_gpu=False)
    self._testOverflow(use_gpu=False)

  def test1DTensorAsInputNoReshape(self):
    with compat.forward_compatibility_horizon(2018, 8, 27):
      self._testSoftmax(
          np.array([3., 2., 3., 9.]).astype(np.float64), use_gpu=False)
      self._testOverflow(use_gpu=False)

  def test3DTensorAsInput(self):
    self._testSoftmax(
        np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                  [[2., 3., 4., 5.], [6., 7., 8., 9.]],
                  [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
        use_gpu=False)
    self._testOverflow(use_gpu=False)

  def test3DTensorAsInputNoReshape(self):
    with compat.forward_compatibility_horizon(2018, 8, 27):
      self._testSoftmax(
          np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                    [[2., 3., 4., 5.], [6., 7., 8., 9.]],
                    [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
          use_gpu=False)
      self._testOverflow(use_gpu=False)

  def testAlongFirstDimension(self):
    self._testSoftmax(
        np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                  [[2., 3., 4., 5.], [6., 7., 8., 9.]],
                  [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
        dim=0,
        use_gpu=False)
    self._testOverflow(use_gpu=False)

  def testAlongSecondDimension(self):
    self._testSoftmax(
        np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                  [[2., 3., 4., 5.], [6., 7., 8., 9.]],
                  [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
        dim=1,
        use_gpu=False)
    self._testOverflow(use_gpu=False)

  def testShapeInference(self):
    op = nn_ops.softmax([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                         [[2., 3., 4., 5.], [6., 7., 8., 9.]],
                         [[5., 4., 3., 2.], [1., 2., 3., 4.]]])
    self.assertEqual([3, 2, 4], op.get_shape())

  def testEmptyInput(self):
    with self.cached_session():
      x = array_ops.placeholder(dtypes.float32, shape=[0, 3])
      self.assertEqual(0, array_ops.size(x).eval())
      # reshape would raise if logits is empty
      with self.assertRaises(errors_impl.InvalidArgumentError):
        nn_ops.softmax(x, axis=0).eval()

  def testDimTooLarge(self):
    with self.cached_session():
      # Use placeholder to make sure we get runtime error instead of shape
      # inference error.
      dim = array_ops.placeholder_with_default(100, shape=[])
      with self.assertRaises(errors_impl.InvalidArgumentError):
        nn_ops.softmax([1., 2., 3., 4.], axis=dim).eval()

  def testLargeDims(self):
    # Make sure that we properly handle large inputs. See
    # https://github.com/tensorflow/tensorflow/issues/4425 for details
    for dims in [129, 256]:
      ones = np.random.rand(dims, dims).astype(np.float32)
      np_softmax = self._npSoftmax(ones)

      for use_gpu in [True, False]:
        with self.test_session(use_gpu=use_gpu) as sess:
          x = array_ops.placeholder(dtypes.float32)
          y = nn_ops.softmax(x)
          tf_softmax = sess.run(y, feed_dict={x: ones})
        self.assertAllClose(tf_softmax, np_softmax)
Ejemplo n.º 19
0
class CudnnRNNTest(TensorFlowTestCase):
    def _CreateModel(self,
                     rnn_mode,
                     num_layers,
                     num_units,
                     input_size,
                     input_mode="linear_input",
                     dropout=0.):
        if rnn_mode == cudnn_rnn_ops.CUDNN_LSTM:
            model = cudnn_rnn_ops.CudnnLSTM(num_layers,
                                            num_units,
                                            input_size,
                                            dropout=dropout)
        elif rnn_mode == cudnn_rnn_ops.CUDNN_GRU:
            model = cudnn_rnn_ops.CudnnGRU(num_layers,
                                           num_units,
                                           input_size,
                                           dropout=dropout)
        elif rnn_mode == cudnn_rnn_ops.CUDNN_RNN_TANH:
            model = cudnn_rnn_ops.CudnnRNNTanh(num_layers,
                                               num_units,
                                               input_size,
                                               dropout=dropout)
        elif rnn_mode == cudnn_rnn_ops.CUDNN_RNN_RELU:
            model = cudnn_rnn_ops.CudnnRNNRelu(num_layers,
                                               num_units,
                                               input_size,
                                               dropout=dropout)
        else:
            raise ValueError("Invalid rnn_mode: %s" % rnn_mode)
        return model

    def _create_params_savable(self, params, model):
        """Create a RNNParamsSaveable for the weight and bias parameters.

    Args:
      params: a Variable for weight and bias parameters.
      model: a CudnnRNN model.
    """
        params_saveable = cudnn_rnn_ops.RNNParamsSaveable(
            model, model.params_to_canonical, model.canonical_to_params,
            [params], "rnn")
        ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, params_saveable)

    def _testSaveRestoreVariable(self, rnn_mode):
        #    model = self._CreateModel(rnn_mode, num_layers=2, num_units=7, input_size=3)
        model = self._CreateModel(rnn_mode,
                                  num_layers=1,
                                  num_units=1,
                                  input_size=1)
        random_seed.set_random_seed(1234)
        params_size_t = model.params_size()
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-variable-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            params_v = sess.run(params)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            params_v_restored = sess.run(params)
            self.assertAllEqual(params_v, params_v_restored)

    def _build_forward_cudnn_model(self,
                                   rnn_mode,
                                   num_layers,
                                   num_units,
                                   input_data,
                                   is_training=False):
        input_data_shape = input_data.get_shape().with_rank(3)
        batch_size = input_data_shape[1].value
        input_size = input_data_shape[2].value
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)

        # Set zero init input states
        input_h = constant_op.constant(np.zeros(
            [num_layers, batch_size, num_units]),
                                       dtype=dtypes.float32)
        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        if has_input_c:
            input_c = constant_op.constant(np.zeros(
                [num_layers, batch_size, num_units]),
                                           dtype=dtypes.float32)

        # Set rnn params
        params_size_t = model.params_size()
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        args = {
            "input_data": input_data,
            "input_h": input_h,
            "params": params,
            "is_training": is_training
        }
        if has_input_c:
            args["input_c"] = input_c
        # Build cell
        output_tuple = model(**args)

        # Create savable objects for params
        self._create_params_savable(params, model)

        return output_tuple, model, params

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testCudnnCompatibleRnnCells(self):
        configs = [
            {
                "num_layers": 1,
                "seq_length": 3,
                "num_units": 4,
                "input_size": 5,
                "batch_size": 6,
            },
            {
                "num_layers": 2,
                "seq_length": 8,
                "num_units": 4,
                "input_size": 8,
                "batch_size": 16,
            },
            {
                "num_layers": 2,
                "seq_length": 3,
                "num_units": 4,
                "input_size": 5,
                "batch_size": 6,
            },
            {
                "num_layers": 1,
                "seq_length": 2,
                "num_units": 2,
                "input_size": 4,
                "batch_size": 1,
            },
        ]
        for rnn, cfg, use_block_cell in itertools.product(
            (cudnn_rnn_ops.CUDNN_LSTM, ), configs, (
                True,
                False,
            )):
            self._testCudnnCompatibleRnnCells(
                cfg["num_layers"], cfg["seq_length"], cfg["num_units"],
                cfg["input_size"], cfg["batch_size"], rnn, use_block_cell)
        # TODO(jamesqin): Add CudnnCompatibleGRUBlockCell.
        for rnn, cfg, use_block_cell in itertools.product(
            (cudnn_rnn_ops.CUDNN_GRU, ), configs, (False, )):
            self._testCudnnCompatibleRnnCells(
                cfg["num_layers"], cfg["seq_length"], cfg["num_units"],
                cfg["input_size"], cfg["batch_size"], rnn, use_block_cell)

    def _testCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units,
                                     input_size, batch_size, rnn_mode,
                                     use_block_cell):
        has_state_c = rnn_mode == cudnn_rnn_ops.CUDNN_LSTM
        np.random.seed(0)
        # Train graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            input_data = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            output_tuple, cudnn_model, cudnn_params = self._build_forward_cudnn_model(
                rnn_mode, num_layers, num_units, input_data, is_training=True)
            target_output = array_ops.placeholder(dtype=dtypes.float32,
                                                  shape=None)
            total_sum = sum(map(math_ops.reduce_sum, output_tuple))

            loss_op = losses.log_loss(labels=target_output,
                                      predictions=total_sum)
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=1e-2)
            train_op = optimizer.minimize(loss_op)

            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            # Train Cudnn model
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                # Train 128 steps
                num_steps = 128
                for _ in range(num_steps):
                    inputs = np.random.rand(seq_length, batch_size,
                                            input_size).astype(np.float32)
                    targets = np.random.rand()
                    sess.run(train_op,
                             feed_dict={
                                 input_data: inputs,
                                 target_output: targets
                             })

                save_path = os.path.join(self.get_temp_dir(),
                                         ("cudnn-rnn-%s-test" % rnn_mode))
                save_v = saver.save(sess, save_path)
                self.assertEqual(save_path, save_v)
                cudnn_params_v = sess.run(cudnn_params)

        # cuDNN inference graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            cudnn_inputs = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            (cudnn_output_tuple, cudnn_model,
             cudnn_params) = self._build_forward_cudnn_model(rnn_mode,
                                                             num_layers,
                                                             num_units,
                                                             cudnn_inputs,
                                                             is_training=False)
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            inference_input = np.random.rand(seq_length, batch_size,
                                             input_size).astype(np.float32)
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                saver.restore(sess, save_path)
                restored_cudnn_params_v = sess.run(cudnn_params)
                self.assertAllEqual(cudnn_params_v, restored_cudnn_params_v)

                # Cudnn inference
                cudnn_output = sess.run(
                    cudnn_output_tuple,
                    feed_dict={cudnn_inputs: inference_input})

        # Canonical RNN inference graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            cell_inputs = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            (output, states) = _create_cudnn_compatible_canonical_rnn(
                cudnn_model, cell_inputs, use_block_cell)
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                saver.restore(sess, save_path)

                # BlockCell inference
                output_v, states_v = sess.run(
                    [output, states], feed_dict={cell_inputs: inference_input})

                # output across timestamps are packed into one tensor.
                self.assertAllClose(cudnn_output[0],
                                    output_v,
                                    atol=1e-6,
                                    rtol=1e-6)

                for i in range(num_layers):
                    if has_state_c:
                        # output_h
                        self.assertAllClose(cudnn_output[1][i, :],
                                            states_v[i].h,
                                            atol=1e-6,
                                            rtol=1e-6)
                        # output_c
                        self.assertAllClose(cudnn_output[2][i, :],
                                            states_v[i].c,
                                            atol=1e-6,
                                            rtol=1e-6)
                    else:
                        self.assertAllClose(cudnn_output[1][i, :],
                                            states_v[i],
                                            atol=1e-6,
                                            rtol=1e-6)

    def _testSaveRestoreOutput(self, rnn_mode):
        num_layers = 2
        num_units = 7
        input_size = 7
        seq_length = 10
        batch_size = 5
        dir_count = 1
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
        params_size_t = model.params_size()
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-output-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            input_c=input_c,
                            params=params,
                            is_training=False)
        else:
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            params=params,
                            is_training=False)
        total_sum = sum(map(math_ops.reduce_sum, outputs))
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run(total_sum)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            total_sum_v_restored = sess.run(total_sum)
            self.assertAllEqual(total_sum_v, total_sum_v_restored)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSaveRestore(self):
        rnn_modes = [
            cudnn_rnn_ops.CUDNN_LSTM, cudnn_rnn_ops.CUDNN_GRU,
            cudnn_rnn_ops.CUDNN_RNN_TANH, cudnn_rnn_ops.CUDNN_RNN_RELU
        ]
        for rnn_mode in rnn_modes:
            self._testSaveRestoreVariable(rnn_mode)
            self._testSaveRestoreOutput(rnn_mode)

    def _MinLSTMParamSize(self,
                          num_layers,
                          num_units,
                          input_size,
                          input_mode="auto_select",
                          direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION):
        if direction != cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION:
            # TODO(zhengxq): support bidirection in parameter size estimate.
            raise ValueError("Only unidirection in parameter size estimate")
        first_layer_weights = 4 * num_units * (num_units + input_size)
        higher_layer_weights = 8 * (num_layers - 1) * num_units * num_units
        all_biases = 8 * num_layers * num_units
        return first_layer_weights + higher_layer_weights + all_biases

    def _testOneLSTMParamsSize(self, num_layers, num_units, input_size):
        min_params_size = self._MinLSTMParamSize(num_layers, num_units,
                                                 input_size)
        model = self._CreateModel(cudnn_rnn_ops.CUDNN_LSTM, num_layers,
                                  num_units, input_size)
        params_size = model.params_size()
        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size)
            self.assertLessEqual(min_params_size, params_size_v)

#  @unittest.skipUnless(test.is_built_with_cuda(),
#                       "Test only applicable when running on GPUs")

    def testLSTMParamsSize(self):
        test_configs = [
            [4, 200, 200],
            [4, 200, 300],
            [4, 200, 100],
            [1, 100, 200],
            [2, 200, 100],
            [3, 200, 400],
        ]
        with ops.Graph().as_default():
            for (num_layers, num_units, input_size) in test_configs:
                self._testOneLSTMParamsSize(num_layers, num_units, input_size)

    def _testOneSimpleInference(self, rnn_mode, num_layers, num_units,
                                input_size, batch_size, seq_length, dir_count,
                                dropout, expected, tolerance):
        random_seed.set_random_seed(5678)
        model = self._CreateModel(rnn_mode,
                                  num_layers,
                                  num_units,
                                  input_size,
                                  input_mode="auto_select",
                                  dropout=dropout)
        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        params_size_t = model.params_size()
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params,
                                               is_training=False)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params,
                                     is_training=False)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum
        with self.test_session(use_gpu=True,
                               graph=ops.get_default_graph()) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run([total_sum])
            self.assertAllClose(total_sum_v[0],
                                expected,
                                atol=tolerance,
                                rtol=tolerance)

#  @unittest.skipUnless(test.is_built_with_cuda(),
#                       "Test only applicable when running on GPUs")

    def testSimpleInference(self):
        # Cudnn scales result for dropout during training, therefore dropout has no
        # impact for inference results.
        # (lstm, gru, rnn_tanh are saturated in the test. rnn_relu case is most
        # demonstrative of the dropout-invariant nature of CudnnRnn.)
        test_configs = [
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_LSTM,
                "dropout": [0., 0.5, 1.],
                "expected": 231833.22,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_GRU,
                "dropout": [0., 0.5, 1.],
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_TANH,
                "dropout": [0., 0.5, 1.],
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_RELU,
                "dropout": [0., 0.5, 1.],
                "expected": 130688,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 8,
                    "input_size": 4,
                    "batch_size": 4,
                    "seq_length": 2,
                    "dir_count": 1,
                },
            },
        ]
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config["rnn_mode"]
                dropout_list = config.get("dropout", [0.])
                expected = config["expected"]
                tolerance = config["tolerance"]
                shape = config["shape"]
                for dropout in dropout_list:
                    self._testOneSimpleInference(
                        rnn_mode, shape["num_layers"], shape["num_units"],
                        shape["input_size"], shape["batch_size"],
                        shape["seq_length"], shape["dir_count"], dropout,
                        expected, tolerance)

    def _testOneSimpleTraining(self, rnn_mode, num_layers, num_units,
                               input_size, batch_size, seq_length, dir_count,
                               dropout, tolerance):
        # Gradient checking runs two forward ops with almost the same input. Need to
        # make sure the drop patterns across the two runs are the same.
        old_env_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE",
                                       str(False))
        os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True)
        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        random_seed.set_random_seed(1234)
        model = self._CreateModel(rnn_mode,
                                  num_layers,
                                  num_units,
                                  input_size,
                                  dropout=dropout)
        params_size_t = model.params_size()
        input_data = variables.Variable(
            random_ops.random_uniform([seq_length, batch_size, input_size]))
        input_h = variables.Variable(
            random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units]))
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = variables.Variable(
                random_ops.random_uniform(
                    [num_layers * dir_count, batch_size, num_units]))

            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum

        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size_t)
            inputs_and_shapes = [
                (input_data, [seq_length, batch_size, input_size]),
                (input_h, [num_layers * dir_count, batch_size, num_units]),
                (params, [params_size_v]),
            ]
            if has_input_c:
                inputs_and_shapes.append(
                    (input_c, [num_layers * dir_count, batch_size, num_units
                               ]), )
            sess.run(variables.global_variables_initializer())
            all_inputs = [entry[0] for entry in inputs_and_shapes]
            all_shapes = [entry[1] for entry in inputs_and_shapes]

            err = gradient_checker.compute_gradient_error(
                all_inputs, all_shapes, total_sum, [1])

            self.assertLess(err, tolerance)
            os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = old_env_state


#  @unittest.skipUnless(test.is_built_with_cuda(),
#                       "Test only applicable when running on GPUs")

    def testSimpleTraining(self):
        test_configs = [
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_LSTM,
                "dropout": [0., 0.5, 1.],
                #            "tolerance": 1e-2,
                "tolerance": 1.1e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_TANH,
                "dropout": [0., 0.5, 1.],
                #           "tolerance": 5e-3,
                "tolerance": 5.1e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_RELU,
                "dropout": [0., 0.5, 1.],
                "tolerance": 4e-1,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_GRU,
                "dropout": [0., 0.5, 1.],
                "tolerance": 4e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
        ]
        ops.reset_default_graph()
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config["rnn_mode"]
                dropout_list = config.get("dropout", [0.])
                tolerance = config["tolerance"]
                shape = config["shape"]
                for dropout in dropout_list:
                    self._testOneSimpleTraining(
                        rnn_mode, shape["num_layers"], shape["num_units"],
                        shape["input_size"], shape["batch_size"],
                        shape["seq_length"], shape["dir_count"], dropout,
                        tolerance)
Ejemplo n.º 20
0
class CudnnRNNTestBidirectional(TensorFlowTestCase):

    # TODO(jamesqin): Test multi-layer bi-Cudnn.
    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSingleLayerBidirectionalLSTM(self):
        # start with 1 layer.
        test_configs = [{
            "input_size": 1,
            "num_units": 1,
            "seq_length": 1,
            "batch_size": 1
        }, {
            "input_size": 2,
            "num_units": 2,
            "seq_length": 2,
            "batch_size": 2
        }, {
            "input_size": 8,
            "num_units": 4,
            "seq_length": 4,
            "batch_size": 4
        }, {
            "input_size": 32,
            "num_units": 16,
            "seq_length": 16,
            "batch_size": 32
        }]
        for config in test_configs:
            self._testSingleLayerBidirectionalLSTMHelper(
                config["input_size"], config["num_units"],
                config["seq_length"], config["batch_size"])

    def _testSingleLayerBidirectionalLSTMHelper(self, input_size, num_units,
                                                seq_length, batch_size):
        # Only tests single layer bi-Cudnn LSTM.
        num_layers = 1
        np.random.seed(1234)

        # canonical bidirectional lstm
        param_size = _MinLSTMParamSize(
            num_layers,
            num_units,
            input_size,
            direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
        # np data
        input_data = np.random.randn(seq_length, batch_size,
                                     input_size).astype(np.float32)
        input_h = np.zeros(
            (num_layers * 2, batch_size, num_units)).astype(np.float32)
        input_c = np.zeros(
            (num_layers * 2, batch_size, num_units)).astype(np.float32)
        cudnn_params = np.random.randn(param_size).astype(np.float32)

        with ops.Graph().as_default():
            # cudnn bidirectional lstm graph
            cudnn_params_t = variables.Variable(cudnn_params)
            input_data_t = constant_op.constant(input_data,
                                                dtype=dtypes.float32)
            input_h_t = constant_op.constant(input_h, dtype=dtypes.float32)
            input_c_t = constant_op.constant(input_c, dtype=dtypes.float32)

            cudnn_lstm = _CreateModel(
                "lstm",
                num_layers,
                num_units,
                input_size,
                direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
            cudnn_output, cudnn_output_h, cudnn_output_c = cudnn_lstm(
                input_data=input_data_t,
                input_h=input_h_t,
                input_c=input_c_t,
                params=cudnn_params_t)

            # canonical bidirectional lstm
            cell_fw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
            cell_bw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
            outputs, output_state_fw, output_state_bw = static_bidirectional_rnn(
                cell_fw,
                cell_bw,
                array_ops.unstack(input_data),
                dtype=dtypes.float32)

            weights_list, biases_list = _TransformBidirectionalCudnnLSTMParams(
                cudnn_lstm, cudnn_params_t)
            assert len(weights_list) == 2
            assert len(biases_list) == 2

            with vs.variable_scope("", reuse=True):
                cell_fw_kernel = vs.get_variable(
                    "bidirectional_rnn/fw/lstm_cell/kernel")
                cell_fw_bias = vs.get_variable(
                    "bidirectional_rnn/fw/lstm_cell/bias")
                cell_bw_kernel = vs.get_variable(
                    "bidirectional_rnn/bw/lstm_cell/kernel")
                cell_bw_bias = vs.get_variable(
                    "bidirectional_rnn/bw/lstm_cell/bias")

            assign_fw_kernel = state_ops.assign(cell_fw_kernel,
                                                weights_list[0])
            assign_fw_bias = state_ops.assign(cell_fw_bias, biases_list[0])

            assign_bw_kernel = state_ops.assign(cell_bw_kernel,
                                                weights_list[1])
            assign_bw_bias = state_ops.assign(cell_bw_bias, biases_list[1])
            assign_ops = control_flow_ops.group(assign_fw_kernel,
                                                assign_fw_bias,
                                                assign_bw_kernel,
                                                assign_bw_bias)

            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                cu_out, cu_h, cu_c = sess.run(
                    [cudnn_output, cudnn_output_h, cudnn_output_c])

                sess.run(assign_ops)
                out, fwd_s, bak_s = sess.run(
                    [outputs, output_state_fw, output_state_bw])

                out = np.stack(out)
                fwd_h, fwd_c = fwd_s.h, fwd_s.c
                bak_h, bak_c = bak_s.h, bak_s.c
                h = np.concatenate((fwd_h, bak_h), axis=1)
                c = np.concatenate((fwd_c, bak_c), axis=1)

                cu_h = [np.array(x) for x in cu_h]
                cu_c = [np.array(x) for x in cu_c]

                cu_h = np.concatenate(cu_h, axis=1)
                cu_c = np.concatenate(cu_c, axis=1)

                self.assertAllClose(out, cu_out)
                self.assertAllClose(h, cu_h)
                self.assertAllClose(c, cu_c)
Ejemplo n.º 21
0
class CudnnRNNTestSaveRestore(TensorFlowTestCase):
    def _CompareWeights(self, lhs, rhs):
        self.assertEqual(len(lhs), len(rhs))
        for lw, rw in zip(lhs, rhs):
            self.assertAllEqual(lw, rw)

    def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction):
        self.assertEqual(len(lhs), len(rhs))
        if rnn_mode == CUDNN_LSTM:
            num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER
        elif rnn_mode == CUDNN_GRU:
            num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER
        elif rnn_mode == CUDNN_RNN_TANH:
            num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER
        else:
            num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER
        num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2
        num_params_per_layer *= num_dirs
        self.assertEqual(num_params_per_layer * num_layers, len(lhs))

        for i in range(num_layers):
            layer_lhs = lhs[i * num_params_per_layer:(i + 1) *
                            num_params_per_layer]
            layer_rhs = rhs[i * num_params_per_layer:(i + 1) *
                            num_params_per_layer]
            if direction == CUDNN_RNN_UNIDIRECTION:
                self._CompareSingleLayerBiases(layer_lhs, layer_rhs)
            else:
                size = len(layer_lhs)
                fw_lhs, bw_lhs = layer_lhs[:size // 2], layer_lhs[size // 2:]
                fw_rhs, bw_rhs = layer_rhs[:size // 2], layer_rhs[size // 2:]
                self._CompareSingleLayerBiases(fw_lhs, fw_rhs)
                self._CompareSingleLayerBiases(bw_lhs, bw_rhs)

    def _CompareSingleLayerBiases(self, lhs, rhs):
        self.assertEqual(len(lhs), len(rhs))

        lf_lhs, rt_lhs = lhs[:len(lhs) // 2], lhs[len(lhs) // 2:]
        lf_rhs, rt_rhs = rhs[:len(rhs) // 2], rhs[len(rhs) // 2:]
        self.assertEqual(len(lf_lhs), len(rt_lhs))
        self.assertEqual(len(lf_rhs), len(rt_rhs))

        sum_lhs, sum_rhs = [], []
        for lf, rt in zip(lf_lhs, rt_lhs):
            sum_lhs.append(lf + rt)
        for lf, rt in zip(lf_rhs, rt_rhs):
            sum_rhs.append(lf + rt)
        self.assertEqual(len(sum_lhs), len(sum_rhs))
        for lf, rt in zip(sum_lhs, sum_rhs):
            self.assertAllEqual(lf, rt)

    def _testSaveRestoreVariable(self, rnn_mode, direction, dtype):
        num_layers = 2
        num_units = 7
        input_size = 3
        with ops.Graph().as_default():
            model = _CreateModel(rnn_mode,
                                 num_layers=num_layers,
                                 num_units=num_units,
                                 input_size=input_size,
                                 direction=direction,
                                 dtype=dtype)
            random_seed.set_random_seed(1234)
            params_size_t = model.params_size()
            params = variables.VariableV1(random_ops.random_uniform(
                [params_size_t], dtype=dtype),
                                          dtype=dtype,
                                          validate_shape=False)
            saveable = _CreateParamsSavable(params, model)
            weights, biases = saveable.format_converter._opaque_to_cu_canonical(
                saveable._variables)
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t],
                                                            dtype=dtype),
                                            validate_shape=False)
            save_path = os.path.join(self.get_temp_dir(),
                                     "save-restore-variable-test")
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                val = saver.save(sess, save_path)
                self.assertEqual(save_path, val)

                weights_v, biases_v = sess.run([weights, biases])

                sess.run(reset_params)
                saver.restore(sess, save_path)
                weights_v_restored, biases_v_restored = sess.run(
                    [weights, biases])

                self._CompareWeights(weights_v, weights_v_restored)
                self._CompareBiases(biases_v, biases_v_restored, rnn_mode,
                                    num_layers, direction)

    def _testSaveRestoreTwoVariables(self, rnn_mode, direction, dtype):
        num_layers = 2
        num_units = 7
        input_size = 3
        with ops.Graph().as_default():
            model = _CreateModel(rnn_mode,
                                 num_layers=num_layers,
                                 num_units=num_units,
                                 input_size=input_size,
                                 direction=direction,
                                 dtype=dtype)
            random_seed.set_random_seed(1234)
            params_size_t = model.params_size()
            names = ["rnn_1", "rnn_2"]
            param_vars = [
                variables.VariableV1(random_ops.random_uniform([params_size_t],
                                                               dtype=dtype),
                                     dtype=dtype,
                                     validate_shape=False) for name in names
            ]
            saveables = []
            for name, params in zip(names, param_vars):
                saveables.append(
                    _CreateParamsSavable(params, model, name, name))
            weights1, biases1 = saveables[
                0].format_converter._opaque_to_cu_canonical(
                    saveables[0]._variables)
            weights2, biases2 = saveables[
                1].format_converter._opaque_to_cu_canonical(
                    saveables[1]._variables)
            reset_params = [
                state_ops.assign(params,
                                 array_ops.zeros([params_size_t], dtype=dtype),
                                 validate_shape=False) for params in param_vars
            ]
            save_path = os.path.join(self.get_temp_dir(),
                                     "save-restore-variable-test")
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                val = saver.save(sess, save_path)
                self.assertEqual(save_path, val)
                weights1_v, biases1_v = sess.run([weights1, biases1])
                weights2_v, biases2_v = sess.run([weights2, biases2])

                sess.run(reset_params)
                saver.restore(sess, save_path)
                weights1_v_restored, biases1_v_restored = sess.run(
                    [weights1, biases1])
                weights2_v_restored, biases2_v_restored = sess.run(
                    [weights2, biases2])

                self._CompareWeights(weights1_v, weights1_v_restored)
                self._CompareWeights(weights2_v, weights2_v_restored)
                self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode,
                                    num_layers, direction)
                self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode,
                                    num_layers, direction)

    def _testSaveRestoreOutput(self, rnn_mode, direction, dtype):
        with ops.Graph().as_default():
            num_layers = 2
            num_units = 7
            input_size = 7
            seq_length = 10
            batch_size = 5
            dir_count = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
            model = _CreateModel(rnn_mode,
                                 num_layers,
                                 num_units,
                                 input_size,
                                 direction=direction,
                                 dtype=dtype)
            params_size_t = model.params_size()
            params = variables.VariableV1(array_ops.ones([params_size_t],
                                                         dtype=dtype),
                                          validate_shape=False,
                                          dtype=dtype)
            _CreateParamsSavable(params, model)
            save_path = os.path.join(self.get_temp_dir(),
                                     "save-restore-output-test")
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            np.random.seed(1234)
            has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
            input_data = constant_op.constant(np.random.randn(
                seq_length, batch_size, input_size),
                                              dtype=dtype)
            input_h = constant_op.constant(np.random.randn(
                num_layers * dir_count, batch_size, num_units),
                                           dtype=dtype)
            if has_input_c:
                input_c = constant_op.constant(np.random.randn(
                    num_layers * dir_count, batch_size, num_units),
                                               dtype=dtype)
                outputs = model(input_data=input_data,
                                input_h=input_h,
                                input_c=input_c,
                                params=params,
                                is_training=False)
            else:
                outputs = model(input_data=input_data,
                                input_h=input_h,
                                params=params,
                                is_training=False)
            total_sum = sum(map(math_ops.reduce_sum, outputs))
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                total_sum_v = sess.run(total_sum)
                val = saver.save(sess, save_path)
                self.assertEqual(save_path, val)
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                reset_params = state_ops.assign(params,
                                                array_ops.zeros(
                                                    [params_size_t],
                                                    dtype=dtype),
                                                validate_shape=False)
                sess.run(reset_params)
                saver.restore(sess, save_path)
                total_sum_v_restored = sess.run(total_sum)
                self.assertAllClose(total_sum_v,
                                    total_sum_v_restored,
                                    atol=1e-5)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSaveRestore(self):
        rnn_modes = [
            cudnn_rnn_ops.CUDNN_LSTM, cudnn_rnn_ops.CUDNN_GRU,
            cudnn_rnn_ops.CUDNN_RNN_TANH, cudnn_rnn_ops.CUDNN_RNN_RELU
        ]
        directions = [
            cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION,
            cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION
        ]
        dtype_list = [dtypes.float32, dtypes.float64]
        for rnn_mode, direction, dtype in itertools.product(
                rnn_modes, directions, dtype_list):
            self._testSaveRestoreVariable(rnn_mode, direction, dtype)
            self._testSaveRestoreTwoVariables(rnn_mode, direction, dtype)
            self._testSaveRestoreOutput(rnn_mode, direction, dtype)
Ejemplo n.º 22
0
class CudnnParamsFormatConverterTest(TensorFlowTestCase,
                                     parameterized.TestCase):
  """Class for testing various format converters."""

  def _test_lstm_helper(self, num_units, input_size, num_layers, direction):
    with self.session(use_gpu=True) as sess:
      random_seed.set_random_seed(0)
      np.random.seed(0)

      num_dirs = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
      format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterLSTM(
          num_layers, num_units, input_size, direction=direction)

      ws, bs = [], []
      for _ in range(num_layers * num_dirs):
        w = constant_op.constant(
            np.random.rand(input_size + num_units, 4 * num_units),
            dtype=dtypes.float32)
        b = constant_op.constant(
            np.random.rand(4 * num_units), dtype=dtypes.float32)
        ws.append(w)
        bs.append(b)

      opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)
      opaque_params_size = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
          cudnn_rnn_ops.CUDNN_LSTM,
          num_layers,
          num_units,
          input_size,
          direction=direction)

      ws_r, bs_r = format_converter.opaque_to_tf_canonical(opaque_params)

      # Test tf_canonical_to_opaque() followed by opaque_to_tf_canonical()
      # returns the original input.
      ws, ws_r, bs, bs_r = sess.run([ws, ws_r, bs, bs_r])
      for w, w_r in zip(ws, ws_r):
        self.assertAllClose(w, w_r)
      for b, b_r in zip(bs, bs_r):
        self.assertAllClose(b, b_r)

      # Test opaque_params size lower bound
      opaque_params_size_v = sess.run(opaque_params_size)
      min_params_size = sum(x.size for x in ws) + np.sum(x.size for x in bs)
      logging.info("min_parm_size: %d vs actual_opaque_param_size: %d",
                   min_params_size, opaque_params_size_v)
      self.assertLessEqual(min_params_size, opaque_params_size_v)

  @parameterized.named_parameters((c["testcase_name"], c["num_units"],
                                   c["input_size"], c["num_layers"])
                                  for c in NAMED_RNN_TESTCASES)
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_lstm(self, num_units, input_size, num_layers):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_lstm_helper(num_units, input_size, num_layers,
                           cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)

  @parameterized.named_parameters((c["testcase_name"], c["num_units"],
                                   c["input_size"], c["num_layers"])
                                  for c in NAMED_RNN_TESTCASES)
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_lstm_bidi(self, num_units, input_size, num_layers):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_lstm_helper(num_units, input_size, num_layers,
                           cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)

  def _test_gru_helper(self, num_units, input_size, num_layers, direction):
    with self.session(use_gpu=True) as sess:
      random_seed.set_random_seed(0)
      np.random.seed(0)

      num_dirs = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
      format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterGRU(
          num_layers, num_units, input_size, direction=direction)

      ws, bs = [], []
      for _ in range(num_layers * num_dirs):
        gate_kernel = constant_op.constant(
            np.random.rand(input_size + num_units, num_units * 2),
            dtype=dtypes.float32)
        gate_bias = constant_op.constant(
            np.random.rand(num_units * 2), dtype=dtypes.float32)
        candidate_inp_kernel = constant_op.constant(
            np.random.rand(input_size, num_units), dtype=dtypes.float32)
        candidate_inp_bias = constant_op.constant(
            np.random.rand(num_units), dtype=dtypes.float32)
        candidate_hid_kernel = constant_op.constant(
            np.random.rand(num_units, num_units), dtype=dtypes.float32)
        candidate_hid_bias = constant_op.constant(
            np.random.rand(num_units), dtype=dtypes.float32)
        ws.extend([gate_kernel, candidate_inp_kernel, candidate_hid_kernel])
        bs.extend([gate_bias, candidate_inp_bias, candidate_hid_bias])

      opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)
      opaque_params_size = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
          cudnn_rnn_ops.CUDNN_GRU,
          num_layers,
          num_units,
          input_size,
          direction=direction)

      ws_r, bs_r = format_converter.opaque_to_tf_canonical(opaque_params)

      # Test tf_canonical_to_opaque() followed by opaque_to_tf_canonical()
      # returns the original input.
      ws, ws_r, bs, bs_r = sess.run([ws, ws_r, bs, bs_r])
      for w, w_r in zip(ws, ws_r):
        self.assertAllClose(w, w_r)
      for b, b_r in zip(bs, bs_r):
        self.assertAllClose(b, b_r)

      # Test opaque_params size lower bound
      opaque_params_size_v = sess.run(opaque_params_size)
      min_params_size = sum(x.size for x in ws) + sum(x.size for x in bs)
      logging.info("min_parm_size: %d vs actual_opaque_param_size: %d",
                   min_params_size, opaque_params_size_v)
      self.assertLessEqual(min_params_size, opaque_params_size_v)

  @parameterized.named_parameters((c["testcase_name"], c["num_units"],
                                   c["input_size"], c["num_layers"])
                                  for c in NAMED_RNN_TESTCASES)
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_gru(self, num_units, input_size, num_layers):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_gru_helper(num_units, input_size, num_layers,
                          cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)

  @parameterized.named_parameters((c["testcase_name"], c["num_units"],
                                   c["input_size"], c["num_layers"])
                                  for c in NAMED_RNN_TESTCASES)
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_gru_bidi(self, num_units, input_size, num_layers):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_gru_helper(num_units, input_size, num_layers,
                          cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
Ejemplo n.º 23
0
class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):

  def _test_training_helper(self,
                            num_units,
                            input_size,
                            batch_size,
                            time,
                            num_layers,
                            dtype,
                            variable_seq_lengths,
                            time_major,
                            dynamic_shape_input=False,
                            rtol=3e-6,
                            atol=3e-6):
    with self.session(use_gpu=True) as sess:
      (outputs, cu_outputs, h, cu_h, inp_grad, cu_inp_grad, hgrad, cu_hgrad,
       wgrad, bgrad, cu_wgrad, cu_bgrad) = RunGRU(
           sess,
           num_units,
           input_size,
           batch_size,
           time,
           num_layers,
           variable_seq_lengths=variable_seq_lengths,
           time_major=time_major,
           dynamic_shape_input=dynamic_shape_input)

      self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
      self.assertAllClose(h, cu_h, rtol=rtol, atol=atol)
      self.assertAllClose(hgrad, cu_hgrad, rtol=rtol, atol=atol)
      self.assertAllClose(inp_grad, cu_inp_grad, rtol=rtol, atol=atol)
      for bg, cu_bg in zip(bgrad, cu_bgrad):
        self.assertAllClose(bg, cu_bg, rtol=rtol, atol=atol)
      for wg, cu_wg in zip(wgrad, cu_wgrad):
        self.assertAllClose(wg, cu_wg, rtol=rtol, atol=atol)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, **{
              "variable_seq_lengths": [True, False],
              "time_major": [True, False],
              "dynamic_shape_input": [True, False],
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_training(self, num_units, input_size, batch_size, time, num_layers,
                    variable_seq_lengths, time_major, dynamic_shape_input):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_training_helper(
        num_units,
        input_size,
        batch_size,
        time,
        num_layers,
        dtypes.float32,
        variable_seq_lengths=variable_seq_lengths,
        time_major=time_major,
        dynamic_shape_input=dynamic_shape_input)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, **{
              "variable_seq_lengths": [True, False],
              "time_major": [True, False],
              "dynamic_shape_input": [True, False],
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_training_fp16(self, num_units, input_size, batch_size, time,
                         num_layers, variable_seq_lengths, time_major,
                         dynamic_shape_input):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    self._test_training_helper(
        num_units,
        input_size,
        batch_size,
        time,
        num_layers,
        dtypes.float16,
        rtol=5e-3,
        atol=5e-4,
        variable_seq_lengths=variable_seq_lengths,
        time_major=time_major,
        dynamic_shape_input=dynamic_shape_input)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, **{
              "variable_seq_lengths": [True, False],
              "time_major": [True, False],
              "dynamic_shape_input": [True, False],
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_inference(self, num_units, input_size, batch_size, time, num_layers,
                     variable_seq_lengths, time_major, dynamic_shape_input):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with self.session(use_gpu=True) as sess:
      (outputs, cu_outputs, h, cu_h) = RunGRU(
          sess,
          num_units,
          input_size,
          batch_size,
          time,
          num_layers,
          is_training=False,
          variable_seq_lengths=variable_seq_lengths,
          time_major=time_major,
          dynamic_shape_input=dynamic_shape_input)
      self.assertAllClose(outputs, cu_outputs)
      self.assertAllClose(h, cu_h)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, **{
              "variable_seq_lengths": [True, False],
              "time_major": [True, False],
              "dynamic_shape_input": [True, False],
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_inference_fp16(self, num_units, input_size, batch_size, time,
                          num_layers, variable_seq_lengths, time_major,
                          dynamic_shape_input):
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with self.session(use_gpu=True) as sess:
      (outputs, cu_outputs, h, cu_h) = RunGRU(
          sess,
          num_units,
          input_size,
          batch_size,
          time,
          num_layers,
          is_training=False,
          dtype=dtypes.float16,
          variable_seq_lengths=variable_seq_lengths,
          time_major=time_major,
          dynamic_shape_input=dynamic_shape_input)

      rtol, atol = 5e-3, 5e-4
      self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
      self.assertAllClose(h, cu_h, rtol=rtol, atol=atol)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, **{
              "variable_seq_lengths": [True, False],
              "time_major": [True, False],
              "dynamic_shape_input": [True, False],
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_inference_with_dropout(self, num_units, input_size, batch_size, time,
                                  num_layers, variable_seq_lengths, time_major,
                                  dynamic_shape_input):
    """Validates that dropout does not affect Cudnn Rnn inference."""
    # Hand-picked dropouts are used below (0. and 1.)
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with ops.Graph().as_default() as g:
      with self.session(use_gpu=True, graph=g) as sess:
        # 1st time w/o dropout.
        (_, cu_outputs, _, cu_h) = RunGRU(
            sess,
            num_units,
            input_size,
            batch_size,
            time,
            num_layers,
            is_training=False,
            dropout=0.,
            variable_seq_lengths=variable_seq_lengths,
            time_major=time_major,
            dynamic_shape_input=dynamic_shape_input)

    with ops.Graph().as_default() as g:
      with self.session(use_gpu=True, graph=g) as sess:
        (_, cu_outputs2, _, cu_h2) = RunGRU(
            sess,
            num_units,
            input_size,
            batch_size,
            time,
            num_layers,
            is_training=False,
            dropout=1.,
            variable_seq_lengths=variable_seq_lengths,
            time_major=time_major,
            dynamic_shape_input=dynamic_shape_input)

    self.assertAllClose(cu_outputs, cu_outputs2)
    self.assertAllClose(cu_h[0], cu_h2[0])
Ejemplo n.º 24
0
class CudnnRnnSaveRestoreTest(TensorFlowTestCase, parameterized.TestCase):
  """Class for testing various Cudnn Rnn SaveableObjects."""

  def _create_opaque_param(self,
                           rnn_mode,
                           num_units,
                           input_size,
                           num_layers,
                           direction,
                           name=None):
    param_size_t = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
        rnn_mode, num_layers, num_units, input_size, direction=direction)
    init_val = random_ops.random_uniform([param_size_t])
    return variable_scope.get_variable(
        name or "opaque_param", initializer=init_val, validate_shape=False)

  def _create_saveable(self, opaque_param, rnn_mode, num_units, input_size,
                       num_layers, direction):
    if rnn_mode == CUDNN_LSTM:
      fn = cudnn_rnn_ops.CudnnLSTMSaveable
    elif rnn_mode == CUDNN_GRU:
      fn = cudnn_rnn_ops.CudnnGRUSaveable
    elif rnn_mode == CUDNN_RNN_TANH:
      fn = cudnn_rnn_ops.CudnnRNNTanhSaveable
    elif rnn_mode == CUDNN_RNN_RELU:
      fn = cudnn_rnn_ops.CudnnRNNReluSaveable
    saveable = fn(
        opaque_param, num_layers, num_units, input_size, direction=direction)
    return saveable

  def _compare_weights(self, lhs, rhs):
    self.assertLen(rhs, len(lhs))
    for lw, rw in zip(lhs, rhs):
      self.assertAllEqual(lw, rw)

  def _compare_biases(self, lhs, rhs):
    self.assertLen(rhs, len(lhs))
    for lf, rt in zip(lhs, rhs):
      self.assertAllEqual(lf, rt)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, "time", "batch_size", **{
              "rnn_mode": [
                  CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH
              ],
              "direction": [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_save_restore_variable(self, rnn_mode, num_units, input_size,
                                 num_layers, direction):
    # Verify the restored opaque param, once converted to tf_canonical format,
    # is the same as the tf canonicals of the pre-restored param.
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with self.session(use_gpu=True) as sess:
      opaque_param = self._create_opaque_param(rnn_mode, num_units, input_size,
                                               num_layers, direction)
      saveable = self._create_saveable(opaque_param, rnn_mode, num_units,
                                       input_size, num_layers, direction)
      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
      weights_op, biases_op = saveable.format_converter.opaque_to_tf_canonical(
          saveable._variables)

      save_path = os.path.join(self.get_temp_dir(), "save_restore_var_test")
      saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

      init_op = variables.global_variables_initializer()
      reset_op = state_ops.assign(opaque_param,
                                  array_ops.zeros_like(opaque_param))
      sess.run(init_op)
      self.assertEqual(save_path, saver.save(sess, save_path))

      # Get the tf canonical vals before reset-restore
      weights, biases = sess.run([weights_op, biases_op])

      # Reset the opaque param value
      sess.run(reset_op)
      # Assert reset happened.
      weights_z, biases_z = sess.run([weights_op, biases_op])
      for w in weights_z:
        self.assertAllClose(w, np.zeros_like(w))
      for b in biases_z:
        self.assertAllClose(b, np.zeros_like(b))

      # Restore opaque param value from checkpoint.
      saver.restore(sess, save_path)
      weights_r, biases_r = sess.run([weights_op, biases_op])
      self._compare_weights(weights, weights_r)
      self._compare_biases(biases, biases_r)

  @parameterized.named_parameters(
      ExpandNamedTestCases(
          NAMED_RNN_TESTCASES, "time", "batch_size", **{
              "rnn_mode": [
                  CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH
              ],
              "direction": [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
          }))
  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def test_save_restore_multi_variables(self, rnn_mode, num_units, input_size,
                                        num_layers, direction):
    # Verify the restored opaque param, once converted to tf_canonical format,
    # is the same as the tf canonicals of the pre-restored param.
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with self.session(use_gpu=True) as sess:
      opaque_params = []
      saveables = []
      num_opaque_params = 2
      for i in range(num_opaque_params):
        opaque_params.append(
            self._create_opaque_param(
                rnn_mode,
                num_units,
                input_size,
                num_layers,
                direction,
                name="opaque_param_%d" % i))
        saveable = self._create_saveable(opaque_params[i], rnn_mode, num_units,
                                         input_size, num_layers, direction)
        ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
        saveables.append(saveable)

      weights_ops, biases_ops = [], []
      for i in range(num_opaque_params):
        weights_op, biases_op = (
            saveables[i].format_converter.opaque_to_tf_canonical(
                saveables[i]._variables))
        weights_ops.append(weights_op)
        biases_ops.append(biases_op)

      save_path = os.path.join(self.get_temp_dir(), "save_restore_var_test")
      saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

      init_op = variables.global_variables_initializer()
      reset_ops = []
      for i in range(num_opaque_params):
        reset_ops.append(
            state_ops.assign(opaque_params[i],
                             array_ops.zeros_like(opaque_params[i])))
      sess.run(init_op)
      self.assertEqual(save_path, saver.save(sess, save_path))

      # Get the tf canonical vals before reset-restore
      for i in range(num_opaque_params):
        weights, biases = sess.run([weights_ops[i], biases_ops[i]])

        # Reset the opaque param value
        sess.run(reset_ops[i])

        # Assert reset happened.
        weights_z, biases_z = sess.run([weights_ops[i], biases_ops[i]])
        for w in weights_z:
          self.assertAllClose(w, np.zeros_like(w))
        for b in biases_z:
          self.assertAllClose(b, np.zeros_like(b))

        # Restore opaque param value from checkpoint.
        saver.restore(sess, save_path)
        weights_r, biases_r = sess.run([weights_ops[i], biases_ops[i]])
        self._compare_weights(weights, weights_r)
        self._compare_biases(biases, biases_r)
Ejemplo n.º 25
0
 def testBuildInfo(self):
     self.assertEqual(build_info.is_rocm_build, test.is_built_with_rocm())
     self.assertEqual(build_info.is_cuda_build, test.is_built_with_cuda())
Ejemplo n.º 26
0
class CudnnRNNTest(TensorFlowTestCase):
    def _CreateModel(self, rnn_mode, num_layers, num_units, input_size):
        if rnn_mode == "lstm":
            model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, input_size)
        elif rnn_mode == "gru":
            model = cudnn_rnn_ops.CudnnGRU(num_layers, num_units, input_size)
        elif rnn_mode == "rnn_tanh":
            model = cudnn_rnn_ops.CudnnRNNTanh(num_layers, num_units,
                                               input_size)
        elif rnn_mode == "rnn_relu":
            model = cudnn_rnn_ops.CudnnRNNRelu(num_layers, num_units,
                                               input_size)
        else:
            raise ValueError("Invalid rnn_mode: %s" % rnn_mode)
        return model

    def _create_params_savable(self, params, model):
        """Create a RNNParamsSaveable for the weight and bias parameters.

    Args:
      params: a Variable for weight and bias parameters.
      model: a CudnnRNN model.
    """
        params_saveable = cudnn_rnn_ops.RNNParamsSaveable(
            model.params_to_canonical, model.canonical_to_params, params)
        ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, params_saveable)

    def _testSaveRestoreVariable(self, rnn_mode):
        model = self._CreateModel(rnn_mode,
                                  num_layers=2,
                                  num_units=7,
                                  input_size=3)
        random_seed.set_random_seed(1234)
        params_size_t = model.params_size()
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-variable-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            params_v = sess.run(params)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            params_v_restored = sess.run(params)
            self.assertAllEqual(params_v, params_v_restored)

    def _testSaveRestoreOutput(self, rnn_mode):
        num_layers = 2
        num_units = 7
        input_size = 7
        seq_length = 10
        batch_size = 5
        dir_count = 1
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
        params_size_t = model.params_size()
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        self._create_params_savable(params, model)
        save_path = os.path.join(self.get_temp_dir(),
                                 "save-restore-output-test")
        saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

        has_input_c = (rnn_mode == "lstm")
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            input_c=input_c,
                            params=params,
                            is_training=False)
        else:
            outputs = model(input_data=input_data,
                            input_h=input_h,
                            params=params,
                            is_training=False)
        total_sum = sum(map(math_ops.reduce_sum, outputs))
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run(total_sum)
            val = saver.save(sess, save_path)
            self.assertEqual(save_path, val)
        with self.test_session(use_gpu=True) as sess:
            reset_params = state_ops.assign(params,
                                            array_ops.zeros([params_size_t]))
            sess.run(reset_params)
            saver.restore(sess, save_path)
            total_sum_v_restored = sess.run(total_sum)
            self.assertAllEqual(total_sum_v, total_sum_v_restored)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSaveRestore(self):
        rnn_modes = ["lstm", "gru", "rnn_tanh", "rnn_relu"]
        for rnn_mode in rnn_modes:
            self._testSaveRestoreVariable(rnn_mode)
            self._testSaveRestoreOutput(rnn_mode)

    def _MinLSTMParamSize(self,
                          num_layers,
                          num_units,
                          input_size,
                          input_mode="auto_select",
                          direction="unidirection"):
        if direction != "unidirection":
            # TODO(zhengxq): support bidirection in parameter size estimate.
            raise ValueError("Only unidirection in parameter size estimate")
        first_layer_weights = 4 * num_units * (num_units + input_size)
        higher_layer_weights = 8 * (num_layers - 1) * num_units * num_units
        all_biases = 8 * num_layers * num_units
        return first_layer_weights + higher_layer_weights + all_biases

    def _testOneLSTMParamsSize(self, num_layers, num_units, input_size):
        min_params_size = self._MinLSTMParamSize(num_layers, num_units,
                                                 input_size)
        model = self._CreateModel("lstm", num_layers, num_units, input_size)
        params_size = model.params_size()
        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size)
            self.assertLessEqual(min_params_size, params_size_v)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testLSTMParamsSize(self):
        test_configs = [
            [4, 200, 200],
            [4, 200, 300],
            [4, 200, 100],
            [1, 100, 200],
            [2, 200, 100],
            [3, 200, 400],
        ]
        with ops.Graph().as_default():
            for (num_layers, num_units, input_size) in test_configs:
                self._testOneLSTMParamsSize(num_layers, num_units, input_size)

    def _testOneSimpleInference(self, rnn_mode, num_layers, num_units,
                                input_size, batch_size, seq_length, dir_count,
                                expected, tolerance):
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
        has_input_c = (rnn_mode == "lstm")
        params_size_t = model.params_size()
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        params = variables.Variable(array_ops.ones([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params,
                                               is_training=False)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params,
                                     is_training=False)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum
        with self.test_session(use_gpu=True) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run([total_sum])
            self.assertAllClose(total_sum_v[0],
                                expected,
                                atol=tolerance,
                                rtol=tolerance)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSimpleInference(self):
        test_configs = [
            [
                "lstm",
                231833.22,
                1e-2,
                {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            ],
            [
                "gru",
                56000,
                1e-2,
                {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            ],
            [
                "rnn_tanh",
                56000,
                1e-2,
                {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            ],
            [
                "rnn_relu",
                130688,
                1e-2,
                {
                    "num_layers": 2,
                    "num_units": 8,
                    "input_size": 4,
                    "batch_size": 4,
                    "seq_length": 2,
                    "dir_count": 1,
                },
            ],
        ]
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config[0]
                expected = config[1]
                tolerance = config[2]
                shapes = config[3]
                self._testOneSimpleInference(
                    rnn_mode, shapes["num_layers"], shapes["num_units"],
                    shapes["input_size"], shapes["batch_size"],
                    shapes["seq_length"], shapes["dir_count"], expected,
                    tolerance)

    def _testOneSimpleTraining(self, rnn_mode, num_layers, num_units,
                               input_size, batch_size, seq_length, dir_count,
                               tolerance):
        has_input_c = (rnn_mode == "lstm")
        random_seed.set_random_seed(1234)
        model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
        params_size_t = model.params_size()
        input_data = variables.Variable(
            random_ops.random_uniform([seq_length, batch_size, input_size]))
        input_h = variables.Variable(
            random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units]))
        params = variables.Variable(random_ops.random_uniform([params_size_t]),
                                    validate_shape=False)
        if has_input_c:
            input_c = variables.Variable(
                random_ops.random_uniform(
                    [num_layers * dir_count, batch_size, num_units]))
            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum

        with self.test_session(use_gpu=True) as sess:
            params_size_v = sess.run(params_size_t)
            inputs_and_shapes = [
                (input_data, [seq_length, batch_size, input_size]),
                (input_h, [num_layers * dir_count, batch_size, num_units]),
                (params, [params_size_v]),
            ]
            if has_input_c:
                inputs_and_shapes.append(
                    (input_c, [num_layers * dir_count, batch_size, num_units
                               ]), )
            sess.run(variables.global_variables_initializer())
            all_inputs = [entry[0] for entry in inputs_and_shapes]
            all_shapes = [entry[1] for entry in inputs_and_shapes]
            err = gradient_checker.compute_gradient_error(
                all_inputs, all_shapes, total_sum, [1])
            self.assertLess(err, tolerance)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSimpleTraining(self):
        test_configs = [
            [
                "lstm",
                1e-2,
                {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            ],
            [
                "gru",
                4e-3,
                {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            ],
            [
                "rnn_tanh",
                5e-3,
                {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            ],
            [
                "rnn_relu",
                3e-1,
                {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            ],
        ]
        with ops.Graph().as_default():
            for config in test_configs:
                rnn_mode = config[0]
                tolerance = config[1]
                shape = config[2]
                self._testOneSimpleTraining(rnn_mode, shape["num_layers"],
                                            shape["num_units"],
                                            shape["input_size"],
                                            shape["batch_size"],
                                            shape["seq_length"],
                                            shape["dir_count"], tolerance)
Ejemplo n.º 27
0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Cuda op Python library."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path

#import tensorflow as tf
from tensorflow.python.platform.test import is_built_with_cuda
from tensorflow.python.framework.load_library import load_op_library
from tensorflow.python.platform.resource_loader import get_data_files_path

if is_built_with_cuda():
    _cuda_op_module = load_op_library(
        os.path.join(get_data_files_path(), 'bf16cut_bp.so'))
    #Bf16cutBp must be a camel naming style
    # and when I invoke it in python, I need to seperate cammel head with _
    bf16cut_bp = _cuda_op_module.bf16cut_bp
Ejemplo n.º 28
0
class CudnnLayersTest(tf.test.TestCase):
    def test_stacked_bilstm(self):
        with tf.Graph().as_default():
            input_emb = tf.random_uniform([3, 5, 8])
            input_len = tf.constant([4, 5, 2])
            output_emb = cudnn_layers.stacked_bilstm(
                input_emb=input_emb,
                input_len=input_len,
                hidden_size=10,
                num_layers=3,
                dropout_ratio=0.2,
                mode=tf.estimator.ModeKeys.TRAIN)
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                actual_output_emb = sess.run(output_emb)
            self.assertAllEqual(actual_output_emb.shape, [3, 5, 10 * 2])

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def test_stacked_bilstm_compatibility(self):
        checkpoint_dir = tempfile.mkdtemp(prefix="checkpoint_dir")
        checkpoint_path = os.path.join(checkpoint_dir, "model.ckpt")
        hidden_size = 10
        num_layers = 3
        dropout_ratio = 0.0
        input_emb = np.random.uniform(size=[3, 5, 9]).astype(np.float32)
        input_len = [4, 5, 2]

        # Make sure we fail explicitly if the specified devices can't be used.
        config = tf.ConfigProto(allow_soft_placement=False,
                                log_device_placement=True)

        with tf.Graph().as_default():
            with tf.device("/gpu:0"):
                output_emb = cudnn_layers.stacked_bilstm(
                    input_emb=input_emb,
                    input_len=input_len,
                    hidden_size=hidden_size,
                    num_layers=num_layers,
                    dropout_ratio=dropout_ratio,
                    mode=tf.estimator.ModeKeys.TRAIN,
                    use_cudnn=True)
            saver = tf.train.Saver()
            with tf.Session(config=config) as sess:
                sess.run(tf.global_variables_initializer())
                gpu_output_emb = sess.run(output_emb)
                saver.save(sess, checkpoint_path)

        with tf.Graph().as_default():
            with tf.device("/cpu:0"):
                output_emb = cudnn_layers.stacked_bilstm(
                    input_emb=input_emb,
                    input_len=input_len,
                    hidden_size=hidden_size,
                    num_layers=num_layers,
                    dropout_ratio=dropout_ratio,
                    mode=tf.estimator.ModeKeys.TRAIN,
                    use_cudnn=False)
            saver = tf.train.Saver()
            with tf.Session(config=config) as sess:
                saver.restore(sess, checkpoint_path)
                cpu_output_emb = sess.run(output_emb)

        for c, g, l in zip(cpu_output_emb, gpu_output_emb, input_len):
            self.assertAllClose(c[:l], g[:l])
Ejemplo n.º 29
0
 def test_rocm_cuda_info_matches(self):
     build_info = sysconfig.get_build_info()
     self.assertEqual(build_info["is_rocm_build"],
                      test.is_built_with_rocm())
     self.assertEqual(build_info["is_cuda_build"],
                      test.is_built_with_cuda())
Ejemplo n.º 30
0
class CudnnRNNTestInference(TensorFlowTestCase):
    def _testOneSimpleInference(self, rnn_mode, num_layers, num_units,
                                input_size, batch_size, seq_length, dir_count,
                                dropout, expected, tolerance):
        random_seed.set_random_seed(5678)
        model = _CreateModel(
            rnn_mode,
            num_layers,
            num_units,
            input_size,
            input_mode="auto_select",
            direction=(cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION if dir_count == 1
                       else cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION),
            dropout=dropout)
        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        params_size_t = model.params_size()
        input_data = array_ops.ones([seq_length, batch_size, input_size])
        input_h = array_ops.ones(
            [num_layers * dir_count, batch_size, num_units])
        params = variables.VariableV1(array_ops.ones([params_size_t]),
                                      validate_shape=False)
        if has_input_c:
            input_c = array_ops.ones(
                [num_layers * dir_count, batch_size, num_units])
            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params,
                                               is_training=False)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params,
                                     is_training=False)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum
        with self.test_session(use_gpu=True,
                               graph=ops.get_default_graph()) as sess:
            sess.run(variables.global_variables_initializer())
            total_sum_v = sess.run([total_sum])

            self.assertAllClose(total_sum_v[0],
                                expected,
                                atol=tolerance,
                                rtol=tolerance)

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testSimpleInference(self):
        test_configs = [
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_LSTM,
                "expected": 231833.22,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_GRU,
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_TANH,
                "expected": 56000,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 4,
                    "num_units": 200,
                    "input_size": 200,
                    "batch_size": 20,
                    "seq_length": 10,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_RELU,
                "expected": 130688,
                "tolerance": 1e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 8,
                    "input_size": 4,
                    "batch_size": 4,
                    "seq_length": 2,
                    "dir_count": 1,
                },
            },
        ]
        # Cudnn scales result for dropout during training, therefore dropout has no
        # impact for inference results.
        # (lstm, gru, rnn_tanh are saturated in the test. rnn_relu case is most
        # demonstrative of the dropout-invariant nature of CudnnRnn.)
        dropouts = [0., 0.5, 1.]
        for (config, dropout) in itertools.product(test_configs, dropouts):
            rnn_mode = config["rnn_mode"]
            expected = config["expected"]
            tolerance = config["tolerance"]
            shape = config["shape"]
            with ops.Graph().as_default():
                self._testOneSimpleInference(
                    rnn_mode, shape["num_layers"], shape["num_units"],
                    shape["input_size"], shape["batch_size"],
                    shape["seq_length"], shape["dir_count"], dropout, expected,
                    tolerance)
Ejemplo n.º 31
0
class CudnnRNNTestCompatibleRnnCells(TensorFlowTestCase):
    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def testCudnnCompatibleRnnCells(self):
        configs = [
            {
                "num_layers": 1,
                "seq_length": 3,
                "num_units": 4,
                "input_size": 5,
                "batch_size": 6,
            },
            {
                "num_layers": 2,
                "seq_length": 8,
                "num_units": 4,
                "input_size": 8,
                "batch_size": 16,
            },
            {
                "num_layers": 2,
                "seq_length": 3,
                "num_units": 4,
                "input_size": 5,
                "batch_size": 6,
            },
            {
                "num_layers": 1,
                "seq_length": 2,
                "num_units": 2,
                "input_size": 4,
                "batch_size": 1,
            },
        ]
        for rnn, cfg in itertools.product((cudnn_rnn_ops.CUDNN_LSTM, ),
                                          configs):
            self._testCudnnCompatibleRnnCells(cfg["num_layers"],
                                              cfg["seq_length"],
                                              cfg["num_units"],
                                              cfg["input_size"],
                                              cfg["batch_size"], rnn)
        # TODO(jamesqin): Add CudnnCompatibleGRUBlockCell.
        for rnn, cfg in itertools.product((cudnn_rnn_ops.CUDNN_GRU, ),
                                          configs):
            self._testCudnnCompatibleRnnCells(cfg["num_layers"],
                                              cfg["seq_length"],
                                              cfg["num_units"],
                                              cfg["input_size"],
                                              cfg["batch_size"], rnn)

    def _testCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units,
                                     input_size, batch_size, rnn_mode):
        has_state_c = rnn_mode == cudnn_rnn_ops.CUDNN_LSTM
        np.random.seed(0)
        # Train graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            input_data = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            output_tuple, cudnn_model = _BuildCudnnForward(rnn_mode,
                                                           num_layers,
                                                           num_units,
                                                           input_data,
                                                           is_training=True)
            target_output = array_ops.placeholder(dtype=dtypes.float32,
                                                  shape=None)
            total_sum = sum(map(math_ops.reduce_sum, output_tuple))

            loss_op = losses.log_loss(labels=target_output,
                                      predictions=total_sum)
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=1e-2)
            train_op = optimizer.minimize(loss_op)

            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            # Train Cudnn model
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                # Train 128 steps
                num_steps = 128
                for _ in range(num_steps):
                    inputs = np.random.rand(seq_length, batch_size,
                                            input_size).astype(np.float32)
                    targets = np.random.rand()
                    sess.run(train_op,
                             feed_dict={
                                 input_data: inputs,
                                 target_output: targets
                             })

                save_path = os.path.join(self.get_temp_dir(),
                                         ("cudnn-rnn-%s-test" % rnn_mode))
                save_v = saver.save(sess, save_path)
                self.assertEqual(save_path, save_v)

        # cuDNN inference graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            cudnn_inputs = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            (cudnn_output_tuple,
             cudnn_model) = _BuildCudnnForward(rnn_mode,
                                               num_layers,
                                               num_units,
                                               cudnn_inputs,
                                               is_training=False)
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            inference_input = np.random.rand(seq_length, batch_size,
                                             input_size).astype(np.float32)
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                saver.restore(sess, save_path)

                # Cudnn inference
                cudnn_output = sess.run(
                    cudnn_output_tuple,
                    feed_dict={cudnn_inputs: inference_input})

        # Canonical RNN inference graph
        with ops.Graph().as_default():
            random_seed.set_random_seed(299)
            cell_inputs = array_ops.placeholder(
                dtypes.float32, shape=[seq_length, batch_size, input_size])
            (output, states) = _CreateCudnnCompatibleCanonicalRNN(
                cudnn_model, cell_inputs)
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                saver.restore(sess, save_path)

                # BlockCell inference
                output_v, states_v = sess.run(
                    [output, states], feed_dict={cell_inputs: inference_input})

                # output across timestamps are packed into one tensor.
                self.assertAllClose(cudnn_output[0],
                                    output_v,
                                    atol=1e-6,
                                    rtol=1e-6)

                for i in range(num_layers):
                    if has_state_c:
                        # output_h
                        self.assertAllClose(cudnn_output[1][i, :],
                                            states_v[i].h,
                                            atol=1e-6,
                                            rtol=1e-6)
                        # output_c
                        self.assertAllClose(cudnn_output[2][i, :],
                                            states_v[i].c,
                                            atol=1e-6,
                                            rtol=1e-6)
                    else:
                        self.assertAllClose(cudnn_output[1][i, :],
                                            states_v[i],
                                            atol=1e-6,
                                            rtol=1e-6)
Ejemplo n.º 32
0
class CudnnRNNTestTraining(TensorFlowTestCase):
    def _testOneSimpleTraining(self, rnn_mode, num_layers, num_units,
                               input_size, batch_size, seq_length, dir_count,
                               dropout, dtype, delta, tolerance):
        # Gradient checking runs two forward ops with almost the same input. Need to
        # make sure the drop patterns across the two runs are the same.
        logging.info("Training test with config: %s", locals())
        old_env_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE",
                                       str(False))
        os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True)
        has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
        random_seed.set_random_seed(5678)
        direction = (cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION if dir_count == 1
                     else cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
        model = _CreateModel(rnn_mode,
                             num_layers,
                             num_units,
                             input_size,
                             direction=direction,
                             dtype=dtype,
                             dropout=dropout)
        params_size_t = model.params_size()
        input_data = variables.VariableV1(random_ops.random_uniform(
            [seq_length, batch_size, input_size], dtype=dtype),
                                          dtype=dtype)
        input_h = variables.VariableV1(random_ops.random_uniform(
            [num_layers * dir_count, batch_size, num_units], dtype=dtype),
                                       dtype=dtype)
        params = variables.VariableV1(random_ops.random_uniform(
            [params_size_t], dtype=dtype),
                                      validate_shape=False,
                                      dtype=dtype)
        if has_input_c:
            input_c = variables.VariableV1(random_ops.random_uniform(
                [num_layers * dir_count, batch_size, num_units], dtype=dtype),
                                           dtype=dtype)

            output, output_h, output_c = model(input_data=input_data,
                                               input_h=input_h,
                                               input_c=input_c,
                                               params=params)
        else:
            output, output_h = model(input_data=input_data,
                                     input_h=input_h,
                                     params=params)
        output_sum = math_ops.reduce_sum(output)
        output_h_sum = math_ops.reduce_sum(output_h)
        total_sum = output_sum + output_h_sum
        if has_input_c:
            output_c_sum = math_ops.reduce_sum(output_c)
            total_sum += output_c_sum

        with self.test_session(use_gpu=True,
                               graph=ops.get_default_graph()) as sess:
            params_size_v = sess.run(params_size_t)
            inputs_and_shapes = [
                (input_data, [seq_length, batch_size, input_size]),
                (input_h, [num_layers * dir_count, batch_size, num_units]),
                (params, [params_size_v]),
            ]
            if has_input_c:
                inputs_and_shapes.append(
                    (input_c, [num_layers * dir_count, batch_size, num_units
                               ]), )
            sess.run(variables.global_variables_initializer())
            all_inputs = [entry[0] for entry in inputs_and_shapes]
            all_shapes = [entry[1] for entry in inputs_and_shapes]

            err = gradient_checker.compute_gradient_error(all_inputs,
                                                          all_shapes,
                                                          total_sum, [1],
                                                          delta=delta)

            self.assertLess(err, tolerance)
            os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = old_env_state

    @unittest.skipUnless(test.is_built_with_cuda(),
                         "Test only applicable when running on GPUs")
    def DISABLED_testSimpleTraining(self):
        # TODO(jamesqin): fix b/117989214
        test_configs = [
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_LSTM,
                "dtype": dtypes.float64,
                "delta": 1e-4,
                "tolerance": 5e-6,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_GRU,
                "dtype": dtypes.float64,
                "delta": 1e-4,
                "tolerance": 5e-6,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_TANH,
                "dtype": dtypes.float64,
                "delta": 1e-4,
                "tolerance": 5e-6,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_RELU,
                "dtype": dtypes.float64,
                "delta": 1e-4,
                "tolerance": 5e-6,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                    "dir_count": 1,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_LSTM,
                "dtype": dtypes.float32,
                "tolerance": 1.5e-2,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_GRU,
                "dtype": dtypes.float32,
                "tolerance": 4e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_TANH,
                "dtype": dtypes.float32,
                "tolerance": 5e-3,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                },
            },
            {
                "rnn_mode": cudnn_rnn_ops.CUDNN_RNN_RELU,
                "dtype": dtypes.float32,
                "tolerance": 5e-1,
                "shape": {
                    "num_layers": 2,
                    "num_units": 3,
                    "input_size": 4,
                    "batch_size": 3,
                    "seq_length": 4,
                },
            },
        ]
        dropouts = [0., 0.5, 1.]
        dir_counts = [1]
        for config, dropout, dir_count in itertools.product(
                test_configs, dropouts, dir_counts):
            rnn_mode = config["rnn_mode"]
            dtype = config.get("dtype", dtypes.float32)
            delta = config.get("delta", 1e-3)
            tolerance = config["tolerance"]
            shape = config["shape"]
            with ops.Graph().as_default():
                self._testOneSimpleTraining(rnn_mode, shape["num_layers"],
                                            shape["num_units"],
                                            shape["input_size"],
                                            shape["batch_size"],
                                            shape["seq_length"], dir_count,
                                            dropout, dtype, delta, tolerance)
Ejemplo n.º 33
0
 def testBuildInfo(self):
   self.assertEqual(build_info.is_cuda_build, test.is_built_with_cuda())