Ejemplo n.º 1
0
 def _ParamsSize(self, input_dim, cell_dim, direction, dtype=tf.float32):
     return cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
         rnn_mode=cudnn_rnn_ops.CUDNN_LSTM,
         num_layers=1,
         num_units=cell_dim,
         input_size=input_dim,
         input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE,
         direction=direction,
         dtype=dtype)
Ejemplo n.º 2
0
 def OpaqueParamsShape(self, dtype):
     return cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
         rnn_mode='lstm',
         num_layers=1,
         num_units=self._cell_nodes,
         input_size=self._input_nodes,
         input_mode='linear_input',
         direction=self._direction,
         dtype=dtype)
Ejemplo n.º 3
0
    def _test_gru_helper(self, num_units, input_size, num_layers, direction):
        with self.session(use_gpu=True) as sess:
            random_seed.set_random_seed(0)
            np.random.seed(0)

            num_dirs = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
            format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterGRU(
                num_layers, num_units, input_size, direction=direction)

            ws, bs = [], []
            for _ in range(num_layers * num_dirs):
                gate_kernel = constant_op.constant(np.random.rand(
                    input_size + num_units, num_units * 2),
                                                   dtype=dtypes.float32)
                gate_bias = constant_op.constant(np.random.rand(num_units * 2),
                                                 dtype=dtypes.float32)
                candidate_inp_kernel = constant_op.constant(
                    np.random.rand(input_size, num_units),
                    dtype=dtypes.float32)
                candidate_inp_bias = constant_op.constant(
                    np.random.rand(num_units), dtype=dtypes.float32)
                candidate_hid_kernel = constant_op.constant(
                    np.random.rand(num_units, num_units), dtype=dtypes.float32)
                candidate_hid_bias = constant_op.constant(
                    np.random.rand(num_units), dtype=dtypes.float32)
                ws.extend(
                    [gate_kernel, candidate_inp_kernel, candidate_hid_kernel])
                bs.extend([gate_bias, candidate_inp_bias, candidate_hid_bias])

            opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)
            opaque_params_size = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                cudnn_rnn_ops.CUDNN_GRU,
                num_layers,
                num_units,
                input_size,
                direction=direction)

            ws_r, bs_r = format_converter.opaque_to_tf_canonical(opaque_params)

            # Test tf_canonical_to_opaque() followed by opaque_to_tf_canonical()
            # returns the original input.
            ws, ws_r, bs, bs_r = sess.run([ws, ws_r, bs, bs_r])
            for w, w_r in zip(ws, ws_r):
                self.assertAllClose(w, w_r)
            for b, b_r in zip(bs, bs_r):
                self.assertAllClose(b, b_r)

            # Test opaque_params size lower bound
            opaque_params_size_v = sess.run(opaque_params_size)
            min_params_size = (np.sum([x.size for x in ws]) +
                               np.sum([x.size for x in bs]))
            logging.info("min_parm_size: %d vs actual_opaque_param_size: %d",
                         min_params_size, opaque_params_size_v)
            self.assertLessEqual(min_params_size, opaque_params_size_v)
Ejemplo n.º 4
0
 def _create_opaque_param(self,
                          rnn_mode,
                          num_units,
                          input_size,
                          num_layers,
                          direction,
                          name=None):
   param_size_t = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
       rnn_mode, num_layers, num_units, input_size, direction=direction)
   init_val = random_ops.random_uniform([param_size_t])
   return variable_scope.get_variable(
       name or "opaque_param", initializer=init_val, validate_shape=False)
  def _test_gru_helper(self, num_units, input_size, num_layers, direction):
    with self.session(use_gpu=True) as sess:
      random_seed.set_random_seed(0)
      np.random.seed(0)

      num_dirs = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
      format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterGRU(
          num_layers, num_units, input_size, direction=direction)

      ws, bs = [], []
      for _ in range(num_layers * num_dirs):
        gate_kernel = constant_op.constant(
            np.random.rand(input_size + num_units, num_units * 2),
            dtype=dtypes.float32)
        gate_bias = constant_op.constant(
            np.random.rand(num_units * 2), dtype=dtypes.float32)
        candidate_inp_kernel = constant_op.constant(
            np.random.rand(input_size, num_units), dtype=dtypes.float32)
        candidate_inp_bias = constant_op.constant(
            np.random.rand(num_units), dtype=dtypes.float32)
        candidate_hid_kernel = constant_op.constant(
            np.random.rand(num_units, num_units), dtype=dtypes.float32)
        candidate_hid_bias = constant_op.constant(
            np.random.rand(num_units), dtype=dtypes.float32)
        ws.extend([gate_kernel, candidate_inp_kernel, candidate_hid_kernel])
        bs.extend([gate_bias, candidate_inp_bias, candidate_hid_bias])

      opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)
      opaque_params_size = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
          cudnn_rnn_ops.CUDNN_GRU,
          num_layers,
          num_units,
          input_size,
          direction=direction)

      ws_r, bs_r = format_converter.opaque_to_tf_canonical(opaque_params)

      # Test tf_canonical_to_opaque() followed by opaque_to_tf_canonical()
      # returns the original input.
      ws, ws_r, bs, bs_r = sess.run([ws, ws_r, bs, bs_r])
      for w, w_r in zip(ws, ws_r):
        self.assertAllClose(w, w_r)
      for b, b_r in zip(bs, bs_r):
        self.assertAllClose(b, b_r)

      # Test opaque_params size lower bound
      opaque_params_size_v = sess.run(opaque_params_size)
      min_params_size = (
          np.sum([x.size for x in ws]) + np.sum([x.size for x in bs]))
      logging.info("min_parm_size: %d vs actual_opaque_param_size: %d",
                   min_params_size, opaque_params_size_v)
      self.assertLessEqual(min_params_size, opaque_params_size_v)
Ejemplo n.º 6
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1