コード例 #1
0
ファイル: rnn.py プロジェクト: yangcaot/trax
def GeneralGRUCell(candidate_transform,
                   memory_transform_fn=None,
                   gate_nonlinearity=activation_fns.Sigmoid,
                   candidate_nonlinearity=activation_fns.Tanh,
                   dropout_rate_c=0.1,
                   sigmoid_bias=0.5):
  r"""Parametrized Gated Recurrent Unit (GRU) cell construction.

  GRU update equations for update gate, reset gate, candidate memory, and new
  state:

  .. math::
    u_t &= \sigma(U' \times s_{t-1} + B') \\
    r_t &= \sigma(U'' \times s_{t-1} + B'') \\
    c_t &= \tanh(U \times (r_t \odot s_{t-1}) + B) \\
    s_t &= u_t \odot s_{t-1} + (1 - u_t) \odot c_t

  See `combinators.Gate` for details on the gating function.


  Args:
    candidate_transform: Transform to apply inside the Candidate branch. Applied
      before nonlinearities.
    memory_transform_fn: Optional transformation on the memory before gating.
    gate_nonlinearity: Function to use as gate activation; allows trying
      alternatives to `Sigmoid`, such as `HardSigmoid`.
    candidate_nonlinearity: Nonlinearity to apply after candidate branch; allows
      trying alternatives to traditional `Tanh`, such as `HardTanh`.
    dropout_rate_c: Amount of dropout on the transform (c) gate. Dropout works
      best in a GRU when applied exclusively to this branch.
    sigmoid_bias: Constant to add before sigmoid gates. Generally want to start
      off with a positive bias.

  Returns:
    A model representing a GRU cell with specified transforms.
  """
  gate_block = [  # u_t
      candidate_transform(),
      _AddSigmoidBias(sigmoid_bias),
      gate_nonlinearity(),
  ]
  reset_block = [  # r_t
      candidate_transform(),
      _AddSigmoidBias(sigmoid_bias),  # Want bias to start positive.
      gate_nonlinearity(),
  ]
  candidate_block = [
      cb.Dup(),
      reset_block,
      cb.Multiply(),  # Gate S{t-1} with sigmoid(candidate_transform(S{t-1}))
      candidate_transform(),  # Final projection + tanh to get Ct
      candidate_nonlinearity(),  # Candidate gate

      # Only apply dropout on the C gate. Paper reports 0.1 as a good default.
      core.Dropout(rate=dropout_rate_c)
  ]
  memory_transform = memory_transform_fn() if memory_transform_fn else []
  return cb.Serial(
      cb.Branch(memory_transform, gate_block, candidate_block),
      cb.Gate(),
  )
コード例 #2
0
 def test_serial_dup_dup(self):
     layer = cb.Serial(cb.Dup(), cb.Dup())
     input_signature = ShapeDtype((3, 2))
     expected_shape = ((3, 2), (3, 2), (3, 2))
     output_shape = base.check_shape_agreement(layer, input_signature)
     self.assertEqual(output_shape, expected_shape)
コード例 #3
0
 def test_parallel_dup_dup(self):
     layer = cb.Parallel(cb.Dup(), cb.Dup())
     input_signature = (ShapeDtype((3, 2)), ShapeDtype((4, 7)))
     expected_shape = ((3, 2), (3, 2), (4, 7), (4, 7))
     output_shape = base.check_shape_agreement(layer, input_signature)
     self.assertEqual(output_shape, expected_shape)
コード例 #4
0
ファイル: rnn.py プロジェクト: Bangrejas/trax
def GeneralGRUCell(candidate_transform,
                   memory_transform_fn=None,
                   gate_nonlinearity=core.Sigmoid,
                   candidate_nonlinearity=core.Tanh,
                   dropout_rate_c=0.1,
                   sigmoid_bias=0.5):
    r"""Parametrized Gated Recurrent Unit (GRU) cell construction.

  GRU update equations:
  $$ Update gate: u_t = \sigmoid(U' * s_{t-1} + B') $$
  $$ Reset gate: r_t = \sigmoid(U'' * s_{t-1} + B'') $$
  $$ Candidate memory: c_t = \tanh(U * (r_t \odot s_{t-1}) + B) $$
  $$ New State: s_t = u_t \odot s_{t-1} + (1 - u_t) \odot c_t $$

  See combinators.Gate for details on the gating function.


  Args:
    candidate_transform: Transform to apply inside the Candidate branch. Applied
      before nonlinearities.
    memory_transform_fn: Optional transformation on the memory before gating.
    gate_nonlinearity: Function to use as gate activation. Allows trying
      alternatives to Sigmoid, such as HardSigmoid.
    candidate_nonlinearity: Nonlinearity to apply after candidate branch. Allows
      trying alternatives to traditional Tanh, such as HardTanh
    dropout_rate_c: Amount of dropout on the transform (c) gate. Dropout works
      best in a GRU when applied exclusively to this branch.
    sigmoid_bias: Constant to add before sigmoid gates. Generally want to start
      off with a positive bias.

  Returns:
    A model representing a GRU cell with specified transforms.
  """
    gate_block = [  # u_t
        candidate_transform(),
        core.AddConstant(constant=sigmoid_bias),
        gate_nonlinearity(),
    ]
    reset_block = [  # r_t
        candidate_transform(),
        core.AddConstant(
            constant=sigmoid_bias),  # Want bias to start positive.
        gate_nonlinearity(),
    ]
    candidate_block = [
        cb.Dup(),
        reset_block,
        cb.Multiply(),  # Gate S{t-1} with sigmoid(candidate_transform(S{t-1}))
        candidate_transform(),  # Final projection + tanh to get Ct
        candidate_nonlinearity(),  # Candidate gate

        # Only apply dropout on the C gate. Paper reports 0.1 as a good default.
        core.Dropout(rate=dropout_rate_c)
    ]
    memory_transform = memory_transform_fn() if memory_transform_fn else []
    return cb.Serial(
        cb.Dup(),
        cb.Dup(),
        cb.Parallel(memory_transform, gate_block, candidate_block),
        cb.Gate(),
    )
コード例 #5
0
 def test_dup(self):
     layer = cb.Dup()
     input_shape = (3, 2)
     expected_shape = ((3, 2), (3, 2))
     output_shape = base.check_shape_agreement(layer, input_shape)
     self.assertEqual(output_shape, expected_shape)
コード例 #6
0
ファイル: combinators_test.py プロジェクト: huyunzhi/trax
    def test_serial_custom_name(self):
        layer = cb.Serial(cb.Dup(), cb.Dup())  # pylint: disable=no-value-for-parameter
        self.assertIn('Serial', str(layer))

        layer = cb.Serial(cb.Dup(), cb.Dup(), name='Branch')  # pylint: disable=no-value-for-parameter
        self.assertIn('Branch', str(layer))
コード例 #7
0
ファイル: combinators_test.py プロジェクト: huyunzhi/trax
    def test_parallel_custom_name(self):
        layer = cb.Parallel(cb.Dup(), cb.Dup())  # pylint: disable=no-value-for-parameter
        self.assertIn('Parallel', str(layer))

        layer = cb.Parallel(cb.Dup(), cb.Dup(), name='DupDup')  # pylint: disable=no-value-for-parameter
        self.assertIn('DupDup', str(layer))